reduced unneccessary arrays from wiktionary json

This commit is contained in:
Andrzej Stepien 2023-08-04 19:59:02 +02:00
parent 536b1330aa
commit e3bfaeb8ea
7 changed files with 64 additions and 9 deletions

Binary file not shown.

View File

@ -5,4 +5,10 @@ export const db = Knex({
connection: { connection: {
filename: "../database" filename: "../database"
} }
}) })
export const getWords = async (db) => {
return db
.select("word")
.from("dictionary")
}

View File

@ -1,11 +1,7 @@
import {db} from './db.mjs' import {db, getWords} from './db.mjs'
import Spellchecker, { isMisspelled } from 'spellchecker' import Spellchecker, { isMisspelled } from 'spellchecker'
const getWords = async (db) => {
return db
.select("word")
.from("dictionary")
}
const words = await getWords(db) const words = await getWords(db)
console.dir(words) console.dir(words)

View File

@ -16,7 +16,8 @@ for (const element of wiktionary) {
await db('dictionary') await db('dictionary')
.where('word', element.word) .where('word', element.word)
.update({ .update({
pronunciation: element.pronunciation, //pronunciation import deprecated now that I have a better source
//pronunciation: element.pronunciation,
meanings: JSON.stringify(element.meanings) meanings: JSON.stringify(element.meanings)
}).then(res=>{ }).then(res=>{
if(res==1){console.log("added data for "+element.word) if(res==1){console.log("added data for "+element.word)

View File

@ -4,7 +4,7 @@ cd ../processing
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) , plural: (try .tags catch []|any(.=="plural")) }))}' \ jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) , plural: (try .tags catch []|any(.=="plural")) }))}' \
wiktionary.json > wiktionary-p1.json wiktionary.json > wiktionary-p1.json
jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)]}) | sort_by(.index)[] | del(.index) ] }' \ jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)[]]}) | sort_by(.index)[] | del(.index) ] }' \
wiktionary-p1.json > wiktionary-p2.json wiktionary-p1.json > wiktionary-p2.json
jq --slurp '.' wiktionary-p2.json > wiktionary-p3.json jq --slurp '.' wiktionary-p2.json > wiktionary-p3.json

View File

@ -0,0 +1,52 @@
import { db, getWords } from './db.mjs'
import fs from 'fs'
//const words = getWords(db)
const sampleMeanings =[
{
"type": "noun",
"definitions": [
{
"definition": "plural of look",
"form_of": true,
"plural": true
},
{
"definition": "One's appearance or attractiveness.",
"form_of": false,
"plural": true
}
]
},
{
"type": "verb",
"definitions": [
{
"definition": "third-person singular simple present indicative form of look",
"form_of": true,
"plural": false
}
]
}
]
const allDefinitionsArePluralOrFormOf = (meanings) => {
let formsOf = 0
let totalDefs = 0
for (const obj of meanings) {
for (const definition of obj.definitions) {
totalDefs++
console.dir(definition)
if (definition.form_of == true) {
formsOf++
}
}
}
return `Total defs: ${totalDefs}, formsOf: ${formsOf}, output: ${formsOf === totalDefs}`
}
console.log(allDefinitionsArePluralOrFormOf(sampleMeanings))