reduced unneccessary arrays from wiktionary json
This commit is contained in:
parent
536b1330aa
commit
e3bfaeb8ea
Binary file not shown.
|
@ -6,3 +6,9 @@ export const db = Knex({
|
|||
filename: "../database"
|
||||
}
|
||||
})
|
||||
|
||||
export const getWords = async (db) => {
|
||||
return db
|
||||
.select("word")
|
||||
.from("dictionary")
|
||||
}
|
|
@ -1,11 +1,7 @@
|
|||
import {db} from './db.mjs'
|
||||
import {db, getWords} from './db.mjs'
|
||||
import Spellchecker, { isMisspelled } from 'spellchecker'
|
||||
|
||||
const getWords = async (db) => {
|
||||
return db
|
||||
.select("word")
|
||||
.from("dictionary")
|
||||
}
|
||||
|
||||
|
||||
const words = await getWords(db)
|
||||
console.dir(words)
|
||||
|
|
|
@ -16,7 +16,8 @@ for (const element of wiktionary) {
|
|||
await db('dictionary')
|
||||
.where('word', element.word)
|
||||
.update({
|
||||
pronunciation: element.pronunciation,
|
||||
//pronunciation import deprecated now that I have a better source
|
||||
//pronunciation: element.pronunciation,
|
||||
meanings: JSON.stringify(element.meanings)
|
||||
}).then(res=>{
|
||||
if(res==1){console.log("added data for "+element.word)
|
||||
|
|
|
@ -4,7 +4,7 @@ cd ../processing
|
|||
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) , plural: (try .tags catch []|any(.=="plural")) }))}' \
|
||||
wiktionary.json > wiktionary-p1.json
|
||||
|
||||
jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)]}) | sort_by(.index)[] | del(.index) ] }' \
|
||||
jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)[]]}) | sort_by(.index)[] | del(.index) ] }' \
|
||||
wiktionary-p1.json > wiktionary-p2.json
|
||||
|
||||
jq --slurp '.' wiktionary-p2.json > wiktionary-p3.json
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
import { db, getWords } from './db.mjs'
|
||||
import fs from 'fs'
|
||||
|
||||
//const words = getWords(db)
|
||||
const sampleMeanings =[
|
||||
{
|
||||
"type": "noun",
|
||||
"definitions": [
|
||||
{
|
||||
"definition": "plural of look",
|
||||
"form_of": true,
|
||||
"plural": true
|
||||
},
|
||||
{
|
||||
"definition": "One's appearance or attractiveness.",
|
||||
"form_of": false,
|
||||
"plural": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "verb",
|
||||
"definitions": [
|
||||
{
|
||||
"definition": "third-person singular simple present indicative form of look",
|
||||
"form_of": true,
|
||||
"plural": false
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
const allDefinitionsArePluralOrFormOf = (meanings) => {
|
||||
let formsOf = 0
|
||||
let totalDefs = 0
|
||||
for (const obj of meanings) {
|
||||
for (const definition of obj.definitions) {
|
||||
totalDefs++
|
||||
console.dir(definition)
|
||||
if (definition.form_of == true) {
|
||||
formsOf++
|
||||
}
|
||||
}
|
||||
}
|
||||
return `Total defs: ${totalDefs}, formsOf: ${formsOf}, output: ${formsOf === totalDefs}`
|
||||
}
|
||||
|
||||
console.log(allDefinitionsArePluralOrFormOf(sampleMeanings))
|
Loading…
Reference in New Issue