reduced unneccessary arrays from wiktionary json
This commit is contained in:
parent
536b1330aa
commit
e3bfaeb8ea
Binary file not shown.
|
@ -6,3 +6,9 @@ export const db = Knex({
|
||||||
filename: "../database"
|
filename: "../database"
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
export const getWords = async (db) => {
|
||||||
|
return db
|
||||||
|
.select("word")
|
||||||
|
.from("dictionary")
|
||||||
|
}
|
|
@ -1,11 +1,7 @@
|
||||||
import {db} from './db.mjs'
|
import {db, getWords} from './db.mjs'
|
||||||
import Spellchecker, { isMisspelled } from 'spellchecker'
|
import Spellchecker, { isMisspelled } from 'spellchecker'
|
||||||
|
|
||||||
const getWords = async (db) => {
|
|
||||||
return db
|
|
||||||
.select("word")
|
|
||||||
.from("dictionary")
|
|
||||||
}
|
|
||||||
|
|
||||||
const words = await getWords(db)
|
const words = await getWords(db)
|
||||||
console.dir(words)
|
console.dir(words)
|
||||||
|
|
|
@ -16,7 +16,8 @@ for (const element of wiktionary) {
|
||||||
await db('dictionary')
|
await db('dictionary')
|
||||||
.where('word', element.word)
|
.where('word', element.word)
|
||||||
.update({
|
.update({
|
||||||
pronunciation: element.pronunciation,
|
//pronunciation import deprecated now that I have a better source
|
||||||
|
//pronunciation: element.pronunciation,
|
||||||
meanings: JSON.stringify(element.meanings)
|
meanings: JSON.stringify(element.meanings)
|
||||||
}).then(res=>{
|
}).then(res=>{
|
||||||
if(res==1){console.log("added data for "+element.word)
|
if(res==1){console.log("added data for "+element.word)
|
||||||
|
|
|
@ -4,7 +4,7 @@ cd ../processing
|
||||||
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) , plural: (try .tags catch []|any(.=="plural")) }))}' \
|
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) , plural: (try .tags catch []|any(.=="plural")) }))}' \
|
||||||
wiktionary.json > wiktionary-p1.json
|
wiktionary.json > wiktionary-p1.json
|
||||||
|
|
||||||
jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)]}) | sort_by(.index)[] | del(.index) ] }' \
|
jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)[]]}) | sort_by(.index)[] | del(.index) ] }' \
|
||||||
wiktionary-p1.json > wiktionary-p2.json
|
wiktionary-p1.json > wiktionary-p2.json
|
||||||
|
|
||||||
jq --slurp '.' wiktionary-p2.json > wiktionary-p3.json
|
jq --slurp '.' wiktionary-p2.json > wiktionary-p3.json
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
import { db, getWords } from './db.mjs'
|
||||||
|
import fs from 'fs'
|
||||||
|
|
||||||
|
//const words = getWords(db)
|
||||||
|
const sampleMeanings =[
|
||||||
|
{
|
||||||
|
"type": "noun",
|
||||||
|
"definitions": [
|
||||||
|
{
|
||||||
|
"definition": "plural of look",
|
||||||
|
"form_of": true,
|
||||||
|
"plural": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"definition": "One's appearance or attractiveness.",
|
||||||
|
"form_of": false,
|
||||||
|
"plural": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "verb",
|
||||||
|
"definitions": [
|
||||||
|
{
|
||||||
|
"definition": "third-person singular simple present indicative form of look",
|
||||||
|
"form_of": true,
|
||||||
|
"plural": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
const allDefinitionsArePluralOrFormOf = (meanings) => {
|
||||||
|
let formsOf = 0
|
||||||
|
let totalDefs = 0
|
||||||
|
for (const obj of meanings) {
|
||||||
|
for (const definition of obj.definitions) {
|
||||||
|
totalDefs++
|
||||||
|
console.dir(definition)
|
||||||
|
if (definition.form_of == true) {
|
||||||
|
formsOf++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return `Total defs: ${totalDefs}, formsOf: ${formsOf}, output: ${formsOf === totalDefs}`
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(allDefinitionsArePluralOrFormOf(sampleMeanings))
|
Loading…
Reference in New Issue