diff --git a/src/data/database b/src/data/database index cd56d1f..aa1c616 100644 Binary files a/src/data/database and b/src/data/database differ diff --git a/src/data/scripts/db.mjs b/src/data/scripts/db.mjs index b171dd9..69c28db 100644 --- a/src/data/scripts/db.mjs +++ b/src/data/scripts/db.mjs @@ -5,4 +5,10 @@ export const db = Knex({ connection: { filename: "../database" } - }) \ No newline at end of file + }) + + export const getWords = async (db) => { + return db + .select("word") + .from("dictionary") +} \ No newline at end of file diff --git a/src/data/scripts/deleteMisspelled.mjs b/src/data/scripts/deleteMisspelled.mjs index d88c7ad..4209671 100644 --- a/src/data/scripts/deleteMisspelled.mjs +++ b/src/data/scripts/deleteMisspelled.mjs @@ -1,11 +1,7 @@ -import {db} from './db.mjs' +import {db, getWords} from './db.mjs' import Spellchecker, { isMisspelled } from 'spellchecker' -const getWords = async (db) => { - return db - .select("word") - .from("dictionary") -} + const words = await getWords(db) console.dir(words) diff --git a/src/data/scripts/importWiktionaryData.mjs b/src/data/scripts/importWiktionaryData.mjs index 915d425..1b794ae 100644 --- a/src/data/scripts/importWiktionaryData.mjs +++ b/src/data/scripts/importWiktionaryData.mjs @@ -16,7 +16,8 @@ for (const element of wiktionary) { await db('dictionary') .where('word', element.word) .update({ - pronunciation: element.pronunciation, + //pronunciation import deprecated now that I have a better source + //pronunciation: element.pronunciation, meanings: JSON.stringify(element.meanings) }).then(res=>{ if(res==1){console.log("added data for "+element.word) diff --git a/src/data/scripts/look-test.json b/src/data/scripts/look-test.json deleted file mode 100644 index e69de29..0000000 diff --git a/src/data/scripts/pipeline.sh b/src/data/scripts/pipeline.sh index aac7f97..e891bb4 100755 --- a/src/data/scripts/pipeline.sh +++ b/src/data/scripts/pipeline.sh @@ -4,7 +4,7 @@ cd ../processing jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) , plural: (try .tags catch []|any(.=="plural")) }))}' \ wiktionary.json > wiktionary-p1.json -jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)]}) | sort_by(.index)[] | del(.index) ] }' \ +jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)[]]}) | sort_by(.index)[] | del(.index) ] }' \ wiktionary-p1.json > wiktionary-p2.json jq --slurp '.' wiktionary-p2.json > wiktionary-p3.json diff --git a/src/data/scripts/removePluralsAndConjugations.mjs b/src/data/scripts/removePluralsAndConjugations.mjs new file mode 100644 index 0000000..55f4a03 --- /dev/null +++ b/src/data/scripts/removePluralsAndConjugations.mjs @@ -0,0 +1,52 @@ +import { db, getWords } from './db.mjs' +import fs from 'fs' + +//const words = getWords(db) +const sampleMeanings =[ + { + "type": "noun", + "definitions": [ + { + "definition": "plural of look", + "form_of": true, + "plural": true + }, + { + "definition": "One's appearance or attractiveness.", + "form_of": false, + "plural": true + } + ] + }, + { + "type": "verb", + "definitions": [ + { + "definition": "third-person singular simple present indicative form of look", + "form_of": true, + "plural": false + } + ] + } + ] + + + + + +const allDefinitionsArePluralOrFormOf = (meanings) => { + let formsOf = 0 + let totalDefs = 0 + for (const obj of meanings) { + for (const definition of obj.definitions) { + totalDefs++ + console.dir(definition) + if (definition.form_of == true) { + formsOf++ + } + } + } + return `Total defs: ${totalDefs}, formsOf: ${formsOf}, output: ${formsOf === totalDefs}` +} + +console.log(allDefinitionsArePluralOrFormOf(sampleMeanings)) \ No newline at end of file