integrated node import into pipeline

This commit is contained in:
Andrzej Stepien 2023-08-04 20:18:48 +02:00
parent e3bfaeb8ea
commit 253a44da21
4 changed files with 9 additions and 3 deletions

Binary file not shown.

View File

@ -2,4 +2,4 @@
cd .. cd ..
timestamp=$(date +%s) timestamp=$(date +%s)
sqlite3 database ".backup db-backups/backup"$timestamp".db" sqlite3 database ".backup db-backups/backup"$timestamp".db"
node importJSON.js node scripts/importWiktionaryData.js

View File

@ -16,7 +16,7 @@ for (const element of wiktionary) {
await db('dictionary') await db('dictionary')
.where('word', element.word) .where('word', element.word)
.update({ .update({
//pronunciation import deprecated now that I have a better source //deprecated now that I have a better source of IPAs
//pronunciation: element.pronunciation, //pronunciation: element.pronunciation,
meanings: JSON.stringify(element.meanings) meanings: JSON.stringify(element.meanings)
}).then(res=>{ }).then(res=>{

View File

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
cd ../processing cd ../processing
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) , plural: (try .tags catch []|any(.=="plural")) }))}' \ jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) }))}' \
wiktionary.json > wiktionary-p1.json wiktionary.json > wiktionary-p1.json
jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)[]]}) | sort_by(.index)[] | del(.index) ] }' \ jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)[]]}) | sort_by(.index)[] | del(.index) ] }' \
@ -18,3 +18,9 @@ jsonl=wiktionary-p2.json
jq '. | select(.word=="look")' $jsonl > ../samples/look-$timestamp.json jq '. | select(.word=="look")' $jsonl > ../samples/look-$timestamp.json
jq '. | select(.word=="looks")' $jsonl > ../samples/looks-$timestamp.json jq '. | select(.word=="looks")' $jsonl > ../samples/looks-$timestamp.json
jq '. | select(.word=="edges")' $jsonl > ../samples/edges-$timestamp.json jq '. | select(.word=="edges")' $jsonl > ../samples/edges-$timestamp.json
#import into database
cd ..
sqlite3 database ".backup db-backups/backup"$timestamp".db"
cd scripts
node importWiktionaryData.mjs