integrated node import into pipeline
This commit is contained in:
parent
e3bfaeb8ea
commit
253a44da21
Binary file not shown.
|
@ -2,4 +2,4 @@
|
|||
cd ..
|
||||
timestamp=$(date +%s)
|
||||
sqlite3 database ".backup db-backups/backup"$timestamp".db"
|
||||
node importJSON.js
|
||||
node scripts/importWiktionaryData.js
|
|
@ -16,7 +16,7 @@ for (const element of wiktionary) {
|
|||
await db('dictionary')
|
||||
.where('word', element.word)
|
||||
.update({
|
||||
//pronunciation import deprecated now that I have a better source
|
||||
//deprecated now that I have a better source of IPAs
|
||||
//pronunciation: element.pronunciation,
|
||||
meanings: JSON.stringify(element.meanings)
|
||||
}).then(res=>{
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#!/bin/bash
|
||||
cd ../processing
|
||||
|
||||
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) , plural: (try .tags catch []|any(.=="plural")) }))}' \
|
||||
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) }))}' \
|
||||
wiktionary.json > wiktionary-p1.json
|
||||
|
||||
jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)[]]}) | sort_by(.index)[] | del(.index) ] }' \
|
||||
|
@ -18,3 +18,9 @@ jsonl=wiktionary-p2.json
|
|||
jq '. | select(.word=="look")' $jsonl > ../samples/look-$timestamp.json
|
||||
jq '. | select(.word=="looks")' $jsonl > ../samples/looks-$timestamp.json
|
||||
jq '. | select(.word=="edges")' $jsonl > ../samples/edges-$timestamp.json
|
||||
|
||||
#import into database
|
||||
cd ..
|
||||
sqlite3 database ".backup db-backups/backup"$timestamp".db"
|
||||
cd scripts
|
||||
node importWiktionaryData.mjs
|
||||
|
|
Loading…
Reference in New Issue