flattened definition arrays
This commit is contained in:
parent
16181504ad
commit
d16ed6cb9a
|
@ -1,5 +1,6 @@
|
||||||
node_modules/
|
node_modules/
|
||||||
src/data/db-backups/
|
src/data/db-backups/
|
||||||
|
src/data/samples
|
||||||
src/data/processing/
|
src/data/processing/
|
||||||
prod/
|
prod/
|
||||||
*.log
|
*.log
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,12 +1,12 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
cd ../processing
|
cd ../processing
|
||||||
|
|
||||||
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definition: .senses[].glosses}' \
|
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: .senses|map(.glosses)}' \
|
||||||
wiktionary.json > wiktionary-processed.json
|
wiktionary.json > wiktionary-processed.json
|
||||||
|
|
||||||
jq --slurp '.' wiktionary-processed.json > wiktionary-processed-array.json
|
jq --slurp '.' wiktionary-processed.json > wiktionary-processed-array.json
|
||||||
|
|
||||||
jq '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:([.[] | {type:.type, definition:(try .definition[])}]|unique)}' \
|
jq '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[.[]|{type:.type, definitions:[try .definitions[][]]| select(.!=[]) |map({(.):1})|add|keys_unsorted}]}' \
|
||||||
wiktionary-processed-array.json > wiktionary-grouped-objects.json
|
wiktionary-processed-array.json > wiktionary-grouped-objects.json
|
||||||
|
|
||||||
jq --slurp '.' wiktionary-grouped-objects.json > wiktionary-grouped-objects-array.json
|
jq --slurp '.' wiktionary-grouped-objects.json > wiktionary-grouped-objects-array.json
|
||||||
|
@ -14,4 +14,7 @@ jq --slurp '.' wiktionary-grouped-objects.json > wiktionary-grouped-objects-arra
|
||||||
#extract samples
|
#extract samples
|
||||||
timestamp=$(date +%s)
|
timestamp=$(date +%s)
|
||||||
jq '. | select(.word=="chocolate")' wiktionary-grouped-objects.json > ../samples/chocolate-$timestamp.json
|
jq '. | select(.word=="chocolate")' wiktionary-grouped-objects.json > ../samples/chocolate-$timestamp.json
|
||||||
|
jq '. | select(.word=="write")' wiktionary-grouped-objects.json > ../samples/write-$timestamp.json
|
||||||
|
jq '. | select(.word=="terrible")' wiktionary-grouped-objects.json > ../samples/terrible-$timestamp.json
|
||||||
|
jq '. | select(.word=="look")' wiktionary-grouped-objects.json > ../samples/look-$timestamp.json
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
[]
|
|
@ -0,0 +1 @@
|
||||||
|
[]
|
Loading…
Reference in New Issue