2023-07-17 16:57:56 +00:00
|
|
|
#!/bin/bash
|
|
|
|
cd ../processing
|
|
|
|
|
|
|
|
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definition: .senses[].glosses}' \
|
|
|
|
wiktionary.json > wiktionary-processed.json
|
|
|
|
|
|
|
|
jq --slurp '.' wiktionary-processed.json > wiktionary-processed-array.json
|
|
|
|
|
|
|
|
jq '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:([.[] | {type:.type, definition:(try .definition[])}]|unique)}' \
|
|
|
|
wiktionary-processed-array.json > wiktionary-grouped-objects.json
|
|
|
|
|
|
|
|
jq --slurp '.' wiktionary-grouped-objects.json > wiktionary-grouped-objects-array.json
|
|
|
|
|
2023-07-17 17:33:35 +00:00
|
|
|
#extract samples
|
|
|
|
timestamp=$(date +%s)
|
|
|
|
jq '. | select(.word=="chocolate")' wiktionary-grouped-objects.json > ../samples/chocolate-$timestamp.json
|
|
|
|
|