flattened definition arrays

This commit is contained in:
Andrzej Stepien 2023-07-18 23:55:33 +02:00
parent 16181504ad
commit d16ed6cb9a
8 changed files with 8 additions and 4237 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
node_modules/ node_modules/
src/data/db-backups/ src/data/db-backups/
src/data/samples
src/data/processing/ src/data/processing/
prod/ prod/
*.log *.log

File diff suppressed because it is too large Load Diff

View File

View File

@ -1,12 +1,12 @@
#!/bin/bash #!/bin/bash
cd ../processing cd ../processing
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definition: .senses[].glosses}' \ jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: .senses|map(.glosses)}' \
wiktionary.json > wiktionary-processed.json wiktionary.json > wiktionary-processed.json
jq --slurp '.' wiktionary-processed.json > wiktionary-processed-array.json jq --slurp '.' wiktionary-processed.json > wiktionary-processed-array.json
jq '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:([.[] | {type:.type, definition:(try .definition[])}]|unique)}' \ jq '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[.[]|{type:.type, definitions:[try .definitions[][]]| select(.!=[]) |map({(.):1})|add|keys_unsorted}]}' \
wiktionary-processed-array.json > wiktionary-grouped-objects.json wiktionary-processed-array.json > wiktionary-grouped-objects.json
jq --slurp '.' wiktionary-grouped-objects.json > wiktionary-grouped-objects-array.json jq --slurp '.' wiktionary-grouped-objects.json > wiktionary-grouped-objects-array.json
@ -14,4 +14,7 @@ jq --slurp '.' wiktionary-grouped-objects.json > wiktionary-grouped-objects-arra
#extract samples #extract samples
timestamp=$(date +%s) timestamp=$(date +%s)
jq '. | select(.word=="chocolate")' wiktionary-grouped-objects.json > ../samples/chocolate-$timestamp.json jq '. | select(.word=="chocolate")' wiktionary-grouped-objects.json > ../samples/chocolate-$timestamp.json
jq '. | select(.word=="write")' wiktionary-grouped-objects.json > ../samples/write-$timestamp.json
jq '. | select(.word=="terrible")' wiktionary-grouped-objects.json > ../samples/terrible-$timestamp.json
jq '. | select(.word=="look")' wiktionary-grouped-objects.json > ../samples/look-$timestamp.json

View File

@ -0,0 +1 @@
[]

View File

View File

@ -0,0 +1 @@
[]

View File