file structure rationalised

This commit is contained in:
Andrzej Stepien 2023-07-17 18:57:56 +02:00
parent 82215e85f8
commit 48a9326c0d
10 changed files with 5897 additions and 46 deletions

3
.gitignore vendored
View File

@ -1,4 +1,5 @@
node_modules/
src/data/
src/data/db-backups/
src/data/processing
prod/
*.log

BIN
src/data/database Normal file

Binary file not shown.

BIN
src/data/regexp.so Executable file

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
const Spellchecker = require("spellchecker")
const sqlite3 = require("sqlite3").verbose()
const db = new sqlite3.Database("data/database")
const db = new sqlite3.Database("../database")
db.serialize(() => {
db.each("SELECT * FROM prompts", [],

View File

@ -0,0 +1,33 @@
const fs = require('fs');
const pipeline = fs.createReadStream('../processing/wiktionary-grouped-objects-array.json').pipe(StreamArray.withParser());
const { AsyncDatabase } = require("promised-sqlite3")
let db = ""
const importJson = async () =>{
db = await AsyncDatabase.open("database")
json = JSON.parse(fs.readFileSync('../processing/wiktionary-grouped-objects-array.json'))
await json.forEach(async (data) => {
//console.log(data)
if(data?.word){
const word = data.word
const pronunciation = data.pronunciation
const meanings = JSON.stringify(data.meanings)
await db.run('UPDATE prompts SET pronunciation=?, meanings=? WHERE word=?', [pronunciation,meanings,word])
}});
db.close()
}
importJson()

View File

@ -0,0 +1,3 @@
#!/bin/bash
pipeline.sh
node importJSON.cjs

13
src/data/scripts/pipeline.sh Executable file
View File

@ -0,0 +1,13 @@
#!/bin/bash
cd ../processing
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definition: .senses[].glosses}' \
wiktionary.json > wiktionary-processed.json
jq --slurp '.' wiktionary-processed.json > wiktionary-processed-array.json
jq '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:([.[] | {type:.type, definition:(try .definition[])}]|unique)}' \
wiktionary-processed-array.json > wiktionary-grouped-objects.json
jq --slurp '.' wiktionary-grouped-objects.json > wiktionary-grouped-objects-array.json

View File

@ -1,44 +0,0 @@
const StreamArray = require('stream-json/streamers/StreamArray');
const fs = require('fs');
const pipeline = fs.createReadStream('data/dp/wiktionary-grouped-objects-array.json').pipe(StreamArray.withParser());
const { AsyncDatabase } = require("promised-sqlite3")
//const sqlite3 = require("sqlite3").verbose()
let db = ""
const importJson = async () =>{
db = await AsyncDatabase.open("data/database")
// pipeline.on('data', async (data) => {
// const word = data.value.word
// const pronunciation = data.value.pronunciation
// const meanings = JSON.stringify(data.value.meanings)
// if (word === "unpalatable") { console.log("test word found!!") }
// await db.run('UPDATE prompts SET pronunciation=?, meanings=? WHERE word=?', [pronunciation,meanings,word])
// });
db = JSON.parse(fs.readFileSync('data/dp/wiktionary-grouped-objects-array.json'))
await db.forEach(async (data) => {
if(data.value.word != undefined){
const word = data.value.word
const pronunciation = data.value.pronunciation
const meanings = JSON.stringify(data.value.meanings)
await db.run('UPDATE prompts SET pronunciation=?, meanings=? WHERE word=?', [pronunciation,meanings,word])
}});
}
importJson().then(()=>{
//console.log(db)
db.close()
})