flag scientific script added

This commit is contained in:
Andrzej Stepien 2023-08-05 22:11:43 +02:00
parent acc4625f1c
commit 9d9afba81c
5 changed files with 55 additions and 6 deletions

Binary file not shown.

View File

@ -11,4 +11,10 @@ export const db = Knex({
return db return db
.select("word") .select("word")
.from("dictionary") .from("dictionary")
}
export const getBadWords = async (db) => {
return db
.select("word")
.from("bad_words")
} }

View File

@ -1,5 +1,4 @@
import { db, getWords } from './db.mjs' import { db, getWords } from './db.mjs'
import fs from 'fs'
//const words = getWords(db) //const words = getWords(db)
const allDefinitionsAreFormOf = (meanings) => { const allDefinitionsAreFormOf = (meanings) => {
@ -43,7 +42,7 @@ const sampleMeanings =[
} }
] ]
const words = await getWords(db) const words = await getWords(db)
const deleted = [] const flagged = []
for (const word of words) { for (const word of words) {
const res = const res =
await db('dictionary') await db('dictionary')
@ -51,11 +50,14 @@ for (const word of words) {
.where('word', word.word) .where('word', word.word)
const meanings = JSON.parse(res[0].meanings) const meanings = JSON.parse(res[0].meanings)
if(allDefinitionsAreFormOf(meanings)){ if(allDefinitionsAreFormOf(meanings)){
deleted.push(word) await db('dictionary').
where('word', word.word)
.update('derivative',1)
flagged.push(word)
} }
} }
console.log(`${deleted.length} entries deleted`) console.log(`${flagged.length} entries flagged`)
console.dir(deleted) //console.dir(flagged)
db.destroy() db.destroy()

View File

@ -0,0 +1,41 @@
import { db, getWords } from './db.mjs'
const words = await getWords(db)
const allDefinitionsAreScientific = (meanings) => {
let scientific = 0
let totalDefs = 0
for (const meaning of meanings) {
for (const definition of meaning.definitions) {
totalDefs++
if (definition.topics) {
for (const topic of definition.topics) {
if (topic === 'sciences') {
scientific++
break
}
}
}
}
}
return scientific === totalDefs
}
let updated = []
for (const word of words) {
const res = await db('dictionary')
.select('meanings')
.where('word', word.word)
const meanings = JSON.parse(res[0].meanings)
if (allDefinitionsAreScientific(meanings)) {
await db('dictionary')
.where('word', word.word)
.update('scientific', 1)
updated.push(word.word)
}
}
console.log(`${updated.length} words with only scientific definitions found.`)
db.destroy()

View File

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
cd ../processing cd ../processing
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) }))}' \ jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null), topics:(try .topics) }))}' \
wiktionary.json > wiktionary-p1.json wiktionary.json > wiktionary-p1.json
jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)[]]}) | sort_by(.index)[] | del(.index) ] }' \ jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)[]]}) | sort_by(.index)[] | del(.index) ] }' \