flag scientific script added

This commit is contained in:
Andrzej Stepien 2023-08-05 22:11:43 +02:00
parent acc4625f1c
commit 9d9afba81c
5 changed files with 55 additions and 6 deletions

Binary file not shown.

View File

@ -12,3 +12,9 @@ export const db = Knex({
.select("word")
.from("dictionary")
}
export const getBadWords = async (db) => {
return db
.select("word")
.from("bad_words")
}

View File

@ -1,5 +1,4 @@
import { db, getWords } from './db.mjs'
import fs from 'fs'
//const words = getWords(db)
const allDefinitionsAreFormOf = (meanings) => {
@ -43,7 +42,7 @@ const sampleMeanings =[
}
]
const words = await getWords(db)
const deleted = []
const flagged = []
for (const word of words) {
const res =
await db('dictionary')
@ -51,11 +50,14 @@ for (const word of words) {
.where('word', word.word)
const meanings = JSON.parse(res[0].meanings)
if(allDefinitionsAreFormOf(meanings)){
deleted.push(word)
await db('dictionary').
where('word', word.word)
.update('derivative',1)
flagged.push(word)
}
}
console.log(`${deleted.length} entries deleted`)
console.dir(deleted)
console.log(`${flagged.length} entries flagged`)
//console.dir(flagged)
db.destroy()

View File

@ -0,0 +1,41 @@
import { db, getWords } from './db.mjs'
const words = await getWords(db)
const allDefinitionsAreScientific = (meanings) => {
let scientific = 0
let totalDefs = 0
for (const meaning of meanings) {
for (const definition of meaning.definitions) {
totalDefs++
if (definition.topics) {
for (const topic of definition.topics) {
if (topic === 'sciences') {
scientific++
break
}
}
}
}
}
return scientific === totalDefs
}
let updated = []
for (const word of words) {
const res = await db('dictionary')
.select('meanings')
.where('word', word.word)
const meanings = JSON.parse(res[0].meanings)
if (allDefinitionsAreScientific(meanings)) {
await db('dictionary')
.where('word', word.word)
.update('scientific', 1)
updated.push(word.word)
}
}
console.log(`${updated.length} words with only scientific definitions found.`)
db.destroy()

View File

@ -1,7 +1,7 @@
#!/bin/bash
cd ../processing
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) }))}' \
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null), topics:(try .topics) }))}' \
wiktionary.json > wiktionary-p1.json
jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)[]]}) | sort_by(.index)[] | del(.index) ] }' \