flag scientific script added
This commit is contained in:
parent
acc4625f1c
commit
9d9afba81c
Binary file not shown.
|
@ -11,4 +11,10 @@ export const db = Knex({
|
|||
return db
|
||||
.select("word")
|
||||
.from("dictionary")
|
||||
}
|
||||
|
||||
export const getBadWords = async (db) => {
|
||||
return db
|
||||
.select("word")
|
||||
.from("bad_words")
|
||||
}
|
|
@ -1,5 +1,4 @@
|
|||
import { db, getWords } from './db.mjs'
|
||||
import fs from 'fs'
|
||||
|
||||
//const words = getWords(db)
|
||||
const allDefinitionsAreFormOf = (meanings) => {
|
||||
|
@ -43,7 +42,7 @@ const sampleMeanings =[
|
|||
}
|
||||
]
|
||||
const words = await getWords(db)
|
||||
const deleted = []
|
||||
const flagged = []
|
||||
for (const word of words) {
|
||||
const res =
|
||||
await db('dictionary')
|
||||
|
@ -51,11 +50,14 @@ for (const word of words) {
|
|||
.where('word', word.word)
|
||||
const meanings = JSON.parse(res[0].meanings)
|
||||
if(allDefinitionsAreFormOf(meanings)){
|
||||
deleted.push(word)
|
||||
await db('dictionary').
|
||||
where('word', word.word)
|
||||
.update('derivative',1)
|
||||
flagged.push(word)
|
||||
}
|
||||
}
|
||||
console.log(`${deleted.length} entries deleted`)
|
||||
console.dir(deleted)
|
||||
console.log(`${flagged.length} entries flagged`)
|
||||
//console.dir(flagged)
|
||||
db.destroy()
|
||||
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
import { db, getWords } from './db.mjs'
|
||||
|
||||
const words = await getWords(db)
|
||||
const allDefinitionsAreScientific = (meanings) => {
|
||||
let scientific = 0
|
||||
let totalDefs = 0
|
||||
for (const meaning of meanings) {
|
||||
for (const definition of meaning.definitions) {
|
||||
totalDefs++
|
||||
if (definition.topics) {
|
||||
for (const topic of definition.topics) {
|
||||
if (topic === 'sciences') {
|
||||
scientific++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return scientific === totalDefs
|
||||
}
|
||||
|
||||
let updated = []
|
||||
|
||||
for (const word of words) {
|
||||
const res = await db('dictionary')
|
||||
.select('meanings')
|
||||
.where('word', word.word)
|
||||
const meanings = JSON.parse(res[0].meanings)
|
||||
if (allDefinitionsAreScientific(meanings)) {
|
||||
await db('dictionary')
|
||||
.where('word', word.word)
|
||||
.update('scientific', 1)
|
||||
updated.push(word.word)
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`${updated.length} words with only scientific definitions found.`)
|
||||
|
||||
|
||||
db.destroy()
|
|
@ -1,7 +1,7 @@
|
|||
#!/bin/bash
|
||||
cd ../processing
|
||||
|
||||
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null) }))}' \
|
||||
jq -r '. | select((.pos=="noun") or (.pos=="verb") or (.pos=="adj") or (.pos=="adv")) | select((.word | test("[^a-z]"))|not) | {word: .word, type: .pos, pronunciation: .sounds[0].ipa, definitions: (try .senses|map({definition: .glosses|join(" "), form_of: (.form_of!=null), topics:(try .topics) }))}' \
|
||||
wiktionary.json > wiktionary-p1.json
|
||||
|
||||
jq --slurp '. | group_by(.word)[] | {word:.[0].word, pronunciation:.[0].pronunciation, meanings:[[([.[]|{type:.type, definitions:[try .definitions][]}] | to_entries[] | [{type: .value.type, index: .key, definitions: .value.definitions}][] )] | group_by(.type) | map({type: .[0].type, index: .[0].index, definitions: [.[].definitions|select(length > 0)[]]}) | sort_by(.index)[] | del(.index) ] }' \
|
||||
|
|
Loading…
Reference in New Issue