From 2e5a9bf1d96ca05532ff8b319c6b1f6d405e8b3d Mon Sep 17 00:00:00 2001 From: Andrzej Stepien Date: Wed, 9 Aug 2023 19:27:09 +0200 Subject: [PATCH] now blocking medical procedures and diseases --- src/data/.medical_procedures.csv.swp | Bin 0 -> 12288 bytes src/data/database | Bin 35274752 -> 35291136 bytes src/data/diseases.csv | 108 +++++++++++++++++++++++++++ src/data/medical_procedures.csv | 74 ++++++++++++++++++ src/getNewPrompt.mjs | 12 ++- 5 files changed, 190 insertions(+), 4 deletions(-) create mode 100644 src/data/.medical_procedures.csv.swp create mode 100644 src/data/diseases.csv create mode 100644 src/data/medical_procedures.csv diff --git a/src/data/.medical_procedures.csv.swp b/src/data/.medical_procedures.csv.swp new file mode 100644 index 0000000000000000000000000000000000000000..661751bfa86b9cb3bc3199fa26cf91ef6d96617e GIT binary patch literal 12288 zcmeI2TZh^O1y+6SWO6~t>tx)Rh0Qu-vrFEjrn=Ho(pA+|RWI~{iXXs7 z6%;QJ(1;h3c)E*KAGqOoom48c)NYI;YO*({+ATJ+N#~j_+=q zUpsy5WQOa(EGur_D#qFUnR)TvjGMyk%V%Tcj@KDkIBWGCb7JuMRB-U56^)iA_(37} z23iScPd)MYz&kN0c;Evq3nh5g0*z zG2^jEA6a|&p`AmLX_^QS0U|&IhyW2F0z`la5P`dqK&X#oFQBn|QgcTR8Slp4sV5>p z1c(3;AOb{y2oM1xKm>>Y5g-Ep7XrMq8;O}LvKUxKv!-Xufl&f&Bge8(EAIz2Kxi(LnzuGLD!*=p--S2(3i=+iSY~Q zv*dq{G0uGjeGPq@>~E9*4aR7H2YnCykn|JAd2BxZh`E>>^P>H8(k+a?LBBx1Lcc?Q zKz~AiL4QMZ5dk7V1c(3;AOb{y2oQn)NZ^t*HaKp4U2zjM_d$wab(`mGqHOJ>A*AO{ zas!i3tP9FUQ!6jHbBdP|-n7M?uDkcZwO-hIR>4+X2;bQH!28DtTUpa*qE6?m$Gh-J zE_a2prQ`K97Mst;m57yTX@sn&yjlt9c4f-Q#^KqwS2kD!FZIeiJ|bQ%>ts-iB}~yR zf3(yYyt)r9w)qsO14?4G$!osoxz?0j@acLQ(46ikdRp5GYgYZ^&-2pAU{`?*P1)gO zLtvbD$~IdKc8lR%d^PRVLgQ>?b=?G6tl^k+H?E86@;0%vRb%IEunWB3K(;NE399N) zvhDHu=wi-xDjY=L?+3d$m9BrpR@^M5d8tg9voQ`4{Iqp&D;4&oQ~P{&JlQ#^ZJ~J8 zdgb$(cUi^y2hnlGy~c6EiJirVy{WWr@Jb-Wadg>eib9&S#->ExtnULg`d? z{@Ax*(z!#?)Qu5Xv&`8L{qNz$(>dEPp_!yrZOKVfbsY>RRhl10;K@F7@KUmoYpriw zDIFGxW>v+tGU+8_8^9!wmW?W~PhHyPV#=lpu^GJS?7(!Y4%Wf5lQh32?Ow!f@!Q}7 X5@*}km0R|9mok@sR?^yYmb2_1QQ-!e literal 0 HcmV?d00001 diff --git a/src/data/database b/src/data/database index 8062965dbf697fcd558c14a963434203ff1b24b1..5c76342d4e385b87771a75b74d760c55d8c873c1 100644 GIT binary patch delta 8530 zcmZvg349dQ0mj+4RO9&x^03n3?KEkOYpdcVH*&VVYyF2U7ZeY=Nv1qlZ z6;DJhR;^0Ctyi@w0<~&e@1xdwTfFaT>w#^*Z+17?fc$>{KRefZZ{EE3=F4o`_CfKs zZ66lztSQ;iub*#kzute97mjQjv7_Ipm5UVhm42F}^-~XP2i2X*ZnaY#Cm&D@rB&{b zFPAQq`$?L#?~E5xu6_OY4PLt1W#kDdv1|XzlQv8~R?V-htrb>W?+h6M(>8S5u(SXB z_Rd*0Z`O)=gICO&y=30t?C#;7h#odBQoIFCW3>FrIkgw>k*mVlqw8CgDL#w}{Ca48G-mmYKq6+?NAK}n->1r>Pe@mO@zbn*u99E5vX!+DKl;S=tw-CEyrvEI1A<1;>MBU^!R;P5>u@m0%S( z39JTdz*=xJSO?aF4L}E-zz+hz02@IU2m%wF0ycpV2!jZ)Kopz`V!#G*kO19aGuQ%> zU@JHcoDO~hehN}x8#n{}3~UEyg0sNS!P(#(@C&d5oD0qa=YwB@3&2irA-D)!3@!ne zg3G`za5>lwt^ikptH7_o9&k1IHMj;`3$6pd0oQ{Yz>Q!p_${~z+zf63w}RWi?cfe@ zC-@z>3)~Iv0r!IY!0*BR-~sRl@E~{yJPh`MN5Fn?02~B=1doD0fycn(;0f?&@E7n` z@FaK&JPn=!&w}T`^WX*WB6tb>4ZIBg4qgHO0I!1A!0X@*@FsW*ybay~{{)A?yWlqm6ly9p zjhar)pk`9DsM*vUYA!X8>Y(OR3#f(EB5ELhA4wT49iOo$93gRDd$5jZ_yEq)h4*Y7-Tr!c>H^s3>(R6{Bn_P9>;r zYBRNkN>W>?)2P#_pHM%gQq(r;4C-gpcIr&(Eb8af+0;4IFQ^^Vxzu^o`P46|3#gsc zh15mV#ndI#rPO8AF6we>H+2PdC3O|`D{2pQHT7%i8tPi=I_fvn_0$d2jnrQ1x71D4 z&D1T_t<-JQ?bIFAoz(BByQsUVd#HP<`>5Yj_frp0f1ne`>6xeLF$jx zqtu_M$Ee4tC#XMDf1&NVJ92m>MiPR>K*Ez)FJ9!>OJax>I3RS>Lco7>J#cy>NDzd>I>>i>R;4X)YsI%ssB*l zP~TGDQQuRCsUN8SQvY*d`4jlzq6Fop`crvSK2<;!Qbm-9k|^g|g}+ovql&2#s+20D z$|)~ZK~+*!RJGWLc8zw4wq1*B8@1!K30kdI zq<*2ktR7HrR`;ko)icx>g6&duuG*$ns~+Whg+MqqHZj%lJfXoSt|xx93$sQ1T`*;dF?gijAg6X`a4CW~6+ z`*iy!iH3;LnFyI%oo#ud@R^Z#qSM)uCWt!Rso8C$B34)#FY0xFBAST!;kNw~9! z5x0zp-@ujGva}k}Xq#PO%M7F&=NTYs@pznu7FUZVD;kXB^stq3cBxeuu6isP=}MoX zRC0ty^~|j)6{5ZivmmpHN$>MpA@nuxTPhcIMkG^@w@kEUhDywcWA>!`p}bVIW_DZt zo;$8A5fkQRc1MzSTt|b`=T{Vq_Ur*Zz-&3C8u~ar62r;NxmBu|wCmKpZ+TP63Z>^V zKYGpdXHN0TJ*~=pI#}*}e$<<}XAh>96+&IkZD~aqO}Jr9EsO?CJ*iOC;dpvOE#L&O zG2GKUZje>WAw7 zlzN|fi@F=@=Q?${x>%j6j#cZFLs&yw)iUKfmYW+O9q;S<*&nvsAuXS|GJcL-7^Dt$e19^?c{~&~tT3ZG-DgXR+O%=lpkyJLPd# zyZkzaY;1$+1HO5;$3M*SO+ItT1L^t?Kr< z;t7ma9ZRFWCeN+8I#(CF8?dv}h;ek`(vwQvO)k!_AXZsSVC-5|;%;=stu0nG#kGdJ zRyyV43F;C3L{=8>jBPxjz+HwP#tP@45!WLLdpWPg_&2bY`z^meVJ~CDv0{ZV4YnN5 zQ_<=`r)e+cP1r%RD^u-pEEf;kK9=R$o!!Tjq*$0g$cv2#+T68-_r!Br{8o3%*u_q5 zoSoUWFH+ofF08hCw;4;=3wa#(w#kUcZKtOeut#Hhr)k*pd5|k3hOB}0(X=~w5bJAK z!kFhAOqns~t>*N>T(%c8UC;FI999*Ul(#MH58f>WY_r7{xHO?+f}d)w~uY!V)>t+%ta*+r3^ zFpfK-cpHQ*e+o}f54Ew#uo1zF!Fi^7Xe^&lFxuVf{BL%T;e7=S+%8KP&4+}=Ir9)k zvA`h4pcRV+@p|#M@Ftuc(V!Vh*duv<7e+RYTZZ23Y?^#Iq}cuuPVeA5T*$DSSRq~| z!`XfVi;UUGXT~Rzu!pf+yezQPIFF=(y=DXr|4^3T95DU$9PfDhV8!M5WS0$b3hT5& zx{p-{OfLE8V100k)dhkZ>sU#^h+tq0VsHA9Bamql_TbHWEN<6&-J@Ly8_r{}VkX0n zX%A%eet&Rd4NJk~9Kfpm))rg$Z!!(1iPbFFZ*9gPsA4NHs?9(p?*q%S6-lLsafLI5 zaE~S)y_Yu=!we3E^m6A+Oj=CffSCxF@j3X-t^}6im|n^=@tzIgcZqW(h=7ztFerAe z3!0k}xTQ!^*LbuW!_UuBRrUgU$iFFNM9?y&!rh8hC>%{9wCVor*ppdEXDq>Mc)t=` ziBN|mWrsb^K|E2+EFLUk4N)T&PuSQhWDQv414f(`7C4)Fr;(nc`P{^!AJJ1zC*|=z zFr0!heM^51CM-fu3Sz--)~bg#1rxEbE(+Y)F6UdJLmJdi`bv9J9xWYGK9zmS z>+)XhJ?$0wEbV-4lX|*xmGq*vRykXnt2JxI>i6o~>f`F2>TdaX^;~7M8j|+Qi(i$W zkOSH++Lh8?q~=oTa&@h`7~wc6g_I|h|01nFB)g8$C9P(95VH@&HPs`I3JV)Yc~ zS!8ncWHux>SWn`~>13Va+kGM%md@6Q))QECPPQKJEWo+py4{(%ebTkFR^`U)HkOi; zug9_xxdFSC#hR(yi0v$1J%M^OYw4S@M{!r*kln&^b5r(6_SO+GyV<#g+?+ik-6x4u zPttDUp4_NCoCO^bwHvuKI9Yobj~)@W8+gw-X?rNQbK-V=xx0lcUQXUd02{&~_#=ti z$Fnn$yC;7R=FTG{cb)UZdNcPRXS+9a*A8$`!dvafQg><~yQWX>uHo3p25)Dj>`C5E z%B*HboWUu0(fb)dqQ@1 z-vD01`SW85yx8gXBOb2_m>Q*(Xn(7*LnL1N#S6kFUYPl+@{grQ(j}XqEQXW?BKs>)nxl}n*i7Nqo=vbgk zP=?_{hbAAEzmPwWUzMMi56TZ9qVJK2_(4be?oN(zz+ElTMW8OH=RxWaqoC8J=BZ9`Q7%3O9)AbQJfj7q!_WuB;RF zIYB)CWZ`8FFIp>VGBHtJBYeFn+`U?8o$5*W^ExMk7q1dceIvNGQZzaVJR87^QYWIs zbO6skL3o+J^Hzv5#P6czIFe4^+A{n#n;X78$BWu*_AXi~YBJGVIZo8)B=7uVg_of^ z?-)^r+^sD^DY>azUMzg6-qfuv!tvbDEiDvvJ(*iuAR7Bb?!x(^I-R(+4t{sY3EYMA zM0GlEYjbfZH*G6(@S#NS3ENU?wy5g~+Xb^krIWSQnf!3k8?_5&h)O4EOVdSNPteYv zCcMnqMN>shCT5pR5zX0@oeA0blZBTVTbYDE>3bu#Hc>PpVW)Z{wl;x#auRmQcofz* zU>CHDN+(~d<3vMlyygd?OuBZ$wKi5X_Q}?Ht)dLkI&TdAGrRfy#|hRcCTn?=@bw03 zwFQ4#=j3W-q^Qq{)pE1&^`>fh1h$+|%}ia?gq}!8>b&8i3~!5qMp5Ym>cU~7I-RGL z22tM=r&9?#f2i;>P8ZdSnoOE59)b^EeZq8s59KgR%Y%ilH%jN#i82J~!a<@souf-? zMe`9cx@@2reN>9huMu8`=v3YSQHGhtZ!&!|bYT@HrW2tHDn+G}p!pfb3DD{M?D1k2 zWa4vCxv0scXQd2h^@eAu6rU}!*}14h)MTQwRE+&W{3+~2XGOzBy~$ZpMO{yDmKA&^ zVQzL}bGa;9k50{sBQ91 hUUuSgQGZdBNz1BRG~|Y5kHE*3Ojgc!2`{7a{{gF^S^@w7 delta 2525 zcmYM!RhSk;7(n6OrD6AHmrf}Ok&s40kXX7A1f;uDIu=lASV{$@8-qqgVQJ~^?oztj zbGUJy=e+Y>%v{byMcoRIinP*A4%LI1D7#9Re3_YcZbzj9dMR8XSO$e_Th z#9IQ_PWQX=wv7zHRXg;NA2p`?_Il2ZywNvS9`rJ=Nxj?&Y6lz}o*Cdy1%=zYpc z*(f{Zpbsb~eMq?|H|3$c^bzHw{8WGnQXwi#MW`r!OvR`;m7tRJ36-MK^eL60vQ&=B zQzTWOid2azQx&R8)u=kvpqf;RYEvDmOP^6as!yL&18PW(=nHC0O{gg~qvq6tT2d=& zO>L+xwWIdbfjUwr>P%gzD|Msp)Ps6bFZz;tQy=O}{ir_;pn)`q2GbCVqMxXy5A-9=pqVs_X44#+OY>+x zEue+8h!)clT1r3B&-4rZO3P?Dt)P{(idNGaT1&st@AL<)qd(~{T2C7&hBneB+Duz$ zD{Z6g^f&FGowSQ~(;nJO`)EHMpo4UX4$~1jO2_CpouHF+icZrRI!ov1JYArRbcrt0 z6}n2-=sMk?n{^`yRhE)AriG?Fi*u{4pU(oC95 z3u!5>q_wn>w$e`8O9$yFouspLk*?BBx=Rn~DZS)N=`DSvuk@4tGC&5(AQ>z}Bua+L zFc~gi$=C9YjF6EsN=C~V87t#tyiAa2nJANFvP_Yw@~wO)-^(_DONhjh*b+zLN<4`#2_&H;l6QB*oC1 diff --git a/src/data/diseases.csv b/src/data/diseases.csv new file mode 100644 index 0000000..8709165 --- /dev/null +++ b/src/data/diseases.csv @@ -0,0 +1,108 @@ +AIDS +alkhurma +anaplasmosis +anthrax +arenavirus +babesiosis +bordetella +pertussis +borreliosis +botulism +brucellosis +campylobacteriosis +chickenpox +varicella +chikungunya +chlamydia +cholera +ciguatera +clostridium +rubella +syphilis +coronavirus +covid +cowpox +coxsackievirus +cjd +cchf +cryptosporidiosis +cutaneous warts +dengue +dhf +diphtheria +ebola +echinococcosis +enteric +ehec +enterovirus +typhus +ecoli +flu +measles +rubella +giardiasis +gonorrhoea +haemophilus +haemorrhagic +haemorrhagic +hantavirus +hepatitis +hiv +hpv +haemophilus +meningococcal +pneumococcal +encephalitis +lassa +legionnaires’ +leishmaniasis +leptospirosis +listeriosis +lyme +borreliosis +lymphogranuloma +malaria +meningococcal +mpox +monkeypox +mumps +nephropathia +nipah +norovirus +paratyphoid +pertussis +piroplasmosis +plague +pneumococcal +poliomyelitis +rabies +rickettsiosis +rotavirus +rubella +salmonellosis +sars-cov-2 +schmallenberg +sbv +sars +shigellosis +sindbis +smallpox +streptococcus +syphilis +tetanus +encephalitis +tbe +tbrf +toscana +toxoplasmosis +trichinellosis +tuberculosis +b +tularaemia +typhoid +paratyphoid +vcjd +varicella +hepatitis +yersiniosis +zoonosis diff --git a/src/data/medical_procedures.csv b/src/data/medical_procedures.csv new file mode 100644 index 0000000..9f1ef23 --- /dev/null +++ b/src/data/medical_procedures.csv @@ -0,0 +1,74 @@ +mastectomy +biopsy +cesarean +abortion +miscarriage +hysterectomy +hysteroscopy +colectomy +prostatectomy +colonoscopy +cystoscopy +electrocardiography +electrocorticography +electroencephalography +electromyography +electroneuronography +electronystagmography +electrooculography +electroretinography +endoscopy +colonoscopy +colposcopy +cystoscopy +gastroscopy +laparoscopy +laryngoscopy +ophthalmoscopy +otoscopy +sigmoidoscopy +magnetoencephalography +angiography +aortography +lymphangiography +ventriculography +computed tomography +echocardiography +fluoroscopy +radiography +scintillography +spect +ultrasonography +thermography +neuroimaging +posturography +politzerization +hemodialysis +hemofiltration +plasmapheresis +apheresis +immunotherapy +chemotherapy +phototerapy +hydrotherapy +electrotherapy +psychotherapy +acupuncture +antivenom +intubation +nebulization +ablation +amputation +biopsy +cryosurgery +hemilaminectomy +image-guided surgery +laminectomy +lithotomy +lithotriptor +lobotomy +neovaginoplasty +radiosurgery +radiosurgery +vaginoplasty +xenotransplantation diff --git a/src/getNewPrompt.mjs b/src/getNewPrompt.mjs index 72b18bb..e197295 100644 --- a/src/getNewPrompt.mjs +++ b/src/getNewPrompt.mjs @@ -1,7 +1,11 @@ import { db } from "./db.mjs" -const badWords = db('bad_words') - .select('word') +const blocklist = db.union([ + db('bad_words').select('word'), + db('medical_procedures').select('name'), + db('diseases').select('name') +]) + export default async function getNewPrompt({ minCount = 200000, maxCount = 30000000, rarityBias = 0.5 }) { @@ -14,7 +18,7 @@ export default async function getNewPrompt({ minCount = 200000, maxCount = 30000 }) .andWhere('count', '<', maxCount) .andWhere('count', '>', minCount) - .andWhere('word', 'not in', badWords) + .andWhere('word', 'not in', blocklist) .whereNotNull('pronunciation') .orderByRaw('count desc') @@ -42,5 +46,5 @@ export default async function getNewPrompt({ minCount = 200000, maxCount = 30000 //console.dir(await getNewPrompt({})) - +console.log(await blocklist)