From 9fd86dfdc3671007fa4d16475058f37addf433f7 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Tue, 27 Jun 2023 11:23:54 -0600 Subject: [PATCH 001/126] add dependabot --- .github/workflows/dependabot.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/workflows/dependabot.yml diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml new file mode 100644 index 0000000..4a410ce --- /dev/null +++ b/.github/workflows/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" \ No newline at end of file From e7475d486aacd87fda4ab7eb603999a1ba9a3bfd Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Tue, 27 Jun 2023 11:24:45 -0600 Subject: [PATCH 002/126] fix path --- .github/{workflows => }/dependabot.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{workflows => }/dependabot.yml (100%) diff --git a/.github/workflows/dependabot.yml b/.github/dependabot.yml similarity index 100% rename from .github/workflows/dependabot.yml rename to .github/dependabot.yml From ad115f78a2fa09a35e60a370c0011ce2ddd4222b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Sep 2024 22:25:45 +0000 Subject: [PATCH 003/126] Bump actions/download-artifact from 3 to 4.1.7 in /.github/workflows Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4.1.7. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v3...v4.1.7) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- .github/workflows/wheelbuilder.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder.yml b/.github/workflows/wheelbuilder.yml index 067256e..a8d73b1 100644 --- a/.github/workflows/wheelbuilder.yml +++ b/.github/workflows/wheelbuilder.yml @@ -102,7 +102,7 @@ jobs: run: | pip install twine - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4.1.7 with: path: dist From 2487414442df498384c9aaa1c50c63013132a768 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 01:48:39 +0000 Subject: [PATCH 004/126] reconfigure requirements to support python 3.12 --- .devcontainer/devcontainer.json | 9 +++++++++ PyRuSH/version.py | 2 +- requirements.txt | 6 +++--- setup.cfg | 2 +- ...RushSentencizer.cpython-312-pytest-8.3.3.pyc | Bin 0 -> 8899 bytes .../test_Rush.cpython-312-pytest-8.3.3.pyc | Bin 0 -> 18934 bytes ...t_Rush_w_Logger.cpython-312-pytest-8.3.3.pyc | Bin 0 -> 16425 bytes 7 files changed, 14 insertions(+), 5 deletions(-) create mode 100644 .devcontainer/devcontainer.json create mode 100644 tests/__pycache__/test_PyRushSentencizer.cpython-312-pytest-8.3.3.pyc create mode 100644 tests/__pycache__/test_Rush.cpython-312-pytest-8.3.3.pyc create mode 100644 tests/__pycache__/test_Rush_w_Logger.cpython-312-pytest-8.3.3.pyc diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..23e93de --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,9 @@ +{ + "image": "mcr.microsoft.com/devcontainers/universal:2", + "features": { + "ghcr.io/rocker-org/devcontainer-features/miniforge:2": { + "version": "latest", + "variant": "Miniforge3" + } + } +} \ No newline at end of file diff --git a/PyRuSH/version.py b/PyRuSH/version.py index 590a0a9..91f09f7 100644 --- a/PyRuSH/version.py +++ b/PyRuSH/version.py @@ -1,4 +1,4 @@ -__version__ = '1.0.8' +__version__ = '1.0.9a' if __name__ == '__main__': print(__version__) # ****************************************************************************** diff --git a/requirements.txt b/requirements.txt index 882dc85..ebd3a61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -Cython>=0.25,<3.0 +Cython setuptools -numpy -spacy>=3.0.0 +spacy<3.8; python_version < "3.12" +spacy>=3.8; python_version >= "3.12" PyFastNER>=1.0.8 quicksectx>=0.3.5 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index f103568..d1415ed 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,4 @@ description_file = README.md [bdist_wheel] -python-tag=py3 \ No newline at end of file +python_tag=py3 \ No newline at end of file diff --git a/tests/__pycache__/test_PyRushSentencizer.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_PyRushSentencizer.cpython-312-pytest-8.3.3.pyc new file mode 100644 index 0000000000000000000000000000000000000000..25adbccb2963c6e15d6824dd2febaf0aa8852be2 GIT binary patch literal 8899 zcmeGhOK=>;b!PuF`_b2qO~4+$W^HRN$+rBJLAD&(e1wcd0Hw&*XnV9f%k0e3Gb?$; zY{ivukPA|VRH}>}j0W_DL%u^@aZ zhji_}>3RM7b@%Jn@6+>-WK!gy{ORPu^c+XRN~E~PAExm<7j)~B z$ts_Ye2g!nY2N7=t3Fy8KQylBmZIm>IfVi;dQi`6s#(Z2x^o=wy8KN%P)rL)$@x5x6-of-boDs^CpinCTZ#d{)_4|u zc{NZg^@ui+dQ~gt6r9mFjXyaia2+$uO~Oc3Rbx zY&IQmLZ+fkJHhfSF>$_>bs{ydA(X?!#33iTv^W3FEkNdg@I6RQ`-{$~a$GT(@H}wexuJDlTY=Sg|h? z>x-~N5NloJ=a3Xijko!trwlH}g@pHofy!j_B;dc-0#;r8Bv@=ASUXUxi?+dYm^%TA zoab_Zr;;#k(2DzQcF^bjQs9NQ5Kz?W7{7(*nnn#3fy44*wfUG8#}DE7b`VC3v>vVH zsNt5Qb~Hxydo6$RTou;Dvvtij)zXnsr>&$ARFZ18T&189b3GFU)s)~TOQw4am5fx! z>4sHMXrtu@FV1y~6NV*gb)>mldbT#VRwPD_@zEt!H!WGuDUvY_O=*FG05~Tm6Ot{f zWyJ}Xsj6E}sA#ANw%Dzrsz({s+N^>h@$MBpstmjAPnK~ritt% zC=|EAiJ_Wjp=qw8uE;(L`aXfKh)jbzu8$L*NT<9EHOzoc! zei%#6lN0K3_0_IFu06ZQF#n=OkY~!U=-$JVI^}Aj(|D0Y-?R_^k_{Ehl zw|3hXPTz=`fEm3R=-j_A@wb3*AlSa-455azGt>*~1mn!t_!j3V09x1T9M#VaJ4XS# zey&ghizvBe1!w;@|DJH=Y)AmH6(@kv8{O-JtYqU+$svBu0d1OpOIac7Hy_% zF_Q)*%s^=3+KM*nuWtpqm`_K<)1kQt?2<_0Bw>px&)(wJ+PWkfJ9mqZYdr0^&&hxO z62gq5d@Txp0kj}YlE|u-uoFelwB-eKCACafjy28wt97Mqsjf8jX(gA0Kf4v^zPbF` zPY1sj`6U`MtXuogESK&Wl?T9IqItw3itZC?e&t~^BMt38Fzg!rfn7aSMW#K5MkGxp z)y>ju!H{NUa0sNNT9!g2%aFheNCm?zt7!OVRjVN73Sf1mvaITotP_bU&`4#=pj9;g zmFa0Ur$S6u%}CNXb_RyI0yT6sCutQuFA>#LWK(gOO+iBr`OBsS1hSr2bvYwGt{_>? zm{BN9Lsw>{iJn<`Mv>$x9UX47hAyF#I{4rS3^oB(;0nWm^T`<%3S-F9G*o(K z6cfTGX{rjtn37qcd4hJCCgRX2i0@OJJ<|9KJPQ=Jlxx^TQlO#&X0Otk^g$Y4a z2Chq*Q9uSjG;M**KolMtJe&d%oFZDpzdw}9Rn`FA7v+NAZBQRNyxf1bl@P+7St&f zbQFXO(t`=2dKF|)1Xv1z?Spm0aMOmS8MEeY5w)V#0>pkCLoLmKzRIvU(zae)ov!8! zmR8M(TcPl0M4a+i0(!;U^ z7KgxYV5(>Vl@5ZRQL2maJIe$7m-4u%-!Jk-3aa##gEaQo#k%?1_#spT<_iY zmaALSrueK^kp?BK^`^~x@!6L)(_&2;bg=_zll~aGQ_O|aA;{FKy$JdcY(+3|BL!9n z^q6tknT80sT;2IGCQLV;`N2)}S$=NwS1b?~ma&wh){@1Y4N?f*IGRR)DS5h$1^ILp zB5=_Z-7H1d&II9z_m`_IDwnC#k<$iiNBxgGm4mPHKqBQ8n~~lp56O%DP!ZRykVMWKEu`QZ-}9WUN_I{5PN?}eAPyfU)Tx#``+=1+pqc0Ixg zcU+3y{(b+hg?Ql+{_+E(Z{Kwt#(jkU9A*ZH!O?}mQ5SRF#T*=c^GO%+>52%qiwkk7 zz;Ms#!k#4zUHlhUl3d3I6K@n=7Y=oWT4!Dx;ZnuTyhwlqHLk?hAfJYNiGZI|G~W;S zIF@16^CTF-DnrXq!c93@*baL+S)>-WBVJAxZRA!Y+=yH4dW_0) zvdEWnGCM+|Ear~cu~ykuO(3zFXbZN;^0T-dH;>&suOWgRhy0B7Y2;^3eOlyPc0I>f zfllzHw*sB$(~-Crb0oFL zAjlxN8^IO?y$CiT*bE?@bmH#KXa?_t=|OD3yM&frYZo^3A?QaifM5rLodD7)=1J&o zL@+)r90}cntwRX*A{a!l55av1?gwCE0^6i3P+jI2wzhB#Td}|SDFAc~kyWvI;aZ9l zHhmE5d?E1__gvrBL0pUR!ejj9v5AGT2{g-(G0P2@0}~4eJPaCOz&tqd_AZwME%(h4 zZWkA#g$C-OiG`sh3|;&eSK?^7;Vy!ElNW~Dmzr+801m?c>RSL;GTmU*LmGVNs0G1{ z*RPI?^-Cb&x#*yMyFjmw<6D~o>fnXJ&GeofSv!mqD%R@vI1S3Pe(dDr`ka}NtfN>In_=JQvg<)IAKPxtk|7CC;#a=)g(C7 z6#Uz3ZFQ$Af>Z4tl^_4VQ~mBQvfSW=IaE3fn*?86L+B3YQusY6ODmcJ37@G2xm6zJL>V2ju>tuc0Tw) zTi0(7oDQ6co{FAHoJzbheqrP8#Wi~tyZ8Qm_+s0U`Oy2_Yc6$fTIk;N>fy!i0mwT$ zy}a|o&hC>NUfOVS^Glmg(=)TDX3zZM)Gy8-U0lEO!tMtaJ0F}E8XwqROT6*qg)JlV ziHosA@TKj$h(M0;mmeB?dl;jOBP;*`#C>B6_l>z2j3WRuGWKTGMPLAObA;Q)g&1f6 z_2AgT;1Y%|z9|5CDD8@Am;2AK{2|dSM3Gv}wS0a({zANqbh9BXtOz>Ud$M~xorSK8n z?Tg*gDJSM%ozSg-aMr@tT~w=#dw&5JAQ`I6Qq@u%p^kE*9*#x1POJiamN@}UPMTe2*ZawNo}W095>TFPR$GbEQ< z?yT+%DRHw4tF433Xo^Y!0?Dla+id`gkOAi-9|8K41V#PPA2FmrVk*FBi=yBkE2#}2 z^rPq8d2%(RD7i|6crSL(+JQ!woB7+gQ2J+%~bTGmZj8~2lffb~f883L-Lf3`JZR@ZFQPX76qgeE?$y%? zZ8SqC@`{$$waI}Q-l$Uw(okN>4phSBP&tgRTP0GCRK#+GZg$E=%h56DM|G&g@ht6FSJg)u#G2&uLesnx=NiKk;QUdj%TU* za&XLfIo$q~>+Nzp2Xmx(dgJ#p2=6Cgq|x_d@5k3W()Xiw3!b|DW+b*wyts`g$E=Yy z$ZqWVU>|?u*>nBrzacSxv*npSU~L;Jjb!wYn1Ec`f{VyLQ-$CaXmHpEdJ%B z-KKs0bJ$+a^A!>PLHG;s55YeI|1kWc@Q=YiQHfiW-tYiQ->?d$o8?xg^u`*c+s|;7 zcbZW;t_JI+^m2{TVM-sK(qZFF_LWGFR3OJ_ntX?3N0saulGD@VO;tS;BO|IRr(+cN z;U#?YX83!70|#dK(vBC}ic`B&TI>f_gd89k+nLhRL&`|55UY>_X4c-6HWP$Bb!sPq z-3a#H6A`(yBZb>BT%pExmg7{}2{EhAPOMZZ$ZnOLW}R4cC#zsLgI2X&R<&In_ZlQk zQ>f0m)&jP$SVCssh}AlHW(ua%^R`T zic`3|XC!JgX0lQ~pP@>TX4UbcL=_{N(WGGobBwT5EMkfJ%&2PPJiQ-Hf(OjW8p=Xc*iE37#G$Rgu$e4rb^d`EziCx}A zw>PoJo9HkSp4z%rq;{=Hb+1TSm6$`@XWlXsot-OEEADlzxYuPhXO`>QrQw*#{ej|B zGD0}*ogM~?rEz`McEhv0z%4fKosKRxZ@PT+($R(Hj@jmppSR96zcL;DO6*y9GJf-7 z&sSpa!jtj4DfZHhFn#{4;{O8g4^ml;Rjjd!KqKBzuCaQN_ly2>&V}5e<<2%^+qo#Dm!%t{1v*h0{{2d`Zsx&eI8hAMQpCWBXdU{fX z{N@u9gJ2>YQQ%%KOHlJ)9F-o3s!}j5$P;aQW4QkN-iE;nSfmn+$9bMkYwY1h0=j&6jTOoPFbc8ka(M1OqYyvij`bLYIMzmXJ77k!x=~2n#`0S; zWq+d(w+zbVRl#EvvNoP;6tXtndi0-umQdA3A#N)^yMyd+6ylcg8L7u8WKBGerhjd` z^%#Ztjnr!t;cQ#e(kEgiCK#_&P=*u%k5emEyfhK1XvC<79`W_O0tU@wAoDuQ-ZZ3Vlq z2JIgJ;Q8ru{)(VQF|i=F&Wf#9QlETpUTptLe0|}`_|1#2L+935{|K0l)z&}9D9(K| zd{%|p`UkJUicnks9$*ybztgX#C2@`7{CE1*v?MOo`tEGY@c)lNFyxvx<=7&=$!1Krv2d#7uZ4tv)lnSa7b z=ZS$)g`oBW4>!@6F%QR?icF*-bwVdP+#EQ12vj9_1Ujyat8|LwG;IRBEXeamA^$BZ zgVm$VFpnKSO0=w0NZai3R_NFxvd5+d1rJA;ki#{#ATMKb0F$Q?tdsTKUB&wDMzxA| z(>en^yI$6}W=iODR{BJpi9%-_p}_0U;&{Q7;F*I16+kn{yzbz+-Y&L+qnA591d51Y znJ-(%L%O39kpnu`%0fA4zkDS~%Rz*myTX{yR!p`Bj zAIrkSj2X-j{KpK+AK$S+u8+^kW^WI+3mk~hkv^szEhR0x>^8@;`Mpy7v}rkxcH^Z6 zA{Aufxf?x=Cg6DQ9-m5ujS!UzBZ?8sD+MFOWM_&FC9dS03Nbrs$_yr6hFb=oGeoCG ze2&cAm{}Op??bZG;y7q|g>h88YF`2XKdr`%7tdbZawYe->AA+#Z7vu+!A~Dre2RS1 zc&R*nC2~o{q)>{KFoNFUwnH2Bk%D4&GB_7_~j6`cUR<+ zPp#-hAo}vkPFze~Ir4GeM}61!ezAXM|Bd9{n~8nPAwXLexoF+}BC`O4!?)lL5c^{n znSlxdegE1IZy@eFClpA2^sYCCl2-Wr(m-o63I(oeDe~? ztA&vq+{}UU{UxFffpPyNvzkX;JS(IQ19sM|2I5DKo*=oxu#(krM*{)^YkY8WkjX^B z$#(icm8_DR#C~MKpr4$}>7!uSgFE1fL*%`#(`hm=3cayvKPPEKRBS@!$u1bl~G-RM<$^<;KZdg`u8QGUd#sA# zXmG(94d;Rda8L|?9VM~7rw)Cg$5tf~Iry|H=B?NTH zJl?@NE3EyGj;1Xv`1!trtQ7{=(ioO;t*Yd>{)|)VSv%%luUpYcPHXklPh1Q;raPK= zyZ6*v;&~g-z20AtcX0?Bn(1?J=$ zkX^ zhiq{7*7BadYGluTJLL~+(_Qt-_hlIrTr3pDCPf9sThSIq-ctLh{K-NhW9Q%~>L?W_nus^nh zOCl4rQ+%nz^29WqGr{fAa-;;m$TsF>nbK0OGv%I?M#r!;?iVQm{WCidbW&UZFa`{Q z$Bzp72Gy8V^c;ORVVT?^WV&jjRqZ43TyTdd<6IOr? z-$w^vZS727U@u7K)(#7i&{t|Fro;iuh2#jsmK1_{1r>x7>fWJ~&>TGn0Hj}JFib9S z)h$n(iaQ&d#$~d9#>zg-hgkh;OhwAVu@ukC zfE>QFpxe#Qo-6zDj=zH&EKr4g-L9n&*G_G{?g?jQ*nGU%2Rha8KP8 whIk%Ug!2mxWVV6aoi<(@r;A;ub(0CSNXv92QXwfj z(kLX|I?Mw)wAoAAEm+fGKx?GHI$*%Ir*+*G*h^9;OTYyvuwf5=OOp=8f<5g2Nl~H| z%5mH-OIsaCKhOR8pL4$Rpa1-&wl+k8`Qe8f6C({2^$&E|##gXh`vF)^Qar`eNotZd zzn+vg?xCp^GwGwP5dWm#3<*pI;Oj}Uso-RgraaVR6z_eW;u#?@+hS*3`kwJx_Q@I{ z%=->+p(v%vLqXh)r8u_jOu<4^SvpHc{LAPbjnD?OKQp!`Zb!o}U2t661(uVPVAgxm z!_&`GlV08fl)+j}`gjJYpZ5U`@P42y9{?KUS)d_42(%^}iqshFxS(ogIdQt;)f>vwC(&~#g+d{e4 zuCW+gwCL2V$sa`z)56nd3aztC<~?tEZ9k|*(2k#_BFt#6PRXbBGii(;`HU8Q%0fuV8;0UR6^J zW>yv@gTd^P3sfOFtzw^$>(P#RSvd^J&bx=11EQ$*!k1$*9Tr3czx$-@Rfsa-2byJi(Ah24ljLv_fp%VXD24lw>-PRd_Q;%vZLZo z9>Os;d8prS&NDhsL%(CRGBb;@%8X9iW03bis^a*UqUj!pQy4FrN>F*P?%_Rc5FyX# z%q)~yZt_0fllSXB-H-DzAJ7ANRu3o}i|K-TaJI;WrPTQl@6FffA-%>-7uLg&j(v$% zH)}O^tl6+_D}8xJ4@}d%4@;c(oMEgGN1D1)T-&BIcDrv9&$q^SemfrTKjqCqujFg_ z0MF{RB5k_rbnmR&45_oz(G>KN>ZpxtOOJ)2rNLvNV>N4S>9L?)f+KIY9to{6UX2}( z4;5Nk&)4u_y}r=Wh6*ihu+!0$qb5!Eh?KrMtgtr6iImxUjeIR%r#BW#Xwq8k*p8fV zwv`#Z$u86Wehc=Wqja8yzZd=t{C)5bz~2x5ApAq{59ezNoL+wuPH(8d=~lYRoZeXF zbbGKW%xRO;aWz;gr|VTthc3N!PKRB#X*5TMIEn04qU3QBpAq7RiKs=%6S90bL=v*h zM?(s30?W-n7U}Dr9Xl53T>ArC(%HU<8v16zLUs_e^+eR@l#mdmP@e3tytYNuMK5f? z*&ZZ)NVZ*PF>+UD1ov+ELXI7#Pch3buod#`!b}B;FJ;+f<%vmq%nbTWTFAD$kZpJ8 z^?FWK1w}Kf)(zQUwCV}hg!Qd}5IbCPO=(~2l* z!-B?%N!9R~O*6bu86))7bQXa@j8lxpSe#2HV~UVg;&Lj@DS{DITiqiL$g?QDntvw!?*^=_Ds{8iXu1E4X(UK~YL)v>0NSpjxP* zYScg-VpbEooQ`g%qu1#ea60;(j!q-&$gO*Yt9ONKV1=uYiPf|Rtsu+M)wRO4BCLBw zSa+c~D_wW5io8Z{Gz#t+h7Y^F%K^DeH^P2(H;n9Y>Qc+Lh2W)@yHAds7`fQed9J1N zt+u6>hry2xUu4_Pv28C$PCs*=?RbwJx_D>&&a*@BvBMYdjNb)zSi#xh4mSS-01ggy z>js-Q0LcPug93|YO+eBOfK~>Q#iNDm!NeU%meRODG6#@cJzfb&t{!hKK(crwD|Qv7 zmjlUCDQ-tF#uZ4G(zvwL0VG$ArhabBB;8u-1d^o^aPclPYXFkC#F$1a<>Vq} z_*j9YJEz|qNV;=+B_LU9kL&om9+^sMT*m*C%IUfrkX*f`B_O$ayeg-=9hsVnM<#bp zZ?14;veMy^$(_?%DjbeX)@H`K0pSC zh98ZRQJG|v1mJQa%mG!2JSQlUn3#c+)RUYjk+>{riku|-Mx(?!PN5)1xeEzS8U^92 z@&zP#BvA;GdyzbVWHXX4ApyA>r9^-%;|96>WhCbD>?>${7|3Fcd7M$Uf?bIs=|Iwn zqzg$mk{%?zNcxcUBN+g)2I$>g2EDtnp6WXV=)K<^dRL*gE4V+;4p(tMuus7N?)O&M zBP_vlBnm6;G&@SqNU)K()5rAbhW$04kpV zc3))gh~EWv_bQVAEucnHmHboGCsiI=mHdNpPLp;UP5`i5)Q{$BhJ~c4h?x{=F9>~5 z_i4SW_pL0_y=9AZ?}NBVp8@K*fU1cSz-a*1pf~l@VRzH;J zI&f=cWwwuB7O_&9?KZ4LHz>2Y-^^5!MU<4;+;3*qCyV$D-L_ZnAaB>6Tj2CNOBPY( z^up47n`IH7>NVYzMdT*NX9R*W2E2AewZkM_GzvW7rsRx9Gzc6Y83cg@UQVWjl&oZl zsHz!wNhcaMO6C5{#`X^|@#g-cdJ=P?;jYcfL(QMFsOk%a37<;Z+n+%kZ>T zj8}TOV!ss_rVdTPHsjT4y>RKG72mV)9u42gm`3;77ydbL^IoKmP{!?T%8C?BS&EhU z`N~2WH&w$G`S~%0?L$*9@C@asd0@8q7R#)@NpyOcdeIm6B&fLO5M2Mml_@*x22?$| z&#gV+>!K{Pu~>@}o)@+5Y@Uh)M@_M0uBo77y``v^{A{k~kfjBV zI^el_!bv=Fga!|xIyZ)0rz1H}D3oI!`DT85gf)B$CnW^Kn-nC&XR2N+rk+GejR9L~ z_$@`@h~HF>R?KB)8wSv#!P*5Gw5lR#wbY1B)nTO_+_~1`TP>$@H%dd*-vNOqTa6oD zI&x~$%i@osOO27MlsCA2Veryj98Uld)t{fZBY`yfOG>lUV8Jp8@Ge;)bk#)-F&yxjllj#qZP(e_r? zF9&`;@T;!h^!>W;Y-I4lJ-goBII&cB;5_>@%&31~^-}j7pv`u_Q{OQZ3a`-NMeJ`* zFPJSKNs_mg2V<~Hc}6)2rZ8?)6M`nd;HqR09tR{qd_E(CUjCSt;X!;pF*r09f!&F4 zB*}pu%~q0>rGyB9BFMzI5_t-=_~%T8codPFiR5XB9k;Rp`<{^r60$YMMQK`yYq%Ez zn}NnXJZs=dMk;z2E29W;L7c-1#X)JG%!}F#DC^+fIWb6{?LHJG<1G*qH@&D-or55}RX|#EbGAr^YkTSi*~{ zz^Q^tlHy?j(?PdP!)t?#0^Pu+pdZY(73+>oax|sbX1h(%MXy8u2N@nb@C|{2GsK)L zD)IpIl|!Arzig-Ne-fgo9YBs#S3KUqmZc_afD)krGjvIwG{|Ba%f$3~szzZ1{ zvln2{;I5%zi|hIyY|lc^x64T$IFQe7CG#^f!SUw!l&B#q!FVHI%*bk5)PM_-SMa@z zssRWCronMh1PD|(&BR)gnwg#!;{ea%*(f1nm>4pQ&nU74_d>}GjBH`>t<0t%AddNS zDqbAHN(kH^T~Yf30w#&0b{SMj66VQx`#d)%0O*tOZAnrDI0)Kj5-^fNc0c|Y6b!Qu zSsRLmX%d$+iVD%mG+5f_1bBgxBvV-^QY9*?upvs6NOxyv7bG={gINU0pl%$5VWs4R z!lj~!EFDIXkh^5$nj5)mpp3f)o_2DVzqJ9lt8UHQH5v&U@Uj=5`ohcJP%M^`d3YiW z_HZnAB*P`G5Cx?x%GZ#LA$bPLK_qb`I5U(q5(UU2Wiqa{OCWYokf1)e+5_Zy>MG;$ z9i*?aKHqNoYMn*veUH*t?_+$mSHpfF4FMoceqZOchM=$0$`5nF%BFxtVzEe_!A@nw zq$c9iB7AOBbhX8-=R{F!H-)D^QALLusHbhY|DucC9=D=EN>~(g#)qRgB&p`kJE_DdV6)#QGAAXLa;ogSWnK?lMg6#UHOKGV!{i#cfD Date: Fri, 25 Oct 2024 01:52:17 +0000 Subject: [PATCH 005/126] update dev_requirements.txt --- .github/workflows/run_pytests.yml | 1 - dev-requirements.txt | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index d552e43..7a4a036 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -48,7 +48,6 @@ jobs: run: | python -m pip install --upgrade pip pip install -r dev-requirements.txt - pip install -U spacy==3.4.1 - name: pytests run: | diff --git a/dev-requirements.txt b/dev-requirements.txt index 5006211..e2efe82 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,5 @@ -spacy>=3.0.0 +spacy<3.8; python_version < "3.12" +spacy>=3.8; python_version >= "3.12" PyFastNER>=1.0.8 pytest quicksectx>=0.3.5 From 40e80e847f9b10a72ace1935ec5556738fd7f813 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 01:53:21 +0000 Subject: [PATCH 006/126] add python3.12 for multiversion test --- .github/workflows/run_pytests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index 7a4a036..ec4670b 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -30,7 +30,7 @@ jobs: - [ windows-2022, windows-2022 ] # spacy doesn't compile win32 # python: ["cp36"] # Note: Wheels not needed for PyPy - python-version: [ "3.6", "3.7", "3.8", "3.9","3.10.x","3.11"] # Note: Wheels not needed for PyPy + python-version: [ "3.6", "3.7", "3.8", "3.9","3.10.x","3.11","3.12"] # Note: Wheels not needed for PyPy timeout-minutes: 45 steps: - name: Checkout From 97ae45ac166133ef125792b268369218677c186f Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 01:54:47 +0000 Subject: [PATCH 007/126] rename the job step to avoid confusion --- .github/workflows/run_pytests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index ec4670b..832f339 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -8,7 +8,7 @@ concurrency: jobs: build_wheels: - name: Build wheel for ${{ matrix.python-version }}-${{ matrix.buildplat[1] }} + name: Compile for ${{ matrix.python-version }}-${{ matrix.buildplat[1] }} if: >- github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || From 8cfac590f4789391ea300096aa7828c70f411c40 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 02:03:38 +0000 Subject: [PATCH 008/126] reorder dev-requirements.txt --- dev-requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index e2efe82..7405389 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,7 @@ +Cython +setuptools spacy<3.8; python_version < "3.12" spacy>=3.8; python_version >= "3.12" PyFastNER>=1.0.8 -pytest quicksectx>=0.3.5 +pytest \ No newline at end of file From 87fd3137971d3bc085bccd02ecc529dbab4cfbfb Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 02:08:59 +0000 Subject: [PATCH 009/126] avoid pip install ./ override dependencies --- .github/workflows/run_pytests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index 832f339..c34893c 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -52,7 +52,7 @@ jobs: - name: pytests run: | python --version - pip install ./ + pip install --no-deps ./ pytest tests From 39973c9e486c564d53c274cead258aab72a89cc2 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 02:11:17 +0000 Subject: [PATCH 010/126] simplify os matrix --- .github/workflows/run_pytests.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index c34893c..41db564 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -22,12 +22,9 @@ jobs: # Github Actions doesn't support pairing matrix values together, let's improvise # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - - [ ubuntu-20.04, ubuntu-20.04 ] - - [ macos-10.15, macosx_10 ] - - [ macos-11, macosx_11 ] - - [ macos-12, macosx_12 ] - - [ windows-2019, windows-2019 ] - - [ windows-2022, windows-2022 ] + - [ubuntu-latest, manylinux_x86_64] + - [macos-latest, macosx_universal2] + - [windows-latest, win_amd64] # spacy doesn't compile win32 # python: ["cp36"] # Note: Wheels not needed for PyPy python-version: [ "3.6", "3.7", "3.8", "3.9","3.10.x","3.11","3.12"] # Note: Wheels not needed for PyPy From edb5670d4ac625395fa1f787473e137978371903 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 02:21:11 +0000 Subject: [PATCH 011/126] update pyproject.toml to fix the conditional dependencies --- pyproject.toml | 27 ++++++++++++++++++++++++++- setup.py | 12 ++++++------ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 92010d7..39cbd2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,27 @@ [build-system] -requires = ["setuptools", "wheel", 'Cython>=0.25,<3.0', 'numpy>=1.10', "spacy>=3.0.0", "cymem", "preshed","quicksectx>=0.3.5","PyFastNER>=1.0.8"] \ No newline at end of file +requires = ["setuptools>=61.0.0", 'Cython',"wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "PyRuSH" +dynamic = ["dependencies","readme","version"] +authors = [{name = "Jianlin", email="jianlinshi.cn@gmail.com"}] +description = '''PyRuSH is the python implementation of RuSH (Rule-based sentence Segmenter using Hashing), which is originally developed using Java. RuSH is an efficient, reliable, and easy adaptable rule-based sentence segmentation solution. It is specifically designed to handle the telegraphic written text in clinical note. It leverages a nested hash table to execute simultaneous rule processing, which reduces the impact of the rule-base growth on execution time and eliminates the effect of rule order on accuracy. +If you wish to cite RuSH in a publication, please use: + +Jianlin Shi ; Danielle Mowery ; Kristina M. Doing-Harris ; John F. Hurdle.RuSH: a Rule-based Segmentation Tool Using Hashing for Extremely Accurate Sentence Segmentation of Clinical Text. AMIA Annu Symp Proc. 2016: 1587. +''' +requires-python = ">=3.6" +license = { file = "LICENSE" } +classifiers = [ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", +] + +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt"]} +readme={file = ['README.rst']} +version = {attr = "PyRuSH.__version__" } + +[project.urls] +Source = "https://github.com/jianlins/PyRuSH" \ No newline at end of file diff --git a/setup.py b/setup.py index 7d5d533..5be1669 100644 --- a/setup.py +++ b/setup.py @@ -13,12 +13,12 @@ long_description = f.read() -def parse_requirements(filename): - """ load requirements from a pip requirements file """ - lineiter = (line.strip() for line in open(filename)) - return [line.split("#")[0].strip() for line in lineiter if line and not line.startswith("#")] +# def parse_requirements(filename): +# """ load requirements from a pip requirements file """ +# lineiter = (line.strip() for line in open(filename)) +# return [line.split("#")[0].strip() for line in lineiter if line and not line.startswith("#")] -print(parse_requirements('requirements.txt')) +# print(parse_requirements('requirements.txt')) def get_version(): """Load the version from version.py, without importing it. @@ -83,7 +83,7 @@ def get_version(): license='Apache License', zip_safe=False, include_package_data=True, - install_requires=parse_requirements('requirements.txt'), + # install_requires=parse_requirements('requirements.txt'), ext_modules=cythonize(extensions, compiler_directives=COMPILER_DIRECTIVES), tests_require='pytest', package_data={'': ['*.pyx', '*.pxd', '*.so', '*.dll', '*.lib', '*.cpp', '*.c','../conf/rush_rules.tsv','../requirements.txt']}, From de98089d5c069843bd282756d574eb8bfb601841 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 02:25:05 +0000 Subject: [PATCH 012/126] os latest doesn't support older python --- .github/workflows/run_pytests.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index 41db564..16393ca 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -22,9 +22,13 @@ jobs: # Github Actions doesn't support pairing matrix values together, let's improvise # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - - [ubuntu-latest, manylinux_x86_64] - - [macos-latest, macosx_universal2] - - [windows-latest, win_amd64] + - [ ubuntu-20.04, ubuntu-20.04 ] + - [ macos-10.15, macosx_10 ] + - [ macos-11, macosx_11 ] + - [ macos-12, macosx_12 ] + - [ windows-2019, windows-2019 ] + - [ windows-2022, windows-2022 ] + # spacy doesn't compile win32 # python: ["cp36"] # Note: Wheels not needed for PyPy python-version: [ "3.6", "3.7", "3.8", "3.9","3.10.x","3.11","3.12"] # Note: Wheels not needed for PyPy From 13de9d04d66a646b1536a05e83603a422566bd8f Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 02:27:32 +0000 Subject: [PATCH 013/126] relax setuptools version to support older version python --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 39cbd2c..e08d07f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=61.0.0", 'Cython',"wheel"] +requires = ["setuptools", 'Cython',"wheel"] build-backend = "setuptools.build_meta" [project] From 1a79a4784a4bd5b3405d1aa59627a23e16f43fcb Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 02:29:31 +0000 Subject: [PATCH 014/126] test --- dev-requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 7405389..e02cb42 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -4,4 +4,5 @@ spacy<3.8; python_version < "3.12" spacy>=3.8; python_version >= "3.12" PyFastNER>=1.0.8 quicksectx>=0.3.5 -pytest \ No newline at end of file +pytest +numpy \ No newline at end of file From ad67ed30f174159b43810e7e60bfa503cd7c24ba Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 02:31:46 +0000 Subject: [PATCH 015/126] remove --no-deps --- .github/workflows/run_pytests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index 16393ca..f30553e 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -53,7 +53,7 @@ jobs: - name: pytests run: | python --version - pip install --no-deps ./ + pip install ./ pytest tests From 4a9b3581ff745fb0304c0b3a16e5d879cfb31160 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 02:36:25 +0000 Subject: [PATCH 016/126] use cibuildwheel instead --- .github/workflows/run_pytests.yml | 49 +++++++++++++++---------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index f30553e..0e4d810 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -8,13 +8,14 @@ concurrency: jobs: build_wheels: - name: Compile for ${{ matrix.python-version }}-${{ matrix.buildplat[1] }} + name: Build wheel for ${{ matrix.python }}-${{ matrix.buildplat[1] }} if: >- github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'Build System')) runs-on: ${{ matrix.buildplat[0] }} + timeout-minutes: 50 strategy: # Ensure that a wheel builder finishes even if another fails fail-fast: false @@ -22,39 +23,35 @@ jobs: # Github Actions doesn't support pairing matrix values together, let's improvise # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - - [ ubuntu-20.04, ubuntu-20.04 ] - - [ macos-10.15, macosx_10 ] - - [ macos-11, macosx_11 ] - - [ macos-12, macosx_12 ] - - [ windows-2019, windows-2019 ] - - [ windows-2022, windows-2022 ] - - # spacy doesn't compile win32 - # python: ["cp36"] # Note: Wheels not needed for PyPy - python-version: [ "3.6", "3.7", "3.8", "3.9","3.10.x","3.11","3.12"] # Note: Wheels not needed for PyPy - timeout-minutes: 45 + - [ubuntu-20.04, manylinux_x86_64] + - [ubuntu-20.04, musllinux_x86_64] + - [macos-10.15, macosx_*] + - [windows-2019, win_amd64] + # - [windows-2019, win32] +# spacy doesn't compile win32 +# python: ["cp37"] # Note: Wheels not needed for PyPy + python: ["cp36", "cp37", "cp38", "cp39", "cp310","cp311", "cp312"] # Note: Wheels not needed for PyPy steps: - name: Checkout uses: actions/checkout@v3 - - name: set up python - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python3 -m pip install --upgrade pip - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r dev-requirements.txt - - - name: pytests - run: | - python --version - pip install ./ - pytest tests + - name: Build wheels + uses: pypa/cibuildwheel@v2.21.3 + env: + # TODO: Build Cython with the compile-all flag? + # Unfortunately, there is no way to modify cibuildwheel's build command + # so there is no way to pass this in directly. + # This would require modifying cython's setup.py to look for these flags + # in env vars. + CIBW_BEFORE_BUILD: pip install -r requirements.txt + CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} + CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' + CIBW_BEFORE_TEST: pip install pytest + CIBW_TEST_COMMAND: pytest {package}/tests From af11eac4e4facbe58044b3c9112a06a72002eb6e Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 02:40:27 +0000 Subject: [PATCH 017/126] test --- .github/workflows/run_pytests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index 0e4d810..dbcfff0 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -47,7 +47,7 @@ jobs: # so there is no way to pass this in directly. # This would require modifying cython's setup.py to look for these flags # in env vars. - CIBW_BEFORE_BUILD: pip install -r requirements.txt + CIBW_BEFORE_BUILD: pip install -r dev-requirements.txt CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install pytest From 19fd6abfbcdec602a12fd9cff1860926fb85f9d5 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 02:46:52 +0000 Subject: [PATCH 018/126] test --- .github/workflows/run_pytests.yml | 2 +- setup.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index dbcfff0..2d4a929 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -40,7 +40,7 @@ jobs: python3 -m pip install --upgrade pip - name: Build wheels - uses: pypa/cibuildwheel@v2.21.3 + uses: pypa/cibuildwheel@v2.11.3 env: # TODO: Build Cython with the compile-all flag? # Unfortunately, there is no way to modify cibuildwheel's build command diff --git a/setup.py b/setup.py index 5be1669..7d5d533 100644 --- a/setup.py +++ b/setup.py @@ -13,12 +13,12 @@ long_description = f.read() -# def parse_requirements(filename): -# """ load requirements from a pip requirements file """ -# lineiter = (line.strip() for line in open(filename)) -# return [line.split("#")[0].strip() for line in lineiter if line and not line.startswith("#")] +def parse_requirements(filename): + """ load requirements from a pip requirements file """ + lineiter = (line.strip() for line in open(filename)) + return [line.split("#")[0].strip() for line in lineiter if line and not line.startswith("#")] -# print(parse_requirements('requirements.txt')) +print(parse_requirements('requirements.txt')) def get_version(): """Load the version from version.py, without importing it. @@ -83,7 +83,7 @@ def get_version(): license='Apache License', zip_safe=False, include_package_data=True, - # install_requires=parse_requirements('requirements.txt'), + install_requires=parse_requirements('requirements.txt'), ext_modules=cythonize(extensions, compiler_directives=COMPILER_DIRECTIVES), tests_require='pytest', package_data={'': ['*.pyx', '*.pxd', '*.so', '*.dll', '*.lib', '*.cpp', '*.c','../conf/rush_rules.tsv','../requirements.txt']}, From dd3d5effc700b209933159a04a426896cd81a614 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 02:55:21 +0000 Subject: [PATCH 019/126] test --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e08d07f..5e6922a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,6 @@ [build-system] -requires = ["setuptools", 'Cython',"wheel"] +requires = ["setuptools", 'Cython',"wheel", "numpy>=2.0.0; python_version < '3.12'", + "numpy>=2.0.0; python_version >= '3.12'"] build-backend = "setuptools.build_meta" [project] From 4d98f907f05c13f5c7049b3fa3d2e15e16de3ffd Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 03:03:41 +0000 Subject: [PATCH 020/126] test --- .github/workflows/run_pytests.yml | 54 +++++++++++++++++-------------- pyproject.toml | 3 +- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index 2d4a929..f469f6f 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -8,14 +8,13 @@ concurrency: jobs: build_wheels: - name: Build wheel for ${{ matrix.python }}-${{ matrix.buildplat[1] }} + name: Build wheel for ${{ matrix.python-version }}-${{ matrix.buildplat[1] }} if: >- github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'Build System')) runs-on: ${{ matrix.buildplat[0] }} - timeout-minutes: 50 strategy: # Ensure that a wheel builder finishes even if another fails fail-fast: false @@ -23,36 +22,41 @@ jobs: # Github Actions doesn't support pairing matrix values together, let's improvise # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - - [ubuntu-20.04, manylinux_x86_64] - - [ubuntu-20.04, musllinux_x86_64] - - [macos-10.15, macosx_*] - - [windows-2019, win_amd64] - # - [windows-2019, win32] -# spacy doesn't compile win32 -# python: ["cp37"] # Note: Wheels not needed for PyPy - python: ["cp36", "cp37", "cp38", "cp39", "cp310","cp311", "cp312"] # Note: Wheels not needed for PyPy + - [ ubuntu-20.04, ubuntu-20.04 ] + # - [ macos-10.15, macosx_10 ] + # - [ macos-11, macosx_11 ] + # - [ macos-12, macosx_12 ] + # - [ windows-2019, windows-2019 ] + # - [ windows-2022, windows-2022 ] + # spacy doesn't compile win32 + # python: ["cp36"] # Note: Wheels not needed for PyPy + python-version: [ "3.6", "3.7", "3.8", "3.9","3.10.x","3.11.x", "3.12"] # Note: Wheels not needed for PyPy + timeout-minutes: 45 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 + - name: set up python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + cache-dependency-path: 'requirements.txt' - name: Install dependencies run: | python3 -m pip install --upgrade pip - - name: Build wheels - uses: pypa/cibuildwheel@v2.11.3 - env: - # TODO: Build Cython with the compile-all flag? - # Unfortunately, there is no way to modify cibuildwheel's build command - # so there is no way to pass this in directly. - # This would require modifying cython's setup.py to look for these flags - # in env vars. - CIBW_BEFORE_BUILD: pip install -r dev-requirements.txt - CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} - CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' - CIBW_BEFORE_TEST: pip install pytest - CIBW_TEST_COMMAND: pytest {package}/tests + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r dev-requirements.txt + - name: pytests + run: | + python --version + python setup.py build_ext --inplace + pip install ./ + pytest tests - + diff --git a/pyproject.toml b/pyproject.toml index 5e6922a..e08d07f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,5 @@ [build-system] -requires = ["setuptools", 'Cython',"wheel", "numpy>=2.0.0; python_version < '3.12'", - "numpy>=2.0.0; python_version >= '3.12'"] +requires = ["setuptools", 'Cython',"wheel"] build-backend = "setuptools.build_meta" [project] From 2932bea56947538fd78c8c1a11e520072b8c8625 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 03:04:12 +0000 Subject: [PATCH 021/126] test --- .github/workflows/run_pytests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index f469f6f..41e1096 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -36,7 +36,7 @@ jobs: - name: Checkout uses: actions/checkout@v4 - name: set up python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: 'pip' From f73ff2ecd013c25f68b6135a07583455dd3284f8 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 03:04:26 +0000 Subject: [PATCH 022/126] test --- .github/workflows/run_pytests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index 41e1096..01c588c 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -40,7 +40,7 @@ jobs: with: python-version: ${{ matrix.python-version }} cache: 'pip' - cache-dependency-path: 'requirements.txt' + cache-dependency-path: 'dev-requirements.txt' - name: Install dependencies run: | From b94997fd5b958c9cadf291d670f799210f4fc860 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 03:09:21 +0000 Subject: [PATCH 023/126] test --- .github/workflows/run_pytests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index 01c588c..67c93a7 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -54,6 +54,7 @@ jobs: - name: pytests run: | python --version + python -c "import numpy;print(numpy.__version__)" python setup.py build_ext --inplace pip install ./ pytest tests From 8c506a365f6a5162117687cf5bba63ee5d9ac0e6 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 03:40:24 +0000 Subject: [PATCH 024/126] test --- .gitignore | 3 ++- PyRuSH/__init__.py | 2 +- PyRuSH/version.py | 20 -------------------- pyproject.toml | 2 ++ setup.cfg | 2 +- setup.py | 43 ++++++++++++++++++++----------------------- 6 files changed, 26 insertions(+), 46 deletions(-) delete mode 100644 PyRuSH/version.py diff --git a/.gitignore b/.gitignore index 3f0afaf..abc18a7 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ dist *.so *.c *.cpp -.ipynb_checkpoints \ No newline at end of file +.ipynb_checkpoints +__pycache__ diff --git a/PyRuSH/__init__.py b/PyRuSH/__init__.py index fe7d966..7dc03c0 100644 --- a/PyRuSH/__init__.py +++ b/PyRuSH/__init__.py @@ -30,7 +30,7 @@ from .PyRuSHSentencizer import PyRuSHSentencizer from .RuSH import RuSH, BEGIN, END -from .version import __version__ +__version__ = '1.0.9a' diff --git a/PyRuSH/version.py b/PyRuSH/version.py deleted file mode 100644 index 91f09f7..0000000 --- a/PyRuSH/version.py +++ /dev/null @@ -1,20 +0,0 @@ -__version__ = '1.0.9a' -if __name__ == '__main__': - print(__version__) -# ****************************************************************************** -# MIT License -# -# Copyright (c) 2020 Jianlin Shi -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -# files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -# modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# ****************************************************************************** diff --git a/pyproject.toml b/pyproject.toml index e08d07f..fad587b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,9 +2,11 @@ requires = ["setuptools", 'Cython',"wheel"] build-backend = "setuptools.build_meta" + [project] name = "PyRuSH" dynamic = ["dependencies","readme","version"] +keywords = ['PyRuSH', 'NLP', 'sentenczier','sentence segmentation'] authors = [{name = "Jianlin", email="jianlinshi.cn@gmail.com"}] description = '''PyRuSH is the python implementation of RuSH (Rule-based sentence Segmenter using Hashing), which is originally developed using Java. RuSH is an efficient, reliable, and easy adaptable rule-based sentence segmentation solution. It is specifically designed to handle the telegraphic written text in clinical note. It leverages a nested hash table to execute simultaneous rule processing, which reduces the impact of the rule-base growth on execution time and eliminates the effect of rule order on accuracy. If you wish to cite RuSH in a publication, please use: diff --git a/setup.cfg b/setup.cfg index d1415ed..5567273 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,4 @@ description_file = README.md [bdist_wheel] -python_tag=py3 \ No newline at end of file +python_tag=py3 diff --git a/setup.py b/setup.py index 7d5d533..dc07ea3 100644 --- a/setup.py +++ b/setup.py @@ -2,10 +2,11 @@ from setuptools.extension import Extension from codecs import open from os import path +import os from Cython.Build import cythonize -import numpy -import spacy, cymem, preshed +# import numpy +# import spacy, cymem, preshed from distutils.sysconfig import get_python_inc here = path.abspath(path.dirname(__file__)) @@ -13,25 +14,21 @@ long_description = f.read() -def parse_requirements(filename): - """ load requirements from a pip requirements file """ - lineiter = (line.strip() for line in open(filename)) - return [line.split("#")[0].strip() for line in lineiter if line and not line.startswith("#")] +# def parse_requirements(filename): +# """ load requirements from a pip requirements file """ +# lineiter = (line.strip() for line in open(filename)) +# return [line.split("#")[0].strip() for line in lineiter if line and not line.startswith("#")] -print(parse_requirements('requirements.txt')) +# print(parse_requirements('requirements.txt')) def get_version(): - """Load the version from version.py, without importing it. + for line in open(os.path.join(os.path.dirname(__file__), 'PyRuSH', '__init__.py')).read().splitlines(): + if line.startswith('__version__'): + delim = '"' if '"' in line else "'" + return line.split(delim)[1] + else: + raise RuntimeError("Unable to find version string.") - This function assumes that the last line in the file contains a variable defining the - version string with single quotes. - - """ - try: - with open('PyRuSH/version.py', 'r') as f: - return f.read().split('\n')[0].split('=')[-1].replace('\'', '').strip() - except IOError: - return "0.0.0a1" COMPILER_DIRECTIVES = { "language_level": 3, @@ -41,10 +38,11 @@ def get_version(): dir_path = path.dirname(path.realpath(__file__)) include_dirs = [dir_path + "/PyRuSH", dir_path, - numpy.get_include(), - path.dirname(spacy.__file__), - path.dirname(cymem.__file__), - path.dirname(preshed.__file__)] + # numpy.get_include(), + # path.dirname(spacy.__file__), + # path.dirname(cymem.__file__), + # path.dirname(preshed.__file__) + ] extensions = [ Extension( 'PyRuSH.StaticSentencizerFun', @@ -83,8 +81,7 @@ def get_version(): license='Apache License', zip_safe=False, include_package_data=True, - install_requires=parse_requirements('requirements.txt'), + # install_requires=parse_requirements('requirements.txt'), ext_modules=cythonize(extensions, compiler_directives=COMPILER_DIRECTIVES), - tests_require='pytest', package_data={'': ['*.pyx', '*.pxd', '*.so', '*.dll', '*.lib', '*.cpp', '*.c','../conf/rush_rules.tsv','../requirements.txt']}, ) From 5736e870f49af9395aab8cfd945dcd7588463da6 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 03:43:24 +0000 Subject: [PATCH 025/126] add os back --- .github/workflows/run_pytests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index 67c93a7..58055a9 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -23,11 +23,11 @@ jobs: # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - [ ubuntu-20.04, ubuntu-20.04 ] - # - [ macos-10.15, macosx_10 ] - # - [ macos-11, macosx_11 ] - # - [ macos-12, macosx_12 ] - # - [ windows-2019, windows-2019 ] - # - [ windows-2022, windows-2022 ] + - [ macos-10.15, macosx_10 ] + - [ macos-11, macosx_11 ] + - [ macos-12, macosx_12 ] + - [ windows-2019, windows-2019 ] + - [ windows-2022, windows-2022 ] # spacy doesn't compile win32 # python: ["cp36"] # Note: Wheels not needed for PyPy python-version: [ "3.6", "3.7", "3.8", "3.9","3.10.x","3.11.x", "3.12"] # Note: Wheels not needed for PyPy From 4fd846f65aab7f1835ac257540ef2a573a47a145 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 03:54:18 +0000 Subject: [PATCH 026/126] update actions versions --- .github/workflows/wheelbuilder.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/wheelbuilder.yml b/.github/workflows/wheelbuilder.yml index a8d73b1..ebd4b0c 100644 --- a/.github/workflows/wheelbuilder.yml +++ b/.github/workflows/wheelbuilder.yml @@ -58,7 +58,7 @@ jobs: ls -l wheelhouse - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: ${{ matrix.python }}-${{ startsWith(matrix.buildplat[1], 'macosx') && 'macosx' || matrix.buildplat[1] }} path: ./wheelhouse/*.whl @@ -70,12 +70,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout quicksectx - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Used to push the built wheels - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v5 with: # Build sdist on lowest supported Python - python-version: '3.6' + python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip @@ -84,7 +84,7 @@ jobs: - name: Build sdist run: | python setup.py sdist - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: sdist path: ./dist/*.tar.gz @@ -97,12 +97,12 @@ jobs: # alternatively, to publish when a GitHub Release is created, use the following rule: # if: github.event_name == 'release' && github.event.action == 'published' steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 - name: Install dependencies run: | pip install twine - - uses: actions/download-artifact@v4.1.7 + - uses: actions/download-artifact@v4 with: path: dist From e8f765ac7b397923a55e9f9d92ab2bf2a84efeed Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 04:16:39 +0000 Subject: [PATCH 027/126] remove not supported macos versions --- .github/workflows/run_pytests.yml | 2 -- .github/workflows/wheelbuilder.yml | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index 58055a9..517afb6 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -23,8 +23,6 @@ jobs: # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - [ ubuntu-20.04, ubuntu-20.04 ] - - [ macos-10.15, macosx_10 ] - - [ macos-11, macosx_11 ] - [ macos-12, macosx_12 ] - [ windows-2019, windows-2019 ] - [ windows-2022, windows-2022 ] diff --git a/.github/workflows/wheelbuilder.yml b/.github/workflows/wheelbuilder.yml index ebd4b0c..db0f06d 100644 --- a/.github/workflows/wheelbuilder.yml +++ b/.github/workflows/wheelbuilder.yml @@ -23,10 +23,10 @@ jobs: # Github Actions doesn't support pairing matrix values together, let's improvise # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - - [ubuntu-20.04, manylinux_x86_64] - - [ubuntu-20.04, musllinux_x86_64] - - [macos-10.15, macosx_*] - - [windows-2019, win_amd64] + - [ ubuntu-20.04, ubuntu-20.04 ] + - [ macos-12, macosx_12 ] + - [ windows-2019, windows-2019 ] + - [ windows-2022, windows-2022 ] # - [windows-2019, win32] # spacy doesn't compile win32 # python: ["cp37"] # Note: Wheels not needed for PyPy From 5d2d911ab840ad1ccdbb176fe7c5ba5dbfce1149 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 04:18:15 +0000 Subject: [PATCH 028/126] test --- .github/workflows/wheelbuilder.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder.yml b/.github/workflows/wheelbuilder.yml index db0f06d..7eaff3e 100644 --- a/.github/workflows/wheelbuilder.yml +++ b/.github/workflows/wheelbuilder.yml @@ -50,7 +50,7 @@ jobs: CIBW_BEFORE_BUILD: pip install -r requirements.txt CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' - CIBW_BEFORE_TEST: pip install pytest + CIBW_BEFORE_TEST: pip install -r dev-requirements.txt CIBW_TEST_COMMAND: pytest {package}/tests - name: check build From 2f4611c3a5a721393f68f828fa26dfca1210ba85 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 04:19:59 +0000 Subject: [PATCH 029/126] test --- .github/workflows/wheelbuilder.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/wheelbuilder.yml b/.github/workflows/wheelbuilder.yml index 7eaff3e..37a4927 100644 --- a/.github/workflows/wheelbuilder.yml +++ b/.github/workflows/wheelbuilder.yml @@ -23,10 +23,10 @@ jobs: # Github Actions doesn't support pairing matrix values together, let's improvise # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - - [ ubuntu-20.04, ubuntu-20.04 ] - - [ macos-12, macosx_12 ] - - [ windows-2019, windows-2019 ] - - [ windows-2022, windows-2022 ] + - [ubuntu-20.04, manylinux_x86_64] + - [ubuntu-20.04, musllinux_x86_64] + - [macos-12, macosx_*] + - [windows-2022, win_amd64] # - [windows-2019, win32] # spacy doesn't compile win32 # python: ["cp37"] # Note: Wheels not needed for PyPy From a870afe4dfdaf4e7b176d4ef91d3155171f6097f Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 04:27:08 +0000 Subject: [PATCH 030/126] add cp312 --- .github/workflows/wheelbuilder.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder.yml b/.github/workflows/wheelbuilder.yml index 37a4927..8dc95ce 100644 --- a/.github/workflows/wheelbuilder.yml +++ b/.github/workflows/wheelbuilder.yml @@ -30,7 +30,7 @@ jobs: # - [windows-2019, win32] # spacy doesn't compile win32 # python: ["cp37"] # Note: Wheels not needed for PyPy - python: ["cp36", "cp37", "cp38", "cp39", "cp310","cp311"] # Note: Wheels not needed for PyPy + python: ["cp36", "cp37", "cp38", "cp39", "cp310","cp311", "cp312"] # Note: Wheels not needed for PyPy steps: - name: Checkout uses: actions/checkout@v3 From 716f59d2d88c173e9da8be55a1035f16c7d4734f Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 04:43:45 +0000 Subject: [PATCH 031/126] test --- .github/workflows/wheelbuilder.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder.yml b/.github/workflows/wheelbuilder.yml index 8dc95ce..c6eb651 100644 --- a/.github/workflows/wheelbuilder.yml +++ b/.github/workflows/wheelbuilder.yml @@ -40,7 +40,7 @@ jobs: python3 -m pip install --upgrade pip - name: Build wheels - uses: pypa/cibuildwheel@v2.11.1 + uses: pypa/cibuildwheel@v2.21.3 env: # TODO: Build Cython with the compile-all flag? # Unfortunately, there is no way to modify cibuildwheel's build command From 01a245a24d4d0246c53d9ebd3227c6f6a876fd86 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 05:18:02 +0000 Subject: [PATCH 032/126] split the build for deprecated pythons --- .github/workflows/wheelbuilder3.6.yml | 99 +++++++++++++++++++ .../{wheelbuilder.yml => wheelbuilder3.9.yml} | 4 +- 2 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/wheelbuilder3.6.yml rename .github/workflows/{wheelbuilder.yml => wheelbuilder3.9.yml} (96%) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml new file mode 100644 index 0000000..e617e80 --- /dev/null +++ b/.github/workflows/wheelbuilder3.6.yml @@ -0,0 +1,99 @@ +name: Build_Pub<3.9 +on: + workflow_dispatch + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + build_wheels: + name: Build wheel for ${{ matrix.python }}-${{ matrix.buildplat[1] }} + if: >- + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + (github.event_name == 'pull_request' && + contains(github.event.pull_request.labels.*.name, 'Build System')) + runs-on: ${{ matrix.buildplat[0] }} + timeout-minutes: 50 + strategy: + # Ensure that a wheel builder finishes even if another fails + fail-fast: false + matrix: + # Github Actions doesn't support pairing matrix values together, let's improvise + # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 + buildplat: + - [ubuntu-20.04, manylinux_x86_64] + - [ubuntu-20.04, musllinux_x86_64] + - [macos-12, macosx_*] + - [windows-2022, win_amd64] + # - [windows-2019, win32] +# spacy doesn't compile win32 +# python: ["cp37"] # Note: Wheels not needed for PyPy + python: ["cp36", "cp37", "cp38"] # Note: Wheels not needed for PyPy + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + + - name: Build wheels + uses: pypa/cibuildwheel@v2.13.0 + env: + # TODO: Build Cython with the compile-all flag? + # Unfortunately, there is no way to modify cibuildwheel's build command + # so there is no way to pass this in directly. + # This would require modifying cython's setup.py to look for these flags + # in env vars. + CIBW_BEFORE_BUILD: pip install -r requirements.txt + CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} + CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' + CIBW_BEFORE_TEST: pip install -r dev-requirements.txt + CIBW_TEST_COMMAND: pytest {package}/tests + + - name: check build + run: | + ls -l wheelhouse + + + - uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.python }}-${{ startsWith(matrix.buildplat[1], 'macosx') && 'macosx' || matrix.buildplat[1] }} + path: ./wheelhouse/*.whl + + + + upload_pypi: + needs: [build_wheels] + runs-on: ubuntu-latest + # upload to PyPI on every tag starting with 'v' +# if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + # alternatively, to publish when a GitHub Release is created, use the following rule: + # if: github.event_name == 'release' && github.event.action == 'published' + steps: + - uses: actions/setup-python@v5 + - name: Install dependencies + run: | + pip install twine + + - uses: actions/download-artifact@v4 + with: + path: dist + + - name: check downloaded + run: | + mv ./dist/**/PyRuSH* ./dist/ + rm -rf dist/cp* + rm -rf dist/sdist + ls -R + + - name: Build and publish + run: | + twine upload --skip-existing --verbose dist/* + env: +# TWINE_REPOSITORY: testpypi + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + diff --git a/.github/workflows/wheelbuilder.yml b/.github/workflows/wheelbuilder3.9.yml similarity index 96% rename from .github/workflows/wheelbuilder.yml rename to .github/workflows/wheelbuilder3.9.yml index c6eb651..a057546 100644 --- a/.github/workflows/wheelbuilder.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -1,4 +1,4 @@ -name: Wheel Builder +name: Build_Pub>=3.9 on: workflow_dispatch @@ -30,7 +30,7 @@ jobs: # - [windows-2019, win32] # spacy doesn't compile win32 # python: ["cp37"] # Note: Wheels not needed for PyPy - python: ["cp36", "cp37", "cp38", "cp39", "cp310","cp311", "cp312"] # Note: Wheels not needed for PyPy + python: ["cp39", "cp310","cp311", "cp312"] # Note: Wheels not needed for PyPy steps: - name: Checkout uses: actions/checkout@v3 From 744ebf9285a6f5eeb325ac48bbfbbaae09b6218c Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 05:25:49 +0000 Subject: [PATCH 033/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index e617e80..45baeb1 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -40,7 +40,7 @@ jobs: python3 -m pip install --upgrade pip - name: Build wheels - uses: pypa/cibuildwheel@v2.13.0 + uses: pypa/cibuildwheel@v2.13.1 env: # TODO: Build Cython with the compile-all flag? # Unfortunately, there is no way to modify cibuildwheel's build command From 501f6f233390f8f544155968a2f4ba42e07546d8 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 05:35:36 +0000 Subject: [PATCH 034/126] update pip install test --- .github/workflows/pip_install_unitest.yml | 106 ++++++++++++---------- 1 file changed, 57 insertions(+), 49 deletions(-) diff --git a/.github/workflows/pip_install_unitest.yml b/.github/workflows/pip_install_unitest.yml index afd1a5c..c2d07ca 100644 --- a/.github/workflows/pip_install_unitest.yml +++ b/.github/workflows/pip_install_unitest.yml @@ -1,60 +1,68 @@ name: pip_install_unitest on: - workflow_dispatch + workflow_dispatch: + inputs: + install_prerelease: + description: 'Check this to install the prerelease version of medspacy if available and the version is newer than formal release.' + type: boolean + required: false + default: false -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true jobs: - build_wheels: - name: Build wheel for ${{ matrix.python-version }}-${{ matrix.buildplat[1] }} - if: >- - github.event_name == 'schedule' || - github.event_name == 'workflow_dispatch' || - (github.event_name == 'pull_request' && - contains(github.event.pull_request.labels.*.name, 'Build System')) - runs-on: ${{ matrix.buildplat[0] }} + + test: + # TODO: Do we care if this only runs on allowed branches since we check push/pull_request above? + # if: github.ref == 'refs/heads/master' OR github.ref == 'refs/heads/develop' strategy: - # Ensure that a wheel builder finishes even if another fails - fail-fast: false matrix: - # Github Actions doesn't support pairing matrix values together, let's improvise - # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 - buildplat: - - [ ubuntu-20.04, ubuntu-20.04 ] - - [ macos-10.15, macosx_10 ] - - [ macos-11, macosx_11 ] - - [ macos-12, macosx_12 ] - - [ windows-2019, windows-2019 ] - - [ windows-2022, windows-2022 ] - # spacy doesn't compile win32 - # python: ["cp36"] # Note: Wheels not needed for PyPy - python-version: [ "3.6", "3.7", "3.8", "3.9","3.10.x","3.11.0-rc.2"] # Note: Wheels not needed for PyPy - # python-version: [ "3.7"] - timeout-minutes: 45 + os: [ubuntu-latest, macos-latest, windows-latest] + # python-version: [3.8] + python-version: [3.9, "3.10.x", "3.11.x","3.12"] + # revised from https://github.com/actions/cache/blob/main/examples.md#python---pip + runs-on: ${{ matrix.os }} + steps: - - name: Checkout - uses: actions/checkout@v3 - - name: set up python - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install cython - pip install PyRuSH==1.0.8 pytest - - - name: nose tests - run: | - # ls /opt/hostedtoolcache/Python/3*/x64/lib/python*/site-packages/conf - python -c "import shutil;shutil.rmtree('PyRuSH')" - python -c "import shutil;shutil.rmtree('conf')" - ls - python --version - pytest + - uses: actions/checkout@v4 + - name: Git clone repo and remove source code + run: | + pwd + ls + + - name: Set up pip + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + cache-dependency-path: | + 'requirements/requirements.txt' + - run: | + python --version + pip install --upgrade pip + + + - name: Install prereleased PyRuSH + if: ${{ github.event.inputs.install_prerelease == 'true' }} + run: | + # use this to avoid install prereleases of dependencies packages + pip install PyRuSH + pip uninstall -y PyRuSH + pip install --pre PyRuSH + + - name: Install formal released medspacy + if: ${{ github.event.inputs.install_prerelease == 'false' }} + run: | + pip install PyRuSH + + + - name: tests + run: | + # ls /opt/hostedtoolcache/Python/3*/x64/lib/python*/site-packages/conf + python -c "import shutil;shutil.rmtree('PyRuSH')" + python -c "import shutil;shutil.rmtree('conf')" + ls + python --version + pytest From 7e13350ccf288d4aaca847e1d4fbd220ede92775 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 05:39:01 +0000 Subject: [PATCH 035/126] test --- .github/workflows/wheelbuilder3.6.yml | 4 ++-- .github/workflows/wheelbuilder3.9.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 45baeb1..feed69b 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -26,14 +26,14 @@ jobs: - [ubuntu-20.04, manylinux_x86_64] - [ubuntu-20.04, musllinux_x86_64] - [macos-12, macosx_*] - - [windows-2022, win_amd64] + - [windows-2019, win_amd64] # - [windows-2019, win32] # spacy doesn't compile win32 # python: ["cp37"] # Note: Wheels not needed for PyPy python: ["cp36", "cp37", "cp38"] # Note: Wheels not needed for PyPy steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies run: | diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index a057546..7a70d56 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -26,14 +26,14 @@ jobs: - [ubuntu-20.04, manylinux_x86_64] - [ubuntu-20.04, musllinux_x86_64] - [macos-12, macosx_*] - - [windows-2022, win_amd64] + - [windows-2019, win_amd64] # - [windows-2019, win32] # spacy doesn't compile win32 # python: ["cp37"] # Note: Wheels not needed for PyPy python: ["cp39", "cp310","cp311", "cp312"] # Note: Wheels not needed for PyPy steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies run: | From ba590ef8da5c47a6bca4f0ee2b37fae18c12439f Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 05:41:10 +0000 Subject: [PATCH 036/126] test --- .github/workflows/wheelbuilder3.9.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index 7a70d56..561d859 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -48,7 +48,7 @@ jobs: # This would require modifying cython's setup.py to look for these flags # in env vars. CIBW_BEFORE_BUILD: pip install -r requirements.txt - CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} + CIBW_BUILD: "${{ matrix.python }}-${{ matrix.buildplat[1] }}" CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install -r dev-requirements.txt CIBW_TEST_COMMAND: pytest {package}/tests From 9a72b36ce75c53cda4dc5ea04ee59a8cb6003430 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 05:45:02 +0000 Subject: [PATCH 037/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- .github/workflows/wheelbuilder3.9.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index feed69b..0cffe78 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -48,7 +48,7 @@ jobs: # This would require modifying cython's setup.py to look for these flags # in env vars. CIBW_BEFORE_BUILD: pip install -r requirements.txt - CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} + CIBW_BUILD: "${{ matrix.python }}-${{ matrix.buildplat[1] }}" CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install -r dev-requirements.txt CIBW_TEST_COMMAND: pytest {package}/tests diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index 561d859..0618e07 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -26,7 +26,7 @@ jobs: - [ubuntu-20.04, manylinux_x86_64] - [ubuntu-20.04, musllinux_x86_64] - [macos-12, macosx_*] - - [windows-2019, win_amd64] + - [windows-2022, win_amd64] # - [windows-2019, win32] # spacy doesn't compile win32 # python: ["cp37"] # Note: Wheels not needed for PyPy @@ -48,7 +48,7 @@ jobs: # This would require modifying cython's setup.py to look for these flags # in env vars. CIBW_BEFORE_BUILD: pip install -r requirements.txt - CIBW_BUILD: "${{ matrix.python }}-${{ matrix.buildplat[1] }}" + CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install -r dev-requirements.txt CIBW_TEST_COMMAND: pytest {package}/tests From 2636c752fcfea9d542f2d39f7924c0f49a93bee6 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 05:48:28 +0000 Subject: [PATCH 038/126] test --- .github/workflows/wheelbuilder3.6.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 0cffe78..9ab52ae 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -25,7 +25,7 @@ jobs: buildplat: - [ubuntu-20.04, manylinux_x86_64] - [ubuntu-20.04, musllinux_x86_64] - - [macos-12, macosx_*] + - [macos-12, macosx_x86_64] - [windows-2019, win_amd64] # - [windows-2019, win32] # spacy doesn't compile win32 @@ -48,7 +48,7 @@ jobs: # This would require modifying cython's setup.py to look for these flags # in env vars. CIBW_BEFORE_BUILD: pip install -r requirements.txt - CIBW_BUILD: "${{ matrix.python }}-${{ matrix.buildplat[1] }}" + CIBW_BUILD: ${{ matrix.python }}-* CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install -r dev-requirements.txt CIBW_TEST_COMMAND: pytest {package}/tests From c9de97334a94917fc397725414357848a7175daf Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 05:48:55 +0000 Subject: [PATCH 039/126] test --- .github/workflows/wheelbuilder3.6.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 9ab52ae..dbe8e10 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -2,10 +2,6 @@ name: Build_Pub<3.9 on: workflow_dispatch -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - jobs: build_wheels: name: Build wheel for ${{ matrix.python }}-${{ matrix.buildplat[1] }} From b99f7dbcb8ed32de89e2814512c22a96d6c221ee Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 05:58:01 +0000 Subject: [PATCH 040/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index dbe8e10..236b4fc 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -36,7 +36,7 @@ jobs: python3 -m pip install --upgrade pip - name: Build wheels - uses: pypa/cibuildwheel@v2.13.1 + uses: pypa/cibuildwheel@v2.21.3 env: # TODO: Build Cython with the compile-all flag? # Unfortunately, there is no way to modify cibuildwheel's build command From 1c455986a12e81a877ef230c318e43f6d0a841dd Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 10:11:37 -0600 Subject: [PATCH 041/126] test --- .github/workflows/pip_install_unitest.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pip_install_unitest.yml b/.github/workflows/pip_install_unitest.yml index c2d07ca..b068fa1 100644 --- a/.github/workflows/pip_install_unitest.yml +++ b/.github/workflows/pip_install_unitest.yml @@ -45,8 +45,7 @@ jobs: if: ${{ github.event.inputs.install_prerelease == 'true' }} run: | # use this to avoid install prereleases of dependencies packages - pip install PyRuSH - pip uninstall -y PyRuSH + pip install -r requirements.txt pip install --pre PyRuSH - name: Install formal released medspacy From 0b473255195b6bcdfdde5f29c68986d521a15429 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 10:15:54 -0600 Subject: [PATCH 042/126] add pytest --- .github/workflows/pip_install_unitest.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pip_install_unitest.yml b/.github/workflows/pip_install_unitest.yml index b068fa1..fef2ad1 100644 --- a/.github/workflows/pip_install_unitest.yml +++ b/.github/workflows/pip_install_unitest.yml @@ -57,6 +57,7 @@ jobs: - name: tests run: | # ls /opt/hostedtoolcache/Python/3*/x64/lib/python*/site-packages/conf + pip install pytest python -c "import shutil;shutil.rmtree('PyRuSH')" python -c "import shutil;shutil.rmtree('conf')" ls From 1bde093e69c86b0798a8f47c7ac47767e0e18937 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 10:26:18 -0600 Subject: [PATCH 043/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 236b4fc..49ee7c4 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -43,7 +43,7 @@ jobs: # so there is no way to pass this in directly. # This would require modifying cython's setup.py to look for these flags # in env vars. - CIBW_BEFORE_BUILD: pip install -r requirements.txt + CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt CIBW_BUILD: ${{ matrix.python }}-* CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install -r dev-requirements.txt From 30beacda97f862ccb4622086073825911d425025 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Fri, 25 Oct 2024 10:49:32 -0600 Subject: [PATCH 044/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 49ee7c4..cb9e1fa 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -43,7 +43,7 @@ jobs: # so there is no way to pass this in directly. # This would require modifying cython's setup.py to look for these flags # in env vars. - CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt + CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt & pip install urllib3==1.26.14 CIBW_BUILD: ${{ matrix.python }}-* CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install -r dev-requirements.txt From 8385b98ad762c180540a12ac669430dba9c7fb44 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 17:04:18 -0600 Subject: [PATCH 045/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index cb9e1fa..b32bb44 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -46,7 +46,7 @@ jobs: CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt & pip install urllib3==1.26.14 CIBW_BUILD: ${{ matrix.python }}-* CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' - CIBW_BEFORE_TEST: pip install -r dev-requirements.txt + CIBW_BEFORE_TEST: pip install Cython & pip install -r dev-requirements.txt & pip install urllib3==1.26.14 CIBW_TEST_COMMAND: pytest {package}/tests - name: check build From 8baa5c5feed46a3bcb3a7f003a380e98f2c9dccc Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 17:07:55 -0600 Subject: [PATCH 046/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index b32bb44..b9df4dc 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -46,7 +46,7 @@ jobs: CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt & pip install urllib3==1.26.14 CIBW_BUILD: ${{ matrix.python }}-* CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' - CIBW_BEFORE_TEST: pip install Cython & pip install -r dev-requirements.txt & pip install urllib3==1.26.14 + CIBW_BEFORE_TEST: pip install Cython & pip install -r dev-requirements.txt & pip install pytest & pip install urllib3==1.26.14 CIBW_TEST_COMMAND: pytest {package}/tests - name: check build From 98bfe4fb23bfcfb43e81acb99647c047152acf56 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 17:10:51 -0600 Subject: [PATCH 047/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index b9df4dc..bee1010 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -43,7 +43,7 @@ jobs: # so there is no way to pass this in directly. # This would require modifying cython's setup.py to look for these flags # in env vars. - CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt & pip install urllib3==1.26.14 + CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt & pip install urllib3==1.26.14 pytest CIBW_BUILD: ${{ matrix.python }}-* CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install Cython & pip install -r dev-requirements.txt & pip install pytest & pip install urllib3==1.26.14 From 80f9e3be081e2751c0ce3511b3fbaf417c67b529 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 17:13:36 -0600 Subject: [PATCH 048/126] test --- .github/workflows/wheelbuilder3.6.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index bee1010..61633f0 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -46,7 +46,6 @@ jobs: CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt & pip install urllib3==1.26.14 pytest CIBW_BUILD: ${{ matrix.python }}-* CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' - CIBW_BEFORE_TEST: pip install Cython & pip install -r dev-requirements.txt & pip install pytest & pip install urllib3==1.26.14 CIBW_TEST_COMMAND: pytest {package}/tests - name: check build From 4bd6d25ddc02996121d3a562071768b766dfd86d Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 17:14:39 -0600 Subject: [PATCH 049/126] tte --- .github/workflows/wheelbuilder3.6.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 61633f0..2f6acc5 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -44,9 +44,13 @@ jobs: # This would require modifying cython's setup.py to look for these flags # in env vars. CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt & pip install urllib3==1.26.14 pytest - CIBW_BUILD: ${{ matrix.python }}-* - CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' - CIBW_TEST_COMMAND: pytest {package}/tests + CIBW_BUILD: ${{ matrix.python.py_ver }}-${{ matrix.buildplat[1] }} + CIBW_ARCHS_MACOS: universal2 + CIBW_TEST_REQUIRES: pytest + CIBW_TEST_COMMAND: pytest {project}/tests + CIBW_ENVIRONMENT: | + CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' + LDFLAGS='-fPIC' - name: check build run: | From 689f5a4037d413029b766612062f3f8bf00e9f6a Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 17:16:28 -0600 Subject: [PATCH 050/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 2f6acc5..937e81a 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -44,7 +44,7 @@ jobs: # This would require modifying cython's setup.py to look for these flags # in env vars. CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt & pip install urllib3==1.26.14 pytest - CIBW_BUILD: ${{ matrix.python.py_ver }}-${{ matrix.buildplat[1] }} + CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} CIBW_ARCHS_MACOS: universal2 CIBW_TEST_REQUIRES: pytest CIBW_TEST_COMMAND: pytest {project}/tests From e9ab92239e60a88d314339edfb7591b360bf7cb7 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 17:53:52 -0600 Subject: [PATCH 051/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 937e81a..301a5d7 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -46,7 +46,7 @@ jobs: CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt & pip install urllib3==1.26.14 pytest CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} CIBW_ARCHS_MACOS: universal2 - CIBW_TEST_REQUIRES: pytest + CIBW_TEST_REQUIRES: pytest, spacy>=3.0.0 CIBW_TEST_COMMAND: pytest {project}/tests CIBW_ENVIRONMENT: | CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' From 29d3fc1a20c1ac37d19770490b01e8c7d4995c81 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 19:44:21 -0600 Subject: [PATCH 052/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 301a5d7..baa923a 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -46,7 +46,7 @@ jobs: CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt & pip install urllib3==1.26.14 pytest CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} CIBW_ARCHS_MACOS: universal2 - CIBW_TEST_REQUIRES: pytest, spacy>=3.0.0 + CIBW_TEST_REQUIRES: pytest spacy>=3.0.0 CIBW_TEST_COMMAND: pytest {project}/tests CIBW_ENVIRONMENT: | CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' From 8d97de7800f2bb3c055232295063bc3cfbbe60d8 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 19:45:59 -0600 Subject: [PATCH 053/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index baa923a..03d38ed 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -14,7 +14,7 @@ jobs: timeout-minutes: 50 strategy: # Ensure that a wheel builder finishes even if another fails - fail-fast: false + fail-fast: true matrix: # Github Actions doesn't support pairing matrix values together, let's improvise # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 From e211b6cc9ba4c6815a93d21eeebd8248daf9254f Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 20:07:51 -0600 Subject: [PATCH 054/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 03d38ed..4386e36 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -21,7 +21,7 @@ jobs: buildplat: - [ubuntu-20.04, manylinux_x86_64] - [ubuntu-20.04, musllinux_x86_64] - - [macos-12, macosx_x86_64] + - [macos-12, macosx_*] - [windows-2019, win_amd64] # - [windows-2019, win32] # spacy doesn't compile win32 From aa05f45ab5116c1f590f0ce69b937f11f476dc08 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 20:23:01 -0600 Subject: [PATCH 055/126] test --- .github/workflows/wheelbuilder3.6.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 4386e36..15056a0 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -43,8 +43,8 @@ jobs: # so there is no way to pass this in directly. # This would require modifying cython's setup.py to look for these flags # in env vars. - CIBW_BEFORE_BUILD: pip install Cython & pip install -r requirements.txt & pip install urllib3==1.26.14 pytest - CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} + CIBW_BEFORE_BUILD: pip install Cython; pip install -r requirements.txt; pip install urllib3==1.26.14; cd PyRuSH; python setup.py build_ext --inplace + CIBW_BUILD: ${{ matrix.python }}-* CIBW_ARCHS_MACOS: universal2 CIBW_TEST_REQUIRES: pytest spacy>=3.0.0 CIBW_TEST_COMMAND: pytest {project}/tests From de0e57c8b264e2a107406d85c17cbb731d10da12 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 20:34:42 -0600 Subject: [PATCH 056/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 15056a0..6edde14 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -36,7 +36,7 @@ jobs: python3 -m pip install --upgrade pip - name: Build wheels - uses: pypa/cibuildwheel@v2.21.3 + uses: pypa/cibuildwheel@v2.13.0 env: # TODO: Build Cython with the compile-all flag? # Unfortunately, there is no way to modify cibuildwheel's build command From 246899ce6efcf91c47a13bf2d0b1ba79c483f82f Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 20:36:44 -0600 Subject: [PATCH 057/126] test --- .github/workflows/wheelbuilder3.6.yml | 53 ++++++++++++++++++++------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 6edde14..21c1d1d 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -1,7 +1,11 @@ -name: Build_Pub<3.9 +name: Build_Pub>=3.9 on: workflow_dispatch +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + jobs: build_wheels: name: Build wheel for ${{ matrix.python }}-${{ matrix.buildplat[1] }} @@ -14,7 +18,7 @@ jobs: timeout-minutes: 50 strategy: # Ensure that a wheel builder finishes even if another fails - fail-fast: true + fail-fast: false matrix: # Github Actions doesn't support pairing matrix values together, let's improvise # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 @@ -22,11 +26,11 @@ jobs: - [ubuntu-20.04, manylinux_x86_64] - [ubuntu-20.04, musllinux_x86_64] - [macos-12, macosx_*] - - [windows-2019, win_amd64] + - [windows-2022, win_amd64] # - [windows-2019, win32] # spacy doesn't compile win32 # python: ["cp37"] # Note: Wheels not needed for PyPy - python: ["cp36", "cp37", "cp38"] # Note: Wheels not needed for PyPy + python: ["cp36", "cp37","cp38"] # Note: Wheels not needed for PyPy steps: - name: Checkout uses: actions/checkout@v4 @@ -36,21 +40,18 @@ jobs: python3 -m pip install --upgrade pip - name: Build wheels - uses: pypa/cibuildwheel@v2.13.0 + uses: pypa/cibuildwheel@v2.21.3 env: # TODO: Build Cython with the compile-all flag? # Unfortunately, there is no way to modify cibuildwheel's build command # so there is no way to pass this in directly. # This would require modifying cython's setup.py to look for these flags # in env vars. - CIBW_BEFORE_BUILD: pip install Cython; pip install -r requirements.txt; pip install urllib3==1.26.14; cd PyRuSH; python setup.py build_ext --inplace - CIBW_BUILD: ${{ matrix.python }}-* - CIBW_ARCHS_MACOS: universal2 - CIBW_TEST_REQUIRES: pytest spacy>=3.0.0 - CIBW_TEST_COMMAND: pytest {project}/tests - CIBW_ENVIRONMENT: | - CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' - LDFLAGS='-fPIC' + CIBW_BEFORE_BUILD: pip install -r requirements.txt + CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} + CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' + CIBW_BEFORE_TEST: pip install -r dev-requirements.txt + CIBW_TEST_COMMAND: pytest {package}/tests - name: check build run: | @@ -64,8 +65,32 @@ jobs: + build_sdist: + name: Build sdist + runs-on: ubuntu-latest + steps: + - name: Checkout quicksectx + uses: actions/checkout@v4 + # Used to push the built wheels + - uses: actions/setup-python@v5 + with: + # Build sdist on lowest supported Python + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Build sdist + run: | + python setup.py sdist + - uses: actions/upload-artifact@v4 + with: + name: sdist + path: ./dist/*.tar.gz + upload_pypi: - needs: [build_wheels] + needs: [build_wheels, build_sdist] runs-on: ubuntu-latest # upload to PyPI on every tag starting with 'v' # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') From d3d1d3849d5a5ffd2e613aa5730bd185c6022899 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 20:37:06 -0600 Subject: [PATCH 058/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 21c1d1d..e8e933b 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -1,4 +1,4 @@ -name: Build_Pub>=3.9 +name: Build_Pub<3.9 on: workflow_dispatch From aa2838fe5f0c38f2ed94f639b9f5ed231aa1dd04 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 20:39:14 -0600 Subject: [PATCH 059/126] test --- .github/workflows/wheelbuilder3.6.yml | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index e8e933b..18b6eeb 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -65,30 +65,6 @@ jobs: - build_sdist: - name: Build sdist - runs-on: ubuntu-latest - steps: - - name: Checkout quicksectx - uses: actions/checkout@v4 - # Used to push the built wheels - - uses: actions/setup-python@v5 - with: - # Build sdist on lowest supported Python - python-version: '3.10' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - - name: Build sdist - run: | - python setup.py sdist - - uses: actions/upload-artifact@v4 - with: - name: sdist - path: ./dist/*.tar.gz - upload_pypi: needs: [build_wheels, build_sdist] runs-on: ubuntu-latest From dd2ba5b0c23434953c76b3748faf8a4e3d4f332e Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 20:40:29 -0600 Subject: [PATCH 060/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 18b6eeb..c95c107 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -66,7 +66,7 @@ jobs: upload_pypi: - needs: [build_wheels, build_sdist] + needs: [build_wheels] runs-on: ubuntu-latest # upload to PyPI on every tag starting with 'v' # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') From c0f673e1304dac3b06de6b3db3f72316945bab68 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 20:51:58 -0600 Subject: [PATCH 061/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index c95c107..85d9ce7 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -47,7 +47,7 @@ jobs: # so there is no way to pass this in directly. # This would require modifying cython's setup.py to look for these flags # in env vars. - CIBW_BEFORE_BUILD: pip install -r requirements.txt + CIBW_BEFORE_BUILD: pip install -r requirements.txt; pip install urllib3==1.26.14 CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install -r dev-requirements.txt From 1d12c1fb42d20f8225324693213f976c18b36e38 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 20:55:48 -0600 Subject: [PATCH 062/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 85d9ce7..1c07217 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -47,7 +47,7 @@ jobs: # so there is no way to pass this in directly. # This would require modifying cython's setup.py to look for these flags # in env vars. - CIBW_BEFORE_BUILD: pip install -r requirements.txt; pip install urllib3==1.26.14 + CIBW_BEFORE_BUILD: pwd;ls;pip install -r requirements.txt; pip install urllib3==1.26.14 CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install -r dev-requirements.txt From dd0f56e2238d4917d48e44defefec22a5bdc64f7 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 21:20:00 -0600 Subject: [PATCH 063/126] test --- .github/workflows/wheelbuilder3.6.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 1c07217..438ec35 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -30,7 +30,8 @@ jobs: # - [windows-2019, win32] # spacy doesn't compile win32 # python: ["cp37"] # Note: Wheels not needed for PyPy - python: ["cp36", "cp37","cp38"] # Note: Wheels not needed for PyPy +# cp37 not working with manylinux_x86_64 somehow + python: ["cp36", "cp38"] # Note: Wheels not needed for PyPy steps: - name: Checkout uses: actions/checkout@v4 @@ -47,7 +48,7 @@ jobs: # so there is no way to pass this in directly. # This would require modifying cython's setup.py to look for these flags # in env vars. - CIBW_BEFORE_BUILD: pwd;ls;pip install -r requirements.txt; pip install urllib3==1.26.14 + CIBW_BEFORE_BUILD: pwd;ls;pip install -r requirements.txt CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install -r dev-requirements.txt From 170f322d630304c68560e2d9454707be5558dacc Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 21:53:54 -0600 Subject: [PATCH 064/126] test --- .github/workflows/pip_install_unitest.yml | 2 +- .github/workflows/wheelbuilder3.6.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pip_install_unitest.yml b/.github/workflows/pip_install_unitest.yml index fef2ad1..c87f169 100644 --- a/.github/workflows/pip_install_unitest.yml +++ b/.github/workflows/pip_install_unitest.yml @@ -18,7 +18,7 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] # python-version: [3.8] - python-version: [3.9, "3.10.x", "3.11.x","3.12"] + python-version: [3.8, 3.9, "3.10.x", "3.11.x","3.12"] # revised from https://github.com/actions/cache/blob/main/examples.md#python---pip runs-on: ${{ matrix.os }} diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 438ec35..6c2dc9e 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -48,7 +48,7 @@ jobs: # so there is no way to pass this in directly. # This would require modifying cython's setup.py to look for these flags # in env vars. - CIBW_BEFORE_BUILD: pwd;ls;pip install -r requirements.txt + CIBW_BEFORE_BUILD: pip install -r requirements.txt CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install -r dev-requirements.txt From 36224c1f68386c66be3be8bca9616883e4f04a7a Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sat, 26 Oct 2024 21:54:16 -0600 Subject: [PATCH 065/126] test --- .github/workflows/wheelbuilder3.6.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.6.yml b/.github/workflows/wheelbuilder3.6.yml index 6c2dc9e..d2210f6 100644 --- a/.github/workflows/wheelbuilder3.6.yml +++ b/.github/workflows/wheelbuilder3.6.yml @@ -18,7 +18,7 @@ jobs: timeout-minutes: 50 strategy: # Ensure that a wheel builder finishes even if another fails - fail-fast: false + fail-fast: true matrix: # Github Actions doesn't support pairing matrix values together, let's improvise # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 From 59cedc76b44570383e19f8113cd85146a5ef77ff Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sun, 27 Oct 2024 07:53:07 -0600 Subject: [PATCH 066/126] test --- .github/workflows/pip_install_unitest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pip_install_unitest.yml b/.github/workflows/pip_install_unitest.yml index c87f169..b5352fb 100644 --- a/.github/workflows/pip_install_unitest.yml +++ b/.github/workflows/pip_install_unitest.yml @@ -18,7 +18,7 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] # python-version: [3.8] - python-version: [3.8, 3.9, "3.10.x", "3.11.x","3.12"] + python-version: [3.6,3.7,3.8, 3.9, "3.10.x", "3.11.x","3.12"] # revised from https://github.com/actions/cache/blob/main/examples.md#python---pip runs-on: ${{ matrix.os }} From 4a699d671323fa7f1dafb16f9caaf99ecb1b37b9 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sun, 27 Oct 2024 07:58:28 -0600 Subject: [PATCH 067/126] test --- .github/workflows/pip_install_unitest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pip_install_unitest.yml b/.github/workflows/pip_install_unitest.yml index b5352fb..c87f169 100644 --- a/.github/workflows/pip_install_unitest.yml +++ b/.github/workflows/pip_install_unitest.yml @@ -18,7 +18,7 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] # python-version: [3.8] - python-version: [3.6,3.7,3.8, 3.9, "3.10.x", "3.11.x","3.12"] + python-version: [3.8, 3.9, "3.10.x", "3.11.x","3.12"] # revised from https://github.com/actions/cache/blob/main/examples.md#python---pip runs-on: ${{ matrix.os }} From 5423498688f81c8216186839d9f3cf17df9ed08d Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sun, 27 Oct 2024 07:59:33 -0600 Subject: [PATCH 068/126] test --- .github/workflows/pip_old_install_unitest.yml | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 .github/workflows/pip_old_install_unitest.yml diff --git a/.github/workflows/pip_old_install_unitest.yml b/.github/workflows/pip_old_install_unitest.yml new file mode 100644 index 0000000..923924a --- /dev/null +++ b/.github/workflows/pip_old_install_unitest.yml @@ -0,0 +1,68 @@ +name: pip_install_unitest +on: + workflow_dispatch: + inputs: + install_prerelease: + description: 'Check this to install the prerelease version of medspacy if available and the version is newer than formal release.' + type: boolean + required: false + default: false + + +jobs: + + test: + # TODO: Do we care if this only runs on allowed branches since we check push/pull_request above? + # if: github.ref == 'refs/heads/master' OR github.ref == 'refs/heads/develop' + strategy: + matrix: + os: [ubuntu-20.04, macos-latest, windows-latest] + # python-version: [3.8] + python-version: [3.6,3.7] + # revised from https://github.com/actions/cache/blob/main/examples.md#python---pip + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + - name: Git clone repo and remove source code + run: | + pwd + ls + + - name: Set up pip + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + cache-dependency-path: | + 'requirements/requirements.txt' + - run: | + python --version + pip install --upgrade pip + + + - name: Install prereleased PyRuSH + if: ${{ github.event.inputs.install_prerelease == 'true' }} + run: | + # use this to avoid install prereleases of dependencies packages + pip install -r requirements.txt + pip install --pre PyRuSH + + - name: Install formal released medspacy + if: ${{ github.event.inputs.install_prerelease == 'false' }} + run: | + pip install PyRuSH + + + - name: tests + run: | + # ls /opt/hostedtoolcache/Python/3*/x64/lib/python*/site-packages/conf + pip install pytest + python -c "import shutil;shutil.rmtree('PyRuSH')" + python -c "import shutil;shutil.rmtree('conf')" + ls + python --version + pytest + + + From 365e2b91535a138ec650cf6fc840e241a7d50f0f Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sun, 27 Oct 2024 07:59:52 -0600 Subject: [PATCH 069/126] test --- .github/workflows/pip_old_install_unitest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pip_old_install_unitest.yml b/.github/workflows/pip_old_install_unitest.yml index 923924a..eff083c 100644 --- a/.github/workflows/pip_old_install_unitest.yml +++ b/.github/workflows/pip_old_install_unitest.yml @@ -1,4 +1,4 @@ -name: pip_install_unitest +name: pip_old_install_unitest on: workflow_dispatch: inputs: From 7807da352b81e3f0b8194b6bd686345610e9e9f4 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sun, 27 Oct 2024 08:01:52 -0600 Subject: [PATCH 070/126] test --- .github/workflows/pip_old_install_unitest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pip_old_install_unitest.yml b/.github/workflows/pip_old_install_unitest.yml index eff083c..4e97cfc 100644 --- a/.github/workflows/pip_old_install_unitest.yml +++ b/.github/workflows/pip_old_install_unitest.yml @@ -16,7 +16,7 @@ jobs: # if: github.ref == 'refs/heads/master' OR github.ref == 'refs/heads/develop' strategy: matrix: - os: [ubuntu-20.04, macos-latest, windows-latest] + os: [ubuntu-20.04, macos-12, windows-latest] # python-version: [3.8] python-version: [3.6,3.7] # revised from https://github.com/actions/cache/blob/main/examples.md#python---pip From ba7d4595a8658402a994eacab9a3038b9c60293c Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Sun, 27 Oct 2024 08:21:25 -0600 Subject: [PATCH 071/126] 1.0.9 --- PyRuSH/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyRuSH/__init__.py b/PyRuSH/__init__.py index 7dc03c0..33f62a6 100644 --- a/PyRuSH/__init__.py +++ b/PyRuSH/__init__.py @@ -30,7 +30,7 @@ from .PyRuSHSentencizer import PyRuSHSentencizer from .RuSH import RuSH, BEGIN, END -__version__ = '1.0.9a' +__version__ = '1.0.9' From a6a2564bbcf7080dfb5f14f42dc3624876e4bcc9 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 02:52:18 +0000 Subject: [PATCH 072/126] Refactor PyRuSHSentencizer to support gap merging and splitting; update dependencies in devcontainer.json; add debug notebook for testing --- .devcontainer/devcontainer.json | 9 +- PyRuSH/PyRuSHSentencizer.py | 14 +- PyRuSH/StaticSentencizerFun.pyx | 57 +++++- notebooks/debug.ipynb | 334 ++++++++++++++++++++++++++++++++ tests/test_PyRushSentencizer.py | 13 +- 5 files changed, 409 insertions(+), 18 deletions(-) create mode 100644 notebooks/debug.ipynb diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 23e93de..806ca81 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,9 +1,4 @@ { - "image": "mcr.microsoft.com/devcontainers/universal:2", - "features": { - "ghcr.io/rocker-org/devcontainer-features/miniforge:2": { - "version": "latest", - "variant": "Miniforge3" - } - } + "image": "condaforge/miniforge3", + "postCreateCommand": "pip install -r dev-requirements.txt" } \ No newline at end of file diff --git a/PyRuSH/PyRuSHSentencizer.py b/PyRuSH/PyRuSHSentencizer.py index f296af5..cf68a3b 100644 --- a/PyRuSH/PyRuSHSentencizer.py +++ b/PyRuSH/PyRuSHSentencizer.py @@ -19,13 +19,13 @@ from spacy.pipeline import Sentencizer from .RuSH import RuSH -from .StaticSentencizerFun import cpredict, cset_annotations +from .StaticSentencizerFun import cpredict_merge_gaps,cpredict_split_gaps, cset_annotations @Language.factory("medspacy_pyrush") class PyRuSHSentencizer(Sentencizer): def __init__(self, nlp: Language, name: str = "medspacy_pyrush", rules_path: str = '', max_repeat: int = 50, - auto_fix_gaps: bool = True) -> Sentencizer: + auto_fix_gaps: bool = True, merge_gaps: bool = False) -> Sentencizer: """ @param rules_path: The string of the rule file path or rules themselves. By default, it will look for @@ -33,7 +33,8 @@ def __init__(self, nlp: Language, name: str = "medspacy_pyrush", rules_path: str @param max_repeat: Total number of replicates that allows to be handled by "+" wildcard. @param auto_fix_gaps: If gaps are caused by malcrafted rules, try to fix them. However, this has no control of sentence end, - TODO: need to see how the downsteam spacy components make use of doc.c + @param merge_gaps: When True, gaps between sentences are merged into the preceding sentence. + When False, gaps are split into separate sentences. """ self.nlp = nlp self.name = name @@ -43,6 +44,7 @@ def __init__(self, nlp: Language, name: str = "medspacy_pyrush", rules_path: str rules_path = str(os.path.join(root, 'conf', 'rush_rules.tsv')) self.rules_path = rules_path self.rush = RuSH(rules=rules_path, max_repeat=max_repeat, auto_fix_gaps=auto_fix_gaps) + self.merge_gaps = merge_gaps @classmethod def from_nlp(cls, nlp, **cfg): @@ -57,7 +59,11 @@ def predict(self, docs): """Apply the pipeline's model to a batch of docs, without modifying them. """ - guesses = cpredict(docs, self.rush.segToSentenceSpans) + if self.merge_gaps: + from .StaticSentencizerFun import cpredict_ww + guesses = cpredict_merge_gaps(docs, self.rush.segToSentenceSpans) + else: + guesses = cpredict_split_gaps(docs, self.rush.segToSentenceSpans) return guesses def set_annotations(self, docs, batch_tag_ids, tensors=None): diff --git a/PyRuSH/StaticSentencizerFun.pyx b/PyRuSH/StaticSentencizerFun.pyx index b22f67c..b1f861e 100644 --- a/PyRuSH/StaticSentencizerFun.pyx +++ b/PyRuSH/StaticSentencizerFun.pyx @@ -15,7 +15,7 @@ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ****************************************************************************** -cpdef cpredict(docs, sentencizer_fun): +cpdef cpredict_merge_gaps(docs, sentencizer_fun): cdef list guesses cdef int s cdef int t @@ -45,6 +45,61 @@ cpdef cpredict(docs, sentencizer_fun): guesses.append(doc_guesses) return guesses +cpdef cpredict_split_gaps(docs, sentencizer_fun): + cdef list guesses + cdef int s + cdef int t + cdef int last_span_end + guesses = [] + for doc in docs: + if len(doc) == 0: + guesses.append([]) + continue + doc_guesses = [False] * len(doc) + sentence_spans = sentencizer_fun(doc.text) + s = 0 + t = 0 + last_span_end = -1 # Track the end of the last span + + prev_span_end = None + while t < len(doc): + token = doc[t] + # Check for gap between previous span and current span + if s < len(sentence_spans): + span = sentence_spans[s] + # If there is a gap between previous span and current span + if prev_span_end is not None and span.begin >= prev_span_end: + # Always mark the first token after prev_span_end, even if whitespace + for gap_t in range(t, len(doc)): + gap_token = doc[gap_t] + if gap_token.idx >= prev_span_end: + doc_guesses[gap_t] = True + t = gap_t + break + prev_span_end = None + continue + # Mark the first token of the span + if token.idx <= span.begin < token.idx + len(token): + doc_guesses[t] = True + prev_span_end = span.end + t += 1 + s += 1 + elif token.idx + len(token) <= span.begin: + t += 1 + else: + prev_span_end = span.end + s += 1 + else: + # After all spans, handle any trailing tokens after last span + if prev_span_end is not None and token.idx > prev_span_end: + doc_guesses[t] = True + prev_span_end = None + t += 1 + continue + t += 1 + guesses.append(doc_guesses) + return guesses + cpdef cset_annotations(docs, batch_tag_ids, tensors=None): if type(docs) !=list: docs = [docs] diff --git a/notebooks/debug.ipynb b/notebooks/debug.ipynb new file mode 100644 index 0000000..5684d9f --- /dev/null +++ b/notebooks/debug.ipynb @@ -0,0 +1,334 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "a297d69d", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from PyRuSH import PyRuSHSentencizer\n", + "from spacy.lang.en import English\n", + "from PyRuSH import RuSH\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c9128bb3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0\n" + ] + } + ], + "source": [ + "rush = RuSH(os.path.join('../conf/rush_rules.tsv'), enable_logger=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "81ef94a6", + "metadata": {}, + "outputs": [], + "source": [ + "input_str = ''' \n", + "\n", + " \n", + " Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina.\n", + "We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease.\n", + "She may continue in the future to have angina and she will have nitroglycerin available for that if needed.\n", + "Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor.\n", + "So her discharge meds are as follows:\n", + "1. Coreg 6.25 mg b.i.d.\n", + "2. Simvastatin 40 mg nightly.\n", + "3. Lisinopril 5 mg b.i.d.\n", + "4. Protonix 40 mg a.m.\n", + "5. Aspirin 160 mg a day.\n", + "6. Lasix 20 mg b.i.d.\n", + "7. Spiriva puff daily.\n", + "8. Albuterol p.r.n. q.i.d.\n", + "9. Advair 500/50 puff b.i.d.\n", + "10. Xopenex q.i.d. and p.r.n.\n", + "I will see her in a month to six weeks. She is to follow up with Dr. X before that.\n", + " \n", + "\n", + "\n", + " Ezoic - MTSam Sample Bottom Matched Content - native_bottom \n", + "\n", + "\n", + "\n", + "\n", + " End Ezoic - MTSam Sample Bottom Matched Content - native_bottom\n", + "'''" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2f8f59b4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2025-08-25 01:16:56,451 - PyRuSH.RuSH - DEBUG - stbegin\n", + "2025-08-25 01:16:56,452 - PyRuSH.RuSH - DEBUG - \t19-20:1.0\t \t[Rule 57:\t\\n\\n\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,452 - PyRuSH.RuSH - DEBUG - \t23-24:1.0\t Ms. \t[Rule 49:\t\\c.\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,453 - PyRuSH.RuSH - DEBUG - \t132-136:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. \t[Rule 953:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,454 - PyRuSH.RuSH - DEBUG - \t189-192:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. \t[Rule 954:\t.\\w+(We \tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,454 - PyRuSH.RuSH - DEBUG - \t241-244:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. \t[Rule 565:\t.\\s+(The\tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,455 - PyRuSH.RuSH - DEBUG - \t300-303:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. \t[Rule 954:\t.\\w+(We \tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,456 - PyRuSH.RuSH - DEBUG - \t332-337:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. \t[Rule 964:\t.\\w+(This \tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,456 - PyRuSH.RuSH - DEBUG - \t428-432:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. \t[Rule 953:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,457 - PyRuSH.RuSH - DEBUG - \t461-462:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. \t[Rule 204:\t\\n+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,452 - PyRuSH.RuSH - DEBUG - \t19-20:1.0\t \t[Rule 57:\t\\n\\n\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,452 - PyRuSH.RuSH - DEBUG - \t23-24:1.0\t Ms. \t[Rule 49:\t\\c.\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,453 - PyRuSH.RuSH - DEBUG - \t132-136:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. \t[Rule 953:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,454 - PyRuSH.RuSH - DEBUG - \t189-192:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. \t[Rule 954:\t.\\w+(We \tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,454 - PyRuSH.RuSH - DEBUG - \t241-244:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. \t[Rule 565:\t.\\s+(The\tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,455 - PyRuSH.RuSH - DEBUG - \t300-303:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. \t[Rule 954:\t.\\w+(We \tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,456 - PyRuSH.RuSH - DEBUG - \t332-337:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. \t[Rule 964:\t.\\w+(This \tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,456 - PyRuSH.RuSH - DEBUG - \t428-432:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. \t[Rule 953:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,457 - PyRuSH.RuSH - DEBUG - \t461-462:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. \t[Rule 204:\t\\n+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,459 - PyRuSH.RuSH - DEBUG - \t499-504:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: <1>\t[Rule 784:\t\\a\\n+(\\d.\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-25 01:16:56,460 - PyRuSH.RuSH - DEBUG - stend\n", + "2025-08-25 01:16:56,461 - PyRuSH.RuSH - DEBUG - \t0-20:1.0\t< >\t[Rule 517:\t\\s+\\n\\n+\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-25 01:16:56,462 - PyRuSH.RuSH - DEBUG - \t129-130:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency<.>\t[Rule 404:\t\\c\\c(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-25 01:16:56,463 - PyRuSH.RuSH - DEBUG - \t187-188:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina<.>\t[Rule 417:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-25 01:16:56,466 - PyRuSH.RuSH - DEBUG - \t238-239:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG<.>\t[Rule 413:\t\\C\\C\\C(.)\\s+\\C\\c\tstend\t2.0\tACTUAL]\n", + "2025-08-25 01:16:56,467 - PyRuSH.RuSH - DEBUG - \t297-298:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001<.>\t[Rule 347:\t\\d(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-25 01:16:56,468 - PyRuSH.RuSH - DEBUG - \t327-328:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram<.>\t[Rule 404:\t\\c\\c(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-25 01:16:56,469 - PyRuSH.RuSH - DEBUG - \t374-375:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d<.>\t[Rule 311:\t\\a(.) +\tstend\t2.0\tACTUAL]\n", + "2025-08-25 01:16:56,470 - PyRuSH.RuSH - DEBUG - \t425-426:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case<.>\t[Rule 404:\t\\c\\c(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-25 01:16:56,470 - PyRuSH.RuSH - DEBUG - \t459-460:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor<.>\t[Rule 417:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-25 01:16:56,470 - PyRuSH.RuSH - DEBUG - \t497-498:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows<:>\t[Rule 407:\t\\c(:)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-25 01:16:56,472 - PyRuSH.RuSH - DEBUG - Sentence(19-130):\t>Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency.<\n", + "2025-08-25 01:16:56,475 - PyRuSH.RuSH - DEBUG - Sentence(132-188):\t>She has chronic lung disease with bronchospastic angina.<\n", + "2025-08-25 01:16:56,477 - PyRuSH.RuSH - DEBUG - Sentence(189-239):\t>We discovered new T-wave abnormalities on her EKG.<\n", + "2025-08-25 01:16:56,478 - PyRuSH.RuSH - DEBUG - Sentence(241-298):\t>There was of course a four-vessel bypass surgery in 2001.<\n", + "2025-08-25 01:16:56,481 - PyRuSH.RuSH - DEBUG - Sentence(300-328):\t>We did a coronary angiogram.<\n", + "2025-08-25 01:16:56,482 - PyRuSH.RuSH - DEBUG - Sentence(332-426):\t>This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case.<\n", + "2025-08-25 01:16:56,483 - PyRuSH.RuSH - DEBUG - Sentence(428-460):\t>She also is on an ACE inhibitor.<\n", + "2025-08-25 01:16:56,484 - PyRuSH.RuSH - DEBUG - Sentence(461-498):\t>So her discharge meds are as follows:<\n", + "2025-08-25 01:16:56,484 - PyRuSH.RuSH - DEBUG - Sentence(499-523):\t>1. Coreg 6.25 mg b.i.d.<\n" + ] + } + ], + "source": [ + "sents=rush.segToSentenceSpans(input_str)\n", + "# 2025-08-24 23:45:48,415 - PyRuSH.RuSH - DEBUG - Sentence(19-130):\t>Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency.<\n", + "# 2025-08-24 23:45:48,416 - PyRuSH.RuSH - DEBUG - Sentence(132-188):\t>She has chronic lung disease with bronchospastic angina.<\n", + "# 2025-08-24 23:45:48,416 - PyRuSH.RuSH - DEBUG - Sentence(189-239):\t>We discovered new T-wave abnormalities on her EKG.<\n", + "# 2025-08-24 23:45:48,420 - PyRuSH.RuSH - DEBUG - Sentence(241-298):\t>There was of course a four-vessel bypass surgery in 2001.<\n", + "# 2025-08-24 23:45:48,420 - PyRuSH.RuSH - DEBUG - Sentence(300-328):\t>We did a coronary angiogram.<\n", + "# 2025-08-24 23:45:48,421 - PyRuSH.RuSH - DEBUG - Sentence(332-426):\t>This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case.<\n", + "# 2025-08-24 23:45:48,421 - PyRuSH.RuSH - DEBUG - Sentence(428-460):\t>She also is on an ACE inhibitor.<\n", + "# 2025-08-24 23:45:48,422 - PyRuSH.RuSH - DEBUG - Sentence(461-498):\t>So her discharge meds are as follows:<\n", + "# 2025-08-24 23:45:48,422 - PyRuSH.RuSH - DEBUG - Sentence(499-523):\t>1. Coreg 6.25 mg b.i.d.<" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "52bdf6d1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PyRuSH.RuSH - DEBUG - Sentence(0-19):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(19-130):\t>Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(131-132):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(132-188):\t>She has chronic lung disease with bronchospastic angina.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(188-189):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(189-239):\t>We discovered new T-wave abnormalities on her EKG.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(240-241):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(241-298):\t>There was of course a four-vessel bypass surgery in 2001.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(299-300):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(300-328):\t>We did a coronary angiogram.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(329-330):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(330-442):\t>This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(442-443):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(443-550):\t>She may continue in the future to have angina and she will have nitroglycerin available for that if needed.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(550-551):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(551-661):\t>Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(662-663):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(663-757):\t>This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(758-759):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(759-791):\t>She also is on an ACE inhibitor.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(791-792):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(792-829):\t>So her discharge meds are as follows:<\n", + "PyRuSH.RuSH - DEBUG - Sentence(829-830):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(830-854):\t>1. Coreg 6.25 mg b.i.d.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(854-855):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(855-885):\t>2. Simvastatin 40 mg nightly.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(885-886):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(886-912):\t>3. Lisinopril 5 mg b.i.d.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(912-913):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(913-936):\t>4. Protonix 40 mg a.m.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(936-937):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(937-962):\t>5. Aspirin 160 mg a day.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(962-963):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(963-985):\t>6. Lasix 20 mg b.i.d.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(985-986):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(986-1009):\t>7. Spiriva puff daily.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1009-1010):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(1010-1037):\t>8. Albuterol p.r.n. q.i.d.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1037-1038):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(1038-1067):\t>9. Advair 500/50 puff b.i.d.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1067-1068):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(1068-1098):\t>10. Xopenex q.i.d. and p.r.n.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1098-1099):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(1099-1138):\t>I will see her in a month to six weeks.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1139-1140):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(1140-1183):\t>She is to follow up with Dr. X before that.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1183-1193):\t> Ezoic<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1194-1247):\t>- MTSam Sample Bottom Matched Content - native_bottom<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1248-1263):\t> End Ezoic<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1264-1318):\t>- MTSam Sample Bottom Matched Content - native_bottom <\n" + ] + } + ], + "source": [ + "\n", + "nlp = English()\n", + "nlp.add_pipe(\"medspacy_pyrush\")\n", + "doc = nlp(input_str)\n", + "for sent in doc.sents:\n", + " start = sent.start_char\n", + " end = sent.end_char\n", + " print(f\"PyRuSH.RuSH - DEBUG - Sentence({start}-{end}):\\t>{str(sent)}<\".replace('\\n',' '))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e5f9fe60", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "50" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(list(doc.sents))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6f21337d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> \n", + "\n", + "\n", + " <\n", + "----\n", + "\n", + ">Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency.<\n", + "----\n", + "\n", + "> <\n", + "----\n", + "\n", + ">She has chronic lung disease with bronchospastic angina.<\n", + "----\n", + "\n", + ">\n", + "<\n", + "----\n", + "\n", + ">We discovered new T-wave abnormalities on her EKG.<\n", + "----\n", + "\n", + "> <\n", + "----\n", + "\n", + ">There was of course a four-vessel bypass surgery in 2001.<\n", + "----\n", + "\n", + "> <\n", + "----\n", + "\n", + ">We did a coronary angiogram.<\n", + "----\n", + "\n", + ">\n", + "\n", + "<\n", + "----\n", + "\n" + ] + } + ], + "source": [ + "input_str = ''' \n", + "\n", + "\n", + " Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina.\n", + "We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. \n", + "\n", + "'''\n", + "from PyRuSH.RuSH import initLogger\n", + "initLogger()\n", + "nlp = English()\n", + "nlp.add_pipe(\"medspacy_pyrush\")\n", + "doc = nlp(input_str)\n", + "sents = [s for s in doc.sents]\n", + "for sent in sents:\n", + " print('>' + str(sent) + '<\\n----\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cf31051", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/test_PyRushSentencizer.py b/tests/test_PyRushSentencizer.py index 0bb12fa..8670104 100644 --- a/tests/test_PyRushSentencizer.py +++ b/tests/test_PyRushSentencizer.py @@ -55,10 +55,11 @@ def test_doc2(self): for sent in sents: print('>' + str(sent) + '<\n\n') - assert (len(sents) == 26) - # SpaCy has no control of sentence end. Thus, it ends up with sloppy ends. - assert (sents[1].text=='Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with' - ' chest pain and respiratory insufficiency. ') + # New expected count includes whitespace-only sentences + assert (len(sents) == 50) + # For content checks, filter out whitespace-only sentences + content_sents = [s for s in sents if s.text.strip()] + assert (content_sents[0].text == 'Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency.') def test_doc3(self): input_str = ''' @@ -79,7 +80,7 @@ def test_doc3(self): # SpaCy has no control of sentence end. Thus, it ends up with sloppy ends. assert (sents[1].text == 'Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with' - ' chest pain and respiratory insufficiency. ') + ' chest pain and respiratory insufficiency.') def test_customized_rules(self): input_str = ''' @@ -113,4 +114,4 @@ def test_customized_rules(self): # SpaCy has no control of sentence end. Thus, it ends up with sloppy ends. assert (sents[1].text == 'Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with' - ' chest pain and respiratory insufficiency. ') \ No newline at end of file + ' chest pain and respiratory insufficiency.') \ No newline at end of file From 9cbfba7a1370c993706093e6511d220405a43518 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 02:53:16 +0000 Subject: [PATCH 073/126] Bump version to 1.0.10dev --- PyRuSH/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyRuSH/__init__.py b/PyRuSH/__init__.py index 33f62a6..ad89474 100644 --- a/PyRuSH/__init__.py +++ b/PyRuSH/__init__.py @@ -30,7 +30,7 @@ from .PyRuSHSentencizer import PyRuSHSentencizer from .RuSH import RuSH, BEGIN, END -__version__ = '1.0.9' +__version__ = '1.0.10dev' From ac95cb4554cdcd0ebe6d9090c3514153064891c1 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 03:06:11 +0000 Subject: [PATCH 074/126] Update build matrix to use latest OS versions for wheel building --- .github/workflows/wheelbuilder3.9.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index 0618e07..2f1ee3e 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -23,10 +23,11 @@ jobs: # Github Actions doesn't support pairing matrix values together, let's improvise # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - - [ubuntu-20.04, manylinux_x86_64] - - [ubuntu-20.04, musllinux_x86_64] - - [macos-12, macosx_*] - - [windows-2022, win_amd64] + - [ubuntu-latest, manylinux_x86_64] + - [ubuntu-latest, musllinux_x86_64] + - [macos-13, macosx_13] + - [macos-14, macosx_14_arm64] + - [windows-latest, win_amd64] # - [windows-2019, win32] # spacy doesn't compile win32 # python: ["cp37"] # Note: Wheels not needed for PyPy From 796596d5206ff1cb53e5b2f64623da7b9d367ee6 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 03:06:50 +0000 Subject: [PATCH 075/126] Update macOS build platforms in wheel builder configuration --- .github/workflows/wheelbuilder3.9.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index 2f1ee3e..396da9b 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -25,8 +25,8 @@ jobs: buildplat: - [ubuntu-latest, manylinux_x86_64] - [ubuntu-latest, musllinux_x86_64] - - [macos-13, macosx_13] - - [macos-14, macosx_14_arm64] + - [macos-13, macosx_13_0_x86_64] + - [macos-14, macosx_14_0_arm64] - [windows-latest, win_amd64] # - [windows-2019, win32] # spacy doesn't compile win32 From 5fdaaeb1fa94e0a35dc4a7941ee783a5d0c1cc5d Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 03:10:36 +0000 Subject: [PATCH 076/126] Update macOS build platform to use 'macos-latest' for ARM64 architecture --- .github/workflows/wheelbuilder3.9.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index 396da9b..b1699ba 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -26,7 +26,7 @@ jobs: - [ubuntu-latest, manylinux_x86_64] - [ubuntu-latest, musllinux_x86_64] - [macos-13, macosx_13_0_x86_64] - - [macos-14, macosx_14_0_arm64] + - [macos-latest, macosx_14_0_arm64] - [windows-latest, win_amd64] # - [windows-2019, win32] # spacy doesn't compile win32 From 49be067583d289c454a1569d0501d53f5c43a764 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 04:55:27 +0000 Subject: [PATCH 077/126] Update macOS build platforms to use wildcard for version compatibility --- .github/workflows/wheelbuilder3.9.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index b1699ba..1d4f7b6 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -25,8 +25,8 @@ jobs: buildplat: - [ubuntu-latest, manylinux_x86_64] - [ubuntu-latest, musllinux_x86_64] - - [macos-13, macosx_13_0_x86_64] - - [macos-latest, macosx_14_0_arm64] + - [macos-13, macosx_*] + - [macos-latest, macosx_*] - [windows-latest, win_amd64] # - [windows-2019, win32] # spacy doesn't compile win32 From da0d5fb1670dae13b9349790ef261f525051ead4 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 04:55:37 +0000 Subject: [PATCH 078/126] Update macOS build platforms to specify architecture for compatibility --- .github/workflows/wheelbuilder3.9.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index 1d4f7b6..d2833a4 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -25,8 +25,8 @@ jobs: buildplat: - [ubuntu-latest, manylinux_x86_64] - [ubuntu-latest, musllinux_x86_64] - - [macos-13, macosx_*] - - [macos-latest, macosx_*] + - [macos-13, macosx_x86_64] + - [macos-latest, macosx_arm64] - [windows-latest, win_amd64] # - [windows-2019, win32] # spacy doesn't compile win32 From ef9366f97fcfcd015d33382be4677e0bf6cefddf Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 04:58:37 +0000 Subject: [PATCH 079/126] Update pip installation command to include --break-system-packages option --- .github/workflows/wheelbuilder3.9.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index d2833a4..d778eab 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -38,7 +38,7 @@ jobs: - name: Install dependencies run: | - python3 -m pip install --upgrade pip + python3 -m pip install --upgrade pip --break-system-packages - name: Build wheels uses: pypa/cibuildwheel@v2.21.3 From ba9d872e2c8864fa3061a3c85c19617fb783c732 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 08:52:03 -0600 Subject: [PATCH 080/126] solve artificats name conflicts --- .github/workflows/wheelbuilder3.9.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index d778eab..e70e021 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -61,7 +61,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: ${{ matrix.python }}-${{ startsWith(matrix.buildplat[1], 'macosx') && 'macosx' || matrix.buildplat[1] }} + name: ${{ matrix.python }}-${{ matrix.buildplat[1] }}-${{ matrix.buildplat[0] }} path: ./wheelhouse/*.whl From 63ec8c7b14205cd4f2b072b493498d60bee3b478 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 09:28:40 -0600 Subject: [PATCH 081/126] update cibuildwheel version --- .github/workflows/wheelbuilder3.9.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index e70e021..381a09f 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -41,7 +41,7 @@ jobs: python3 -m pip install --upgrade pip --break-system-packages - name: Build wheels - uses: pypa/cibuildwheel@v2.21.3 + uses: pypa/cibuildwheel@v3.1.4 env: # TODO: Build Cython with the compile-all flag? # Unfortunately, there is no way to modify cibuildwheel's build command From 40cc01f8f259918e1e59aa49da167b189293deb1 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 11:13:01 -0600 Subject: [PATCH 082/126] update license configuration --- setup.cfg | 2 +- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index 5567273..d7d8473 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -description_file = README.md +readme = README.md [bdist_wheel] python_tag=py3 diff --git a/setup.py b/setup.py index dc07ea3..afb38f6 100644 --- a/setup.py +++ b/setup.py @@ -73,12 +73,12 @@ def get_version(): 'Programming Language :: Python :: 3.11', 'Development Status :: 3 - Alpha', "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved :: MIT Software License", "Operating System :: OS Independent", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Linguistic", ], - license='Apache License', + license_files=['LICENSE'], zip_safe=False, include_package_data=True, # install_requires=parse_requirements('requirements.txt'), From ec9edb4b5bd80a8412006524d91ae1dd8a32c0d3 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 15:45:08 -0600 Subject: [PATCH 083/126] update license configuration --- pyproject.toml | 5 ++++- setup.cfg | 1 + setup.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fad587b..1a4ad78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,10 @@ If you wish to cite RuSH in a publication, please use: Jianlin Shi ; Danielle Mowery ; Kristina M. Doing-Harris ; John F. Hurdle.RuSH: a Rule-based Segmentation Tool Using Hashing for Extremely Accurate Sentence Segmentation of Clinical Text. AMIA Annu Symp Proc. 2016: 1587. ''' requires-python = ">=3.6" -license = { file = "LICENSE" } +license = "MIT" +license-files = [ + "LICENSE*" +] classifiers = [ "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", diff --git a/setup.cfg b/setup.cfg index d7d8473..5dff641 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,6 @@ [metadata] readme = README.md +license = MIT [bdist_wheel] python_tag=py3 diff --git a/setup.py b/setup.py index afb38f6..1ddbef1 100644 --- a/setup.py +++ b/setup.py @@ -78,7 +78,7 @@ def get_version(): "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Linguistic", ], - license_files=['LICENSE'], + license='MIT License', zip_safe=False, include_package_data=True, # install_requires=parse_requirements('requirements.txt'), From 08d91687a66279ece57ccc933591993123fe991e Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 15:55:09 -0600 Subject: [PATCH 084/126] update license configure --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1a4ad78..01b08e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,6 @@ license-files = [ "LICENSE*" ] classifiers = [ - "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", ] From 2fbc2772afde5107cf3a289c318d523974395778 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 16:10:45 -0600 Subject: [PATCH 085/126] Update setuptools before build --- .github/workflows/wheelbuilder3.9.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index 381a09f..fceae49 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -48,7 +48,7 @@ jobs: # so there is no way to pass this in directly. # This would require modifying cython's setup.py to look for these flags # in env vars. - CIBW_BEFORE_BUILD: pip install -r requirements.txt + CIBW_BEFORE_BUILD: pip install -U -r requirements.txt CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install -r dev-requirements.txt @@ -80,7 +80,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt + pip install -U -r requirements.txt - name: Build sdist run: | From afd25cb255616de474f02310611b238d7a11e688 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Mon, 25 Aug 2025 22:51:13 +0000 Subject: [PATCH 086/126] Improve artifact handling by moving wheel and source distribution files to the correct directory --- .github/workflows/wheelbuilder3.9.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index fceae49..ad0a8c6 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -109,7 +109,7 @@ jobs: - name: check downloaded run: | - mv ./dist/**/PyRuSH* ./dist/ + find ./dist -type f \( -name "*.whl" -o -name "*.tar.gz" \) -exec mv {} ./dist/ \; rm -rf dist/cp* rm -rf dist/sdist ls -R From 5a8b596362181a9cfadec6f140a38b393951eaae Mon Sep 17 00:00:00 2001 From: jianlins Date: Mon, 25 Aug 2025 18:24:10 -0600 Subject: [PATCH 087/126] Refactor license configuration and improve project metadata --- .vscode/settings.json | 5 +++++ MANIFEST.in | 1 + pyproject.toml | 6 ++---- setup.cfg | 2 ++ setup.py | 2 +- 5 files changed, 11 insertions(+), 5 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..a8c2003 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "python-envs.defaultEnvManager": "ms-python.python:conda", + "python-envs.defaultPackageManager": "ms-python.python:conda", + "python-envs.pythonProjects": [] +} \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index 6a5cefc..d874685 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ +include LICENSE include conf/rush_rules.tsv include PyRuSH/StaticSentencizerFun.pyx include requirements.txt diff --git a/pyproject.toml b/pyproject.toml index 01b08e5..f89f823 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,13 +14,11 @@ If you wish to cite RuSH in a publication, please use: Jianlin Shi ; Danielle Mowery ; Kristina M. Doing-Harris ; John F. Hurdle.RuSH: a Rule-based Segmentation Tool Using Hashing for Extremely Accurate Sentence Segmentation of Clinical Text. AMIA Annu Symp Proc. 2016: 1587. ''' requires-python = ">=3.6" -license = "MIT" -license-files = [ - "LICENSE*" -] classifiers = [ "Programming Language :: Python :: 3", ] +license = "MIT" +license-files = ["LICENSE"] [tool.setuptools.dynamic] dependencies = {file = ["requirements.txt"]} diff --git a/setup.cfg b/setup.cfg index 5dff641..162c62a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,8 @@ [metadata] + readme = README.md license = MIT +license_files = LICENSE [bdist_wheel] python_tag=py3 diff --git a/setup.py b/setup.py index 1ddbef1..e3ac1bc 100644 --- a/setup.py +++ b/setup.py @@ -78,7 +78,7 @@ def get_version(): "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Linguistic", ], - license='MIT License', + license='MIT', zip_safe=False, include_package_data=True, # install_requires=parse_requirements('requirements.txt'), From ed16040bb97516f08613b7d731b05c9b785fa893 Mon Sep 17 00:00:00 2001 From: jianlins Date: Mon, 25 Aug 2025 19:11:55 -0600 Subject: [PATCH 088/126] Update build workflow to use dev-requirements.txt and publish package distributions to PyPI --- .github/workflows/wheelbuilder3.9.yml | 18 +++++++++--------- dev-requirements.txt | 3 ++- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index ad0a8c6..8989e60 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -48,7 +48,7 @@ jobs: # so there is no way to pass this in directly. # This would require modifying cython's setup.py to look for these flags # in env vars. - CIBW_BEFORE_BUILD: pip install -U -r requirements.txt + CIBW_BEFORE_BUILD: pip install -U -r dev-requirements.txt CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }} CIBW_ENVIRONMENT: CFLAGS='-O3 -g0 -mtune=generic -pipe -fPIC' LDFLAGS='-fPIC' CIBW_BEFORE_TEST: pip install -r dev-requirements.txt @@ -80,7 +80,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -U -r requirements.txt + pip install -U -r dev-requirements.txt - name: Build sdist run: | @@ -114,11 +114,11 @@ jobs: rm -rf dist/sdist ls -R - - name: Build and publish - run: | - twine upload --skip-existing --verbose dist/* - env: -# TWINE_REPOSITORY: testpypi - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: dist + skip-existing: true + verbose: true + diff --git a/dev-requirements.txt b/dev-requirements.txt index e02cb42..7813532 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -5,4 +5,5 @@ spacy>=3.8; python_version >= "3.12" PyFastNER>=1.0.8 quicksectx>=0.3.5 pytest -numpy \ No newline at end of file +numpy +wheel \ No newline at end of file From 746e8ec752203bd2cf5fc87ed40d7ae921e6e4f7 Mon Sep 17 00:00:00 2001 From: jianlins Date: Mon, 25 Aug 2025 19:13:17 -0600 Subject: [PATCH 089/126] skip musllinux --- .github/workflows/wheelbuilder3.9.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index 8989e60..13a5ff0 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -24,7 +24,7 @@ jobs: # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - [ubuntu-latest, manylinux_x86_64] - - [ubuntu-latest, musllinux_x86_64] + # - [ubuntu-latest, musllinux_x86_64] - [macos-13, macosx_x86_64] - [macos-latest, macosx_arm64] - [windows-latest, win_amd64] From df7ee3e0a90b654c339edf88e5501f21c8bd2cfc Mon Sep 17 00:00:00 2001 From: jianlins Date: Mon, 25 Aug 2025 19:19:31 -0600 Subject: [PATCH 090/126] Add permissions for id-token in upload_pypi job --- .github/workflows/wheelbuilder3.9.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index 13a5ff0..f3c0d4c 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -93,6 +93,8 @@ jobs: upload_pypi: needs: [build_wheels, build_sdist] runs-on: ubuntu-latest + permissions: + id-token: write # upload to PyPI on every tag starting with 'v' # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') # alternatively, to publish when a GitHub Release is created, use the following rule: From 2b9923ce50e121d56ac28019e4fe2e41c40d97b2 Mon Sep 17 00:00:00 2001 From: jianlins Date: Mon, 25 Aug 2025 19:26:41 -0600 Subject: [PATCH 091/126] Enable musllinux support in the build matrix --- .github/workflows/wheelbuilder3.9.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheelbuilder3.9.yml b/.github/workflows/wheelbuilder3.9.yml index f3c0d4c..914f25c 100644 --- a/.github/workflows/wheelbuilder3.9.yml +++ b/.github/workflows/wheelbuilder3.9.yml @@ -24,7 +24,7 @@ jobs: # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - [ubuntu-latest, manylinux_x86_64] - # - [ubuntu-latest, musllinux_x86_64] + - [ubuntu-latest, musllinux_x86_64] - [macos-13, macosx_x86_64] - [macos-latest, macosx_arm64] - [windows-latest, win_amd64] From 4a64ad4a6f6405d53d811f5a5fc6ced6cd21e7f0 Mon Sep 17 00:00:00 2001 From: jianlins Date: Mon, 25 Aug 2025 19:57:48 -0600 Subject: [PATCH 092/126] Bump version to 1.0.10 --- PyRuSH/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyRuSH/__init__.py b/PyRuSH/__init__.py index ad89474..6031715 100644 --- a/PyRuSH/__init__.py +++ b/PyRuSH/__init__.py @@ -30,7 +30,7 @@ from .PyRuSHSentencizer import PyRuSHSentencizer from .RuSH import RuSH, BEGIN, END -__version__ = '1.0.10dev' +__version__ = '1.0.10' From 88003b60d2e128d63d2b9ddd0310a2e781a07842 Mon Sep 17 00:00:00 2001 From: jianlins Date: Mon, 25 Aug 2025 20:06:50 -0600 Subject: [PATCH 093/126] Remove license-files entry from pyproject.toml --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f89f823..8520781 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,6 @@ classifiers = [ "Programming Language :: Python :: 3", ] license = "MIT" -license-files = ["LICENSE"] [tool.setuptools.dynamic] dependencies = {file = ["requirements.txt"]} From f85500e09fdc9d27ea48f7d2ad3c5a3911573c88 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Tue, 26 Aug 2025 03:16:30 +0000 Subject: [PATCH 094/126] Update devcontainer setup and adjust Python versions in CI workflow --- .devcontainer/devcontainer.json | 2 +- .github/workflows/pip_install_unitest.yml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 806ca81..c07c1ff 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,4 +1,4 @@ { "image": "condaforge/miniforge3", - "postCreateCommand": "pip install -r dev-requirements.txt" + "postCreateCommand": "apt-get update && apt-get install -y build-essential && pip install -U -r dev-requirements.txt" } \ No newline at end of file diff --git a/.github/workflows/pip_install_unitest.yml b/.github/workflows/pip_install_unitest.yml index c87f169..8066859 100644 --- a/.github/workflows/pip_install_unitest.yml +++ b/.github/workflows/pip_install_unitest.yml @@ -17,8 +17,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - # python-version: [3.8] - python-version: [3.8, 3.9, "3.10.x", "3.11.x","3.12"] + python-version: [3.9, "3.10.x", "3.11.x","3.12"] # revised from https://github.com/actions/cache/blob/main/examples.md#python---pip runs-on: ${{ matrix.os }} From f6b091766fac4b2f02438b56da7420ef2f49c27c Mon Sep 17 00:00:00 2001 From: jianlins Date: Wed, 27 Aug 2025 14:26:59 -0600 Subject: [PATCH 095/126] add max_sentence_length for cpredict_merge_gaps pytest passed --- .vscode/settings.json | 4 +- .../StaticSentencizerFun.cp310-win_amd64.pyd | Bin 0 -> 64512 bytes .../StaticSentencizerFun.cp313-win_amd64.pyd | Bin 0 -> 58368 bytes PyRuSH/StaticSentencizerFun.pyx | 54 +++++++---- tests/test_merge_gaps_max_length.py | 85 ++++++++++++++++++ 5 files changed, 127 insertions(+), 16 deletions(-) create mode 100644 PyRuSH/StaticSentencizerFun.cp310-win_amd64.pyd create mode 100644 PyRuSH/StaticSentencizerFun.cp313-win_amd64.pyd create mode 100644 tests/test_merge_gaps_max_length.py diff --git a/.vscode/settings.json b/.vscode/settings.json index a8c2003..1f77e20 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,7 @@ { "python-envs.defaultEnvManager": "ms-python.python:conda", "python-envs.defaultPackageManager": "ms-python.python:conda", - "python-envs.pythonProjects": [] + "python-envs.pythonProjects": [], + "chat.tools.autoApprove": true, + "chat.agent.maxRequests": 200 } \ No newline at end of file diff --git a/PyRuSH/StaticSentencizerFun.cp310-win_amd64.pyd b/PyRuSH/StaticSentencizerFun.cp310-win_amd64.pyd new file mode 100644 index 0000000000000000000000000000000000000000..2920bbd8d87379b6a42f84a0e7303b66760852c5 GIT binary patch literal 64512 zcmd?Sdwf*Y_5VGA3`8JKRHmZ!5+zy`Q8c39NXD1nt)0GFDPC>tD?4b#!-t}1+VkGKWm?vOakq9ujh}4 z*NdEU_GRt0*Is+w_GOyCw;i*T$ z1L^|MU+=amws_8+(~AH9-2c#Ni})V<>0RgQ`OMeuIrh`vocoBSFFf~8_Waek5A!^7 z^Iev{c+OqZw&VNYPk(>zqxQTrc%C}VegE3NzjN*sYpd9^@lB!7Sy%K69eM5_rWMmR zhW4x-*k{}QLcb&BWS81_FV7l#pG&vhd!JCKpCxraLk;ALnEdM#n#_l*RD5jt9i(JK zp&8Vf!SgItuB`}d*0ZJ}G_u%;zDHJs8iBgtwNPjmegB((8q97SYFC_v0?v67YeymYdzYQl*ppAtVQJ|TW`{1h*9Z{5VN z3k7f9dv`-^&ksL)e<)P{sh4gV;`!^nbSm5E|AWVZMxVsgBL8dt-q`4Wz~g=|-BLH1 zmxX$nrav?In#=z+{O?jigZ_A4`oSuJPp3NcJ+irdWlME@%z?A35`Mf&jn%I#L|gu- zB29OD?H#G@lK$A4e+h+_Mj5xda?KW<1NI+bRU{szwvul+*F9<-_M;5C`v z>x2MIKY_^~S%`XnwwC{BCD&%RJKNO-!@Ua8KmUnLUUvq!YlVC!+VzkMEcg6v3eore zpt`qgghN3sHJC2_b}3!@2G}M2cXH>~he8Rzu?`|U<@wPG*1Ne2$Zc-#XbHy)Yx4aH z(Ptc0=6n9eB4MT!q2wkU1c=uC>MBy(0HhGjS(B>^(P^sHy19^w(rSINyyrL1&mBe` z&+p2n&MEe#<<+F$G$i3SRC%%Rw_$)(RiXfD>qNZjlwig>Qv;#-#Dw1kTHhl1s4bIg z=*4fCh$@`RYnod;=`X$@e>?Tz!YvI=rIM-;mnI^P%L=NdvQUl=bQ(5;?9* zV~(my4fXt^YLfnJIH+lUqR`(fguH#WRmEot(MeWuwdZ#x)3c^|o7N@Msp($lh9kVp zglS%8@T&MC=0qqtyi>!H9N3Yaa;}=%*xX)di6mMNut9yc5FHGlMC< z(FjYl?o*M1OE&(Vs_mZ|lQlk8VzRe$RU))iHl(^#&JV9-lsrEp))dZ~gt2go0Uq^x zC z+YC!@Ue{FreA53S>A!vUf~1ewNcdkS{qyhl{7H)t3!{q?!cW}@wGa>;sfnI{BxoH| z6UnR^X?Uy!j}*LjH_wmztCHyn2ou`!m*uacl~qF$g?`QL zpGMk#@GSroI+dB<-t$*6w~{uWB5haiFKknb7;M$#ZTl<~YN}refb4;cU=i?X05)KbkB+m#&5P;_NfEcv-&BY$(`YbD zBW=%*;0=1U-wyF449}r>5z^Yd_1>JNr*Z%JWu6uo5~a)J4mmD;a7QB3P?1Q-Yx2zPml8fu_W`Q4Hy4nA6I*!<$7s)=z)&`?Ll9I& z+P{ZNNQj4Tx(Bq#5{-l0+*mS`{;Wj_AKnb92mU(OiGHpV2p2Enk$Hz|r<)pz{rJaz zD*Q5Jp1t4MXP+I}y^YQ)srpS!5k@oNABWlruhmz&@;R@t%!{p^`3A#tBisvri}p+e zXkSnn|G~|0aMIu8`ENLF<9`iWo=9+X71C(^CS`9SQ-5s4-C&FxH>AQ>0?*BEP3&6F zKOIbF*J#L@*QH54pJMeLbkK*HCe5%}eb-!d!cFWMy(advn%DkRVX@iB@M=q>j48$d z&V~UI9`|2K4KgV{)TN5en*Fy@gFI1sTbGzbk2fG8B5kJ*2WooO{R#i+=JxknY7(vI zhmaA4=-+;8Kq2c#|3a7eA0_;ctXiNn8zG7*&2hYRE6ubG#n^9r>Xhc#YF-$vq<=I@ zb0n=hrFknb1Eu)|B~Y4Q@(`Gk(%b>^()vs*`EvHJr@OjNY0ef?1WNOJKt*Z(3{{cz zSD4a#7~>R*058&Z(Z13pB?Wo#Cn~nZVE$e}FmWb|4<&Mjt6G^-d;yT~^@2dbO@chB zi08M?3y=B4NRLT{i2dL|qi?#Ih9;<+C z+CDq_Dr>auzFKwaC)QVkpOC?Tu?X`$Mpw%v2jnV5f200Oc*ep91CEJO=%4fpo@1K) zNzCiBRHSGSs*BDb9JOV#Cd$t@3LxD%U-><`cK^LK2AfTvq9rLb%$g6M@L?#lSaxC3 zKTqyM`#LeTAO4kqrQ5Ua>s3neC4VuFn5a#r@7ln7GW~ms@mM0K6#KEiGaq*j%*X!D zdK`%LXvSltk(qFG?SE@%u4<;On#O?`oX6KJ?ng`5nnMzOn`_M8%m~$N)gD;Zu{>P5wVL8W4VeQjIs{Fewdc>`a>HpHw%|X(m7X<{j#nbEd`|S(Vz| z^DlxIzlWacCsno202!aF3egP|M1z}}dT_pQ+=MBuBc(C$itu`;DYCkL8w9upm^JApfI?PJ{{dU0* z_p?W`ZkhwW=~tvOucw(7UMb+ta(>7ae_8ngp2UguM|D|oIke8;b7ilxvdwb=f8{@e z@?&S|Ej?>)kCKxqiIKFXz9Z>R?Mk;RM$v8WG%rf}dF+Rg)Z2{tyFn)5_(Jqg>ZsXg z>-XeM>g5PnplClmL}C;E2`0D5e6w&2bY6z6G8(^0mPRYK>j8j_`=2{$-w?TeMU(## z$g*K=*ljNyo6Wlf6ieOD z?B0W}Y3ixt7gq?sX2v1<%zJu`%kfzdl0fZt-FKkz;@I8yO{;Og=f9No&(OQ;)Xyy) z&l~r{kDd##tpHkORNHJ+_^v8+cXbo^#}|cZ8C%WpZzrV)eLo79vBfH~f=LV;1XK9I zAThRUUz&;gAC-9JNv1R$(ntZvD}TDRNUUc9UReM^{Egz3Pb2fM-nj>Ar%^BAe~ln% z_WW53SQTay~21W-Bc68=5b6>m0yI>`OKeAem zR0mS4IKAl4x9MS#dA~58D|M%pEMueLI4wn?e}PM6N<9Y7J&1?2AUSoju@P?{+nYoV zY-FRDph}5&r}kvpD1b%0eYaCrjwV_Li$qVMV_WIaLZg>LJ(0F1!RdA%s<&R_0%{be z%MR2iaf=}~8dQs9{ls{*q{n8F6;L6VOp3b&iLtQ{fp%{yz&M+J8BC_JQ|&FFg0aas zs+6cO!6s9Q<`Dpk3UxK*RQL=_Xe(6s9{?4{uCNn5K(S?RxY2|}_3x>pa7DUZ35i9B zGX0y2VoA3zM`~Bk-?d%{5fiGyw93tgG8hJ%{(*bkz{>}`Ak35gQMkw3(JR9&JO^lj zdpt)2-l3*`>$3s?Ix}tjxeWK3qUE(G+8n^q(~E_@fLldE|b+C>_ zfiI*?$>^8k>x}+}Et6}Qoxl2<=#4C%p?s-25FSUXHXrdV){zct5oK@opRxwF?Fs3r zpn*4F(z14ytOgpa0V3WzQVn-9nKtjne9jev)lN}Ryab)MLYPbGqzK|A{SKqaPpG8; zCdkEvoI>D42lB_mA{X^UqS*OyTP6#)JKqjEe>wRInuz2(0zQc_@)9yrnN$!bMt(P< z?R04?h(05_lq34|4kbjd13+p3h$bm8(wwsuzaY2hoOO@Wjaw!Q)z97vP+g;Z0V&3F zleE$sUtfj%@B{QO9(ZRKN|2L=Dce3CQ9^zt`Fu@wU%R=l>F(=p_chmjZBLgexqvSs z{$Bc)iM-*rO|!>cGwkvE`8>=yTQr9`ShfnVsGx6K1z4n2fTl>}vcQw;zu3xtv*_|I zwxWY@`9QoT6pu6#O2uXD_6|!$op-3B<{eTL_ps@|a}U8_m3fBEH8_QbH4a>r=zDxk z)4;AoUoB2-1HM|EaE_wLm3`wlkpR9hE_n$ZS2i(s{yD>AR?!=QM_6SkL<<9lT*=IW z4{uOG#3buVO2VIfounSV<6TNf&t#;`b4;4&cu@`UVmXf`&+*q*@7bh36Pz+P+zraM zgz=!gX%%D1{Yo=0vd1FEGjbnq8?W-4as0G~QBs!Ufol5WfC^KV=HA9!qP$G`l%9kR zYNgI4yO5<6%Wu$mWh$hSe6ieQj}aU%k8N5qeO}$PM4^dd&I#Oby~>!~G~!M1_`kE8 zE`*>(Wh32c4xn@=V}i12+E9de4G>}OYk`IunTuRx*>RqIHEm|GYHqRMR0^&z9mM#C z^4l7!m}7ILvu1S9_Zv2d*YW+fbR1rxhef15>S6X&(co4p{8kmfxS=?G?-3(O#++fG zf&l#uma1YVgsi|rKT}Cemlu1(ILAt(&v#If5|19Cj44Fky(Tv{7IBYyt*`R){8JV02lfwa&m;%ZCh%_do&dao%f& ziLrQKVtn=nf*TWKY!@#WPckw37niNCf8CiFyZXG8maXqo2eFF$@Gig$OpHIOdV1_n z0w|am{bgcYXS`bd#b2#{_SByvk?Mz}Q?gFt$~@TPm(M7Hp9$#nnL9 zOo#4h=wd8_=^=(G7EvrbLrutTgMMaS{Gl9)u!?25MmmO|iG)+PLZ5v)|a*Od15DN8XXLjmhp8S#Oo#OfJr4%1*$$XJft_IN z|K-&|`u=X2)bzkRE>V_GgE^+m*$c3{%H;Rs6bbByo~DBf^4?|HW;%SdPM!aEro+M4 z6cI*1h_UmGP!K@g(tbD&u+a1P^?Ec*>>>W+`a)+aew?adeV#&jB&`_=Uo(M*QNTIK zkU0B%H;HKO4!Y&SJcRb~geg&sgXfPh5nghw-=rgqYWAlB5V+O{!7qVpP3V#MHPh2P zY{4@BI?{eZPOkOOd|d0Jc$G@AjZo=&_)@%y_YSxRoS++TiLseKtawfj-+tM9xHyG->8s z=CfG|18-ZO-CL5UJ0kQp@U|igPZJP`Fwr8BMbmt}k*j;F)okFF>9qdz$LX}PCFR2? zQ_?Qo&e*^$lO-(Adr{b7vyC&*8w};|4{$5Rr>IUKK=&Yxwb2`y%6|B38;$3ZZ0#HA zKilXR8vTEoaT@-ca0H!qp(dygF< z%$)7;4{Yvo+hOw+VkA_W{7PJ3i$A_iB3*fahv}_Av3c6rvq#duWQcb5nBQBrv*$ia z!@K#gXP&Qi?LFA#^cO?0rdxkmV!8gJP3f<@;-VDJRd3(-B*T%V)8WqdwJ|xOn0NpS zr)>N&#PzgMDKoz(rt?s0OJBqF6PzJJl5L zw{1%PTh2c!#T)w&+mxBC8=b2IWc~9?hf&)9Jm-@O0QsFuzhwX@!)~eoF+;-( zuyQE%{HgLo&W-of2~g|C%ZfP(JQF=XcMa9K}~we-;LCg06n z+vZM#LKNDtyug%8;Y?f_iFAx^stW_JH$2i zLsxr(u}e*HpJJgl=jh2eKh$l(t?OZ5)>G zVG=@Q>#LzXi3G^}6?Sz%0f(6t*Potp^InQCtph2`XDo70@&QDp(U}SdKls|%o#%#w z(3M)dR;@Wn-`(2C-0=dgWZ3U1S8^&PyJe1P3M-Kt>hNwNe=EHf5Mekn_l1G8I{xSR zi@?;{Rmb@LzJ2AxOLuY2eTp~0m;~&*6}@q<1E=B|Bf;2h={NZT#t_T%0y$>cNxvw8 z==DtiPijY@!b=h|`7;aA)8+z6=I|;ygdO+7Y&}@b-#&Zh_nlSxJEN&pgP{V629jzY z-JvIPj0Q&2z|tLsnxOaK<0Z4yOwawuza>qc7uztqJ)P@SkRu8`YR|Dm!oEFH&$aqJ zL;n+klB{r)&7vces)G(?A&cQw(wWfZWp}Xj=8hf!zMi?2X_zBXJtbCG*4A25owd?5 z*UMDT6n*_kU2KQN?_JaAKgoW~h4OeOW9iP7Z!kk_HA8Q<$UF@T>VER>=XoQ~8+k6` zxrisWivCaP%GOOXI%T!zYu5espUPxi*>XxgQHWN|QPza{u95Cg@|XbM(M5M>m?bpd zEyOs0e2OeF7d`5iOWgs?UMY#(t3{EFDXXw~^B%t&2^c>ketLXT{IoMpKXuabr@H@XRiSFa$sB$=ukH~}zAd!r5q@zI zmFpj8IFG1*)ysrmM2zvVJFnkcVXxofbw~GlK;)DT2Axg5n|U1Sul_hPr>Va!KW70a zgIvd7=4*{Fat)3>c&$*c5Aj;F6)k()-zYmIFl3*x+#_pfY)A7+Irfrn!u_L8AKvU( zQKpIQ%qzXjs1Gz5bb^GF2536j$r+GD?hg~w)@CztxBB23ZQc=@YO_}LCo@!$Z%)m$ z7WzaodkeSS8$#Zg)4uB8vaMHGq4b#>bJb)xA2w5c+dRD|>xLyW)fel11#LiFCe)Mp ziRx5qwojz(PwInu({_u!6+m1;Tm~akeJu4Nw{!sEKtX8j=#$KZ_f|h>FBxg3Th)ga z+p$i7?rs{(O!Qky*J-+2LEr61XM(>*K%#7df26JzmV2#V^qu)8&HR}85nr@}Fv(PB z_{h%xu9QaZ4kh`=Kx#(^e`}-;==H>f-&MN5s>sPTa`jwj!}|Fbv#2bJS5n)=O0c=b)E2!EWId_AarR-ci zY5qW1{#%oaIXTAD8bx)l&E5g#VuW zVa&(Z1-{Yq9}Dne{$3X=#Uz~g4hs`2$_Woo*hz*kj<%9`<>QuemlAA zNO5sSq%B4k5o%3~i8`_5_}85^I|xFMm+-$TMDG)wZRb=U0aFa>H}qMjMNSmG0&s5= z%1ol^rp5gid%OQn^gIy;a&q^NNgv*QA}ngVMS-~^Fs-%g04QBt8byH5@O(${Nc+nc z^ijXimhsxpQ`YJmQCK`I$5XM45=|I>ADg?S(0D_RR0uWE@n*C2qkaEJ& zkFM$z(YDy9`$NMQE1zMSBQnrEfeALOhJPr~Wt%3x=YmIN+wFfu37F0{N<2_^zq+RV z_7Bu8(yOg|TvIjJ|j5dFvy)Op7JK_)qcVDbZXH$sABY-1#IOO+5{Kfu%3 zo$yDCga6SsdY2{X;yw*MrpAtjJd5l$rgZt5QX79##VOZYZ@k!=NM=ZXL0X-CbYwSl zTl+yEjqc4aOV!=jUT^$}M`liy3Bbqd=7VfQUW`8>ex0R$oWwXgKaa9rvGTh+NNx>#s;YmWpn}^-6nl zdqf9X+;DfQ_#3s`IUj-3ql0T%gB&mgIGl1LE??@0yp1^mY13EyvBpzkM)l%pn5-W>#C{lqxu&veq( zPul(l1C*KFKQYk7{yg>QR{ImBvEqJ^+rz>I>Awb$juWr7B&NwEE4RV$oNkGn8bOQP z0&>Mec*l`mwwtUE?VE>cvXS8`ME^-?cCWkTn`Zy-^q-Vs|F^aJ+K&Q9bMD$eBkD8* zH06`NlT&5va;vH{yAo5 z_}TXNBlA~R3aYln8HLAC6lqRQ+ZBdJc#gUzAt`41=n%(Dmr~2IgSS|n+B!SjN!4P9 zV^)I=RIGE^GHYYq^x_7!$H6pU_`-@po5|0#n*h1(KNm}j_x81;NXz5 z))d0DG5gSnCIzsLBy>LKRjr(3(&d&2dosLB&r<=%?)NCYn<_&=(2>5V=e8ro5nvM< z_1_WT4z(UE6iozV&-rk^HR$HUVb)-2f6||n)nlv|DkV1RL)zWSSpV*a#jzenxt?P^ zThz6&ei1jRtXItxC2SZik-kWC?O%7D`@w(c+>;_oFr5{`gzL9Dy8i&TU{HgWa>vpD zs_tJuu+E^~o`|&lSu+|&YG9xTTC-a{!Zg0D)dOj@xZ{qk^o;G!v{fYdpq{-wLs_w1 z@2Atf2>1!f=YopUaPQcg1h`EW6#Q7KI0DN4D${U-04WXkeoGvwdosmJ6J=$+nS*Lz_2y>t<8jwle@(r?6gUY|KHftSQ1y)EwtJCNy!k_-32nSiwkMk z&RRt)+RwQgKbDV3bk3I3ZU+N1y1%kfaK>~JOBDoWlz=3+PTG&MZP1&f_v2ooPX) z;{r)gPm~at+#RXR-s-6yjO^JKX>9ok`J*`}$4HfYT4sY35Snn&IZD3TdG!u@uGp$; zk_{woL0lWIgZSf5sMVV&7g$=)etl2SwN&D~37W(NXtIL6s4~(@_9W>pme!M`RhHIs zi2il980QHBBwf&|JL)&-R2TJY;KD1yd?F-_@WGut6y|HIo2!&e_&vCrh@V9*GT{r5 zjC!AL5xV|z2fJ@lwW_imz2DZ2MOACYUMxr6B5P>btruSi&miLB_7_7Metpe? zotb*^MKBuBt5bYrhaIK(TK*D?SO)#<(R~iF7u9jHZhY2zzZ5g81*?@zfD*tu0era$ z;EI_w@aTp+r6HvcXgsQ2moZN526zi63&jE*E zNzx)3h3EkEhD4{rcvRm()#s_&8dcLV3jagmZUhytuG>JmmmcP3FgP%STY;usiVwI< zKm6nSRgGFlDdxAIK_4+yiRpU{A2urrOf0++g%oM~xju;Bzz?yXe%=xLhU&=ykoT!%sYfvxG&9% zLF!^Gd?kHI?Vs>53j*Hp4_}ktLq}m`^1h%D9!vhn37QgSGxFCYC{m2{?I$$2pGevH zZM(yKGKDqQ8q~k?_|$RgA*97l_K2_|O$c6@PlHKvpfdgFVSkVlvl1d_-`Pzm&OGiA zOpC`j3EJ06UtO0qGQQ|xNNUit^?Rreoa?F`ZKxoBdIyA-pQ&=;$M54&8nsId=-5Ay z*m67sI*LLqN7*Pn;l`+v6z7n&5I*!^fO#8)0mNZ3s3Wy6tki{Ru(Zwa3OI3hVWA6| zASDGk=pTeyCnqvQ_aO6hPI8FT=2K1cJ()QKPZ*}nDIg$&!96@ZC_*MNgoGjNsba1&Z?Hn*O|M$Eu^ zV+%dAnvak9D0$l8Rfs02j+CH&dO*BEC2x>q+bL)@~ zGRTqnovPCvdb*fObj}$e;Lf;QZ=;zzkl0Eg>I>cAj+*|H)j*dPv#%d#R$JQ>ob2bk z8ii!02QK>VLWEDxcM?Q2K{#kKYJD~t1g2#*Nz_N$`w@&WljiTItA+nqI=B7{6&|rO z3UF!K2;jIi-$H7oQm-ga6)8W!9$VU4mP*(&-$cBAvUM)f#>5fn_BIMy2mgGeb+G*! zawrXu-wMc2%76s?=+x}+66TIeU20URy{ys-(t5ApzK`AAdp&&0U(!p(PwecxQS zQO|PxCkQaL8+xQ35u{G+k-Cqi%F^s@HAXDw-nuTpZ?XI4BaoEaw1Lbd41?yOFFI!n zs}-Va)d%FyyvNs&5kIYbFnB< zFj)xvhxOoDLyZgPzbK;k-Rr@B#{(;_Z~ph|!Pibvt)#67PtkkPe*8aJ4~|jKtq1QR z2yQ+2*EZox>%reDwj-@k%aYKx8skiKN2O~$_!`jvck97#scU8H!9QTS#O?GjGohM> zvNK};$Lqll;-D7k<^QMk;Hy88aFGtS^Tl->cf9n58ksJ|FZSqBjc+0 zzpV$;$IosN0rpx3oTr{@IN57`5AE?`A$o#M6pxc8SDs1O5zT_(C0U9pk)+eFbaC}Y>XW}?%SiJ616 zwk6MDy5Xp{5M3xD2KI@QgUrvqN;Fx&E^tftPzrmKzK>QaM2B0YtgtzfhHI^1LWn1p zfVE7}G5$>SGm{c1)FI#`kX(@(fP99OChX;=GR3vnVUE2xOx`lcntu(ZL2vx(f1VCm{Uh{jJimilb zh3lVPZw|rW`pe0ffm|Xi>b`B<19caZsNG1VFv5=n^wZ0R=57NjeR_lfMzy&?Ygbxp zpWA>f&t1irb}qPo@d-F&(<@XXtaIXlci!MqF1b_2*ZYnYIs4F`Z5($FP~mFhn@n`3 zp~VoLN~?=Ryj-gauzsArxo*C|;D*TX(Tc2v2(g%_@tsWHSvT3957bSx{ObNRSBM(^ z>F}R^MI8|eUz0SRwkH`lpNurHr$MMLK*XRSMBR7W4Nj=0{H^9|P+gf%K~v z-eINBw9>=fz|W6RxY6JBdh(jMG5PF5bUoy2s(&@U7}<@p+QCg1XD=8Rd1!D|{mQwY zkDR%u?B%(ikGXWGM4=<{(6Y>+bS9Bifr`u^j}o)qibwvisvW}2+kvp#wVC19 zjx*-(UIoYTefmj8ky94(5M}!-{dp+9FIW|#{YBAqxS^Px?H^q?)DM)L4u4aR%-CJd(jz=oBaseoOJXuJYL5E1``i%!{G$go!tZ|M?%I|^;-7YK z0X6q5f^L3Ppj;t3egciCJEIMu zE-#INK2*qB{C7{Kwyq#P_GDFZ%mJx3QDU&`%U;4yRlg!qATju*V5P&+V*1}nZ}juX zM^&I?`&v>E=a;LXYV;|#u4@W)OG+mE@GnH}?6BPo)^l{;+PH}V^hNu!ZdB#2Y(H1x zja!2fmr|m2?88(DwG4%kzrIc7qB{zLU%iG$A-X+?fXDn}Da(0K;Vv%Qk$JMU3vZJP21J#EJ0WMAZIsD+Dr4B4aN4uV-_NMK@HptOt zgiGQ|g3B1pR`qmvh1$_*9zZ_ppn?$S!CkFJ^;C6~ z*}?9h>V0^JI;&KP2{sWlUQ#yilX<@`c%R1f8vk)}GFkQL(X*|jvRa12uW$TPzSuJ;H2PBO$@%_QFc$mlCg3Ra zJI1R`#!@Q{2_@U&=G97e7a_1M-~$jNPPCO}`CWh)Ak)6idRF~g7BNg8P6vj1!yJYS z-nEut{WpcA($iL+Npf2^+60oL8$g@MlSFZi2Py8UB|iwpj4f_BS#EXAPWq)?xp);|1xf`b}#|4vegPU&*d>a6;liOQa6#)Fa}6eT<7m@S$! zYCcK9EB{AWeMlamhoKOSDfZ|jf;EnI0%uIMUN z^Q+h1B%H@OoP%1rlDHe0n~pOWJF9%A`iBmgn}bF%sly8ZQAB2NkY1ZTtQ;9n$c!;$ zqGwXXZn~~M#oF!!KJ*&!s1@oIf+a%i>e~LbTF6vCdz}$tSXtYH%i6v%c(2Va-H-A` zLd;RyO9a*rH&LWFA=ay!U%d*Mk6r7K>Hnx|`vLoXzJ0iKy0jc9mtWO{#KMrWz)Ml8#?{U&>0a1?fv(*fzVj-5uMib}SQ{RWw#mK~dj6KtQv$H-Lgg`ma2$lfx-`t)s4ia10}ix}RD z@V=_bztq0$|47orp6pi>iP1zLUHvuf!|89>l!VQ=pIEoA>y13dDPE;kEdKtR`G-t*SN%(A+jTe!a zb@nFRdai#BE#LEE>s!wC{8qER;jF!renX8;>vzolHgJZ>zV*gb;iVF5H@ItN{xd(6 zedXQf*V!E$Qb%K*ExTzn|LMh^Z5f;>bS7GtL73H%tAC77!0Ob_izpV#?~q_U#{5bR zxj*(i9#q58eL{9vBKAS#>cw~{I%IU3_OPF)$fNCv(Z18x$7|!KFL$6`mj}a0A&W}e zCz7%7PvD)@V0Zh1#;^79VR++@A47ADuvdOZ>_3kc4yL)<9|k+|K?C>g8~0WJD52b} z=d(=XP4r@`TK0vfBW?33nPh#fd$cNB`e*-4AT=M(9=^Z&({c{Mo{2P$M-KXvOs&>( zu4J;8Zqk2Nj+=00N3HdWH2%UckgDu;RA{zk7;G>WOF|-1XIQWB8u7LB#MOA! z+LvyO%^|78l#ML0*)?t6@DG~&H@RoRQ?HyBe}o?T4Yk6)A<2yJFvbVp-Jq?VAzOKj zG_FhZeLY{9@K5)20=)hG*?Y(R6-}I8s+A`z@>>uqRVUjtRl5*JhF5^A$pLF zxlBw1+Jmqe-J5*sa^`u$e?B|deE%T#7o}W_uQL9{^9kk06I$PEcvScCaS=pgY&9$G z@yJo@{k4VY{G+;Ap_N%X?~4Khe`!_xFE)jT@J&MeFPNU8_!4XiF8@2~t4k}JG8YzL z8@HAW%WQXdL;NxE3|H%HXzu%ud}XtLHm4q*_PFH0{>|~$a>?<_!fzyZf>KnGWKA$m zaD5=7?3$PP=t2Lh>*kW=vF`NgxZ1g?y-b?ia&)k%=W)veT(F`Yfb;yzYc#u38nWRl zyx5bGtA9dK|JYi;voqVLbwmG8JHhzV&W6RpI1y=F18-`5>qIXeTansXXv1M^H=xE7 zYd0h^gDSIsjKP~bG6JSH?5C&rsTYjcqd=$F6R$77kfI}og(m`>V%f;m2h)EzHjnVC zUf4wGOmrU;J7~WezRNaT{*+IN!<%ifg;8%P+h$S5+7{3W^jS+?%_&l1h{hV``=<>x zyiTubsjPohefsVYbaoEu$33zPHUew=lE76o8!>yva{XEilA7(1>k846)Fa{|DDE-# z>J;|`YJ>Vx6QrO%p-~;T-7?gd05?!B)@1Tp@&0ao8z4FMlWVDG;UnlWr`cbZkl^Kx zoAt?|>rzDa!b6RuV3erJzWgo2nOQ!rhOSk%9Hk?^tFr|rL2etEl<8EFI*?lL3qSRP zTEB9&xPQcIRk1V1|Awk{soL|#IJyC3MXLWJ;(>fkppL6BcJ@FAunl4VJgA zZ9&jxb-eI=?gomZl2fxif6-h6G-{xF)LuwYw!;-HL{HH`^UGf6%AQ3b6{253FR0(I z{#Ac@{yW`8!5E^yq4JLCkK3yw`UVj>6HQ2^$tl=yq+XWix0lW*$`f4sw|<lv%D{GxC0`wZfLkiPnzF1UGbEnqc$D zF7pb7aemvpx!A3VzMTd3Y8?mRF)#K0I?4FN|*`mf0D^2d#B0D1Ts0{#YFzM76bb zw9uUs=+0X0;J65J1s2~bA)D$~U+`DtccBGyB%T0lE&-cB1DdU5%ZBk_LlENEz7WJ%LyH~_c5;Z0bLtRd>m2)m7{|s*Zn>#MzgAs(Fmq|bY|bIHT{W^5 z0DYGgHuz`kqC;k#Z^Ogu@;~hH>1}#7|6S|t(Z&wz^2dtbG2pks51Fzqe>i%A$0#e) z3Np^FVhC3(hct!!9;PI=p|-?4Q$29he{+Po@MODUeZM|R$W3)amGh4lZ);92~Bxa|8s%va~X zVd(Pnnx_8F<+{Ajnz3t5nRu;V6;eACqPx)vnf4Y3zO{mJ@VDjH65@|}plU|GJY4gC z{B6A0xkFMvPBIIUu@h@j)BO`_8~s&{+$$LOJK~+WKG+ZO);IfOLo~*^QsX~eCPQLb z!=k{Dc--sTDWx0t2wm5`pJRLQz`h4vDnSC#i(yY(sNdBNk5F?~H?^p}E1K*^xS3 zXztk?&8Rp0Ya24Jv(Pt@J|YkM32$s8@a{H_y73mqAfCPLpmL=5cg`Gl=4(OC*%l3D zGC;e;WEz+ya~j-QeUF@jO!P}ci>|(&oYZe}z+yuW57w3Pk2?FI=+DTVu~B9}XD%V0 z{z#VfNNp<=n@LA&JD~nxKzv5r7jsP#<5~t>LS(wQWrpXorRgP!hhauft)7gjU~jU2 z3}jy>vZr!+B+vjGfEUFfpg&S@>C(~Kb$10wrglxT64M~vQH$?l+vA_jp2mdB-AtX5 zYhRPXmZ*M=8g#eGx{D=2Psf0snLzmqZB%s@mK9*X%KcU)W=tXaK4sY7+H#R`?sbL> zHcRVr(CkR9gJXUpzYDza8*})5rYSSF*0Qr|U9O=Cvj%tCLv93wh)2za%#! zW)AK^8%D0trZw$H(M0ivV_Uc_ini&kn?4Eu8wr1AggD7FQsWqRCwXH=wM6DfDBx;HQ7LJF3YjOWYIeR=qNd_p=HV?qY zBKKJP-7La=ntyGM{i9`{V7<-rd+~=42!`$?@kc`Z;arb}2CyPE)eg(JeP42y=}fxv zLwuWrBvdRv3vtDIWbRIQhO0=8X%fT#<#m4DH~(;vcm6^f^s%PFc&FISF{6v!yav=; z#AH8r1&{7ou%=Y7kCQAjWK(S2DQe)SmeB;LCw|IKY9LplccsoRKRzvf`lQ^?O)UoJ z>x1!?26HC>{I50FCpTy6IttNx#s~TgSSn4Py92Ulp?fY*C@W52C-O|UTkC!$jybrl z9PaXo)rsNHh&i|pB9U%h%Ql+?7J$-j}Tr&4^~Nn5-j_s&~x^}j+)O6e-UAb`3Dgx zY(0zQIQwuELnpmoGH4&rN|UPfsKKu)FknND@dtGslRmhsxE20ND@>%zsVD}$+mH;5 z0!?rHOX3~6o`XHgNDjt-8255hSpp6(u1gdjh0iz!zkK$7iPnzIMR}QEZ*6mi9hvS> zk(VBX3CnR!3eJ2x0mJu-u?e<#D~R&WtkVxTVozetz`rjU!pQbXqXOQx`Z8&^PmYP# zwhVH`PnPlXEX>>sx|4r_=nE{G&lVHif!V4O{)dRX`d9hA#)H*bR=wgD@+TCR+pi7| z<6e1n+AT;B#gv)mlrHzOOw#Qjx1e=pC{Hd#e+-Jn0N~EZPEBnsdJ#=4ME9~xwGe5` zS@U?Ptdu=r9h9{gY5y(h@WY{0)n^BvrUzdzYj-UWY=5-*IK+wzE$p6WBeVAY&Op|( z;j*d_y_2qRuLG`4lU3$twdlsS0&3l8nIyboPxiQ&=0YkpPvN&pJ*{%%^1P8f3;Dx} zNCT>%WeiakZAh#)gZxOfKu8#XF9wymvZGw_?aKBi?M%7)S7o4UK8+7d^h#3gKpX5m zS!pb&Y}D$4{O(@#HG3sfJw~d&u-7pVfbk>-h*OQx-?^N`2Q9J8pzRLT1y_kkdU4q3 zPLm&I>1?ilwVBpoa4hO$GlRCp)$Ut}-VSCin4MxP#D2nF{N!;Dk4F1crr~nAou^|2 z_qhl)Kj?i%2swPhW};jX~RGuhR{j9|mRJ|;5=5%OYU_?mA0 ziH{fY(iL8Y(;BVsR;2!EgS{=rV6YB`qd-8cT_$ZWbhIx>Jc1ULBA3`~&;+fO1u{(H zWfgcn4aa>pYr=Z)*16De6NedK5 ztF}e}1Jl_M#tbxkY`SspnqndXxNdVH`pW}N-v34})5u4uN{r$C6id9-pQ)4F-!B74 z_S<(U>F4ugxe#U1EPAQnHe{Q_q2C{M{gy`$^Zc1wEHKU9&EcQ$2fa}gcQ=y7PD-@! z%&bxI2wIdpZ}6}T%7?a_fh@jVOHWY%>zRkCF}%|!el-;f7sgmFtkdHrDsNXwgHvJL z@Pv_SihhKT)tgJ!HwEpZ*n-Ks_UFAwV{{LzLztVk(kD!mP~%reNK;{V>>c-kA^qrDA440=v`&x=f%XnnE0 zgL<@Aq6cGYeB^5G<^bHQQo;Sg4M!+ z_*+!SKkX>jKu>72jB?-T?n$vdB6W^FlO287AE4C+^% zGK!X!(KN3LG}|lA-bkIu&wo9x3f!CJi1!d!#+}kvuijCrE@-}1Hnx|(%^u~5PXZH$ z0Es+OG$WWDi~b0ZE%)?bn#$BKX=Eh5gcN%i?Et^P+NNrea?~&Ke{sC#S(9$aPUSGp zjY5PO;92mEg>TKcv~Q&(YrI;%7N`(k-Mix-w5D2JvEsZdPRfq`R6Iy6(*NJk>w1;- ztDkUP`9@EAJ*-rtmtl~;%^v0S8UY@?=@kcLFM8D)y+RuBOmv^eb zq!%)TiSztrB2 zwvA&7c7=aBZ!U;n@1Dl$uPil1G$uycULciiSbw=RxJRiFyQMi@_KK3sWZDNd`uY-?auY?99^vPHf`aA7lqM!6V{4up*Jp)SC4pmhSl*<*_1IDV z?7ph@YRiGLl3JJEZ4LPYiEU=${JJ%#$2x-e_FXFJU!f9oQfFnx*$mnBZ?vq)`mXgV zAzsFqc{TM%;@8!7>DNFneXCV-%fBKBno`;eIu&Pc5Kjo?&3fJ8pCF3NPoI0~%L!e& zLqD~K#Id!-jM`#Gf0xlv%xEZPe6WwuKGxKZK%uc5s25#^=?7(uFJ?UEGEl>=aS(Mi z?r<5Ciy4!P8CSWCX~m3b#f%@hjM0D}t<2KN;fa>vWDw~)c*d2jFuWqfomO*ob_}+Qbr#c$Br#>Q24FrP@$|qCrS;i0xP#hEmG+A=7*nrTJ1~ zGl@mYml8*p5ww&z6o5uXENSdX_vAb_GXaFHwUraMT`GV`uj1@rgrnt0~EfLk(+- zT?VB3r(g`oS7f(glM`uUEC9WY1ASWndRqg1rvsfrCj%C_Qd$3ep5VO!Ji`HOZ~)W2 z0WP90^5leOxjq)Jcq!i~tPeHN2?x4mp8%TKtA6!IpaFU(0iu-;@o$?Ia|W1bQ=GRfVuGiCg0@_}pd*L3d|NDf_N&T68j*XMPRF!{6)jmPL zi&cM8R+Z)FmTlSEQ<1^|)OH?)V!R>RGjI@#W@74BM;esJMC(j63>00AU1I8k%b;lt z(3g*oM9ggnXz>5I|E~G5uK8PK$yxvR{4e_Ns`hbJrxdIHq^zp?zrFRpLF*-~o|I^y zK`0U710!Db!*muR$e!#$gagVz5+yCCg3}L4|GNx!84qB?zy?MBPWY!*bq_pab{q{s z`*&!z+3!n}cbQeez(2l6&~f}==V3H4>-LTls&1)rDoPG=u`kU;_iRk0dN!a}SY9ko zaq`rrvR2}{nG|juogCWXFoaL*!faD?Eyi{LHQP77d?C@NaQxJxUK!jeedX9+qDJsh$K>(94byR^T&GX zgIctJ1%s=PKTl%fGK~%bXPrcN+tW<7^dg&Z=^4Yk^fb&IR)^Cwrt7RYhEGQe;cO%~ zzcEX~M68qsvyiy#Aw6!gm%eyBpGqFBVp{qut22%C8AZD#uusyDnzNODbfIQ>`q8_2 z@CzmL-n_OCiypbYEoZgnDq1GC<|BV^sJIQ)0*&Ud$H z+q;%e_A?yBNu`4@Gx9s}l14c3yT_Tab2;UX4VoEysjcv=2Dg!g=v!6?ikn(4ZCAxQ zk=JPSdUV0l$Ma@LX&M4yI}djy2;c@__Lo*nqsk$_B-|FKa=~pJ)0*s`%@<;{PU+Zr)N!UPNx7m%WUM5 z8GM4l=ym7O;Ur3>qFedpUU?scnZKn(e}wl#LVmRQrTB|r-(io0D#iE>VixiI+sa6X zYbm!Pygh##NLUfBFHRYLvo9G^`jsEARyS&*^Mw#a>_fazG(`gdi3AvVU&GG~%5yC0JU9K&OiWE)LIvcvpJ*S=?u1ROxwp)a$!YuGePf zRZ4?3C(ACGCR1g)1W;+&&M_)WP3*Lew9SG5iC9<54A1}RM2M<@-vLSgJ2kLV`}4Dj zbsgjOr~BI_V$Zdlkl=iDQoDj#vrWX7&3vc6gDKFtAlRRtV1N1u=*0eXKfD>TC4s$5 zWOp!1Ys}PCH3oX>pUD^ki^>*lTK*InsebMqXV+oUA&bCcr@X%k>u`2ld*zd3LM`h` z`|nrI{3n~{-2Rag{J{pF-)@S9F98FtC!vg)UOY?k*Wwk+l3X_*Ta_AMEkh;?OcocJ zBAFN{ckkqnv<(-92x^p$5)9e&6NyFsHE_`lzyOgO$%J(W3BDl1w#wLVb^N%7pXZeg zir-R*T=SG=N}6T9htIJ*8LNu4-K#`*ynr44I_@y)k8o$R|BBlnU0gq7JsBIes&UK$ zwcqo+K{Cb`tVA~z?Yse-mx&zD9_tPq*m;9-JQ=tqP&#k0iyP0_Gk6sR?4;#Y z$=Dn2yg|NS+z2av8>u(|dteZ<&Sx67JRk_(&|NTPk@WWLB_GT86U}>Xu85%9nbv!q zAF!GEv(|s~WeHAO2R^TVpZmOblGNO2GGc?uB0q;}XTk7iyXk`hnZv3V^lywHeLkp2 zLCmY&-5R!Sg!MeIN(TB>PG-`NK7JO$WVx)Q8UE_dY)zxTx^=^x&Rjn{qUk!?>xXYr zk+oVznKtjuLSL$B)w=cVzVez-u-fSE0!5A5rlOTl?Ym}$bQrr>J9Cw666(uFroNr; zvY%-~MeAD?I48A*XcgUJ@uF-?^4CL@@d!xgP#pZh+Ek@hF}yCU4l2hc*v|pD9P}* zjt;Tb&EBg1WYH+z(vyo-K`iAXo~w@o7#9x}!tJLKt`vfVDLVqL7q`RuwtB*$nMVLZ z%luuym>6BmPHnjAGtQ?`zrABlPb0F>l@9MmM)pLyX_TpL9lQ8UkWL+vJ%^rU`>6l# zgYJs(ZKUfTJ=q9V>z{A9>e=i7T8liiF8c!tvqAX|hr_|b0VF&QHKr%lCb1WT>*4tE z1PxFF<|e-+<$uUxX=F_F?o&gdIl6k_@4(L-S-@Jpv;l_i5MLtG!sm1!T7F+fL+wIG-WqH+MR)}6jb%%AHZQ#OQ zvzfaOVd;kW>1S~FM~R0uV9d15Y)1A!WMZVk*3v{i z-EWfpEnl+PM`L){xfyM+&{;9Q$1rU4`#L?+;l31CNhD9WKS?XXuYETZQnM?F%#hVvF% zp1Rb_G}m}5nrZ}}`veTHCcLPojhy%8%kS_G)O&?{&omEl-?{C`YvLqF^G1H!^BuXWBjly16kik68RJZe;dx?3?-41Eq*;^7kftP&;T)ft*y6ivP zOCK8Jq?+2wUP~p2y~k9~uiT3W1B2pKu>%ByY;b+YMlbTm>hSq1|I{tXUZ5!NY`V+X$~ zZJ#~Hee}=9dC9+5RzG_HDcODb6C@U8X)m8S;eRF}#yrhs??oOWR=%M_DV&HyZg@Vs zzgBRM(qQ$MYX{gfH$IrXJv_+V|4`TZdX53XU&&ODM<%O7xf@8!-tM4>vl%Pe-Ou*m zJE!aE1q1W8|C8Zus9lkrfz-=RAtUJX;OxonV;d->D(7o{7#^Vxw{{h{)HR%@0L&-( zpwB=~{F2Wn!&CWNVF(bHc?2M#?AQFUaYqeyE^0=coINId%W)VGGrTF)Yj{D)X7uR0 zYTlUfP4LVaiv}Vz&K?sVk~%3ld|Bk-69!u-weNG0hvWU&0W&w58HD9KxK2LA5dWA# z$ynz_12Clfa#di*MOBm@mVblsLJ^5G2DkpWE;LJT1K`7cJo^}14q5JKbk+K-PQ}$f z4_yYfy~<9XE2Gy*@JX@;W#*2(l9bR@>su<@o=*+RzAojuI{Pw@Vu!_3necZtk5+`Q zQEC1$U!(^nc$s_a8Wez-_g)8t@z)0u`uS=bj!J*^)BD&(G1(4g8@MAM;)~V*KC6rA z^;w9XPhro$B@0|HbHh5N`>TC+ixFDE1J~lcX8g2M$bs9ikQu z13)Q~48o2VkNfPgrzDRXI7i@2uWBY+OI60}o7HJzZ5FcYRp&h}nn})ymZnZQ@%gP0 zsy?=shr-fYk8DBgZA2v7rpwqH7vsLl@pZDPfEmO~%_tCi*Oic`Y&d-|Ao%iw4Q!hvjT$}L#=kA{?ER|XsW%m&U>T+MJs}Okvlps z9^B}MV-vNS(^b)!4v*wD-=DN^@nV7W51ahwh&i=>)qiF~JQMFP9Q}{*JtB+W-sa+g z)aygN&8OI|{|gD>8lLjPP59^9Q3=5X~%k&jnnL5;3*mK!o%XA1e{C$#JtIy$Y0|j4Q5uU)S zu%*gTylVA2JeHTp;(zRT@gOgzECCjoE-lpfgq177`0$!ctad^cg1HINa|H9`hdgNtz+k%;<^G2sU&ii%ub*lY1Nu-0CW+utYcZ`A;$ykhTv zwZE_1-wpQnE&KbP{r!NyPd#;i#d2Ueeb4pXcqOa&`SvUmy46nrsl9JWf-bWC4Bw>L7{O}-=+HOj#iObsF zE|Mwn{U)iEQ)-1pHvd_AKFwdWwSy^8L?0Lkqn7w@Rtk-&Xh|#!9AZjQ22pm@&e5 zmhrQUbBuq&c!hDxH6`E8Xq)0uPWLccjOQ57{zP$(GTP<7%K3lD_#+Mnt#rOlppE9oh8&!TAqnojdv5#?p(PSKFOfk+feuXi|_?L`t zGgkgc)wh-LPR9Ef`xs-4A7M-~exC7bj5)?P8Q)>t^kY>|BjY`cJ&b1D$s-7FjiO0ert3*x#?S^vVhNb%>ktj7b8i^BsU>O?GmunEw55Ynv+JL`WVAvlS zGPEEP0m}$#yESV#6xT+g!9>{50{(~=jfBUv0YeK%{XsB2bqv28K~hV96SI-LI5ySg!pI+OR*a`NO8+4^p#?h!z@&MNPE5QR^{whaw>> zki5n~>ze;^SGTOnn8N(aWQ;b=rR6A>#!E!TCML+Si;#sw7@RHt?$ zCh(D6wTAj*4YpcAV-Qb;K`8`(9*>41JMYj;Bc2Fb#A8LH=(fm^HW)QEKdK--wYX)5 z=$``#?esAE0i7HUgRsO99e5mrL5L&Ay1Q*@r$Z>4ur$LoAuAMtu!kg@-ihK$1B@0j z)*xEbP~X_(cE_P94Ht}}P$X!K?v7(x${}P78K%fHsS52@p?l*2GZeG@17SIi>f>nD z5G0SFU7;9MKIIQ5jJRypNn`AE)C_8Vb0{%_AtMc>^ievXEc0MkC*l^3Wn}_6dBB>N z4?PnRV>G6w!gVG4EiG*L<2*WrKzbyKq%rCbSm7~|mn&_6`Yr!SXj|Xdjz4FDS}YpJ zAd5!);k?%Mjh?u+lQM@wr;NxQvH@&QH9j@j`}CW7LgxY3tb53NQS=UDvIMnG)xFkO z%s^dae%qpcO%m;Xy*@w94QtrYPMb7psJC`$mN|w&5QvV%LNrXMex0YEoWUjfH0od- z!E~T1<5)JaP$5PQxwKWQ%rBnnq|vLg(A_&Zjwdfp*JR?A7eAVobFbFMyzdn z;#X@fO;0T_W>?Z!B;|A{9G2Pz5+*H#Sc3h7gBX{xdwBRK;&L+O^^`LigHP8_p8k0n zk>Z64T~D7PDSt1GsMVxb!N#?msY(>f&)@puxlurua>;~xFdC2vSq~CedL^BP31`^j zYl&}Z91%^)v3phaD-uilj?5nlM+f|2ImLCI6U-v>1L0^qVah6XofAd*(cqFk0VGP~ z+Z_je3P%L#SKP1fv8T{OaZElF%QSWcRUZ0B2M6hVfVrycr!az|CN20&RHC0Y{3o&J z74WM1u&g8kc@6jZgK~&zgJJ)WT75gm4)7#FfqRxlvsQmh!?IW(yAO1={6#R4d5xCV zv+2BQ$}@${K(*WXC|9}?bNNJMAex8-HLNki(V$xMda>L_3?qm=K9a9bi4)hO7p56|T=tYn;CkdHobPy`IxG-%;i1MK9(1xV{aX*0?^qd^_jYaDF?> zyA5d#L2%m6zsl{YMpi$<0++Yz+f|U(kYDp>N}qDX`w>PtpV~`lC+Fi8u1wFde7q8s zX~Fq6|2=FEdb6(~q&aQ3-&c^|kNjs#(iv{QEkDim-2z?>A;zMx>%d_pHasSxzuChEA^AB^{E}vz2)IJSiC(EP$q;w7UPhLLflfObg zg7!USe_P)=PTTzfmZA@yU*l0TE;O2EIJ5Ax+1(nraH0;5-mBPsydH$3Lv);e&vW2T znpC^A0a~qe%Ri)tf=IITdyn8D?hZVc4JFKk5GFkavKFQL$Ya`Jzj;!Nt0!wZJ!9h2 z@@_>>;`vF8_($<1CLg;EJ#0jVtYL9K%OY@lnwq^Wt!?dl`~v~(Z%gTpeVtwV4-n6Q zk$(&nBukf-P!8H;SOKVG8OMA?9&#j#Ef?&b1TG6aI-rvNQR_eC5VuTWWjC zGhexm_BVg>bZQ{IQtfiJNj=?>?wxhaJJ(MPCOeWn$+7g>+2&b)?ykd^dvm_NT;HR) zNBeXAgSo-SbB~)jGfsP>+#vDiR)}?eAa!)!c?)V%$@LQh$rZ`llSh;O`N|qnA>~-8 z+<-lIUow!YPVGtuXX|GZna<3aY<2dwY{$H-HhFa3b$c2rR!x}6jjGJNtD1DbJ9RYW zr!6><+@IQz@}^Fw1GBZWMy5FvoZHT2!C5sCPi{}Pq{>sr)82G^HlIaht)yq(1!YtF z)7A5?P07JjN2(_^mfPve`Hts~KazW7I5#|!8;K}2T=nT=B;bQ7?E6x$TT?CR@^lZ$ zoo&gi&X{vlWv=S>bQ>CDW$w+c$fCe?9o~OeBF+8XZi_@zq(sfd(!)5y;~bV zS(B-r->Rh@^IJbKU$X-zvYJ{FNd4QJE=wPpJ(j6QFKnFam^(UOgN{P?AhILf1z|PN zSN*BolB!Rc>5UlW56^a@qdbznP{0T`%j%lErnMPkuBk*MDwhIl)o3f}0-By;*K$)F zdk)^)(UX=u2gaf?fpwvHOBljSnYa;`8x`a%_g?ZbZQ1o)&=Y=)D#ynkAHxgWXk-3a zhEIn1ePC(*lg}eY@NSK^I@%T+5sEfEq)YdM(!HQ`A1K`eO4t9=wZC-TFJ1GYLeXcL zb}YZ2E#DDphbtpJJjAv#j47 zW0vtUV~&x^?F+pqpXlh!cLE3f@aqWwNH^$psAJ{$%JDkX;YGX~r~&`{1+_KFY_Abl73 z3E%^reci;9{%y<+T3~mKL_ij4TujvA#QVQE-vG9xi^2HT&#t+b6IR<{|dL_A1D60m7>nEMO0^n z*uHDM*!5P8*mEw{LTbZCA>~W~*FO_pK4-UKn;5V+kQE>p&X$0{H^@q9lLwtt-VE zN26GgQ|;O7fL`uO;l>Y}?7Xx~Xx45~7i<=;z9!-HdPMqhA%2SB`~mo!D}?i{J7xNh zo)BUy(V!;f7p4o#IJQ*4CTql&pi^w=TP?PDSBWi6n-(0NmBJHTBg*zcM)?X+-scqk zr-itPkR-W8!xuCt{Vu}uHq9!N>D3s(8Lpr53)4kqpkJX}$3~~v7+fnh_Em_DO;ro) zvTI}8E6PO0TgsN>ko#2x=bx*#6{a`Wq7PnL;ihqe;UgT|D#SML2C>cDvRJpUDOZ!- z5UcKY`5aZNL{)ILs6tz-n%2e~HBM23_Sc~OHK5frxtLY~+8WT1*8|$>m16bTm9Z5N zE_Dj7hYi*V?Md9XAk2_XWS1*WnLYzrM(Vi&eO^jaWtMm_CLm9g$a8F~6x*z|Vp|Yx zfiByi%eE%jo~+Wl8alc}%_m%o>lZ4sQZK6tdaW0}k752Gc&{j16{a1VE5v4RmDt=< zySQvKG~Mm%__Qgm4x#NEMa;r#uw1=-%(RigH-3gLQnEykTojQ<+; zCWHx=U6^)Ie_`xY^sI`hapDx2r-gV8;aid)_CPy|X|PoRkE6~Z>h_@z%N%kHI$TcS zf^A$)m5UWwwL&OcSBkyQ3NekK{RP|q)e4!;g61U}#scLRrVGnBHmw$$&Td$6W!J~n z^;i1VVlCNTTZZ;Jh4a-7!u{()Z29*>{F0Of-9ay;$+_RO!Y$j4@l(;XD&|1@yy&;P z`mo-4*T}U&<*!!xXPn{;=v5f&Rp6=W!8lZF2+MIw`#I!1gD>vZO0gB=V{6|!vDJ%s zPu=3Cg__)ktSh#?rVQhEwJ68jb6*hRGYECIEuasjUqQI%FO@Ay(}iUm+bhL(tdZM; zo5c1$teM^#vHeGCyupmZflyuR6t&hGQHwfjA-}e%dLjF=a?hQ^nWN$J>a0QhPb2=x zf;8L8S50FCHli^?Wik52pMvji5dH_|FMTc{Uj79*$Ihwrdc-#)*!h&+Qjou93)U`- z=`AKK6WiENa!KE#;AwmTypTi4cJ?{&FF@7|Anhy@PE+k7j&)^Xowq`)>sjSnLu2nY zj2o8oFwR|XAow7M(1HKdMRUVZTMj#XMbtXx1npaDUScij!&>4k7d0EOcBm#uePlU| zvCujm))m4kkV=2e(O?*=pZS*dOQ+?iJP7;*7E&VAT#A`}0` z$N}!rfZn5A(1 z>ZC*0v*0}>9piKb%Bil~IAw&D-RP?@O2>x+(n}?XlRqdO=8%3I9&~t~G#*sWSs+Q@ zEIe^$Z5^a3Z7qx?>*IS(h8YULQnC|1h(73}Yw&%z2GS1|}FHIqyTB?!z+wRxjE!2?AB?3#l4|M9sBBakrVJ=Y*`s2eQ zRxnyRqY-!{;&cXk$i9YRhj2CXFudrLLMhZD$CUEJ%ny%4T{ss(5)GB(%7;LlT&l@7 zi&z1g>Js8SBAr~O3p60}K4b<%a558rOZL*~wISM=Ag84UvMTu{Nn=dgTEY^frEQ6s zJRO5Ye67?j(l*_o9)NSFrBjVv(S-Eb`w2=?A5a&D1`?LRz3>b9dZBc0I21r{W(u^X zQTiP!i;)jySdnN^`=KHFvjVguiID+Z{B;B|^3t}hN9YpaODy9+)P%2w5a;b&IZdBj zlGhUo40pn{4Gwg8H;~s(&F0sn-44Lp$2Q!@sMpl2Km(NhaYkO!I#$Y9tj^-_+ zq^$xM_ZXUK&!HcIi^rv8`%g&AIVw|kE=lf943ImBCANWsZ2J)0Mh3&t)5^4m(L#F# zp;6|G#vg*?E@oa_d|S4o%OJO(fT+*65>24)7EeK9X{%_|;_V}8r+hKMtZ0udS&z;8 zB3m0;6)#glU*g5g=`zB2x0+vPJK;oWJ`B&J9%B$I!*3Og0~$+q-_4Rm;G3l8?bwp8 zFtXhP;U%filSZ9AhA*;ivNADk%j$^5=r;81Qn`YWurC@7)N4g!G@qoiv-qNI9a;G+Y`G&*l#7~U z^qRR?8QaE@B~9!hC4Nlx$&rISPN#xYuq5efFvigOoh zN$RLkzMIA_Qj%GTCuTV8;td7Wh!*znI3HW8?NGeWOc-Jd%cIp&$=gZt(rt7tVQh0t2L8h?nn zddK4EV#W1bp(Y35Y}^@*jdjA&7fS%P1@Ws(t;>(5TMI{LrbX*m|oFDu3Fr5p0Pg2X;3FLhl+>yAcGCSvx9 z3D%-9bqnPj51AG`dXGi$FpIVPr{%{Y_*z0RYy|U5$^p3C)4KmkIhuRj?7b^606Ue- ztWSL(LHw8UKEoWrMrau}NxZW1P@EG{^MK(eU48JI$91L{FFTqr%rPHUSK2)y0ki_g zDpcO_Ug?K|Vt)Zuo%KKzn^!^;)uDZG3+#xa0S3iHhocMMF2FXan7cy*Cft$9=yCDf zN}4J3{ev?`FD-fpF$Tqp(6)lZjEE1-6L^&AF?}O zTD#e~a(CFX+p}9WIOSjbkx8N#ezJe4?n_rOQ7gJyd0b126%|`$5$+WKknsx_j5}AKAZnDD&53s z?Rk}cfYUMNZ{~FJ(<*;2r^g+tzJqpJsI;w5j`eBfeAln5@|?_r-T&e``FX~hjQIXo z)e}o9%rTZ0@4rdDVdisj|M(b7<&EAX-vsm7{hhszycF|kpHS`SXDqFE_9pq}m@ju7 zIkw!(H_128d_cl9Rut}&nZjpfIQt)G+U%Z>Qzm@mijwCl*-d6RtYcD!@9RoEMTT&r_Rzv+K+7H5*ufw&8J?jyM4 zMLhG%xIcXoU*5_&(x<*ANyFC+zzb*!T+C4($FI!o zv&3!Bq3aOGnN18L5D(5{;?!rbGQRXm_?lY1IH1_4uqfjybv#do&w(b1?Y(QR$v$I%)9=(!Y2`@ z`9eSRJcmGSA$;Z{-j0FiS)lkL+JZRYiwF~lXMq11LE8L9$VZ^~R^Ty?p8!6~al%QC z&j7#0al$t^eg)W-QFbG=5J)EB1jh+~pW~N+4}D4L>;qm!I0xMbD_>G{!WNEq13&#c zpo1p`d=-Jl5+QyEL&j@>pL-elfrtE#?m!?Nb^)J7_$z8V@Fj!=#PNF>(x>t&;^bRN zz6@7kXF~FGpg1AZP3uY4afQp>~3$MFL6bJ{xDvq+}ki_ z#2fCvZ(YSb_!f~dGJr3eLqH^cZ$rY2+#Sc=+K4~Cdn6Pvqw(mVwHx1ez1trjX*}g= zz&8aVp+US!;=3wU8;QEhmW9MJI{tr6%F6y0>6zJ?%*@~5a!qMd?y0>~zN!AH*wpw`awFLaLb~-n`Fugb}W)9BGzL0(4@(aF;!xv*0 zM=y?FoWM#BzhKloo}NwT($~^d_xR+*- zLv2pfwiERoM13bv-zaKKp~hL%co}tGMV+phx|#Nwy)*qY!!r{zN!jZ8nT46FGvd7S zyz4x*zWu!K{E74Z=SR8#WRxHviou8y1CoJ6@4*R%0!lq4AqgZJNK9q~G$=Gl zoAG8ewP@2;Yieyv`)H+otl|Ni5G3JfMA0A~;;}PMRj?M&s`LJSYv1=|CdBr6zxVy) zyRPqpE7|9@*Is+=wbx#I-8=c$HCvJ_7E3aIA`y#a8$$lek>CIRS4x7#GGfBBBP>r2 z*?;!71ke7nmy}o3I;(4{R@M}+a+VaYUR~vNE-!P|_*OeBRy%XAUhG^|Ra!RPW*eGr zfd2NZyEC7?>w_Zm=f4N9DcXkgoZ}xBGd%FE3g;aETk$iJ-&_1=3EwS#8sWT?A4>kS zcYRni4(W@Je^UIMgdZ5;nMEr7k)*#@QBuyn`j4z2&tfV4;UJ6S>?g|2vZI!BoWl}F zoNw8ToFyvv(FYKwN_+!`ui}Xo%OJ_>3oRKaB~kG&(NctjYGekdrd3YwA&X@#+AMnw z_^@$mg5@MbOM<1r?1V{Yf@KO3*lV$jWBNP(we7}S3Zh1Ztrp7$1FwOaO!tTJRI|F9%&uSJf=NBT#S@KPmX>#!t+@ z9E+uGdbO&k#t4L2uP3GwqNtZMy{5LN1aYAWXk!8GExq`O`IjU0$||c+QD_8ONxk=C zDj|w`^Wt;@zaW*X8denW=4MFZm-Rim2hW@TX&@(J2i8+ed?3xTY*`` zQT!_)(Aef1>Cq>R2MfM17Sk8HYXWlh09gYKwrOmbZG9Ydy_JY;ZbhU->&)94;Xg+% z(we_fs_R`QxC2iB+-U@tA>eXSJo<)Sq&@z;qxnXkzmHbB@htYaoj~$3cWe66t>Lqc zv_81LEy0`UAB+N*{<6oP4?->7ny0saZcm+g$u@x0!D zx_MREV6?M`U_-D7d133w9|Gd;NM)bN{-xG2C5U{1! zivSovbF(h38>H1=nBbkLfnc*ddMS8G_oP=FWak`s^t_{?$H>e2OA*KWza)C7VayOq zW82E?h1R1w#u{}zXfb>+Ujp^|Z@8&FIzfyNt;p(pg!X$Za)+HqQbRjw0Yx-Z(z)f!bTYZpX zph23Rk16_8(;ZtrW(5~{vKhn&TX?OQ(lDm$ahZf3HMPJ@)2(|RvRIx0EQ^+PMT+lY zkA7R4M_--l(g!8Ady`!LK`uSdf@reKpJ&Osyl#Xxb%bw-remB>plN0smWAH#@%sw2 z6Fr*$#${T2?_4dgv_K1t+2h(q`Rke14x)L6wFTc=$UaZzHy-yoT=kb*>g;ZgYKWWQ ziU1DO&(n|PW(}(wB`h`^ydQB~p-SBP;k+!LrEavlew8J`H(X}oJXMyjhlH1JJ_9}Y z*LS(~m+~7U-jR7=h2d_n0=f0u2SIvs#i3S3PAd49?dS0p?f0&Op7G z#-4P|qtCP=)8qH1Ynvh0&CeYgF=_VrWAx)Dc+NXvR^L`6@@<0LmKda{G6Vt8<_E}4zH?G5@E z4kNz>A|QC0;ws_lC27dVu3^mV7+3x5bW6nx9B@#pnWL>SP2jr{^y-C%XH|0rR9 zaHd++rGTy{sim9**4y_HY6q4UareBjh~)C;mAkirBK)@@;&=rOAc+CJ&MqqjOA;$( z9I|B%snsx+ol?AQ`UN9~y!sM5VQ$uiY>0J%hLnFlLxsj8zRCJuNOzB3nZ7Kt3!4R* z9IV2PNApWr=x^UpXbxf)Xys`8kdo-Y0A#padR5fD1)XBT+R$Jw8hnR<8jcnoC-wT zRAejujf`R@*5MDOrW*fY$>j7$9LIl(3e;c(#$Qn4f`pHig-d{oM?da@^g=dg2?8zR zm?PQW=7B9h{0s;{O@A%oc#ZHdyToWEpmD#zDWks5(Xp* zlgO{ekoTa3b#p{#jFb4o0UiRm7ZZB=`>GaLcA#0k4}F>dD-D1UVbDm?J|{O;qCfor ztLFE(^|vFAi`Y)Y)>LVQX}>oH)EEQQEP}#P%G2UNRRR=;b8$?QS5p&fW@aPV)1DvDno7QO@u70i@@Y?3St zWEZl`)wo77u?`_+F1er36<-$$zs0Yl2T-1u8FA_F2_%=a^TW; z31&b=B{`;NPJFx^_z0&?bFN-NDfPWrVKx?hU?)4Z^KaNNd;B$^pG!aCf~u=`Dn|Yc z*$<;sG4gs;Fd11%jxa##N6E#@rNitvF8&Q#>Y^XG^>-pr87M=^&^r)v62N~y>m||< z6$4C&Lnc&B$aFjF`LJpiMIsc z2<%QJN>CauA)`YU>_N%||7(EBnh;0Y=cIDp6pmyTxBppLX??oRLnw{WNNrY*Ij~fe zYQL|#Zv*rA*RPE@MgS%Bm30=oh)Szv0D{@eZ~VmG-~a}~HhWQ3gU}UPL~N9pH}>N! zCsAuWu4KfWvq`?UR0kp$UqB;bz06!w8NmEZ(b62)|L$WcmQ=`@LZq<$spLo`Da-GR z0kv4jp5Mw{fQM2jZ)3Agr$~kJHay~Z0KlL^xY@qBBVPwI&pG&GgPG@SMY&?;z1#*d z$7dl&?`b?@Z(0Drxmk1Lrnm`9uwzDd)sPQfy**7QZ?prrQlcF zHga49AwqY7)irLdRPa1mx2M?~&SI8stpY$dPZ#Um`oh#_k%`>)`ragMTDMmJWooU> zrQejM*`IhLgb`V^_D~WMd$juYWE7{YnWY6BweLhCdc)n8Oq3_>i1ebxIG27!n%4gQ zS=z9k@D2Skv=c|dnryZ%3>Ly7e3_(v4P4YJ!QR-}!9#xdTp`Nos$PF;?7a*aIQc;vN60XK&fSE5y$xg%yx*q1ss=)H0ua2COc^QE5Wk) z-=IoYrmgP$6H3 zX+|FsacsYri4!;vLbtRxG;lZY`Jm|<<|MJxJ{?EnLD3z%PPcF1 zlmzZ?Lg4ZLg#q?};Aj`y9nHp3S7#&dP z+tI=lJx`#8YnxSigM0%;j@`qCN=B|iY5$2jTgEkj&`&`Dqq=hL)Cl4^FBLP&v zknWu*Yj=ICN4Gw~Jbio-aRd*#vtibG66k39wW-H=blJx~)jb2U&oP+=#s3}vCakD} zni2aoj}O6EzmOTCEP+^lJFn$B~&P$c5`Lrn7rr8yJ=Q^`G< zn>D3xct1j$7&7FhuN6a;Y{f8V#-V<_z;}G{x>^YOf#5UqjQPOf<}8J0uyF-AP1$3P zVng-WBaZWk4~X9Q6@*6rrD_FKr764hLt&SS>OQQ}()3w8=88GS-1DJtH`BO^;V>Uc z4s-^n2H5MH)e+dE=fkk=$+j&4rJ-}7{j7ZbLpdbp*zfOTbqw-L?;Wmfyd81tFGn01 zclQlsCH+LfAbX21eRF1G;pj64Emy8qDa^&bId7_Hgn;yql z`=6-TUQ`#FcF+Gb5>aAQw5Z4ZEK?%roG1!c3JQm5W!c;T41S7QCI)kAq8RKY23R1y z=s@ow22ddWK@8@~nJ#KeV0&1WM$vLe5htcYxBgAo5m<9Xia*{Df4767srBe=FknHS)N`Gz9>=>g&P6%Lz=aka3cqgY9@P zw$|~723%6V6jB{EDe@Yosz1N)#fAraJ2y+(>GHCs)10V`Hcy}oywZyfo`025DE9*& zE@w(ditaa|7Yw;4;yA(C5PfigvGbK}Pemj4}0Vuo3c;xpdc{)&m9Ueyb z{%ZeFLGm@3{!anLB+#BDw4gOL?sWDhYQWN<`EN*{6?c&Ckfy$#n00KM%s~h4Zv}(P zru7%bVmGb7Y(YlEF2H;&21vBaqvaN|_Iw~DdsMGfH&p$>$n8I*ONmQV(Z7U> z{uinX3V?+Tq3-L*v=Cw~ZWZ#7i>rCanwwGAiI5W1F=n4`>IT=F<} z%c8b#`ut-Edze`5jX}gBjz0s6IwABR&D#vz-~9Wl48R2kM~%R|&+zE0z`A``6mreL z+ldAK;d0Tzdk9{3gc3jEX0s%{T5dcxqj%w)YS4#3H~}$mWTixb$yG=DhmlCWeoPEN zFxFZvGjOQd>f0Wp*U2D zSZ15VrB3Mjz6|3sFC%m%t7o8kzW#0KGA1rG5*IKr-AFi@xWGt^X5xG!F^Gvvk;rTd zeT4~r;Sv@9l<}D={vP7MBo8UvE%FGde-_%WGUrD#L(j$KgdS7Ld8$I_A(hNA>%SAy zRnA!!^G;|!a$d+s8YVVO0H}}Y2wkA+F<(E4DFDfU zd}yr7UM$T|h6bzT_lzX(USD8>Qy6AqktDtiy^REDz@!0bpUNrd%i#$Ob(q7>q`%*) zysKmL?o)XSr6my~XB-XPf$SHUsRkj8bSzY+D$kM1ApSd{#r<+{bM{W?^8T5aBj%q= zpxJ1Q`8|ViXnl;2FymyL1SZII2JMbX9m$c)WN;|33_(v-)yUI0p(9)IbUO>C;8*xV z35>OH%vcMLuy`_lq2Hmfalde`ihuhx;*(VTbHaflgEz6_UvsLTKk(rI|f z3um(CuUIpUv7aIqaop8F`2|U}8RNm`F`=G87mkv_^NVC#YxJU|4|}kM3_$Ljah<&o z2PN|oTdrhdH_g+3Ky=1fmY+ZQd8hB3CCxxW5nBMmDk*E~WDIeK0e$=IXm}tKCL%yG+C)ulb>g zQR_n3jSzdG_yWis4K1^nT1{QVAvO!OT2QM6VHZLW6ut(mQ;nQ$$iWZ#3toF2d(PjM zc_b<_3yA79VVv=6WHLl__9x(9H>h?|2~M;%{V>t)gA_O3O=x1DqFfTbi^Ams782W| zKbGE;fIdzkTb}B<>(r8Is-)%Al94RIP16{;XN|G1oxF>~;Nd}kGX3a){T)J;fm!7_ zqj8Q0jk^bI+#-z!VvIXc;?bW^??OA?yMo_%I|mJf7q74n?~6FDlMeE`J^G{Ry#vCZ z#To-K_ypAy-12;SaKNVD+{vMXuVAeiQ$I`8R^FP5Ev{DzJG6~EDPFI?u><$@FE}9* z@bd>FG}AweN&gkQB!4y;p_%?&OnRU3j|`Zy^qOn6KxO(faH46o5FzXR&m+j(2^O6T zrVLo~_aN=hn}Fz_Rdh0h4q~ znpPoUzvpvw;?~<}vRk0l2NS#($#2*`PePpu3knNT`?%Vlf10K*NQup`d-Oz?GFZST z03t7MuolQ03|&ME%v`gP*g}cHmH$^F4PnNbz#H)?Tz$^qirGq8e_Of&^v2yj+>_hJ zV!~p3IZdmNB-#TDi1maGs0^os8@?Ir9ic@!SyXp4G#zDxoB;{)Sp(yq^eLWzt%LDS zbcT%*Jhm&mOw-XOm}qa<_g8!bU=Ur$> zyHV#yWHQ|dqt8o;C;mXwVbF93aEqs@zSQmsH2c0(*;RY(NS17e!~FLOTh0mt{7J5b3S9|^DL&($*b1uwXfjUP;J zW3L-`wjwCJ>BbIoHSF7$3;sysX*rb>nX(v5!?nb<)OC&PTC9$3s4oY<5HBv_YQSGb zc{pezEO6=HFs#ng|H&{1=R$_FTq>>QvHY?0764&>M#Qm@Xd3QWLvhmD4)iRcC8@{- z9?<>s=nU41THbYt{OEQ>uy^d&IvdhApepT}IL9Mf^Eaeh$cosu4S>(!G`IaG0!$)6 z*vK{I6vn#E$TurCM=Q>-v*Pm#Qs)jr4^K8TqK+ysS-ABVHUH-H4TNGq^XUJ;X~}~N zdYlM8#tIn)5S*pLz!!+iq zNc4f2lXArI=sKgq9-Q+dj`1Y4oKL>6Ax5(d3?V#z0=&TBVYUH~rO#^s7S>SvrfG=qn%mxZHe)ni{A(>H zXtbm}=|!*~ax!5vkcK%5SxhP%xo}r`Rt&D`D8o!(=y%Ro5#t|Ys{ll`;FOEb_mLKN zyMF#$1(Z6XXTYBeRs_?Zi|sXiQ+f*63B=3S|Jk>Eb;psNEER{aqVF)9_Xw6pN#%w$ zGP2DAOnG(b*5Dd|$eNep!||;qP1CQ7aqiNS;M$ewPs+_YkDgsaxfQ~p?hu;5VjzxP zPo}q$?daU~Wcn6H#ldT{%8qsNB0s{(>m)R6Fb?&HF|?P&`Zd%6Fh)oM#9sN8Nq@+W zH#RPm?6<_R>!D$Q3JQYTm1ma=HSB6<0xh+!9mx)X=DZEO?eYK{7_n05CC&xq+|7ArxL0a4|_6vt>5K=)+N{1sqQM8b~=rpjF& z%xV*6)!qOd1gH@+l_GG{@qy00cWzKq!LlLTUdca23n5=RAt>^&{9@$UY=wg0It@ z_iN(_21y6oo8|e5epIs6V_g z;Gm|V2U^i6tj{D8^ojWta21oLNEY~l^A05Jw@sL^iTk$>(E3G!ULo! z2Wa(k5V=%(`zQTF0)m#c5!Is{*EC6S9L@QPX9jKOFy)Tt6p8k8G&M=E87;)b$!!5d z&WYP&+Nyz*S|IA)s58q5a3N8i7Z zT(YBNBTK|XuGCW;eXeAJmqDK=t5`v?#|Y&2=fyF=)y;i2UQB+2W{M=wJx2foljKLl z!w}+sO6tFiDx_=0!9PK1e_r^R^l=I={D94}XC47!1MtFd;Q;c&8K@V}3)Q5%@IpGe z>&FYzP#u=5Golj%(G=-4XJpPv`s-s$b0qzn@%|JYm@M!ooFBiN1-7roAwW&K(P8L~ z6PWbunOCWCVMl$@IB|lwWQ0eaCGY|L9VW!|OCz&+SEzP|1oUOj!U~l3pW+!T&mMm_ z2+)t9|3vn4LPmPQ?p_r~P`K|+K$#-w2!R{VBZpC+Tj?j>??IN6zR{I6{T#Kj#JHl{ zt|gq9FBz_sy04jvRgCx%m1+Y38&r$VkSv$b1IM{={wL>v9P(^I%*N!x6^g7l;}oTD zPTv5TU|yfN{jO9^SLd(LOP{MNlma;}ECM>R*e=A5X8@54ym&~_);~t;?L~kcT8akS zF;+`xVKnAq3^Lmix{NW1Hhdgp#vmF)^rzU4)eLHW6B^3|SY8rCn7}c_60${O-y*5< zjxh%F0R=i_b|}O&j#p;-Fw;NiOYdeH2Y0jl8Kw!{X#aaN9oig?{fIG;!xCP<{f7+d zBmftLYqqm6T)w@M!A;w5W^nEHO$acMaQ^lRBQ}3~35)W!FJ~}wdm)4A@S6i%*~LgT ztcQyLh{`XF-E-Z-PK`4;?3)A*E?O~44pjCT*t(a?0S&e_Hm6}UH)|?2W_3e+{w>Uf zvAe$%8jXShPat1m1pUviqHDh^1vsLcHcJt0(g9nw zEaV48jTBpV9lvsWQR$-CQVV#tDB^wtdC_OFra=OEYlB;cmy5en1GJCs{;jqjn7KYt{Y%; zx?UNbau&n(y7BMLD1_PdtluG^*|i~k19Jk_SD%EH;t1(pj?^W8K_=qWVsX78eJxC` zFt@_kiW>;Aw*DT?qSn?Ak+9$M0Dc?R)|p~w9fv&~8ZtGger8o%Q1`d1@@gZ1U&^vt z#_8>6StV*XK^msjVUxLr)~-^P(>?4X>xQojVojzP1ZI2}Oggl#HqT5sx_jS;i@(`-cyPxeFHJWb^`24Ly$Yf+zce zMz+(xaOwM8`pz7jr;+yz@^FwrtP`!HhiP#H8ILj^rq>Y~&shpuuN7cU@vH7abJ%jO z7kg5~F@#h!%${hy1DoCpKPE0`0~ZbZZ#NP#P8Hmt+cp0t$=Cd;E@*J4i3<%nhpnkX z#?~Ok9{qyP|HPpv$Dv{T<`8*rBM+{8G|ncWckk7Rd|Zmi6>cB`_ROK_6z6^9aWRvq z3PpRvzeH<}F~;(P3ZA~+lL8om!so%bPC&rXoDN=cK<#ns5Dut9A+U@e#!*<}!#&Dt z%*8w^c3So#d~JIbNC&J+F&J7}U1^&6we8FOzP4S-eraiC*CYCDtfJm4)6jkm+SlmZ ztTKGLrVeBHc+rKynS>FKD(;X)I23Zr#Qfk>$8-C9lSZR980sA_piWR5yA zybl)3BltMypLa@FV^eyMkPmv?XTjm8GwZ0??pCxTE#O%VBT+ZEfz$J{CfC8a2Hs!8 zB8~?I#Oq;v!`7|ob0(pCWyc=kAB?mEX$`j-pwwkR(0e5!XQPSta2;KUIoyO~-`| zj@u$-<7PGsbuqIC|0pvH%SQ00GD)hdcVWfJ#f-#$4l`KjiGjNGS3TL*Wf+o@6uj5j zyu+=(iQK!9E8@c1Qf!I~*gb$Q>=Q(fpUpKbrxphMpsBp(T^RgiER*cno1tzza3f(o zp)fr9I7i(?g#&vfzd?SP-yjdc#5cCBy9Sf^!)i?8PdSMz05|w>6q>yu%n6q}6hgED zC5-u(14DcosMEysG}`25O^owb9fp<|AaMo*$R*F_^bM#ON&uxIj(-qWpzj3upGq%X zR81efTtGJ52RD$2<0f=KHjj03_;V3a!gUi@j1o8$6zBpDt&{DW4*m%-tSn6TYrI$f7hD_5H8IZ0Otq6I!wnajkMRfd+)O4cPJbrHa}{>n{@*h4aKk0ej^flyOhmxU&syT{m=mhd+UQ ze;nga@D21-!=K<;6`f3ff;Hp4!OR;_)4Xp7w!gtzaBaJnf7Nrl9YhY z4N|oFof$X5Kfc?G?Zh^eKmv|zj7jEf)O94dQHO43D7=$}(9$&C9`Ve^n-gcmaR7Bq zmFmaB!`NiAkO_}7Y^QOX_X5_1|6QdRDV9h}9t5B}1ss%t%3dCY=q>cabKfNknHVgnleVKxpf-0TC)j3m{}~x&>Qq_$-k42~?DBR!BGYhD9ieIJ(Ik z0Y{nH)rM2Ly4>t4Z{V(uBU`$nM~8HkYj*W7M0h&*NVJ18)zw(DD+jv5qXVKvd$XY) zaz4BcHNakBFbwdeYq4aG_V*s#NXW<6O$_0o9OwhKmr+Yb*JN~AD_wprGuaWk11a$D zQXDvoz<%J$YN!K-wIWC5ockM~^9gBuftKCw8w>}cr&Czh_BCk|aI6=UAW^*NY#XT; zdH@w-*7JkuYY7V{%?%PhpWcL!m%_^=d_KKE@@>yP%7f^uMA~mX^ay+DLeIXT&!OBB zx_dCdMo+Ql548Mc3%2918!vt3+_5Xu=Iu2zbF8*;GkqR(9Vi$U$@V@6h_+5Ki5d6_=6 zlexGmh3qGes^Yak&i)krT;vuSUrMJ2u%utepLH}kJY{xjr9$URjw)GXqKpB|1l zzR!FRv++W9q&p519m1+M@fwQ)kKTp(9se`VL+~C4s`#y)CL#ZsjCBszv4C|GDqsk~ zN>wAT6-iFiE2vRF=W#T!cqfA4lG#*uI3%#`LJ)B*10tO4-=QwPk6*$X6m0f}`;p_O zTNK3i?VK&}_?dum586pSl>QxyRqG1W)%2OKpl1&ad|0GEeVAEzzvEoC6*FvO#5W1& z4K+d^4xKiE8*u0l3p_$nJ$MNqHUFFpBFT@ucmW8|Q({2RHbL(dbZOyLpp#-iPhhtJ zM@9^2uLAudKndvo#)1JoRtbWj{TBr1x6UMZQh*nLcn>0mCL5)NO=k@(7Z^BgD-j1( zGMI%W2fLo_O(R}}c#Fiv-e~oZy-}weYp>uRmQfajOSD8}Lf?i56;@=h=_1y#KJ!Zi z#Nlfxww_KfMvvd13_ZTUDrkcqtv4IJ6k+u|P;MdMg7+-Tq=SkC)KeWq4XfvhT|zd!mxv6=M+RteZ%e?Uf=V)O|$$2yxO7%{eP>oz0q433?`X5?9j>rv=B zbVNpMTP0PRnvkU@Bz3?Oj8@2}k?Wxor|pvmIlWh-SDe_5ri;+DA6ma%BxqeoXaU;> zh1NQwd!qFlz%kJ}+sJnY@0uKg7G#tqlq+aCE{`HmDF_?@d_h1eb)#lK1STm2o?SlbZGI8#kq8p2kL0RmI_gT zO)aQRWdqw`jv2?`9Iey38*%>Qi8w><+RlqO+O7sR=OE;ez)G-+*^(I#I9?R_0MRrY zbuixnp_HLJoao^xoazKm-&8NoY~!Z~R5;*O2z8z+NTiE42vKZcj{hP9wKwi8LP4y% z@IE>3@@$xTEfjkv46Uh}{?+lER79&&yvdnI!pWNc{WJ`39dN>T;GhQdqfyjoZ=lx< z-PG%MItx3LKWhY+?f4XPxQywD55s$x^fQJxlktFKp*g$@7IF{-0cAko38+4QW&^)U z`ZIge(+ni%9OCvP`WJS*yYh8B9Te2`&-v}h#2)%(THj|++)CUj!O7DE=K{EvrDABw z-YcL>=zD_^sDvTj7Q5^JmEfC!R*%QDI(DA4!k!_mQoNwdST^fovxAsTaaGIi^bLL{ z1F3N8i49vg$thrqvvD$-<-6a zMj{6shq)DTyW8erq$4~EXyudcL@VO>Q$Dk0PQ?o6#gVJuY~cQe-&^nq);AyEfm>5F z{SEkZBGjw zm)PoRO}|a7J-J%_`V@PNU!uQmELyxkfYTNvah!>=T*lWK`Xd3t zkYBCU-6@mAeX?HjBCD;S`rovOz&5u;kg35XRsb?&?)UfsJV!Mag#IK z!etT1?*#>^NO{p6UPPvqfq5~eMHtg7yt}BGA{f6Rv>?X5L75S~GklKuAi@@a4dsGy z)wM=)x15TxS71~;=c+>@-<`q%xiH4LpZ90xQq91`^ zN@@M)nK|NE+=m9_3$p%-j)4wWou)bv5sbGQaLRlLsDYyW2zuI8iZgQJn#I`H;X$qs zZ{u5{aVEca7O)-q*@iU+;d#nvcpl@D)-+zAf5pX$VSoc%<+SNL@E(gijDxo%_M^F0 zc%wVg%Yl`X4mqH$y+Pj9s9&3Au{Wfl8wkq+c1!>4wfypfA7>&r_IaG)dZBt9=N=aI ze;wyytd*rL@>E7z1_u?yxs@Vj{T! za2QMQI^?p{-tY%x`tcR|4zSLY4!Lc0*Pl$SJspR&B-gYqm;H$actnMzux}+|kZ?0_ zw+EBtB_({a@I+Vi$)rU%^W;2$w;%L|rW`TLgDlR^#V%;E`hamak6=VhLAXLa}+^yO3tn(37 zUG@$IUi{f#p+YnLNld!G{FV=cWaF<$laGLps%=!O|0d!ajrVatVS2{O3H2#%mHG(0 ziUhp|j^VPfhG^LpY4*k+$_y+@^<*zjwKuLsN?+v6)%WD;^PMie&D9=CY!4>7>faq) z-#a)ryUVxi_ztIctiIz!cYSY;W@{xd)Xa8t z@d|4!o`px9^X*UU2yVWFhgHae&F%PRYn?o=Ro|IH92;+duybd_i@go-#XlX+i?}W& z$^__v+*T;xhg7ca0~5b|Fq)R9_qa~H;;Mf?$GvecLAcw) ziJqjx_-7GUx=>(4V4Kw#Np#;4vUbyAq_E> zZC#iLh9B>N;I&m;pPoV`pxGxM*c{X$_Jd*E&Ze`ny78vY{BCDWTVVpyv!iyJ(>(cDx(TO<6;DHk2Cz5{5s`Ch>Tl%DM4-t%L>M;aDwF1^$W z5q!{j(I)$C-`j8y0YqPc*hM&a5|6jlj_UbBuf{i^FNPNjYlL5AT|5zNdj)VXfPIiU z9=@ie#*l|5qSl803;0Yxz5i16l4YQ9jWcVWiJBC`*eo##o`Bqh-vX_m7M;H@j#@WB zb>KXu@)J^L?3aLI!dKs6@EMo~(_2A_e7zT!{`rB6+prPX@A&|;%v-)KB+@jf6EN$o zXm;yk-FlrpjqkUBZ=u{S(XwWFJ(S51mO@5+Ja=V^JSBS@ku<)C@h$NLHYvAvPH{e` z7@zgL6s(ZzTICTR>X9i);M6_i)=!IZdr>~8?rHn9_Aiq(|Cp}OZq)6^7VFtz!0|^n zhoeaPxyvN!8cZ)So(;_V-eZnsj|b)nd*H!0fEE@J9g<(97%cJ-Iitv2J|G=pTL*P7ZDgt?cmnRWzMchkgvU(e!@^ z-?*oJp|yXT6n@fVpUvn70}VhtgMG}g4vLOd2ljl5+IEYoxHG=uTRA3C5|vO{i#;SG z#=XzfIu+}d9WXT=Nm8`;(zOV9jP_`kp5Y*cCU)jWLKP!e)Q) z9cU1oLyh*J-{iJ>y>0qhDYq2{pPoHp?)@^U+ zFo3MOajrpsZrvT!bw_!!Tu$$BRsB;`55I(&VJX#~=s)?ick$Aw@P<5im)EN0?9HvG zJUbns+Q=gg`{nE|$i-x(6HSs^pDrrjF2O_2Ch&(cCjJKabmE|S5mBXJ$N2{&HST6g z#9j?It%&1n;I6#;y%%!F0AJwn0E_KO6p%k}&M&zlMZ>3^VB_-O$6#aZ8clnF826=e zG0gx{-I$o?vY!O1s+8wB^v+Uz#0N7Sj;0TorBKk~@Fkb~^0@#oHTY^3AAQ_S^kA_o z=Oq$Gem%DpNJ(|Td2o)=4;9yHclJ^5avl*camnKa?!G7AC!)*fIOMIW^56tD6#q=H ztspChlvA1p(Fcy^9wHGMfx6K3?VGHK&^^iCcpPBlwDnh13 zD4WR8sOQ5W0~t#}ng00rA5|VGm`*3Ew^Ubl60rqiH7n#p>o}R&-66% zmXkh8&L}eQu%rqAT=r`9FXFbSLa^}N&nw@I=gNrf^!kbG%z0{!ZK>{5q82>K}Vi-TOk@ zFOfl+mZM>Z#GyJm<7B9u#o!-|Xwfo$Bn{@vgVaxjF)NK49*&zjarVzgF%2lVa*>;SrJ?zno5@9dGs~~-H?6kxsGcHqwo1J_cFJ?U>Q4(cP0AK zWd?Ul>4@U18Td}X*#Y>1njwWL3ItR9{Ssmt1Ae|1i%B0oLBKf$K} z_!OLq&y8{T6e@fQOnjc0bqYRFdV)J-{xRiS00dKxobMuJPLz;+_(Dw&I2_WI?IS`U z1?I%{7)M|yWPQLf1`WddV=0}3k>#8^GRSKsA>ubk(6cC(o<;rXsoRbLnV2WSJ!p;f}r6eX*TnIlp3E?Jd2UR zvI;=;hZ95DWB)QjQcS7@;PJuW9k>%-I-hiU$Y6d_?!l0RHQa$&FMo}0moKN8Tq>N}hX zHPl_r86Kng@2Jko>hn^TN1|Z{GYg%ug@Xm(t8&c3oY=yTFXCXXlFgA|wb;V_s!(<+ z78b-7ZdHY{;j?gAY~fE-VUgKqQEcIDs<7NFERQYxzABsrh_hHBHMVfJ6zWYNJic{x4&>fMVUhGmB1Ca zO$DAdx85Cj4Qr{fi-R*b!bW47V3u5?E@D&wH z@ZROB|0>yg7DP`vCn?~V!_K&&j=PY;HS`r~@*9s)@@4hwIMh>~H7aV)Pvr-YJQ^=f z3#nsuUMhTpH34E<2aEyoIx2fOg2K|KK!*SYpdU6s!;HTh??NchzUz6*O^KbHc`$rC zu73#d9UdKY+Y|+GNPmDW0x-NNAM{dhy}uUDCDxY;=(81Q?^FZYZUyZPj1cI(0v*M~ z>}mxl#e~8s%ZamcN3TKq?AKI^GN3WT zFFDYGen74j@xD7wyK1-b zUe2u(XV+uQ0wpiXN{W;6FH2S18zf7>aRk)FAD{pZnDvDyf*hag!C&>`g?u;dqCbW% zVC+W|#TEi{9&90Zqb~od?X(ai9`H7K>pdx4I98C@cKwex6V%X&2-m)l~9XWOaMzPArt5}hZ@8P(Yi2z!1){i zH<{}@T(S|vy}_H|uD=8HH=6END(f301LmKa6UY9&n5MYX@{VxTf0^LR#(jZ1vL~GE z#;#2tii>aoNuIn)LD{UJ_yENNPtz!qC&xhJeBg;d;Dos}$nbwWe$~B0bw9!!L}1$g zVEjr&eFGSB{Ia)m{4q^Ae*7Ujj6a9>CAd9_(twi?CBjV#)V&H+H9*A?A#z?HHn5Gx zQbFw(xR}CZ$M68nRTH~Ses}BFr}Rxcru$rU1lnJVZu9jdVDB)o-)wdT&CIvUn@=R<~7=lmJt>9F*emn8J z47a)owXkg3?gTA?=M$Kgd_56(uA&}FF8ljbA&CeUa73ZOr6l+S65MN3y8=(LD2-BI zT}A@SE)O&(C(X0)6T!Yh2_;qI*O#16;`aw9)LMQHsoN&_M9<`eLjd(qq&I|QslgZ1 zzfD%1e=ST|_h0HUPPAGs(Sl3RIi;zwUAnS&&iTi{NxU7Ai^jbVOD`c7;*Y2G&m!J|_a*PatdU>T7Cw+ylF&OZ^I&e^I*{~mp1voq@yOaVcS(nq z)Rvo_+n#b~3g--7OC2b=^8%ON;hMH%U1Fr|<&#$ghNrqq+Rd8lt$F(4@Sr=>^8$+! z^6+P4UqPWO;7(8#I#OKrg}W&!2~2+Dk~l_pASdweHO<1g1bA6`76g*Ng^dn3-UckbO$f7t zebWYj6i39{an6{GEu}^-UUK!7$*$~Ied(_HojK^iUGl2d{$6s#F-@lLDEHX;#5UKz zGFdA*P0K!X=LC8iwZEIJB^_vn&jPNp@QAJZsC^_1huMej`ui1rV&Wh7ZDert^kUqw z5Pcj^oP9%!Urwj!ThYf%vH9og@49r1IE42x&&9&mPh!u%5?@76t|e(B_@G1|KN@j7 z4kG$V@Br8?y90k5ss&S;RoR*1!zOxhC;m1s*Ao9YFKnpx#(#k}ZhcL4X2k8kt6G1D zHv||YI4SzKZuOg{@LV?}Iw15R?~39ELIbJPlVrZxL}PT;;5b`t*Ce6`pM=-ZkK7F$ zkjdcPz)r^W1EHrFqT5kg)=g7z$!1(%STRnwes#6mzeYquyj`?P01xoI$2C$g!1Eq+ zP{6yBfGvD0R{NpgY)}Y(ka^YO=#P6bxyil;3}R?Gcw?Z;Up^(`_#CM4Gx;-4`Mblx zbmBfhZ!fU`tEY&nR=M23P=-dY9eF;bJs9_lQM)T>lF9|57Zh==L%4xm;7^B1!BT4y za{XX~P^-;bXMQ5?t&wk?v#q)4Qa#bp2N8d6EzLU#QmcwXknbJxaK%~ulaPjqV~ z_`|K;bV0Q(W!r$`ZEEn`2DhIRXZA5XjsOrRonqA8=fP6W-eYfA4~n|8yS!^P{m08l zf2d#=di3w%uk=70yA!tE1!;W0LDNUNvtRZua-&0!7HOjofIE9f-QP0XfMa_zp6sQ3 zb7!~16$47~XE5xQgG?U49w+`HepTM(wc)_$`wV}v+?#~PV$JN>2ONBZR2;7sC`{U@ zQEI7~2lYKTZQSX?N1i1b*g0e4GCfxnlDU)DhCf zSlpHUdWT2HpAy>R8!A1U1Wz$8b?t#{4C#cJtU#t5d&4y0(0M6((-vej?p~L6;s=c3 zUyc`lR(m%=gfgIQ+n;^+40}BVj4(2GE=|83@5jQwf~Qe@eR9t}DdeFwa2?LD9iHqI z;|fa+Ps@I(E)0+1voVN#{SA&X_I?fT{oyJrtC{_;U(tu9_c~AZZtqHW9~IBVLcqcG zAZ{`5oI@gVKC)6E-|?D;(6PJjv+zW>-X05I<`ZAXdB>^w%szlqY6Lo6M~Wx=9s8!+ zG0N~Dm$91U>UG;TL316%3AvGbUuo@0&)1bJ;xL^1^r={VGE8IkW zn&A78w{9m#uKoFM``xe4puLZSIfu zhRaZUp8SyY&-%BxBAlBs~y;`U#Kj_!ANk4RExeG|a`D!G<#hzD2+{dipE)K2Pt! z74h2o_d%~s>Tw0S^T>xm%;_Z($SGoDeq)4R1$5ZJ<2rbRfxl+82M0}xB*B!?9!hq> zNfNGnN$`+^FJ%IGH>3W&1WenMT)1yI;irnJ@aVk*ziZWbmfAW_{gd^NqlAxL$zN^& zQt^*o-HTH7wSF_ol#f&2Q&cf*6vMCC^2wktRcmigU2&p467eO#X?u5S=x&Tf?Kco@ zS+LZ2r|{QUp0i@seekwH5boCCl?d!lLB2uw!X_pQ+72wRc!TX!Bkjd~OI*pM#5nry~n8{Rd74wps+iw}*4X@=XmcRn?s+|DGzJ&+=_3@3amF zP$=w{oQooN?+rdX7_o+kZ*I_%0!TQH;7Sd?+F-iDC_<;ujdnfD2IB0Mgb&XSU{>+! zNW?m6Ie420;=-Vf+Zi@Ou5N5+a;fUNspA;{LC^TzjF}O-h{-1B?k{!ghs8wruJ}V& zq|0yJhl1c@3{xoMtep8(xX>s;jXA4YN%RLwk?bN5TZq3F&2 zR8R0`RE87uuN4j#69*uHx2N#Nl+%Nwl^;erzm{Ss={Y!_>0e=raJgpBa%jNeE`Iy_ zA>aq*MR2-YlY@OL2cyRd^;rYU62iFHZv#8(J(%zezEtx^R0hH5g>oSH8>mqz%0&^T z`zNtQL&0~ds6LY5yM%@!G|N25X8hwz)C})$5AGGKt#u#zK%d?bW}S$`jVx%D*7``2 zZ+5iD%$Odj+*t2ZEfu!#&rp~}97Cg6hw&K+u~&ML?4Zrc$!v2zW~md>!?kdAFyVgPUeE$>KsJycU&de_3V&j3)hlZq@B#8 z3kx>iSl(x`4CHMzV%xw|7VVZcuBdfv*_oWcp8N83n`;^J z{hnl}nVA3utJ2Iw!ztr|A-^eV#>i%MfDU*Yum0~~9a!gwrJ=R{-`rp_Bm8g&J|ab8 zFZ>cC<`m=Fj#&XXP0^!k7HWdhU8D?*A<)qci9H3TN=zGsY$sYqI`Ixh-5)u5JQ=bi zZK;O$n#4M4-E=L0KX=h1?_BURm>sn24&NFrke{k`=A{yT#BmFIhq+ht=N*NAEK+cr zn!)(5R6LNMrqX_<^YA5D{!zSG@6<98&&)fjWwvYT%i{yf+laLO* zhINfkL8l@P$2B7ayx`xk>1y>U=|TL7H%1cC8QcWeSMXjm0X?=mJ)^& zQMZVK^G$7zsIx>UWlCFdH7{ynzFQ? zjR}7OjY16TG!%Rc@(^NWEI(=+DRM*8_rvspzYl%}$BPXfMxg>@^~PCMLO(}a8S6>` zf^Xgewxe88RABE%UhokG-5Lx?Rh~uEI2(<0h#%`S59<8jUDn^DbwY40q+YND1;&`i z1g}wvA)wGzh=xCo4p7OD4Rfl|(xM;st%QAU&CS6>5bBt@DY*1Z2UgdiH296=fqNhy znEpi7nGULEuCP~GAE#)k?~34sN$XTp0P|5kFloq%Y$SKddsqpA0PL900Ei{{Eq=+o zV;gMWmVX^y1jw?^DTM;DRx7bpBZ5u#0tQKYHf9y%foBT04TI2Fk>yH*v-7ka_NN!Y zi4j*3NiW-!m7^H;*q8@pHtDOsTT8`%?akxbXN z7IrM#L}L@8!C!$L%@)|Di_lwv1NsocB?+hyadZHt-UCsK9R>iQrQz>0q08KK4etrC zL-dq`cjXbqVzXhfc~b&fQt(;XxF1aK2C?|A6avaQ(vrJ@Qwxm05^@Ko&Kv~1$w}h) zA@-`rA{)mjE&H9CkXoJGR@HuNwmnm|9n-Jv#oy6(2bh?Br&3zrx^XIQs>`8AX#=bCg4S3&%bHuhh)*N(~6tw(O; zZD?0QuHV{?Xm~L4IuVhJr(=2g%do-Le>1o)#}#l5CXV{&IMUdkeSb*pFtkfVyB1W3 zPF825a%l-@U)dkY)emkh$dD#(_Wtm06-|A8wOFQHmsYjw@ws|v)6x3fb;@Q zA2fTAz3~$y@rDXo|GBrmE2YzV9ireki{H8oG5I8Dd3~3&)9OMJ41$8Wh&_W^-N1V? z;<*!HH0b>XX!85>-0a=HOQnZ%c3if@NBu4l+4Z);_??4_M>?(lyjo$4CLbXxK0|LI zVt@9d(`tun_Ggb`{;hkFw{L4g2VkoG9)AT1rxN@TrI&*?&^bn{pq`qi!@%+ezU4XzgOBUDB^jR; z;w={)U!9f|B;z~#q?fglF3-f5ai6e2ACt;Ua(O>hffwuie*&8jc}r(o;CSM<%u6H$-gAzrOQn^kOombl<+1@=4c0z~I#)@yh-9?tD!4 zPUnrGukoi5_?VHI?F@d594ekpNSkPZs(k%PvoelpA8UGo4sie>N(RPt*-4;}JNjyU zDQvR&MZpL00zV;9KLaFbk-2;0EHEm+vD4d;4_f~J|Nb9PVBUwS(d-XYSYU)V8{wTs z_;VxNW`z5Uu-6E`Ho~z6yz`B4tr6Z}g!dSsiRxA({j3pwZiKxbsTv23^lL`iYk++c(c7~yY?@JS==GQziw@b5-w~g>qBOLm!MgE;-gqIoN5+ht;gkB^3p%Fe} zgijjb4kPR_!Y_<4@jV59j1iu1ggHjI#0blcP+odKlRp^g7mTpm2z!n2q!A8%U%?+| zgy$RKY$Gf%!g3?5Gs1g}@DU??$_V!u;d@5-xe+EB>m$_&Uo-e^hY|kP2p=%Q4Mw=y z2$vXPt`SZ*!ZVC8*$CfzQ;nzF2p==T79-qXgylx)F~V6!c%BiCGQvb7{NRX!d(a49 zFv33=;m?imhelX#giDODyH~Z(G14k!u~h%kvLOKpGfaI%r8D}4X8q59rOM6p|F!Uh zg4=3@CR{3&vHuFzeb2X8yKu3$*jrJuxD4v*>XM4v%WAIht)5qrbCDo_q5jWM(e}Ge4U0LDn2PY=)KfRrm!d+&YHtjer;#y~MWldReDTh_I+F7xxx~c|apYHr#*##A=E4&rOl@+xD z&DND3(Xn|=YPW&Y;Iwn&i_;i z^+;Fen`o7mt-z_QRDj@@b4@wuib<=itXRF$Sqzv&%vrVkW=yWdvT&)#?Ti|eo;n7EqdnkgDWalmzJ%) zpcczaSfXrYS&b!HNp)yqiRr2>si~;;7B8<9zL-=yWvX)}Ag=-@71cobw&F@(S*>#g zP`RaS-I}VJQfG0^O5ZB5fDBmSqi{f5qYl{ASL-Ej)_Q9&Vp$BR8ifx$eXGmXRx8&2 zPRPYxXJuJ&t-);;3(~8qkStqUT;i=kzQO5CNFi)SF_|@S6v2}B(q7mVy9%9 z^Y`8L$DzGuTtcAFP zfI*CS{|9mdE;+NXux~obyiT%-F)tE3W*X)n&gce5*@H*U7a`ieOS0V_H?b zPSzx*y|kYX`#J>mz5Hw3i0PFC{ZU z@hWG@ICB7O8GKkRP6)V7pGED}%`)XzJMDdlC3uY@Uyk*V5!;Axn zMeMuqiG%WggpnF}VeG~;5Xt%>7G1BC;@3h#H&LFC!6GUd2B*BL51p zWJ-;Fj2WtbgQbv;61#GRVz%5347oS*(kyDhC<+stBXDg z3(*(W4>T}e4fb#JwHk_t*EuC-512aL>0APlUc4OQ+q;g)s`_g8t@ReKE`fMjVMtj> z1njdVmBqES%p^4EE~RBq^Pn4*m7b4gpfqGB?wxy0T$STjRB#tQX8iK z5Ano)Np-+W6@Y!#l-=s90OJ|NL{NhLrwn5*^wt!Ylr4vjr^*WcqV)T2%f(w=J;{SN zc$;Q4UD13_v#Te?-gIkoLQir^Q&Q8V%~M-uv>gw%_T8@4le=foPR}3tS-Otq52R=9OToAio5#x%|wSqV--#86` zsz^an8Qjcyjo)9TGJcKQt?)=h+=-vmPL&>qcoBZe+f~|$IDc}C`lAsq!p|}z4HKcH zGj^)_gN$^uk%kda%DdZCx!HcLQ9i~fcN*}_bc&H4X_P-^;A1t?+l=|>gji$Yq8H!t`x2;^HzAvRz(+3Ank6)*uWy<)LMMlAyXrD5oX?b&E^Tg&w-DA&fE!}d?mU&xBwvOC- zHoLgIb!00dW7uU&LeKEiTO2LLJ&+f*&1W}X)^bhj#jUkl&hANdG%xB&9mxV;b6(5n zmfY4MTO3=;wqCRq?{RG#*^`Q%TJl;EnbSO)W3+5D4TK|TXy8iSR#dzHOF=QN3vvFa{PyuC|M~wkg`ZcN+gjc zDO++fk4@UdPPz1G-< zVZV2GANf(HVPjZ;Vl(0MyLsOk5ETN5JjZ?FTjeLBnFf|14N5C)Z$@_eKu!Ojd_lLPvSp?w}%<`WAOI@ypXM;IPlCf!=C-uMXw&M4P#t)!NsOmp=0-^S-67r2~+zmS&cTrSpJVBD#>q zYZr3ybe@n~NaNXrTq2z(_To4JRYjFh5w2VK5xQi;8N;*JO*^WCr+Gj9pQIcZ9h}v^9A~<+nYr{wlv;XL_a-xo?Xg0(|A&D0K73k zM(W0-J&BJ{W_#SS-19M>8@Mce-heDcz_X-y8<`gaFPNH}M&|K3%h|$Y+fB@)Mf}4? zvrej~#b2Y${U7nGRm(tM=X$Raz!#mIXjbR*`}(S_;vVS)Z!*ObHlcXSk6}#?tugGN zQme_fwkVH^-rGvXz&6Tp;i(@JwFZg(A?mJunT(gWP|l@?1phwN1-QL<^nOG68h?RTGoTWCCwep@^Dm0W)jyKzxWtH+l zs5Mx!$zrfjX$<3j>@IrDwVbN2mC-$y%W3%~GnE7?$Y@_dIgNPQdl-L(0qOOhvyWgW z_~|?-)X5ltl*;SKa;{x%q}7*ms4|dCrS^QvZ8TD>8|%Yi#C}}*JG~6QLY^Q`kf$Z` zm3L=TzTtk#Qsd_P)!GdOwBg$CP}Sx8$P%b0lgmoR#s?_)2+`jmIaNRUvY{l8N?gmR zq!F~C&}%Ro$?QV1vw!o*++b3?0`T&|LtO`W1vymU%BO;kJn!1vEHR#@exk>a5~3gY z2D0fqTt7M)!?J8z_Eff)$FNrD!}CUCF`2L^v>;V~$bNwjU=JrzJpCoKB74!%SGs8f4Yi9`A6VwU>?GX>-FWdHtZ(` zQ=y%nZ&<#JmOllL#P9MJJBw_ER_H3rp|Wd6GQL+vwgAzVQQXf&ADA=n^*j;34Ovz( z--w5ThCHu|Y1>+koj@Vt09)_Id@}t$(GH|fmA(41Y`;vlKVhU3kS}o+Qi;8QN;(jC z@;Xud7)4weL|g$^Sc+?AXK|v?Tj0!R`#DywOG!Kf-UdPpI}{T<|5zHAp#^q@7F4`Zk8De5%6QRk^q#ky_Evj@Rx> zcT?31M8ilnrAuFi3&i%Zw8BUgfqbffO%<5SiiXle71m75I$6#TJ&VM98vcBa{J-hS zDh_O=?0Z~ieBE@@}MlRUScpTvyNK6T15Rm3g!h*V{@TJnt=c zvR(u9z5$u%UxF<3Fg4&$xa1!JHRh1fCs%PpaSjzX7Ep0Vo-Lo(gmst$)&A{Q0D;8b zE{gU{4Os@UUK%QLG5&X`!Z1&K{mL~D>zy6zU1Khp%dmd16Yyaw+F3<0?LWpZFr(~a zC5JrA8&NJoG9g8~0agXge3B1n!^ZlJwa4*Bvv=6Fsj<#G?jqZGI}Vc_nNGg3C;5bR z$xq#7&^P2hMdOA45xFSVS)JT}f)@mDOvgL_OYAGbyV>UseocDj4t`Dg;i-hA-&6Ds zeogx04t`Dg;eQ*yR)}SAXRqI_04;WgF?^Jj1h{m%1I>W|_CrNEw(+i!L2-!Nj@MDX zk=;JeaHqhM5H&zci*`Ed5K8D7*3#+lk9dJM3G|aK+cs_=U@Q@@xNgTDcYyvJ?OOTV z8U3`%!(i%UU3^x2zvMESh!L{|q~kg8cl#K80mZb4(mnt{9L@et#(+2iME9zet?DEYog`ZaF@DEB zckeNGhZ{HrKTHwepL1x_E19ny_@2; z_4)_sr#y0w8i}M!20(i&UTUOd5fU=m9?o;X-F%vZo<|vi@*IzbQ6|E~H{la4ZrX9o zqS^OWl8-5+>r1T_7XL6QlZ-i@F!DlrH=uU`2fMM=Gb#{RE5PMwV!H;9j0W7wXQz|) z!067Q!9IBRDl~JPBAAsy@1%_lmcrWy!8?p+vCGII?*}J#smC1 z{;!pUR{OSAD=xII8?URb5nLI4kMp_mhbap>2KxqDa6Atn74~^a`{mX8pz!!^pglB~ zcc<)ZAF&M~kl5U@q?cs7J&=-N%oC$UaRYN@Lf&1%FhRJ zw%$MFIWFB?C#-69Gl-*)P9<%I&w1MMXO@#!A6U{3jtlI|Qg)$6^4?Q^ykQBU2&B~t zh$wD#57Fn)tkwq+HLs;4S^$9a9R`X7VS}sBpj#O^aG9yHIsxx!D|9QcYz)0|qkVF;9G7B2 zkf?%BQ@Xm7P#NK<^ICkNfp9bZna1IzlUD0SETiDFOuA$MP#@}4ytgwD5@-D2?8n*Y zZl4=A?v*xTAMWw;6(Yc-^cN|O@X$fA;`a{8)!~<#S48JOR~|dyfliNaoahIdwP?xq zYrdUN(KQ}?~*5mzU#GxrLA;4m30hC{n` z9R_IB=J6i{A{a|G;$&`#RlMjNQe)?V$cVp}?SXGPRLsA~F^^iYD8Zw>;&_?pZ&Eh4 z`|Z9_H$9|u9l$M4>iTB#da(%{$!ev0zr38NUbPU@BAZYrN9`_Ns?+atb(*ZW*Gb_{ zadmR%@UU2=_baK+aX8j{X(iN?!Z6L zJS3uFSw=0|OnC)-6pw;+E&z9pxgCL_mR>JodRjafGuB!Y^Y(c>kB@q_{S3UpQMH0= zx(9s$;4-^L@Fl239|#V)$GRr+?=CkLo2Y|2x=+3Z}5l@pi*|apI*-9YlYF@C)}O9 zu(e@hN9SN|tNY04qrf+5RzAF|A_Uq;iesgW7Gi#)1%VCvu^>$*fY{U_$WLIUK&{d_Ve3_+CE>PRzcrthw5wV zYvn*5;xYCOd+{zzCZ^*E`_FWl%zmz2u@QI-K zh@$qsN@MoCBhPR16r)P3SVE0|CFRN$ze&-h`YcQ2tT&OfE|J@C6FJ4cCDH7_*+?)o zcD&DeB=h;Srm#)fVH%TinjWqGRTZOY9nxy2K5yjDe*`uW0b$}K24dQ$3ZdLpwQuPC{=lCvqg zwBPGW4rfmpuZyMfw<-^)bySnnl*ZI`s`0h60Ihs8+V)mvyF&jZ`E5(|F4bfGv6M@` zy5BGF1_{+Jr0}V>jZ*F@rNFE3uvXQ7s58Ajq42R+G(2}(dVQP1*J}5nQ3fmA`P|$C zrW95|kC)J6Di~{Vs2*4X;4PS*K#C#H^mC**@=R-RGr591QwNd}*aN1|B2^>L^j)N_ z$TMxj&Bca1Vt~4Fqi`bc1brVjl_!ypgI?cDbOw1nR)KL1|J;FdA@G^@cLIWdJX6FA zT}PhjT({Jz$15<7p|}TUUU5mwk+&kx^dztcO~~tU2aHD;I)Wo>@R@!CX%hJf&@c4? zkAu7w^gNyf=8=zqZp7QL50KYm2pDg02I(sJOy5Vs%Su5Fzz~>`*W(EoSMccobc4@y z>mdAvd=qH!F?l}A^d#O~41ll41u$OV@Cf<=pXmtF>&SaS|HO;4dF1tH{fq&aN4g3= z(@&6$7x6{}bfZu9r9Z3JpYQWIf3Y9-L7wS-NH*k|9z=2?&vZHV?Ox>dXZC!azyBD~ ztKbWY{CVVY@1_F0C%J$;?%#xauQ<BGS4hmqI!_0XSP#~4`WX`I!MaXGzzzu8bT|UW z6O5fi973MyNhJCO<^^iJuX0x8DK2IuaH(bxs1GfjrZamth0)CeV#Y+?Nyd36*~m z^i7qY2feEDOby?Y@;RXQt31h5&hs3cI1#Np3 z^9b9R4k0n0>33BAb<2PDPt-)&y;ZyoM*}y2gcnny?}HEZSQ=3TRdP8 z*is9Yn{3J^g`Nd|z@)uYb7iSbYru^CN@(c-N&44PH$p`3K?wtf6aty)6gGvc!q#w8*cNt%z2RVZA{+`w!?AEYoCq(5 zY1%YhHEo@4nzl_lr@hm`>51vkbaZ-Q+H%%dcLbP9bepqK0D#fV-upr1DM(}RA- z(9WA6SI~%>)htKo;mN_*j#XKVs3ISG#8zlpNq}K=V0jn(a(Pa^`pt2 literal 0 HcmV?d00001 diff --git a/PyRuSH/StaticSentencizerFun.pyx b/PyRuSH/StaticSentencizerFun.pyx index b1f861e..3742533 100644 --- a/PyRuSH/StaticSentencizerFun.pyx +++ b/PyRuSH/StaticSentencizerFun.pyx @@ -1,3 +1,4 @@ +from loguru import logger # ****************************************************************************** # MIT License # @@ -15,33 +16,56 @@ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ****************************************************************************** -cpdef cpredict_merge_gaps(docs, sentencizer_fun): +cpdef cpredict_merge_gaps(docs, sentencizer_fun, max_sentence_length=None): cdef list guesses - cdef int s - cdef int t guesses = [] - for doc in docs: + logger.debug(f"cpredict_merge_gaps called: docs={len(docs)}, max_sentence_length={max_sentence_length}") + for doc_idx, doc in enumerate(docs): if len(doc) == 0: guesses.append([]) continue doc_guesses = [False] * len(doc) - sentence_spans = sentencizer_fun(doc.text) - s = 0 + orig_spans = sentencizer_fun(doc.text) + logger.debug(f"[doc {doc_idx}] {len(orig_spans)} spans detected: {[ (span.begin, span.end) for span in orig_spans ]}") t = 0 - while s < len(sentence_spans) and t < len(doc): - span = sentence_spans[s] + s = 0 + sentence_start_t = None + sentence_start_idx = None + sentence_len = 0 + marked_this_span = False + while t < len(doc): token = doc[t] + # Advance to next span if needed + while s < len(orig_spans) and token.idx >= orig_spans[s].end: + s += 1 + marked_this_span = False + if s >= len(orig_spans): + break + span = orig_spans[s] + # Only process tokens within the span + if token.idx < span.begin or token.idx >= span.end: + t += 1 + continue if len(token.text.strip()) == 0: t += 1 continue - if token.idx <= span.begin < token.idx + len(token): + # Mark the first non-whitespace token of the span as sentence start + if not marked_this_span: doc_guesses[t] = True - t += 1 - s += 1 - elif token.idx + len(token) <= span.begin: - t += 1 - else: - s += 1 + logger.debug(f"[doc {doc_idx}] Mark sentence start at token {t}: '{token.text}' idx={token.idx} (span start)") + sentence_start_t = t + sentence_start_idx = token.idx + sentence_len = 0 + marked_this_span = True + sentence_len = token.idx + len(token.text) - sentence_start_idx + if max_sentence_length is not None and sentence_len > max_sentence_length: + doc_guesses[t] = True + logger.debug(f"[doc {doc_idx}] Split due to max_sentence_length at token {t}: '{token.text}' idx={token.idx}") + sentence_start_t = t + sentence_start_idx = token.idx + sentence_len = 0 + t += 1 + logger.debug(f"[doc {doc_idx}] Sentence start guesses: {[i for i, v in enumerate(doc_guesses) if v]}") guesses.append(doc_guesses) return guesses diff --git a/tests/test_merge_gaps_max_length.py b/tests/test_merge_gaps_max_length.py new file mode 100644 index 0000000..58c58b1 --- /dev/null +++ b/tests/test_merge_gaps_max_length.py @@ -0,0 +1,85 @@ +import sys +from loguru import logger +logger.remove() +logger.add(sys.stderr, level="DEBUG") + +import pytest +import spacy +from PyRuSH.StaticSentencizerFun import cpredict_merge_gaps + +def dummy_sentencizer(text): + # Dummy sentencizer: splits on periods and newlines + spans = [] + start = 0 + split=False + for i, c in enumerate(text): + if split: + spans.append(type('Span', (), {'begin': start, 'end': i+1})()) + start = i+1 + split=False + if c in '.\n': + split=True + if start < len(text): + spans.append(type('Span', (), {'begin': start, 'end': len(text)})()) + return spans + + +def dummy_sentencizer2(text): + # Dummy sentencizer: splits on periods and newlines + spans = [] + start = 0 + for i, c in enumerate(text): + if c in '.\n': + spans.append(type('Span', (), {'begin': start, 'end': i+1})()) + start = i+1 + if start < len(text): + spans.append(type('Span', (), {'begin': start, 'end': len(text)})()) + return spans + +def test_merge_gaps_basic(): + nlp = spacy.blank('en') + doc = nlp("This is a sentence. This is another one.") + spans = dummy_sentencizer(doc.text) + print("dummy_sentencizer spans:", [(span.begin, span.end, doc.text[span.begin:span.end]) for span in spans]) + print("Tokens:") + for i, token in enumerate(doc): + print(f" idx={i}, text='{token.text}', token.idx={token.idx}") + guesses = cpredict_merge_gaps([doc], dummy_sentencizer) + print("cpredict_merge_gaps sentence starts:", [(i, token.text) for i, token in enumerate(doc) if guesses[0][i]]) + print("guesses:", guesses[0]) + assert guesses[0].count(True) == 2 + +def test_merge_gaps_basic2(): + nlp = spacy.blank('en') + doc = nlp("This is a sentence. This is another one.") + spans = dummy_sentencizer2(doc.text) + print("dummy_sentencizer spans:", [(span.begin, span.end, doc.text[span.begin:span.end]) for span in spans]) + print("Tokens:") + for i, token in enumerate(doc): + print(f" idx={i}, text='{token.text}', token.idx={token.idx}") + guesses = cpredict_merge_gaps([doc], dummy_sentencizer2) + print("cpredict_merge_gaps sentence starts:", [(i, token.text) for i, token in enumerate(doc) if guesses[0][i]]) + print("guesses:", guesses[0]) + assert guesses[0].count(True) == 2 + + +def test_merge_gaps_max_length(): + nlp = spacy.blank('en') + doc = nlp("A very long sentence that should be split at whitespace before the max length is reached.") + max_len = 20 + spans = dummy_sentencizer(doc.text) + print("dummy_sentencizer spans:", [(span.begin, span.end, doc.text[span.begin:span.end]) for span in spans]) + guesses = cpredict_merge_gaps([doc], dummy_sentencizer, max_sentence_length=max_len) + print("cpredict_merge_gaps sentence starts:", [(i, token.text) for i, token in enumerate(doc) if guesses[0][i]]) + # Should split at least once + assert guesses[0].count(True) > 1 + +def test_merge_gaps_whitespace_edge(): + nlp = spacy.blank('en') + doc = nlp("First sentence. Second sentence after spaces.\nThird sentence after newline.") + spans = dummy_sentencizer(doc.text) + print("dummy_sentencizer spans:", [(span.begin, span.end, doc.text[span.begin:span.end]) for span in spans]) + guesses = cpredict_merge_gaps([doc], dummy_sentencizer, max_sentence_length=15) + print("cpredict_merge_gaps sentence starts:", [(i, token.text) for i, token in enumerate(doc) if guesses[0][i]]) + # Should split at whitespace/newline before max length + assert guesses[0].count(True) >= 3 From 5cc3f89dc3b612990132856f2cc3ac3f271fe9bc Mon Sep 17 00:00:00 2001 From: jianlins Date: Wed, 27 Aug 2025 21:00:46 -0600 Subject: [PATCH 096/126] Add max_sentence_length parameter to cpredict_split_gaps and implement related tests --- .../StaticSentencizerFun.cp310-win_amd64.pyd | Bin 64512 -> 68096 bytes PyRuSH/StaticSentencizerFun.pyx | 44 ++++-- tests/test_cpredict_split_gaps.py | 141 ++++++++++++++++++ 3 files changed, 176 insertions(+), 9 deletions(-) create mode 100644 tests/test_cpredict_split_gaps.py diff --git a/PyRuSH/StaticSentencizerFun.cp310-win_amd64.pyd b/PyRuSH/StaticSentencizerFun.cp310-win_amd64.pyd index 2920bbd8d87379b6a42f84a0e7303b66760852c5..567357204246900897187832d0ce615af1893968 100644 GIT binary patch literal 68096 zcmd?Sdwf*Y)&D;M2BHurUPjS^MhzB46pbi2QZth18J#FzQ0g^SMW{E#Ohp9@P0}(R z$MUplwOUK7R@+)HtrtY038)0TfZ_#fRjjR@anz!%fZ96W_h;=hlS!a``aIv)@AduT z$LmGTIeV|Y_S$Q&z1G@mU#98w1)=_-P-p=E3x!Z<5l{WAx4-|*pNdduyFC_f7kYS` z=l5Jxk$is7Q>IP7qGra-%PyRG?j<$noqOq}m$lS<(j! zo=0uI+tL?bd-t^M`95Uhuctp@&%1)>8PnYN&+Yr9>E~Hp#gOO=ubRRAw%b4SC@E81>iJ?!wY|5}Z1>(T6dGtrJY#k^$Rug;R+QWTYkHg zNuPy6vnbQe^L!N^QxV#%XH7+@vsj6~>nlPHV4d_)C^Ssp|KeZuhX&e{yCxJ`7|_*| z*zCY^_TKT_*|J_Qc*B{SIKT;|5=~Sn_$24`UY^jQmKWJ`c!jD&~vbvRpXiJws z8t?TwI#b&x{Ry*IheAuEbX{)ZA)>8n?;#bTq$p_2dVYMUH$J=#DXHC){)k()s|Yo9 z{HbM|L}5T}E<+*DAAVd_MJONVn|v@R+V-!lNNod>LNsSpK3j-RQ>nJig;bPU>x%H6-!wmWIAuJ) zJ3IM`Vq03?NcxRK6MlWA7Yl!_GZacyCJKu_l?m zV3Lyqiz46keb zkzQuK2?IUP&)`enz4xnl%Vx;TA(Y z_Sec|l#YwE-=@~a!Z8X(+HcUuipF&X{flmTk}q2A`D+l%Nc(~V`9^}0e(QR*D&1I# z6i3=;S$fmD#=4i1{ufFA-Kh(bK58T3|0C&N@}TFRwFtE^uBajW)Gcre1<{$B;`v8G z*0I%*%&IX)$6Dw}A$vy+P5Os)=lAjo6`ubhn7wq<{J6g=nVyU?p)P+}{`=IjYG|S` zu&HBXr2U&;1wo-pnfV<&e-&dZY2zu*|ZKt zP#Nhs9g~od4Bz?$WRWEv2f4WkWG4MNixNJv88UfV=Nd84H3H@0MLaU^ROxhMeX$+y z?JsPv!RER9Pn|k7vPV0ORZ`VExEJ(h!ap9f6JD#Y^ne$=!ZI(mcJ|wJ&n-wV@-5yo z5a4}5F#aQ(k>I4i$@AZK*2e!5vOKZim`b$K>w{!5w$Q0p8*|s2;Kq%p@OQ!IMz;ob zjpv^YC3CCQ<&5jnpq>v{T_+9n5vH?d*{Hs|RgG{1dsd%;y;|e?BC{HsjSX+KM9PF> zJm{<&P~ma^_o*SK#rJlp60;`%kEtP^IK7=qOk&6D(GZdLGe*($^qdD1{_{;8AGTB{ z+Aay9BMQ+!J#I*0>n8)SivA}F{}U?~7|jNlVn%Z!FFi&xZCx>8XFM+H{RvgP&|69W z7>wo^YIjES4sZrW^9vv_n*ZP-G$o_CqltReY=e9)`{XxWS!XooN+<%Oc{*4znm@o) zB>fd;G#^`J#Dg!=K6Afb2J(>bRy*_8LV}Ak#q-x$kB(BYGNbr1D3R+0fq^><_M{@7 z-yxTz2}ht{?XAiHwiP>f_Lj-AbHD2!*twex`+6hzp?yU#ot2{wHzw{NN!Io+7da|D zBXJI?8s+nZ+~4d3(F!BBvGDsOw;az)*JHdiN~BjbGIx{WolBC2W7_CXB8pP8i$@BP zhs@7guYOE7jJtxfa!pdnlQ%T^ z?=wUj#G`+LD;1ZomWUe}-a(+ISd^aHN||p_Ch2F5OWy(had;9cL|=PEs2Xa=B!h1G7J2BtPUGCP znnwS9FOAFSpHL0II=%5H)Xa^V4Lt@19?|V(;uT){nCkp42{fH#;Sr`;((I!_WMlon z3M?No1m}Ia$rr~PHzfUxx+T()G386aVH6aUdV5R2+y%fDq6ez!xQjKk^jB5@-Lym2 zyV)vjzrRL}`nI*z@F(Oj;4C71Ptw$K%>la#(RDbHB=5YRDD3WJhYoOd5lD?ohmngrt)E;1lZL`@^tRz4qt@`kZ6&0by@(Yvx#hN~J ztdmgt;hzdwx+Ckp-T;cP&3wi2qid4syVvucO#ga=J(d6+!ham(+{fJl_i>Q(9tYz+ zn)4WGU?dz<^Pf7JtD2~*x?wO5=Lyw|2T~Kh=Fmj{rfTyyGa@ycohEwSz&>I`Z++Cj z{JXJkqv`8L|2_2vj6Z->3BSApPib&tH_|-+l-eoIGWp?meyirW$<+9xDpPxS{!C=? zbnMhXnW}xamGil(5M2)-7TnC#BlAV$CS36)np`+bb(1Cb_ClYPv07@{8fST3fCL!>gUrhwv4=x@|e^Uq%2i#JW9 zjzk2C{?nr*HZniKZpx!zMcMo2j+RpGk3GM!7isNO>Hw7-2*mdoEhnTLF3-Z^fD6p<``CNj|Z97GoKcFt@&rFxSC&YU5_|m;|?XU*nE7` zA47PNGn0m?Bvd+I`;TNne=;_93@CdwA1{mK%dOE}yuJNES@X^3T>^=v=4bZk#n&|D z)bJ}%EXX1K5PjhvdW~!1vw+c6K6T&Ggh2DoNc;U?u@Vn@{;S#B8HceAB4^Je?7#rT7 zYU2JUC0TifA&rDI0N`ZhH}@Br^+F&k3t))9L9((jGXMJBdXaV}jbfg;>%E{ji3=Qc=GUeSo)jfG3lxJ_A-b2UJ5?{Zmn|9;Q z5gT*vD;yX~N$C+}&Yrmd8^+)6oC9Ym?( z@S;6mqlLxc^@Yh?sX1+AnHY^?(o$6Vm%2oT)RWNMi+N}Zl2iAY81eQudy~pRjI0+E z6e%(9jNU>U1+kd7-}Y+CF+|G{k?3hOY%2{~X#8?Ru2Fb;;%ik~FL5a)io<0`N|dC< z2pb2fMYeuwGFq}@bI1y~5DX^8-Gao}ghwH}FBcG;O}`2T(}WA`E#QKQ$wZ2jxG>oU zQ;Fvh5Q_`7)#Y6H4BuxfT=)@)ihWnul@?&wGB@2~N}}r5lu@`M-JyiUB2=0FwPIM( z9SeCYM31F17aG_sPlQpk0AgvZ5`%dviPyM9W|*Fo;(4k`ZcCox8pWo{<4ToN?}oxL zl+iF&uxheG56vaLa0G-gM6=Hk+b{CGV`ovq#8&fvps~&CP$sR4Ax)D#Z;o`_w^@$x zN54c#{FK-6&e(ReBtM?P--!7QPm2!Sr~Dil>Kv zxQPc?!eCKI`BURg)!7F4YW9vZTv0bWZdPV6J3c~4Gtm?pF6lpGv*TNT;bgAkjYxaQ z$e$Qkj$I&UE0Umo3ns^C)dq!PF-DKeZujWD--5Mlf_wy=lx=7v^tBGru_*9mz?7W+ z1Zvhf{qp$GPhVqI-*6Cy~%&hD%h?!rsskR-oCe2 zy-HRE^;Q89?_DUybTXN?*^SLPSL~#Eii+Y@_`DUyTuCFv5HIO>8c)7WDFrYgE+*m> z0w+3~Z%jd;EGn51X7Vx)$wJwhFKa&pTKJSfo{e#z@1mU?w+cvB7?&IOSVxphKAQ zL3nj29%&$y%9OEZc31*+vqOQ}>=00o1KGzb@MBDkV z8uD?50!BH`Iy3aC^=oO?wm&d+Z@`jPbAJ(Rt+aWX2yAconudC@ssnz819j8#mN_i_ z#1{>K#pa-VkK|q)Xo@&f84K@Ehv2j@t%=;Y9QED;sFqDM{f)l;)1o8)Y-&dfoh-4s z2yq~<{yGfT{Fk>=i-|?x!Vp_&r@ZXhWE>2w5H;gqbX%dd&cRsAha8L#AOY)f@rOo< ziFn{*eD+rqH!jA6ZeDPnLrf~t z{`0Ex;ds^7xv?8;EgJqL1p>d~U67?vxC3ev{@)2!D&A{Gdxvr=q|hC4@BXA$+l#(W z1H_ru|B8zBj;KC4M73=wU;cOnEIkTj4;Z(V{+kBBiWJW>9OelfB#O;VOZb1tAB!+w zxFt>64KQxYWbyIV_XARAC|`acuF&SeB$lnu9AbIlvuIY)t&o*UcT`uXQ21q#@Kxi! zUT|My+}9L!RJx;{FG9E&#i6`mqDCq^{m3|b{Mh61|A7bLAN?Qt4>&jfQFOWFKAcc= zAH4p@%YE?hA{JMM)JH|{LCnd{JE^DJe!4A9?$OI{{a6z%c@Mr#;?U#IlGIpNaP}4! zOP^tHA0}B&N3bxYR3!fnrTXE^cqII{Eq0R_-&mR2E8(YxHu))qoVC`xZ<8cM9>>K7 zx;oF8`r z1UpX;1p(wO{fFa03qMcD>Cq&$hx(7}3!ACWB`o>z)g^0arQSo zETXzQX_gD~5ZY%Z3`8*woXT1ZewbFG_NuVe%0;oi7TIryKcgq2fHmOZws}37nH|7{TJHAg7VlWc`419 z-v*~!P;LTUV9tI9qGWo+avr@Fl$V(-R*f>qSF;x-U0G+(Mk_P0XWNL$ndnhJ6@Z&E zPyLZeiLKA>BhAwj5qcMVThWDQ3JFY@Vv)$A+w;1yt7og#T=16Jv_Z7T*|f4H<->v5 zicK55WwMmz#eWuc%nK(P>h(tQH>g%C#iyxEpg{K|jkVER7|MS5OY4mnlWgr94x?ezk z_%#aI+}?9L@=g#EcH{*MtlE<|r@3d={BPD&|2IpjT2{9$e9WHruvs$O_ijHPdwVARZw=MX9-H@;?d-W9Xk<4Z z_H5>BQ{HZ8zvzNBJ@(5I%k39+%6@e;iBn9j`p$h%F&bGq9p&b})+a|66AxtJl#M?| z_l~kw^XXH2LF$_H+y|*2IA#bl2!!2-@0PLseP*ZH!bX^$_{mvUF1p+ zRp|*NSySjZta_=h4AmGqC)e$05upG32LF@Xp|1Gr7}_#6#L3Uy?y{DWwe-j$W+7>F z8F#A;euS}(%{0im!R5|#`5is^EV?PX!GGUD#2v(WTZ8bNggL;W4)E};0RAxmEFt}X z7y@iJQdOJ(``i?g5S}jMhVecUZor20JWbQ^ciCB-QwaFm38@)g4hx z%H{heywZ~(9839JdJy1<$RoT%*f$miNyf#0E7jXM3RIrTXS(uxsr&&X{rBZxQ0)Sx z_QQCGQwQ=T!bZ4HW$JTR_ql)Yxh;d)9+dFla)-1h;VhRh0tm8Ca+MTt?w2+g z-Aybf+OA(WKj>H>gWkt@&mhrSh+<@mn5wSNecz#a7*v74`=3kf8bO;&xy&E-)ME2OMye8MyxR)|>YMzO)viET6u}J;MibqBI)ACatos4Ly0TJKk1Ff5P`v z+i>0kISK5<2|})9#P2x=c>$0;I>#)9LFD#!bT^W}l~xOhC>)df!q8b6|MUDzC{>>L z>+$^!`^txx4s+FQWHvybl#|t`{SKYLi#T>)`W3#wG1T(BK#y5=(l2TtT74_XliE?J z@RBrQ{@gfloR&`%q7~m$*5vuFlAciV_<-JV#p%vSOK7@RgmD1N5jN*R z_Qn%lXGPtqpngDY(M=!vR{f%0O<(5{Z|Rk|oW#CUk_RlUchB8sX?--^cMKzelU^ui z1gW!nrJio7ES8k_Mxzq@^!z~%u)7!FFazA!GP-OVVx`h-S;<0IpIz7M^L>4C`fA=& z_o4yg=fuyBpA|pzoU_k3Yx#3M|Fo)5Ir&r$zg=AWI49p0+VBX!f@AO1Jw0!d;Kb}ySUc_Bd2xJ>1^`d%;PZs*}p}uZ5(9F&sosPA=mMj`C8+P z-0+IkutKdq)N9LDwCrPlqwJ8tk$u5(kE*7!U2IOuv6pls(?8nMk2gD3lxbu;^GYu> z_9G1jogm?)0hUgFat0#N`y(Z^HQ66Gxzz_(Y11z7RGYP`KAo?Ed~<51tS3^q#C8naosOq4yQk0dpBp&*Z0gK4r4~ zBJICX8`PS1v@dT32p0l6BU5!86nF;Tsb^w=*G|{Z81B>-oBOrHo z^<^fyhthSL?heRz`_UQTZxWCw8{nU)X@%up+ZX+3ze6=wGd|*rb`&L=41CpjeD;hWHHGBIrnkj8o462sO z|A~l}Dz8@MU!ihl@;&=M`_g}KIsM|P-*lu`_gr@7MJjlEZKrCS_tZij=bYV3mL=7m z|5QV|eyEq8M7zAoQ{$5nYAu>K)_v%ur=Xmst&jV>&RQ4u|CvlTuZjC_Cet&z8~sm{ z=~5kA7Ial!wH7*v;LrrRg1y}te*5qsK zWYjBO`bML;$$wf*7A^U2Q`d__i}AU8cBVLIrUJlxgkx^S(AqbY$B=9$`XH-cI_LIi zhocF8z2K#9cQg`tA_!kPZ1sJE(N?3X3+;8nf|o>TSES=+YtqR~&R^1c+o0V2O2`~l z;r)UQn}wG1ytZWnytdCPuRMl9fBA0CoBDU_(CH%Tk&kt59yHX5&m&QDM_d#aokYBt%ZO2ePHCzD3IEo z9-3JFNo2}1h3KxrKo~`(*?+^A@rT&vMN!^+qG1ayD_PsDv(P9qZBO1wLSlQ`KHG^m zMF$trSVd|4?t2pBeCS$)Lyp5 z<#ZS7D?1#qTo86RBAI@;)>aa=ujR>FTQ0O!svBK$?=YX8kGO+a4}9nwt~tNC9BOKQhiy>D48y?qCA;ma(KRU2lLRh}K+gE1ovDtz=jW=~&2pX?3m zUA#RgR+*C9mrN-7ZHLi@hGwh}lg z*%=;Maws2#=;_uKI%k`&CiA@2Ui$i4qze_+9m!0rQh^tXi)9d({rcfbl1Y!W`zivj zXY|#avs(qkd6Bv|{4dzoIgd{psxhnNM(a5*wkDDpTp2*RIfMD(SSeBbV0Cs`S-IAr9QlaH3#?W38eVT-YvmTxeo0UEyA-wcHrJ@yptP9S zcPelfMWr03puLi@2}|bEgKW6D#O;i>1rqea?m!h;^uZ9`W8vk{f@=}?pLZK=1_KiJ zpSF!Q6TT%hcFyEkT_wN(IR$XH!R9`dXYd5^`@Jcr4K`=?Qx#^j0KW-viLbHv8A?b9 z)QQR1=168|8k+R)s$EAyalusQ@7TAH^~b5CYxs942tUMeff~EZJjr{ZQkz|pnJcA& zK3=6yHp6Y9g$^@n9L`Z9pP2oW;@)xA@CtL3w$8eEK=0-?sE*k1n~Xc3!Yz1IAGwj! z7XKvqRSLS(77h01Hi*M94Ax#0@i*wG#Y2O^(POSY1~lQn$)4(yjbVwxie&7ck?TLP zwJI|h8kmdr@+8Krw}T5#Zued_(C&H(Qwa=`-dz=I1A0z^Gy7*7aiqh z7$YD8RoB3v6dLv&L0_@a6{66oA3RXobLJ+mjYcacv~gUc3s$NRw&3pT)(wt(4{hrZ zdBOtu?QVnz8~rvJ)#1(HQEhHfZ*0y!+iTa;#ZD&y9_cV|FZvWka-YH(`DKji=wP53 zLJ9xqt(~lBDx+mRv=k=*Y;2>Scd`1N94r#0vG9{L5UX#)fCKE|0f3Z-!DZJJhXJdU zHVm2ow^hpaH$`pFFc?NT(PX1LI<rnkyo4iPuU-6$&Lqy z33tx~eF|BoOB}CzB=$&^iA`8wyy-Ov-X!_OtAjx>;YfQc4FV`0P7yZWVJjT`&$QaYKcIxQ2qw4h zY6dy{0A+d)!O=&!A$YEE_~Emy&hS80q*u zUP~c*4n+bHF#T6pQ52(p{#};me1ayi^^x`;0o_C8yk%RZa+UGdHTY>!SsKKfw_e~@ zE8v9VMhciroMMTA12tEvJ?i+ulz`a8x4Nw4=~j|qP2WchV6)kYqpWOcNVbrrP!re) zAO(Z!91`us6&gFnyLJW#?$G-KRCP&hk2TDWjdFox#qB(FvF~>>fa@(Rr8risYQL6H zNJFLN(%zfEuOq!!%W%~a3m-^7I7J~DC*fPb6s_^_cdqCq<7kQBQJItRKlB%wrQPW_ zP)>5afx(GG^qLV>ZdxXfcXIt#%9TXbGs$rsu|x<>L=ja!rGrZ8!0H+C&=GU#SCQe(}eKL$`BrzH_J zfz3_+M4AdEnD3w9@;7NhoawE@?hEYyB<69HA`^-zs4DCqxTPfS74^CXNYRkuo)j1B zD@HhMZ_1B85NW3iZ{tHxZG@lY)crkL`L<7 zxP@{T=nYv4<<8&@1P2QNo=2&vYUi!Op~T!+!K>EXsNx$0<#u<4KgFph3MM8s_+Kvh z25?FLZ@JSQZLjstJvYcb!sY7Xjk2=MLGJD@_cn5ubdVPGz@}W-WqiADM%~8T`z9?T zE>u>!yv&O(<9KDvFV9fmgvO6m#&^mxK=xyowVSdQ^kjXSYj+tH%J@Nf#zihe^PnZ) zFV8s1Wvo`le-$%KHa^WA>hgZ8JXQdU?ff)1+-3ZzPsXMkS0;((d1RFF?HtOA=kM8b z#Chep8~wM`EoYHs3-3#A=1tvqrp5S19NGiyf5|P54*X1k|Ine^NmQK7yLCfuo=g3b z3B!_;im4^}&tCv}e^lNHTjnJS(Vtun9t)e$66RLB%+RA-i`NfRf}9SqvTkFM6wv)Z z=#DR<>$S2o^fDC>26Fk5PvKJ4(|i3mPg>u&+-nBcdzE{+!S%jm?*W7Bedoz-q{*e%0$8KUZ)zG_&f)(i*K~a$x8R?dCdUfOp0Bo^jeRx2@ zG(~QvNnpymcvz4-t5@nkrS|FgPf)WSmT88eV8Pl2Q<*W~(Y#`r_tnlKv#)eKPM*S$ z&6d|&B7SUnPu5N`q`f6*p5cU}G*=6Btp-yTM`{iLS_M$vsAGZ*laKO@1C-I{H zCH$vZ`kPM$@w3iezB(BS>HqD)v-tg3+4sL#-~RL$R8Y3Q{pn+TyY=m>aT|F3D(l;a zx!3=9>)WSl#Q(eX?Ii~|KkF6aJ?pr^dTsQZD(KdA|MT_j$26jY_3a<%{eQ8(eG%o{ z`u2&!;MTWqkV5HM-#(#Ok4AJ2OD12_+HNL#q|&v%y%ZneU#)NNpr)0rZ~urf5MShp z0hy{#`N%Fh8lCz-Ti^ckG>ZR6>)Y?h+mh>V>)XT2E1#vxV^scc*SAliZ6*54*0;Zp z)c!~7+sU+7Q(WJUpThF?8nwMp(OCB1oV1&|Qg2FFgZi zd)E3m_l>5faW&`%tbDIw*_+jGi}x_sIA5#SM1J(AS%1cH?~-xHtxmf`wW;kre{>cH zYw3@uDR;p@s*6Qet8V}AasRov{}fY}(Dl-3BZ!=>0-)y7U~e0*`bvI26UY zYFDsoerZ@`>XeiFg-x+aM^3i~54s^~Yh2vVA>wSA+K4(?#cxxwH7cfK(Ei6PU9%bisUicMR?%4PcIdU*a=9}7 z=+(cK!>|%2X9w+}6b%|ygoV=<+?++4XwcWY2p%VGhzzLHvu%5-43g`L9b=>r3?bOm z>k_U0Rz5s{+Io8JQA?cg*WYl`>^qS&a{wfOF!J~X& zOWhLk7fi$Cz%+Z@0LvLAx~gW z;Zodx*$rsU7@^omU38u(_yePwk2)aTs4ku~N>j+4G@7k;x=H@4EvIH5G%-+pEo0np zVAn$UM}Q+PUp%M|nSTp>)3z9H{C-h4em@6~Iy$d9o9?&@>!6MFKnLBi0Q}k0OIRcA zpIWY6AO_}JpcBkL-8$x(hB-37OJ#aO8^3jpbK4!EoqoB|dNX$rt0bHo6}iD3lgAiK z&7c3%L^H(gPivz`ba!x?>^hfO6}vY#!2hD>^j=udi$BXl9ptH zK&I+6*vn4U?CP(fyALizAFzHA{xi zmg|xT4AM(mbWe^2E_Wnb`e)u#YsiS7S-zQ5``@;{pELXukks|;=M4XvZ|Cp+@;-Z& zzk82M{eS20-mi}QcmD44dz%!_@9Xc5I7bElr~a<&X3^ihQ}6!^fA<Tw!qj z?t25lYVdcDZzewzJwfU6cV9UBU;DdzscB{Y?k^bz@kO^vDl%0~RFv(G z{g3_KPtT(Gf8y_6|EZLVY_R#e8keQY*YJ@&j>`Y7zxxf^R-(Vm-~Cx!1^?UqoSCZe zKNErx{ouoM&()vmwS9o~__z@LFEP>gI4QnDEF$XJ(ulhMMBr_o7gEE#bokHu@WWsG z6%Um5Xt@Y&PmMX7t54Rnyhl&$q@tPV1vJHaq87$*pu)k}ZyRKy_bO96JLN!NS4S!$ zU&Fn!Vq)N*d|UmY#iof$G+n<=BbJE&xk_P|;WupM?IJ70#-q-bqo}yqDrQaL2PI@J zlXVSOCOY1-a8aiapb|=c;vhh|9l(wF%Z+7*>vN!V6hwQUi6KMmnYi-!CN)88{Hgd95$(CRr9hpTWpZXiDbBJS>8tnhOZ@iTjTpNGH>Z}-mq1gx~_GXShjoyx#Ce%ER{ zT($YjbJy|Jv!CAf8(Zkb4=-Sln$m5z-Mq-*qZk?m(M%LMyF7?#YL_-j_*Eu1ndp-y z)G+WiYULuYLLs+M0PH7ZTbLprnuf^n(e|)g?D?dHi<0TPEOeu7d={ojR~w-G)#RrLUsai5U5sf?MWjGv2&ba(|txKB&%SAL?HAHA7;9IShU(yv;7 zk3s*`pf7S=GJk)78~ouflh?>Eo=j!Gsrs?*jrihFD8yv7lbh4AjS!PH&lgJ97A~VDTV$L7qkq1^a^nZO~>k*X)q=v-1-sfVv z&PI0IE^0^*pH5ZrdGA7U!OM*94~b8}9+JKYg|_YAa$)4Lum=b`Z>9Gmk9Nl&?@xZ@ z(eS|}FYe|Y2fp6R;Plz-Eff9@fYRAxH2g=(I?xxeWOtg~JyrILlt+vZoib-x~xtzXv0vnX$iA zayop59+?SePSYdo?lTJ?Phv7N_IGOE9<7kdMKD7ekas_FZ%xag@r|AQfJ`BJ8RX{2 zMjktjP0FiC+@od3c-#6tn5~`Q4|s}X@du{b#fei~TP~2Kh!eX3qzwp1xb(@z^nRo__<8m_D+9=m4Fh>0 zo}o%r;;3TnuaTi!4m06Rw~5`^S-S(04sTz?^(X+e1%Royt8jPrR0pxgoqURl8-Zw> zunBI3TJ}bezb0mKkE%VYs!fnjf)HTmPXo3Pz3((ZNRvd`PbJ0Er4Xq5Gu0^=$FG28 zofRh?rGM9{uJnEeJN9nX>{os3XL`JveUuIltm;AB3+Y;x+6RbJtdpY|)Nt|Aj)y?S zy;d{9o_ix~?^9KiZlQQq-~L#4Z&}qY0s7N=q2IL({ikY1CVD^_`jZ^`y@h_ZL%&NA z{l$hpx?-|wp%0d-cse{t^@tNm@(a=GGTt0+C93Wg1fzq{Mb&iP;m)>7Vt}!op=xKb zgZF0MUkcKvF}x=IEwp2Z=P5h)13+H7M&HQnz;S{P|L~_gRPO@=lpQ`&MT1&)mTI}xYJo9Z)N;71<&A}8sV(oKguH$C2rT;{#_H&gh17pG`}}UI&uw;iTY&!P z>&U9Z+Wp8NmDMr|+1(L*P1-WsiHB0-DYP{*=$CzW7jlgIYp1AA#Aa*-lL#f-vUjJF z-KAzslJ){WF{7<)M4W`A1jKZ#vzAr8$x@lw#=F3w)=*gb(Q0NGL?v8GPa8Y~W^E7I zvmkpX$YvmEFY_dbDnticC1$cG*q#Z>Y_c-uGkwKI-7{1xO7=aA`eFkxHaWKNL_kOeizaDinDWvnjbO39@rvHMximhpZ-Dnp5?}9inQ4 zHSemuVH(5jujdlDalC6+O=77>!Q2L#(8JehMn$n_PuCZ}=L0?r!3RGyL2CL5UuIzZ z@QKQixC8Y{@LrQ0OoTsB727_sBV5eAMkRoJ8xZ|aO!~X?#al%zqhG3+Up0tFA$mC# z5;_S=9j{U)F?;?b!*RUeGgUuv#H`U*K+ID=cJCs_3({+{b4RO~Iw!U56P}1U))Dh5 z;C+dir(%B9)nxHIJXC1JSg9^iTq3645wlWNW~x59**J4t88L^I5%WawUX%TJ|1x53 z6){VMmi=Ua^d;sWD&|*xh(TZvu0HxP9!B-hVmTeQplNoB+rI8@Q4oM~;nJSObh4+zNW4%#zt~f==ET-s(w<6r6 zqWnwk$3DMFRIwN1P{K#AL3GugVs|Fm&&DWne-JQmZ8wT37Gl05M?ie%PHHJptC3%U z0nXv%$1?~^$OSEQYXWCZDKV#di9+3T*_CxX=RGxp&-j+-^Aw+uH^uS17X1<<$||lo zT@81zR@*+>*BPUc8?-R)W%Pp{tRf8Mx6}B>*<*rT*%W(bcAg8vI%x>I^8JB{*t$sj zX)54PtZZ9!uUa3;@C!MiO>Op}9a8O|kgA_O^5-k9W93!5#W%GUvpI8uy1oBEdh1P< zUi{8(k+x3Wn*1r%bt{{Q$KLRBKKU6jF5ZM5-N6`l`> zm@{?LyiyIPd$HGBrh9&yaTv9|Ptvciu6v1dZ*#u}o}u!8z44XIVu{5Y-ZiuTn&)@* zmiOFG#S%hD4UKiR?5+;|s~20%36?@vqHP(Bc{XzW)yx^{I`g|N0xXo@F~NOoGTp}^ zb@1$Z9u#9D{IvYEMC_x;^^2LoXy$mP&Iw+u$fQ;pm`Hle9BbldFL$Wkvh$T~AD|LS zTd|UyD?@xzJGo;9>c6(9MlxIf)A3YC4|}w(^z)4olQgqt9gm5B$U%J}I?0l&{;Nq8 zW<8&^C2xusTh+23VjXFp4`hC2+YlOw>z6G`KdajML?RzE3S>U}Tm^84Wf*QKmPo=Pac5+o@Eh^BizU^} zy7jxFCfHo0kT`DxOK^4#vp4FaM*kgt-y^A3Im`YyE%fVaM16gd5#iyi?{shd6JtoX zmCZ=QxN@A9kuXa@wiNI_H`GqrF&mF2_Tk_A*0;CP#JtrY8H3 z32NfTxs$xMFRN$n?`2Lcc>ULUnPYgo0@x_8KVz%=%CHxE?TRXMH($$UzJ}E7?u~jo zkr{BL*ZZR>Eh*1O*FE7bqe%!WWll*X8W_V5-@A1K}-8o~zwy)i(LD1`bR99kCHqn2L*Y){8 zk00ae%0kteiHpI2%a;ATY}*I*Pw2jFE_!Q- zJLpJ+_5=Tw-SbuJzXPBPOqjzA@bS4`^{e z4>vS?BuDLr*GTNe|Lbd84Q^O&l2wF1F|2-kj-7_0W%wRNiRR;h+{hya*8eS`ft+G$`~|UKvOImx&9E^kHqFt zUR9k5pfl0LBo)E{j^W!@$(&2~5T8{&}~^=UF`^Nfe zUT0Lc45)iUZTk99%=R|aM_h-_Mq%wxQn&<3fxfj#G`CIpvT?@L{5m_NI45H!Ix{B{B;Q_Uhc4bJ-PR~ z6fwE*L}MwZ#uAm;K_iW3M)|~Qnl_;2XdS_OHd|m2Ibx_qmTlYzJrNK>6Pkeez|u#-1jj^oR=X5?hoXY;`01S zJ)pD`rWXY7nEoewbxaTGJrkV{29uZ2F?v~^-$6E?C{R$_XO`UCi1z6X-A^Ff`65!d zpTKo$9K!K3j?x_&DXzEZ$Bm`jW;LSItL`Uf&a<`7b+YKCjt_>;1b5 z?BzIiSe##h?R9J%16+4B%A!vm?n+_AZe&13bAsQ9^>L|$fg|J89*-oB<;*xRcSx*a z;mZI`ML&s0s*lo{@$iasNiK|7-zW^>ZOCoxKpU>|B8_3&_fl9t5W4jLMbv%qUC$58 zzI&uISg-0WpJP1#@=9G36dU`I(fV1W{R?$+Y^-jJN^ynXGCRsCThO+l0vhOGPv|9M@0zlQSk7nY4Yo$O}Nu~86w@)_LO(dS;&)A4i1=(6Kdu^L5y#k zB_17YeGnegfP&qqMU;AP{4tfOD|OSQoU;q<2P55G2}y3f>1}!=|8;BZVJ1H7^2Z4lhkqNUF6XV|j)Z6Q z7;7+BK#cRl=;Rg4WhV1`no;=-R@n&wpOqX-`Qb!0<%>e=d*vY<^K&XtD$Z2 zy@*R_4vI8d1p~6PI5gw-ZKPuSP97DS%x0or1vg@~0dnXWx);&9f;zMaqCRnc>kQXZ z%Spx89M|)sN9r@Kxj98H^t0JzpHYj6{|0`kUQ%@nsPd0YU69Q{1pi+Rp2hDKW#7dy z-pBs9`5umOJg`L5xjy@|Cj4MCK3>~D3#lDhFA}e?NETPV1F1>)+wtpZ@h8zsaGgoP zExg!#$=}9{O&^-NI>{hN#!jwI&G1jGY4BGy_!Dd5erLQZ*AIUq-uBKQe2#`#cWTnc zWpX^0)i09K!SQ&?>)$0K9rp-@*R<@QD0$H-dAa12G~%oJ-A$1PmSxXpf?Kx|*}k!h z?Ut7yy_LkhD@9kw#mG-0hH6lsMfRW-c&!+@=|dwO^GPa^IoHTs)__M8_q*cIXk@N; zWOk;`7MXkXB{S-c`qCVJudC2Mkv=ky_=#?ODd_HTrh4cW#Sor-;NWti4{(z|enU$O zbj}y450(vWubCZr#+}!-QD$qF z#!s!rjIZFXH2+xGzD#US<#dn9w$=f!z+}x`#|SS?I!3$rz6Ft~;Yn6@>Sb$cnCsY& zajeejU_ee0mQpC7I)(31bsr@dtz)gZcpkKLU&u3;DSw&uDtA|CMcJ=(zg>?CEJWV~ zrfqXY%S@Bp?;9=nIBm-zvop0e!Lgax#uk3?yXXF+!KLC>%WcL4egs}+VsEA1Vk;#W zQFdbA?X?Tx1b2ZaxhXw!NGB#Ta)UO(X@83biZ=>hq_~ft<+mRGB>bHd{_F^Gl@~OX zz!^Nn8$YfkiP*!|E@SIQ>SeT9EPR8AX8ltmEp>D<sIZ4g}U-_{Yvtv=VTDOK4WkV=*~SwZXdz;fvU(vhoN=3e8{@2nt8TkYKjgPjfX>P zasP6;c<6RnIw;aU55y)Szq0y!L_~KqG{xR)*|&6mQXl!a1{8GNDUy$b78=5e z)CDv`8^9Lut?4z5Ne}p#xllqHDwdyvx`M&EyKMgYHxrt~sK0w%U-r*GR+OCwsDn0! z%rvepHgn%2ip{(M(pw~C-*JG4^#%-;0Q))3GD|MS)+bT*2n-Gewuzs%mKn&E_&q>( zEEX4TbQCz(MpxUaT(~B zb#_vzn5#N4Xnt8Zh4?}+)np{XtdF=D$fs7C#sYES0(vT@|mqK4a22OmuM+&q8XY$DSbs?OvFSs) zi`(w5G+^*L6J^MYjzI9#svq%R;v>4Qk3G+54*Gv2zspTwDL7_*-QxII=9uG|$IsnA z(bl=Ss4o-j)oseK!_%FN^wLA{V7XWVz}fF65cobZIpLOU1p(RFwfaH#OmqUS4D$Po z5sYqc8Xw5Em6uDqLvnn)re%l&|C&6X)d+JR>`wm$sxRypkBTrpJCV#fy093NORykGA(KQh&A1{&K8nQGcbFB@jSM!J@w8 zXrB=T$co;gCu*}^1GGjS$VjP|ksP#6W{&tMzJz@C;T1qD3H*`h4y=CC-_$f}Q4gkySh$@&J+K8iJ%zUz3&i!?;Hw=#sqsVjZr z6e%@+g|PI4o2$s)QlpjV^8)&a93-F*{*Bi#N>B7e*}!rVyfPA0zeE!!h~a!xWyzXX zx9n}Pt%R3IPXb5TQcQghC3M@t;%X*(p4ly`{XVlT^RB36B)o`EGDgHFnOXer>zhP< zlX}&+M)NnP%c*af*p!KWrC8ryz3MCRgFdzXqKj%&B0Uk^)rS%FWsC?qq=!DH6`AOt zWnbC?lsy{pC{TwRe&giRE zuPIeO*z&#kj59C&6MK{s-lW-Rr!?{y@r>YiEP5^|TYl9`Xa=Niqmmk02`jcS-hsZ= z>ZWLsbCfU2f3d&jS(UEK&hY$s(qQygi)Rrz*2*>F(!N1S*XT%{Z$1}aJ-hAiwyN43 zSaIAH2jzof)XX5Y$bZyg*IQjK3mLHZ~5DCgI3=;+I@gFxAb zUp2&~b8Q-1Y}0H2i`|b_DD=zs)0#wjA^n_4FVLgcNcjWsTWUM@lWTpl1_7}W zuKmR9Ca4`{!#mw(`F)6|&x`xfZJA#u4Uw^JTqD_KzHweFgy8RTi;@BlY-b~|5@~;# zRJL+Ga7}PiC%>a9-UMf{x|LgK#FTVPOj)II?qo$a#X|!lLuD+O2U(9&RWtiC! zDlC>m^|Z?{`=E?T#fh*4xz5Z4_robF{8Pdaka~sR?L`I%sAI&j062RWiqRE zs65MXI*9Zgz4R0F#n>QwUzcqi7-ZXfkiD(TUZ@g5w!H`0dD|W38_R=idk?aov+Uz$ z6{%&4?uh!WWw6!3MoXDyb$wJEFR!z|Ox@6S&Bzb}uki3gR`S{Gczks^Z&H)E(BCJy zcmFaTde%!{*RApDWdgv^S;c;p!mFCDbgN-RM=e#sHB0@COSLYpDy81;QmwaoQorL; zt*ao>Fr+vXcbs)YDdiiUU+RNWViSo)&X*F$kyvDCDRFNQ8XNJX@yGGNWz_IS$C+jG z3hRrCyOdTg@BWFzBG?8|b-3mSg%MYbDrc8F&$Y$+SiD-lMsI5}4L&!Wv(y}K`(q@I zhaYKwpJwyMMf7XkTSd#Y@wU$guwlrrIz*k6iT>KAMA(Ky_LE-BeGa6KUG89)2>o?m zfYb+$+Y_q^#M}9^lZV>c7t0ORtVJ#|BvtdE7?i8zx8ai$X`|mj{TWz5{gVLoc82;! zhdPB#1}%DJK;27ug7=2-1cz{pLwHqRg!dZ4{E1DP`dGZ;)qI1fKFm;$cc@$T4X~NN z>R0`d7DC=cuoWpg$00&PIxOn4`m*SES>Qsf$5GIJ9t;Kj8Yl3Qwbcq@FP$w(*27@W zvZ0!~?Z=v8H9N{V!5xH|3B9k$c7Ln^A?=R@n8rIt+erE(At=6?i`uZ+iz5c4<`{DS z1zY=LpZG{xVm6}Xa2Y%1ZB+p~iRYIt=la;ds@_)?>rFN%IflCFlBr~&kq_q>rL94& zVTzNzRl&8uj1bggBtHOHeg|B`p$7O&_FEQzW1JE;GR@t)F+IcE*;6i%TGO+;Ai4J% zo2(|PoBY>lcieAnMJCu4QCSyZFRRo2s?m=48{`;?@Lzv5FyCX& zSnig}SpIJrlW4mJeg{Q0zoY0UYCi2Q7M)pER137*u@$Hyg#)PVKMKWoL$$}?U{=f| z)X$o0UUi?t@+p3*eki9T)U{>U)Cbthr^cY>)(1Q={rG>=epmfySN#oC@7ll5f6#tc z^b;sl`-??qmK9a|cd+)?YrTZklM)Zq2_+`HWz4I($Dxux*^3E#l%XU_TFwP$ACmsp z>FhEYz=uH$it?TCI_c>3&XR0Qu|quM6FKh)o3UIiWh@Q9${%!Azwqmfa!PaK8R zQt3>TCdkFMG!fmiK9T8Jk6mGTu{_1;Qya=!iR%$kq;Xtw?@mV{a#|ZkQWO5wqVsTe zUsPOi0Ce6=!=dv)qq84$ekfl`0y@OeX^O5I zkzRTlZVpSs=~*)rQo-@*Y$2SD=AH;1otsEsQ*Ry;mq(;0HhbwSCh@7{aRSrQUs;)H zq|Yk)E$JuBVy2(4$yWM_g&O7QC+_9JFBZ;w=X?EF^vLyZIlnDe(K4kiH=yO%w%pY% zN6h;yM2=1|RNVv{Ve59q?!ReozI#;L&b559o#9|kJ{^pkk>8bcn|nMaVhfM*sEvz`3=JnG-7-V{2!x zB{QB$R5*a;mGQ{&D`mTc%>J_STjmR>kNfs-`H~vZ#kkf{**!WOKJgRtgkg`s=l&}F zu0ZM-&QtKp(RkE&c2DB`v1N0<;@v|1<6D+j@-x-`Fy=~jyF|xQV#rPY9Cpp}1J=!y zN%(WPL-1wd(g`-q^1qq?(>S-MJ~3%Z9oYAre#c(&8(8kXA8d-nMmp}&oBu-YW_vbG zY4R^-x>i7l@$5wTqZF+eB>H?7!4? zG6cF71pC<&>}MYhpV-gN{pwJh1oyDW9#E84pQ)-UW&)|dCSxcp3R|>k`Ez8X2D*19 zyN;4hc?2Fi>-|-DhjZiFTb~>sYFSs>ufKBkU)f~m_K%eC?_~J-E!S9hDimf-vD=MTn*t!x;-K=t=NpOB2P1uN0RMLVCsW@jSDb0@e13U)qWV(;?_ zth$x2HGoKn^q5Gh$VXBxRYUpm<*2Mw3j++aH5Pl82%?ya2# zqgMCu2Lo~~t6uQGA%gb#s3HY3uXlHK*tQYY^PtKNvq<}O;7Jk>orW@5E*n0p!C%{z zt#0t2<%qEk3H#yiDRV7Loip0r?XPu|Y5ZP~maH{j!nDc`lj`ZMUVqpWL;rHVd0>3r z9}E4N&voSnut%sr`0^%V`cr6b$^SiuCPrV#!6Fp-ok+s6F9r97R?++N!X zjrd$OY~WzY<_cyo*Yw)NQ55ddv#dX3RVz*m9<`o}8TCOkBi#7M`}{9O_LDIZ|t zE;xYNJ6lf3KCum7pI|4lyOM;D@yFfus9MV-`_>TKG_Y@WvIDOn|2HoGPUSBme?>S6 z(taSVKe%w+v)RRicxx}Dj?9KC#rt($+qe*G-|V#-L>84YdwOQE3P{9!)OFPXAmd`A zLimDa!j~eDuw|#T?ieTigv4w$g+n)wgM^y-yP)V7nBQ7|-DiGy1m!zAukGzr7P`~n zA!KAvrkTc>nzjkLUa4Q|(Cme@EZa}*{}6Ilgs&xC|7eL$TGzc)f8FZrw$v7RbY1qF z0JE3&PDjHbq5&d24oaq{)FkmAi$AY+n(f}G$Nl7&X8n&@JdKQx&Oavi>k{2Bb2 zqY8M>SJpEO7Bc*#6T+D(6?xW@YxwrpNz!%O)%^g&FrO9W7qWL_ipgpqi|+nGd6s@l z*JZU8DpH4n?_i46y`*9v!EJ5Ky{RkvtVN;1`@1?@wkyIEqQ78ZVEe*th5o4{N;TG% z)z~ruYCqBVDN@UFtD}r{c+wr!dA5;@dW~_$JtAEnKl>c+7AeWFI*d`bna#}p#|(^A zIC~qt&XER19uSIB$IBhf-3HC{bt%TC1N2wyBH?0E{taFV(TnY~d+{~{LK;gJVQW^J z?W<_}VBF(;0Cx%x+>SwyOa7kU7eW8o+4TJ{_>vDl4o4(SH+kA{p|fUu@2YPE`)7mBV3>snn^yCVD;ZxY@W;jZ9q5i%9>F6z{XdR!$}45(1eJ?Vbcyi-X} zCkNCN6Iphty04f>!TDc-LY*mCK+DE0hY$QNC8uW4=bT^LnbQ7mH-ogUppE-1LN{9s zWV~`{E1s~DGD*A_!sUe1#OZEvW(?h@j2ZeEiN8b-p)?0kvao`CdS>6R&f~W+t?!0T z_PEu}qCMf~CWHKmgMaq+=vx()iY;fia;B-;ThUl8{OqSNx*AbejT`vgqbS1B=^do^ zcielXX{h^NsqaQkf;4ULy4Kftb-dR#Zt&{5yvMaCM~8g8z4@r?sf~5qE;`_7QGu{b z@;0sE^Gbc zyCZZ@6?;vBC-|~|fflc=vNrpkd+A4IoODxN+3!&ZX771{=MNY$h6-7tsANY74%$w2 zog2K!LserynEL2or|TcA$ZwMVQrVMT*+JP~qKERkW@CJ#E-!Y;0}k%Y56WJ^$Ac`v z$)%zUN`G{PDgB!qz9O#EfNLjwNTD6F$GVR}**Gux56a4C43F~-VMRpciFMA#tL7R8V>J|z< zwt+*(@tS|X)Ig^{S|>b6$&mkg>O$fVSAH}53QC=x8_BXvEmZx8-0VLs4e3EY==(zl za{yeen;N++T%`uLbr<+W*YI=z5I*S#eFl2s8a|&1H}bc_2p~@LI7mX-FZpBrjv4G) z)Pxf;b$ocvQ8*B@{!e?~0^L@1rn?nCqS#I>zk>50Pt#y2xwm+xy@D{`bH4{`cdY^B=j{xo{j2Y_dHB z8GnDU-nJPwW1q;yqS0C3SQ=_;x&CDS3p@Zqrhu)7}ps+{Y&@vx58_Jt{i#FFEPW>}LZ!p=^;jDU^ zN2jyA#ElQ;3+)5Dnh!s@s)24u9QdCB+&uV=DadU;{Ivwp2Y-lF5dTkta-jZA&4Yq|`0ylUe|81L96vlo zS+oZ7RR|G<_87dB2zd{l;nk64Kl~uoM7nTd<3xli=Gz4KVxJmE!az-2D}P3!Q%-+MLHbMQU4SKXTX+TavlbPv9Nv+51y{d?Q; zzptV3aN~T^aqvC56gmEj1;JdDn}>3VySW$geGBPu$qqMvoIkv2@a)7YByOC#j~CiB z_+uOeu7sA&hbI5^bk-8YNR883MTpVPj+KBRnt%M)#lb7V^gG;N**=r?OGHm+{Tkm` znuuvx{-G)efI8!^Qg~hRlvm%J`*Bv?zol#C_9Ym5=n}@Uh46*NlD-ON85Z`n)E~+R01!~?!sT*Gj z)$$syI=Sv7y~LWe5#&P)Z^m~67=L^^s~S<#7DcW>l%8g1eF2gD!n;Hjyh#P~=FTHIl`xG~$4*e|q;jA7kdCdo3!IKO_^M|iBU%fEC zjf5|J1+d<*FSq&7&wha9aU@A#Nw9r{$smM#t$|bGV(Q3{`0`^P18B!>X%wF&a_mau2=4U z`vE*N`?Tx>Tz7q1Hid62u_>SA_36xmmz3OBUros`JB|gWVHfsfrFrn3N48&p047zJ*WxLcxM;-eD3 zCGkaxXC=NX(J1AukZ6^7uf%?ddQ+d2>0ybdB%YIaHb>}nMxtIWPo|ejTqALd#BPa9 z;sX+&(CJc8%pl~Sl{hK!g2Zh4)f9Y|ORSW5i^M;X=#|(n@rcAxiO)zpDKRc_O5z2H zIT@lpMPiM_EfRN2^hi7)@y{iWN{mVTM~RaXKbE+N{+SFuio|siTO@W${8NdKN*tE> z_Y&Wf_<_WnEFrf-Vzb0AN$i$*K;mN(pOJV<;#rAP5kSqy%Hah zcueB+63<9{Q{p=k|5f6m`J$eyB(9UVU1GaLP2v%WV-inDd|l!>iRUHGlk;Uv+Sw6_ z{Sy5W+a#`&c#Xux5}Cwz`Ei7?*Cn2k__V~Z#QP+6NNkaKoy1a!=Rf7$%zh&AjKt#- zhb4Yhq9(CJ;&zGaBwiIfTzSi6^A}#ARCKGG>mkei{iK<1dT!^eoi*pOJJu z{YS$q3|x<>L|rZ&T2nu^zE=1H->_42Xm00D6~|$((|teF_E(ucuGxP!*5Q z;R1u(tNB1vDKaC)4@v%jB36^wZuJu1~>xAQUqyPYu+BP!85-5!;wjQIlYF1MFr zME5(KdmQLyJM%jN7|0}D>lXY(35Gg^e)gd12spb@jWgLBxScPQFQ2RZ`kvCxKCRp5 zwFg38%}p)0+jS11^B=GaD$b}*j?D2q*Wbk#|3u9nx`>vK2UD|#( z%7!#W4Fn*|?S-)0IGeo*#f1j27V4}{w5GhQqS9;*LQ^sqSdrW7QhV11F)g_XsaXkEXjK;^_n=*FKUCi5@PyPLZ`K~QZ@(|#QXGMx_Kp8S<*VelZm;D%qha9@P<)*2x&sJw7C71)@%w@>E}z%onbEea!V*+gQ)ZWYpX$YR8a}cggm1}@cub#G?=U>U`<#%!6h99PKe^R6XX zBp!NW0y{y(G{H6T^7i|Lal<=>@|Xt+IfW+^r^f6LP6ND7?` zrY^L*d``WO(UU$WmR?RLbCDUkohA8__Cz!xM;~22zDOkZ9iHFi@pU*ne5%{+G9g)b zzSH9ih621wyIm&I@_nwPK29WN$=3%C`lMO}>6bpPlukJaK#biT#3T%0nZ~Xl%0qu& zXD6KxFlX)deXt;3fEN5DmFW9b#~$o?DZHXSEH5GFjD{N>E^cBT%&pK>2$T%;UxQ}GlkATwd?sPH+vMU&!lH zu-0_@Tw?9piDlQTsxIvD-dW<<#pr44YK2^6X)tNYu=@hEn4tpZ@?kZn9j?crIlG09 z^exs%RiWKq-(>fxfgnmNWv+@cm*VTxPgL0REPP{xP0Is=lEzE&u->Y6HM-Q zOeE^^|45v}>o&=ByL!23+@Z#)+M$uHV#dl*<@NCsoL=LJ--vc+fKNdfKPS=@ zrx5*>NH0gc9U(rMopD`ENx&-C|Y{&dK`p^rVzmguHeH zMe;9_>Ed6C@^mAY^8Hf3LYY=%eR}yjWqz^DzaZsbg|vbYlW9GFOr{GYf9c!l^|eTO zR6mt3{+YAP5~EA)UMtQg54(91COtZj?YK_uCtCd}IblKf5B&GBX+p*wY zi|ewkP$0xufGz{27PI$}dz5XCz#b(iuB_?wjEPUnyXM=2>nGOZ=*5*7zwB1+9@X2W zb+enKECREovdUUrQ(IT>=x}0xOQst)Hf`Fxg?KvDnae;%vh2JP%0avI>+R%qvTT~H zl`i(}Sg_Y%W&VsRvY|p0|2CV=7IUS!%4{`Pn`_Lq<~nn|#cZ)yDlJtOtEJjfW2v>& zS?Vjzm6poN%Bo6hWp!muWo>0$Wqp;o%2HKXRaIrJs;;W3s;#Q4s<)c07Hg%o%4)S% zTWhSf);eo_wYl0-U0GdKZLO}ZuBooAuB)!EG1pjXDr>50tToj&H8r(0bv5<1=2}Z_ zWo=chwYIvprna`WuC~6;TxY4PtgEWC)>YTl)YaD2)z#NS#d=g<57G4~Ru2+2=5CC! z!dsQnPbv2*D2vV^j$qK%NxC+v-%Uu!v)98+bxiS{AWJ_C^h~>wPhL#aPx@+xW7Hris0i#idZW zBjC(7%xaHS0H2g4P$!~%Pr$eCRQev zUq5C^EdN5Hcop{B;u4Zy9m$I9j4mA8HC{Fzm?)ZPoY;{lz7n-(MG@hA5SJ*vcB}-| z6<-~-B#NoU;;ST4vNTc_2}FycjnO;CnoyO@m=t9>YYE)u3W{f%QZlYiR99Qv+&cc{*v1%?oKb(a=6MBSj8*=? ze%|q!xdV8#&F)cl(#IgTtwEDGjL7V5H5@t z)lFnfSmNvNir@96f1}Nngzf0tofu-{qJhqEW4JZk7tN(Eh~Kd7%+9#2E#9^}zPml% z-Wl(_H-2v*9thG|N;l8TH_Aes(Kd#ik%GvYsB5eYJ=`>YAXX5&D%O}Vm4tUBOf>r3 zc8i1|Crkx2U)Du-L>y$)q44HNA#8Vl)CqG@$E(I&usTr|ocRO6@JeoRw@0lo;h8Kd ztA#BI7``R4If{{99PW%XMp`3%@zu7t?e_TXcgOGUj(7LOd%QvoQ(1Hu3BXuOy_Q9) zVYICzcdUARKCGFljOSk+twm$B@eQ$gF%+1qihJVsIO9&p55+?V;s^fIXR8pFGcy2j zr732CvAND%eP&m@70O`<+;O)j?&+CrV4B*?FU0M3O%0}8r_H?$&^pMk(D*sl@AH#= zrf+FoxTzFWgM6cf9nN)M^&TZ|Nk`zmIx#pKC$W*N&#^)h1vAb$Y-D)8o}wie=vJqz2W3A?Jt z3o?r#zXqt&R+-H+Wdse^h;dw7;80)Tb~!Z1s%k2Sv14^q&TMveVzWpfP3P>2rdGH_ zHZ>B@WI7ElFz{nbqiyAU)9EzMcOi{t<9zH#sdd;mpD~Tr#`#R3p{Y_A`mbZ?8_?R9 zfEJ)OFov;abh})nSMBq8v>rNj(Rl5~xJmkf0|#6kv==u2Ue+DZoU67r@_KA(v@h{` z?o6e%3BL9;n&9gOZE8O1=@t6eK4J_#%_>xpK7+K4(xT5R5oM(xBP3~6L^FeyGB#GC z(R0K+_{qq!G`#2#10&!53L&0NFE+4o3|K`+P^^lIfyF4_FB!}Re$7-72nL+1%oY;S z4q5~#X5GsTr#$04X6!Czls<}Zj%cVmJq^Q0d4}>NbEYgy($nfop_8sN zx{;$C0g7Ec^-Ti zO3F`7r2+SAJSz% zGtW%@Mr(j!MGjkGEo3VKOQ%aG7srcZh5mwelg*Hy&GKFISw7mDUzzJS6dPGF+Fy+J z7lT$@X_B-Y&=!D(yjIZWXR!JAXZYtqIM>Ow2z{`SnGfSkg>Zp%qJBANtS0t*7$XCasXI za}_Y-$;?UK-jZxqaz2NdUeATynOOL1@L)&yiIkn1Hqdy%c5+&?{lZQ#2OeYW9R%eA z*&k>}It}wFg~w28V5P7<1AIKl1`Q@7Gof!xm3h-SF|q22zRqJdEPT%*Xe68Z|MeW6 zj)OM9>l68@>C`fY#q-(X`wJ&cu|@uc?RmCbGvt(HqP<3De7%r0JkHp4{{?HGt~=Wr5t)0ZO7*^)7l~^NJx)!sQZCEp{#cbt!(%w+Epv1^Zv<0jL^_4(&NoB$0 zz2MnC0ba->dk zSV!8hj##r;aUs?X-Xi1yc{$kE><{?^5$%4QUpOD)guO`LhtQ8O9%Ss>z_~qR;3`yQ zF|XZ^CmoI+SB;u+Qa5f-}(nCRyU6kZ;**;#{QDj*vfP9QER?q<`K8 ztsk_h6xuQPM^1otvCrrJqBN3>eeN$R`r&hbQPB^d`-_Sd<3HM8GzZJ%Mn_P+O*n+L zk=xIM^JW~xcB)#VrUkI}X4v|6dEMOUf%_nOw*-7WJEc3C1yY)HZvs0P!Wv1+PJ7Vn zhs&DQ$v#55t($!JXhl}-H>(;OLb*-kr-6Dl34a{cLbAxWV!OJ3bB|x^V}Aq6=0L!H zGr3u9?sckudbq!h9*wC%_J+*8!x2Cxc1;7gd!6=H;nGKB?gI5C(64s-WG7V;k?H_ zr2ewg^Ha1jM9yI4)T`w0#XDm}*AhKJTIxM9IzxxCuoq|d3-6m2P`AKYRomd&ZnPLZIOZwQ2QIEjaOU&E8q^{^>eFQv7snCeEk&PA@GB)ZxE0isa@$B>sHo3H{|qK&-ag~L#*L7{BGI}3-lbXmJ+$*5^xaDAv zv(rTho)e|;HQB`~S`L{T`|v!BuOC8Uqt~^QR-7B8mZXlb^0j2UNO5K%o|xeP&Dbxf zMzpY1+Ie5HwynXoKuBdvr94_Kg}l`ykFSF^?zJ{f+jJ7}rmmoz+uZ>Tp3=L#xR}LS z{&ChWFJAL-c~sZTlClM_FwnaHLKd2P(d>qKFu*3^f@~A-cd&oXYEuI}*a$UMC$SeY zwgzRw7ucdYNLL%)L%`!z)}Of}qz3wISY2uN@H){79IH@y<7cIBb+OGURB_e=kuAx9 zCSpL_;AYtvL<3Zcu|~rtysd$4QZTo;I|6X?rcMvCuV>Iqq4zlss5@!VYlaQ7??Bs4 zRC#Efuvx}A2>jp(;B@}ALL+}juQf}@PD&rW`{jD3;ys69a(pV;k9t-H3jZQZunVig05=Uyn`*3bn#tTq|9tMHnF zOHMbv9)nCfjeaS-{(as~H@vh|xa#2Wwcide*?oA8U6d4!)3a3XJ~w>$=|u?K3Qgm{ zaitfxGUQtoS24TVixVmi(P*U1x_ASQIP`bn=&E=gFM?+z;=_~TYFQ`Y z1^PFW{-Q)xl38i7?zaSAk)$?AoGq_+7N7YdeBoJqt1rSQ$(ICO4?G|0r?_Z4?$hs^ z`+2%9a7^kE`-b4t<>>jBLVCPLpZ>zR{R^xjn>8F2@;AzK_?Spn%Cvb*q;Hn#e#u`Y z)6o%;-=OCgi29oKwDfP#^@&S;YGl6hxF|1^GoX*Z=_2_vjF%a){SQ%3{}TcyCC)D1 zewlpTl24K2(=Ktgyxz;?8<2eZc*idyFCzKO|5dcZFL8FgW0%P{A^9dRB1e~d<}&#b zl5hGF@}@46?}FqreRFO*(e*RFCGs!DS1S1?r9AURO~g zLI=bN_adA?ydSt8z7ww^p6Vl!>OYa{TR}b+*FOMXr}F}DLVOM4gqsl7BTl#jp#||& z-wE=mXnhdRH$f-tL>NHa541l7J&7OKH~^ml#BIP|A;b`$25OJsQ6b{tLB_6y-@`e? zQ~e^yKSK9)AdLJu`U<`SgwGd;268<2d z6K+TN8^mqEhrY(xDa2F#63AcSvB%&}1Ulh!2&NCv2B7YnKv?`&=r_;_w;(hiPPiAL z4e@^9FA-dbPXn8I3dbr0$m6+ zX9EcpialjL;kt zaVzkZlUN79Lq1zu5vW})z$X#bgO1;BVP_G{h~xKLxQ}fe;^d!8z9Mzdf$$m`CnR4F zq7(WNP9RP=fRKr@pMU~RFw_PCt^U(na7t;PG!>m0$JihY)Hywu}$cjC^Me6}WPBT+Z# zvXJbhx>=a!_ndK;n;9|cyf4pn2nf5lo9ht!-#F9 zeZ)V~KN22^j*O4QM&curBhw>nw0X4qss5)1o-#eX`f2mibx$`u-TXB9X^*4s{^(dV z9=#Byy8DL)hRzI44P6*A4wnwE9z98#NA~t^{hkfZB|xZ8hp^MtygpzFyQA zL5*Xm@eJymLY=13($TunhSB!X?$LqKFmH8YbaHfRlpQr5H65ka*B!MTz4K`M(cYu| sN6#FcJX(6pe5~%+oyYvgdXM!V3m=Odiyj+4HgPO=ERMbJ|K-pB0cPr0C;$Ke delta 30549 zcmeIbdtB7T_dovHm6eOEi^>bRvmz*n7X(oeQC2};2_ z50sJgAh;{BkM-d`(1B;~NxOg2_t)umvQ8dzZ0|{uV$ayD6qV;Sv#P1{K9;0IGAdQ~ z?Sc~3*!sRCS=9%iaVn}MP)Rm;1CtkSRijHENK#hO_qpDgE{~{c2P!FQ*noc}sm6jt zA0!m@f>rGs=#nQxj8&~tRKtaT2(1Jvc4Ik`ldbmX!Gcd?;#1SdB`dqR==V731!uD< zT4ksh?UHxzLLrjDQ0^tt-XT|fKmGmOP)rx3huh{WbjLL(<^FL zadi*WQPc`&-c+}ixmT=eMqs9z=B1<@lcIJwjOjV_G)JmV#tI=K6JT|wr42F!$NFF<460=<+5uV z6jfU<0QKrOXzLD1N-S3j;&kb1Ri3w^qHSw{1Fxw&SbxvXx*hCA&-j>$PY~{AR*@^$ zlv&V;+v8K0><`k+f{r|U&Ltamu=Ae&w(X!Ty3I|Uen4n!RllqDBBeRCqJp}D=v2c= zv{+Fc)Io|1DGOOCm+O$UoyzRFQQb|2uh4KxF{Gf9sc<=EPGnqi(LedlHloTv7gnMR zO>C0a6y0`q+G~bx8|&^J*@B8yx$tjE%1AtkCOSJm!JhICmN`CaU!u!OBGT16F8Pz~ z>{ahz)Z6Pl48KO7PP%tkDrU}hHr6K;>A60!Cd_`trOQ+Y215M{TiI@(*1E0iqEEE$ z4h<=J4j9wmhAxGc_9Wb}WGidiqz$MBHc9NSPNwp^RvK|va!q7?VftjS&Pe=>_mD-s zoaf6&U26@i!0uV=oX1Ho zI=>$4ZadA*1@C}?n46E2*&H&%xVEY%_}tvPnZ4^9tow_srr*AmUH3Kn!OdZ|l!)0f zW(#X(Y^mFf86xY)ZpIV|@cBc`kTsiGj?sTKEI>2SRJb9Qw5@P@<^bB$qZHl&^MOq~ z>5>26JTuj^Ae8^01_nLPo95)@i&6d| z`C^xRaTDug8mxPZEi`qq9L`zOw|@~ZVoRn z$t;NFGZ3vju!#*$YBBolO;n3FunQREnG~0&Rdp%$gG6656ys$Qk)#~XYl4w7l15Zz zOXl3SpL4b=Wb zK>jSN47tXfJflo0I++W5X2dH6F`WE|H+Ul&b6|~{rg6XXIv@?50B-d8CI(}QizI()9iKa2 zMIuGZPVbHE;}*fX*V*M3!A-yx-bhIa>+~jbwFpQ?-n8m4@>k0!Ro#hm2L<#gd`?F)vMMo< z&;7!3X%M+9Hr5rkiLfFv1Y5MQSXUgXbyKeMV1z0_%R}=^WAr# zgk_&WB7%!tiK5ro?{e^?z>^g*7&FvcVko{ZkSSq}z|o@KvZ$vl>L+QYRmU+jVc~QT zXtv(9!QHrX!qz2aY-~VCD^O&r4~lN*W-03YGK-iG^3Y#MgHN3vquI*=t#q%k0|Ai< zFob6A3t?1>j`rm((VF@$kZ#psB-n4t2Iki?*ap#j0Hpp6VGXpz>ZGKk_Tai|$#KHo!&Y^4w>H%GJpgCzXtPJLIE*?YayWIB2sOQxzwxy-N zFG#KGeCoT&KQmpc(4e=$*iASUgjRJj{G(h^%^tK2M|;BpgKf~p-M|0vZmJbsfFj-< zaXL$_6L?xfVkjMexCRY{V*onuRo)#){lQZ-^wAx>n=?_RwxOdSEwi9WtpkzQv%`V1 zu9`1#WgFP9fx#m4^af@Q3e&yHdIbeP3Z_CSF-9oH6v&j+51`(|O2OuLUJ+v?buv#0 zrMQLru4nHAg|q|w$0;KvbCijbhQ36TOl#n7o0dlT}MEIf5bel-bsGDn0o#JXf(Jh3n8v^}bVJ`&-dt;5j zfOGybg6#@U(!I=_!J#%1s(z0CFLlXXs0HPw;@<;^(0KL-E*?e_0nSh`$P2^vB9S;D zobpm!jjD@zBa-g?h{Y}q4)RwdFu$#8Kg2i9pe;>u)nEfR5#RVyb$i&2pCsuKVq*Xs z+IsxOn_wKvDZg|c8l=_LwjMcDD8ieb#ML3^VWwHt<2<}sw~n0+3DW(@`@ZBA_8=rG zdL0$lhBrN5hW2oh8QrlPfc)(?#Fkxq}V+%rCwg3Zk1vW-m z>KRVnbR8SiM%G^|WgmwI_;70MR9dcM*Fw#@AGp_g0b+PWWL3-g{95Es&EWfy6ksWbMh(C(*e89+xg)5%N z7$??pR;6n>tG~Y$tSZ;CHsMi2){+i}JrH0KK?ILZD3*vv^pMS+w!s7C$mM9ca4k1e z%Wc+j`C6{|_q$&W*Xi`v2C~;8LR+kXGz>RIy(S_DuQlvkL_qXv!XwPEsy|Ty!i=jx zH3%s-t!CclHoDa;-khp`Et_Tcl?P^nh!1?>#(6N3q9JKhLs7RveTn%HrJ=PzoMyz^ zn1A)gzQ=0znYp#^DjEyivvs%*&laXz!y;P6v;!J;dFeU?d;(NRjK`X%Sqow3D{t#yoTjSTkX zg;{}0im~HLRv8&EV5P>eR;MhJ<_%K}?O_YJ87z*bRYz!B2vRJQd|{NYt#B_nr}J7U z>M5-HjCF2pwt=YVim7l|V`|Ijs2r}%2?aypR^Frf_S8j!4ZMmcd6kvC zkB$F)%^5gH_1EfZUgclXRd|)Jh^s?xj=AMN(CE5?#YctfzM=k1j{&V!b-LHOZB0R5 z*Be7%p?cot3bJl*bJy){ny%n;^jZm@qiN8Hdz+Rk*tMvDSgJuD70u+%=G_tqsM|H1 z7Q2T05*8jE;By6qGJRFOugM5;(|9whxM!Q~TPvY6N#L$BQ~kQ;5%l!b zm%ute$2m`2cUf?r37+nt?`{(y>+MUr@G`>~XyJBJ^SF2N(|pe7U;^(%Cu`{Wb6PH5 z%ZAJD57m8Ht}Nat8QJQC`{jFIeR>|Lk93rYtUB&vS?6Y z(pV^mii&-Hex9F8js;N-;%ID85U?Tui2;EYufqR2KU3J5*zVq!h{b8=3WXWlCD~M* zzV)kixhN`9jjw)5eRYol4DDQ_2Gsn_(Fd#^KG#S(b`b)xlqMZOqITWC2A0~pNUPD(oF0=;|pps*8RS`Y8A9I6;ANc4l_mo z=N_`HfWyf|DMpfNc20*R<*r-%u^I&p2};&*Udld?4R~2x?25yyPqZHz z7kYtofdBbp32W3bDv+pD!$n|ue4%J(q=PQ7-W?bAIMAC)wf)bJSSjky`n!w8%%rs* zQ34NUzXKBN_AF-iI(E=4W-*<5XKb7y-2Xru68}<+79I)w&b8q^fR&O`56V4xS zxX0PHTg1|_*W$yGeh@z*6fP8u(sn1tY949Y`jC=^HCEcNH=0&GxGZPN<2 zA|V1q?IzQW$=J?OSZe+{FpuU++v<5F z-HP_ruWS8+M_D#PsV>ZvpfoT-|#3_nZdVpXab&dh~3&56Hx$ z@Cv;XuP6?~7srveXG9?R_z9kDi)70aY*0$RZa%9_iO@SN?Dv$Gy4lRTXP2h0EhHjq zRE*VOd~P8d(6gHj7}NT4cq8#74<9V*zXV5SsDBYuEb2D|b1YguJ45|} z@@W|wU3aT}TNENT9BqI30#vgXMr|e)vC85e(s;Q@k`CIkfT`}s!}e8CJOqP6(-gH2 zEh1*zIO3_rZFEn>*%lJpfyDJDh0YY3s`*tU7qs~V zLD(7HG>Xe7O2xh`Dn^GIov4OOkEJkEJx2DJ?XFbgshl`Wa&hZrRsS$Wq)Bd*Th)0{ zIhpD&6b)f}bd~t0E9wEE7#-OU@-sP(6YmtmKv*Kr2PgS~irTZH#%55|d7lPi6T#0} zY(`SxFc)3ZjAl1Z$?k%qsW1gai0%|D#Urii9Wjbo2|Rf=MHSrv7li|9&2*-siw|4~ zN)YNU&Ldv`g{FEBNO%}p!^K_Flu7dC%D`V|niTBNms5T8NEz8*wP|f^2BJ*$3wZAZ zt*5{T8qepV)CFY*!CZzTl(T!BS6rVcDHrBlBU>m4kI7fyj2uffZ1F1*LEk`hC=5a_ zb|j*LEcGuQ%Hl3kl}{a}#^=-6%)+gV!NMxD8b?0K^~DMZYum^SXdZEKUrRih%YN@; z?mY_vFhUgdu+_dNYA5M`2ba2gqV`epa8+-`u}VWb{7+f-VJVcXafcDbSeukPXLO>Y z2U{2^35B7R2=JS53|(Vz3}TDFpCdWn@Nsbr5h1lU2WJ%IlznO2L9P89M6k?X&l2sK zbo(|wkTFpy$@eh)u;B}oTkVsgvT4{D6&szFQ1vyaPRmIe{FB>(V1j2l!f7;jSJbmv z>V2v>(sVE+2jcExI2?g)1-!sXs7|v6~#jZRXUb)m<2c+usK?RyZ9R)4U(x4P4HJov7v| zKBUV*N~gt`bVP*f>IN&(f?3>MB6#PwBNYd^Nt$tC8iR>o(9k4TW)gd?r&FkV>eM%r zwNg@N!7>k-Sms*H3>Y+1Ju+^;%v-KQhU&U7PJ^BcoAU$H%F%HxHaW3 z)3OKfIB2@qvVZyvQJvw#8dDxeZ3ls&*oe;nQ_+L_@Z)kmm{tFQB+in~a9d;WTB8(a zZv^9T0mb7Hjt5!P2k98*#JXl9f-+c#?tfoBK-P))ff^~=zXRlaK*^p52#biA4)AQd zIgMt)d7!cE122@rv7@o`Nj3hSim7lYO0hyjqJl;J0;34~HRQj2n)1iExst|!Uc08l zmW>aG-DrneS@fBy@O$E07a}dk#*Zg{19b)tNz{uNRaxo}E}J|bD0hpS zat74D{Ot7;&Kq8e1~Sc6tEzdJdQ)u!RmU*HI3Ul7DYT3*DM{Zzx<$QFJNb^0?@UQr z^OP8AN&cWS{h9ygg%%Q`5y%oCRJO!2h+=euO%ruLSSv0bq&M=FjO|f2=UaOohAoEaxkq z++-iFsDo_T^(1tL1j{~*4amX87-n2B_`2=s+psO;s3;|DCz|w-ef|Ra35eC!m!Eiw zoGBmSm>j97qM7GIUBmUo7d&Zwxshvu2OJK%+TndNQ5)W+eBCklfGZh;`_g}(KnB4F z_oWm<97XAW*f{784PG2O!jWaE7C*dWV^-Kwi)_P6y)3GX&#Av^<41b+$`bg9-M2i` zk%njwA!nxD;^z)Kt4sf&s1vs_csa)LjIL zO{p5ofOHU6ma2DA3M-*hZQD#q%uA_y1En~dRkf)llp2Gdn)wG2jY>81I6yzuJbMs< zy;L*p;AjG44?a#{#6j!SrkPZI*BbDZIo&|4zJ1J90ae)XeSZxOhXyH zP*~4luLZ8|EX}6fITvukhG#U-a9qP z`>+zUCms7aifHj358l4Cs(g{b?twh)NSfw3!oJs*U`=7e_Yira!09=sBh=JX!&BS;kq$TE1+0vtI*;z#=O@-@FiTna~U#8;EkTET%b(W4NFnn4o1x{UVb3`P%JWOaD1%TB^ zzJ1eEGMkbPK)3_Nku;t9h2Hm69RBzCRkam`=*xLgF+^^5mUp1xJ$r^yU~}nBj`tx7 z+sow9_p@`GDy|cBm^1GtpOCoduQ5_ScOFi9n(-btUWRSZs4L{Y!89J{G$(jE9ReKk zX-eZRe?>PB zhZP+*!^It<>eZ7Qqaa0o3G6NEpBD9uMLn8^H=%$VfVzCOl27uK7W!+qy%D4icHP96 zwg4{%f$I=ASDIv0Qk}TP0J=K2a~_Ov1YqSGb(ib|w@!YI4unv@1=U~3ej3v*wE%>4 zZM7$=3Vbk*=j;Ta8?JtI+|-^|>@b;-*glgiQ{kILnPRv(fejoRpx-=!O&R-)e#u03 zaqKgb24a0saHodYOu@ky#x@Qjctnhau`?B_a*ucU{S2n9^QUmaXmMA0Bc$ui=ti7+ zS(KA2_Jx9Ih2%RE+17DwlaGw2nueo00BY5Jo>xhI7s%W}P_Z9~a{JLv_{1^oh|FU= z^T~;9T!HoGP$5gmd0byPo-NNAp#La`UCD`P@p2BamEYVhRQ%ML&5YxNy`KUe_d|!q zv-t7;w%J%aao>tg1MUxGYBwPK&L|UN`39tT9$y-G$n$0HaA!^gai{PzH(oo3JB>)= zao!a9qiy7{i%Zc)cmy25xX#zWXv@&1qVgBVqTeV6KePm|UV@3}_!0T{@s)@kk^PJbp1z+%u;cgD8TC96rKuy}XZGlwKp z(7U^<`tl{S@YugZbrnSXOh((uM2urEOz5uvVJv%ZLaZ$q4&soVtEx4mCKv7VAgt!~ z1eRL;D;BLo1@&lg08^8JV?B~gi#K|k8rg=om20xQ57(qQfA~zHE*hD{D z+*qoH4N8Mezf9PN7Hl1G4FRN(trw4Xth= z3ZhW23XQa?+qCmGdI1;fBVy?bquG&3U1E;C461q;0>?HZuU!aC*OGK0Fm($q1Z?`J zbNG$G{MNV;2+;rVIKL8jX(H9I@+*Of6LBTb)s_O3b|=t^3bi|dx2QYn?gZMptMT;* z2boPtUx(b1@U%iKNBH)??gYL?S^YbK>F5y4J{xtX!)R;_NzU0RxD)8CfB!M&J+-Tq zdZ(741k}!ddXSv|$E`qQeHry`1^O^`YK-mg^`t@cCm_PgKU^vJ1rzvpmpqtTUBL?g zOGbeLL3nNk#v;AXsubLF<%KGC!zRk$+WIX3%$g45uL_RFkHyPAmAOB{6rYo5ha3$( zxhcLMNlTuf;orvC9P%_Og-cb)ozX&0FN~XAav7prw+qzT zC-4)3I9`cf)6eY#$!0DYanPf+%yNg(dvp%@Z$8T65Mc#EE8>~1-V{8~0h56>DWjhA z*gukeJ*}1Pw-KZjy7dFlK+0jf~Z@$t|X>Tg0Qyes*)t@;oI+-~_o+!8MC-eb%<-K^02c_PS6gX71bdLCgXuA{VZg_j6C6U(bN`cRRP zj{%1~kIP0+i~{R@WTWZ=wr6^>%@b8!G8bq!?0Ow_sCjwbNML|EjL)E=OFmBuQNgg< zf+@t&ZdigUF8Muod3$%cJRId%)}|6m)mZi>rykF#kYgEQGqm^Q<~zyx`jv=5wM}(L>50Iz;+cDO~(gwj-gRA>F(z(&BDbIUg$dH z#}4I+jwl152740zGZ>;BlyJ$O)KI%2&0XyD6va(=afwzO>n=WtV)V}}Vv2ygOp2u1 zAyn*04W+QkVVF!7vKyKLX>}yoCbFMqgzGzvV4hEf+peKvjjV~SF`D*{z#D3~+0Gt@ zDx_U^&c8yjBSdITvibf$lCT9QuroUDWlipsS4a)>*HnupX;k%Jgramzc%OLL4YC{m z1Mul;jZ2OjP9X+Ji!J~KcQ%(Osfyk$em@HE9T2)3;bie343%u}_V*=P$AEId6p4x^)(W|dJD6b|xukpXQo^oG0yEWkd zmC%v+zfbLO$iHGXxY;+?*nbBW)#D6ge|zdh;z#@kasKj>!NeHduz>2@4JSwtnX(hg zab`^{L@*u3YZxa}Q!(IiKWdCc8umFwl`utcbz=OIijmGidZkED!@e#14_5X}fbD~B zRBRaG7PHa=dDY;eykL3(X)v9{nbMY2HOvGf;_?*|hTK@TJQ+TH|bR zbMCHj-c*nC!u99_u=c6Px~oV-UqgZr^9O82Vn#^y#AsVx=k2b**$r>M3P5)H46Z_n zjS$P1#LX!`K}^-tPJ1xP)6Qql5n*q&!*G-(VpW;KNvSM14rBD%D->6rgLB71Y*y&} z;ZI@w*l^;G`f@ zNMTG(bnMTZH^%dIQiJeX=!}v7TAgAz7m%x<7-obOd!=lc9UtDIT)UjeO?4lu9^rLt zeDx3E^JgXGk}9!d@G>cHP6aTTWGQ|Pv7hvc#pll&ThYRez+!)w7lF~^q-Hu~T0gVY zqr43=imLx*=vW-Qk7q>skn6#(tPC!0jknD2&~)z{Dn zkchG_Tn%^kwV|A9cnu8Ur1vpuf+2_ufN_TC%n(K;NXH=kx=5RyCpu8IyL9fC=krku z7`Xa4AEAbUprNle?MsTOJFp1iUYzQw#tZ96>r_aD(xO%>)v6s#>jn~`A(-eL#wRsx zUKLVFn-Z{aYvT`mXUdqJJ%l=38-=t<=Gw^PLBqcxHICrI4-aH>o(r~xX>#5oZVuz8 zD|xG9>dEo1C#O`T&CWyZNj=(VqtTdHQuRS}p`9)^ONzTjpm+QwDE z__hGY51mMenUi%SG8vON3+!+sj8spgcu)YIGr?0SNSy|$w9_w04M3rk+cE>^gUzjo zlpV%sI7_Vi&h;I5t9p^Gu)j{BGbrVRp&eDlzr1$%GTI6mk+|3=Bg0hC+V}tpU}Rt8 zbI`j9K1v1=40mT83(f1_ba5hb3y;o3r&-1vVYzuD^<@Luxx8%M0v7T7W$!quuBOh& zV)_@dJ1j!oJb&%AVB7*C#E-$Q;^-rwq0c|HS1<6sITrQb1~A(TvUfa}W}KDD*1wRb z>&ec)&^_RtOwd2HM_rrAB4)SLFUVxwXAjmhg{ia0>khCRv%BcBSmcW%LMJXpCEEGH zHCxrMtZ?9uSk=RdBW%kkw)(}@O%sSK_tS}IobR*lbHa3M+3-2N^u_(ynmO<3*Y{_c zb8~g?u|snM_5J#@OLN<|D$76}haAh5M2zW&e7*b66&Wmg-f*w#0W?tN_G1O}qGN~l zC*`l?!OY1#shN3LKhEh8NV)eImd?)4>#G0jCFVWf+_W`O(v@kZm-Av<)@^=_@di~8 zsMpzy`A@|(c?p;zSDrsvR(zB(f)vCbA$Wo&CCu!S|4nDQ1)*uzs6bQWC{Jo?RHXy$ zkTa=AX!}ythZ09>nj@CUqQAC)`%WytU-e~U7Ie`2^kvHybnyvY=yLT{3m4|w*r^2( zAuUkHqCOI8QTqfchp&e#O)BUOCRg-53-ho=PF+AUQVfGYmM7n`nEd+q-7<|d{6t~3 zE4ti55)IV=8AT^E@q(8r!w^DhxT@M<=F;=~0u8$JMuDe1Kbxi>rrR0t2t3a$tUdUS zG3gv{%HO8KZ!p@C-v9}DSj|3B4m*1nmgnKUt55J!7tWxSM%bEK{7eO>iBe5J99WId zoBy$e*go$|h)XotXW&(DaIVBGwcwpB93R2I(8>Vw6!A8ksNPKVaRN9~DOeCF%K}i|x_bgmK)M#6)ZC>Li*X1aI7n#pux+>$wWV)*803RVnE{?`%HSVTEGYe)1 zN~Xg0lvE9AX^0zc(+)jNN$%s7hekc(M_y;D;;`tHEp$SbYgGykd$1h~y}gGm!0Q@? zOXCYz`N9si{=B0u)eNBVQ*`o0Z@f%3@ls7sd~pYm#2Zim0b1|?lb<+3+q#-o{B;Cx z)TSwjZW7Dp+r3G>e_VNkaSVipllA+-Roe*9kM8g`Iq*QIB2=-VS4ZL#!d6@j<5oz* zjF^b&=Keb8r(Tps+iFI0gPwJUp+UGQ){y-RKBSJ9=HufcxwT3{&N|=ulPGB9d=5-2=%o)&W19$%!*KF)qzOmz=^<7f0%Eq_FXe)AaA8us0XS>dShtbBjaz z{FB1TtuUXJfcRZ& zEo2^zl{uLWFOJclOJ@1S7QLFxjucO@e4Nbb{9r~%b(1D{cS~JPM^oq!;~I^ye?vmQ z?re&Eu0F8``_7&fRY)`#4lQ{QbUHKcoO1Z@Mz|Nsw5OkyYOn~NN@hcsuGfbovp<%$ zZ;=Es)K9ezU{6<;pvLK!bY{a#2=EKl;P~b}n%E^dhl338PF#YgetoyRm z(1B2TT+Dr`wY2p={VDu$XwwPib!18whFe`EhhhXL=MFzBFkgq5> zYQ_yB-5&DGL##&0G^$+DkDv`_&fu>r&U}NK>IFi z>GDk7Y4-W@gceN`i1}SA!j!>wqbQLDy_BvuC9rWXJ=^}3&fIUlO%pQyBeWT7$&EY& z`^LYiB0jdG!AG6&SZDU{OIW2@k}&<{c;>sJlRh(%rLGuZ&IXkh z{-BPr4@q#yb7|C(PH%K#dsbwdt2)&u`ivMmRg!w+uHD~bxGyHm5rae2Y!9jXJCsw(#wQo>IRN^X|Ix^SFK>c4GSP<(J zaa0Q!|I!4109NjMMhN&Ro=s)ufH9~`(Z*={8tABb92QP_RtL78wQL>_^`USpt{G~S zbYREX6Z-c%u+FPO^yTf@z*X%cR<-APAJb?98p_z)iLG5_4(I?q9+qKg&(5xD*<2#k z9r8jN-qh@z_U!hmC-g<_+4$8V`jR-dV0A0~Q*rF|)h(O1r{*~13^(nvICg4vNBu8x z?B41Q`mFXWcFiQ?$1jMtdtLIXIQH_I_Q4OHrwKHYDmPf;8^^J#HLd;EQ3D#BG=JKT zwOkt$wW%HHw3vqg_~;v#Abq(#!@iG0pJ25POKis`t{op@=7a;x)HQhJey-U)@I2eG zYinDE{2EJ)1}N@w#a;eIENfQUa>@a%9OF={KhjXlLnEX59_BY)88+uH1G%DE+gGA(6`aVp0(?;AGEFhsaAv96Nb%OHz9y^VRp$i=$AY#g{$kBPuwyFriaN8h#?a)DccK-Xnm} z1da%Q)hV0LfajxDu8-D#9L3tMe=+ELQc^XhYIGZDlF=+V*qVK~J~HSrtyC|#xJwtc zW`C~l(CP|zCB_4!kwZ41ls@ySwO^Oktj~tALEmdssT$P=nvPsTq$WWt)oY~_-KC2n z*`FH{TK&o;8uzy55@;$nkkBcT^?z+p=s*<1`agj()sD+gl3uI4B+-_h^NVS9VB-Svz>wOSxkrn6(^U|7G8HX0ty7E;|TMk!y zSPnZISqiQp4<@->6 z!Z2e3>$Nql$v|N6zJ$wAG@9AB_DatfO_gm%ZP`qNkBuL9#8qK8v`2ySQH-Odmx|#H z3sU!w#~+By>*|~eDNa2KoHx9P@oVE*lXo6#w|Nv^x1dY4AE2kBj|(qgy?_B@D5s{v zK^hz*bv0fXltFCyI}>E9hR$t@FcN=2hWyvbKxWt$9o)Qr1wRyvzn`=;kfm>X%Enrd z>9A*Agalst;Mt1(-X2o9dx6L&9D2bEyVdR=wuC+|ITtv@>7M-0teuU8ZRm#Dxdnyv z`UVy@;Rh}B{1n1Xw{JU<*Qjtc;Hdcn6_Gqjit1ELtvIE<^uAmfT&i~(0@OwSlP_>=q!7hWPX*tO22KBcVa+gFf1ZkRlZ zJ@#Ik=IaN#TzGYGqOlSQOeM!aw&J}N{wt~f5x9;zlJ+RtYR*qx@;K|dE86=p()xs9 z`QvQTt~UD5jBMGixX`DJpjOvAsp*c@moQr}X3_Pq2@fcm9-s5F4Z z@39*14}j==P@ORRJ%HKwbPq@wiJ4zep-Zlu^%g9vvC1A$m{yk`aYgU<#$1X%%72gZUoRiRT;}Po`R`T!Tg89B<-fo1 z-(S6%dGB(s0G{#kW*_YBslV*I`_H|;dgj;2Y5Z6Kwp733)oY@#|3;^ zz(oRX5OAM>=LGy-z+iGF_!lprmc!lCXn`0aV6K3x1YGpDMmk$SQBgV|%1;USwScz; z3>5S`MZSy3TO07PNz%-l8p}ljmI%06zz+mGBjC3J-V?Ct?^^wE0pkTs7jUS669jx# zz?!W&!sLSSjF-0^SzT_m(CwRKRuu_7ZTYfHMVLAYh4rTLnA>Nb;phf%sWK z`ctFyFW?Uib`-FufI|eFBj8d2-x6@YfL8^)E1=Jx8oxjR+x@A@m--3B1OameTqIzr zfNu%7Pryn6zZdYXfWCief?@>hA>eQUrwBOvFW!E{AOi8SfI9>{B;aQP{w&}F0sX{m zi4rhfz>xyZ5U^0dRRV4laF?IZV<3gz+3^x z3YaEfdjW0DMdJRi8sp0XRtUIJz-0o?7I3$2V3dH(1$=NrpMP93pKSsVs%BLm64R`L<@|6!kH~mWBS+k47yghZiUYY0agpc3y@SHq* z{KVWDFYdm0q?LZx@njF(G1lYMRGlw-umkN5U*Svi66|Fw4fM%MF-wS~PA(;h13lQYcBl1l}-v0}Y z0j*C|o-G767kRS~=&s*gl>3YF?t)(=@@7681;S0>6fJ5dDpUyuZh`IV<;|e+Kd&i7 zi*F8|*`l0UOnEO+js=Y8_X+-3pQIc<8wG*8LQi24gmBzsWg_pkKz6fkfwZ@Xn~u-QNh5i;7uXGDCk!ShHeX)MHjjGIR!sI zK|f98QGWs#SS}b+3(a_91Vidm%KM8hsudv0X-ANQ$NZJn1#X1{MV@!bidHR{`CP2- z*zQNpnRL3?-E%%2u4DMCSY%)Nx4YAY&U(Lm-Ay0KfDer?d3Y6j7SCd1F0^AU&IcN* zyaG#gEd6|YQ#Hnf4bX;@Hr0TcuVvJs)ji1NU+^$fXf6OX6em_NKSaS0WO;}l#bOG zlFjM78sLUyp$;@{pv@ z10-o^kkk;5E+KpaTF9%H*EQBljfd!ST7`Q_;oo~mLAeI0*-&rE&;_lWhCaqq&SfBv zatYEqF;H?BVYxOt%@J&K3^I4;-ddoP}!1KdttT@OP& zjMM7W(ZGvx_h!@O1fJ)1)rXKDpK>INuHjho@K+*X3DwRY1p` zOC{+to-@1{ilCDk;-F+L4VzHau~*tMNqQ5H`3KRS&wY5l9JnrogSDuJR2f4MJ5Q3n}G7coPaI=HXwlbkbZ-K z>bMO>sMm2D+y8AsZP5rUL)Yh(f*Y)T&0GAJ3X+7L!o&VO0U^Joz`^Q&6EzbKz6=y~J_ zT*+FkJWZ76>$F1+#hu4Ch_uTT`AP_pVbgEOEg#s zNINS3oc~oYO%(Wv7H0$U8ngdbL9r;`F|An?h^%-8fB<& z#lwh80#W6ra6XK0ey@cA<2=Jpt`2UYLZ>b_wnXzb^Vm)JWTc0eFt00>N>3E+I{D5*{bg1ZUy7i*)_i zpY{aLz6fg}uqU_}PZy+10Bv)iDAM_Wz!wFrEXuG=PP+lRJGaLon0 zjS{`^gxn~cyBSP^)F zQ}Ap?8s1W>z;gg;cudK#LfdKht$^7FAVURkj1pWPR|qaof|uiIfcW*sf8e18OKV^mJO}81nSo1@7Ow>pq_K2Jk?YWX zNMjk2rr@E)HU6-jJk4cmh#S0vNXm7C^dglcjCY+#4{01#iPLpdq*#&orb7zKLfx z(mMfNn=x3C_S=Hp4xY_O(;to>|L9d8Hvm?Q9$r@ zk=_V+Mx+T|7wPW-(|2l?BbbYam=Y`&X@Z}K^ijY;?`cYB13K}nf^ts!FI9baX$1t6 zMA{0tb~o@KDh2!;4-F`SxKrV2f50stKta&Z$xkdEQldTJGCaRf>j8J+sX`ieFZ}G$ zi8LKt(mpl82~$&ocCnNuNIO$X6Qr%^3{)UE4bPsRcCXm?j1Gsjv(`3ktV{7Q>!cMp zrrw*=SogXvs2nQ5{$(3CZ``?Y-^TKdl^bI=#cz@-*9Lp(rulE)R=%xbTlV&8+h=Z{ Sy*+<>F*= prev_span_end: + if prev_span_end is not None and span_begin >= prev_span_end: # Always mark the first token after prev_span_end, even if whitespace for gap_t in range(t, len(doc)): gap_token = doc[gap_t] if gap_token.idx >= prev_span_end: doc_guesses[gap_t] = True t = gap_t + # Reset sentence tracking for new sentence + sentence_start_t = gap_t + sentence_start_idx = gap_token.idx + sentence_len = 0 break prev_span_end = None continue # Mark the first token of the span - if token.idx <= span.begin < token.idx + len(token): + if token.idx <= span_begin < token.idx + len(token): doc_guesses[t] = True - prev_span_end = span.end + prev_span_end = span_end + sentence_start_t = t + sentence_start_idx = token.idx + sentence_len = 0 t += 1 s += 1 - elif token.idx + len(token) <= span.begin: + continue + elif token.idx + len(token) <= span_begin: t += 1 + continue else: - prev_span_end = span.end + prev_span_end = span_end s += 1 + continue else: # After all spans, handle any trailing tokens after last span if prev_span_end is not None and token.idx > prev_span_end: doc_guesses[t] = True prev_span_end = None + sentence_start_t = t + sentence_start_idx = token.idx + sentence_len = 0 t += 1 continue - t += 1 + # Sentence length logic + if sentence_start_idx is not None: + sentence_len = token.idx + len(token.text) - sentence_start_idx + if max_sentence_length is not None and sentence_len > max_sentence_length: + doc_guesses[t] = True + sentence_start_t = t + sentence_start_idx = token.idx + sentence_len = 0 + t += 1 guesses.append(doc_guesses) return guesses diff --git a/tests/test_cpredict_split_gaps.py b/tests/test_cpredict_split_gaps.py new file mode 100644 index 0000000..0371309 --- /dev/null +++ b/tests/test_cpredict_split_gaps.py @@ -0,0 +1,141 @@ +import pytest +from PyRuSH.StaticSentencizerFun import cpredict_split_gaps +import spacy +from loguru import logger +nlp = spacy.blank("en") + + +def dummy_sentencizer_fun(text): + # For testing, split sentences at every period + spans = [] + start = 0 + for i, c in enumerate(text): + if c == ".": + spans.append((start, i+1)) + start = i+1 + if start < len(text): + spans.append((start, len(text))) + return spans + +def make_doc_from_text(text): + # Use spaCy's default tokenizer + return nlp(text) + +def test_split_gaps_single_token(): + doc = make_doc_from_text("Hello") + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun) + starts = [i for i, v in enumerate(guesses[0]) if v] + assert starts == [0] + +def test_split_gaps_single_period(): + doc = make_doc_from_text(".") + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun) + starts = [i for i, v in enumerate(guesses[0]) if v] + assert starts == [0] + +def test_split_gaps_consecutive_periods(): + doc = make_doc_from_text("Hello..World.") + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun) + starts = [i for i, v in enumerate(guesses[0]) if v] + # Should mark the first token and after each period + assert starts[0] == 0 + assert len(starts) >= 2 + +def test_split_gaps_long_sentence_no_period(): + doc = make_doc_from_text("A " * 100) + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun, 20) + starts = [i for i, v in enumerate(guesses[0]) if v] + # Should split every ~10 tokens (since each token is 1 char + 1 space) + assert starts[0] == 0 + assert len(starts) > 1 + +def test_split_gaps_non_ascii(): + doc = make_doc_from_text("Hello 世界 . World .") + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun) + starts = [i for i, v in enumerate(guesses[0]) if v] + # Get sentences by splitting at sentence start indices + sentences = [] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentences.append(" ".join([doc[i].text for i in range(start, end)])) + logger.info(f"[test_split_gaps_non_ascii] Split sentences: {sentences}") + # Expect sentences to be 'Hello 世界 .' and 'World .' + assert any("世界" in s for s in sentences) + assert any("World" in s for s in sentences) + +def test_split_gaps_punctuation_only(): + doc = make_doc_from_text("!!! . ??? .") + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun) + starts = [i for i, v in enumerate(guesses[0]) if v] + assert starts[0] == 0 + assert len(starts) > 1 + +def test_split_gaps_basic(): + doc = make_doc_from_text("This is a sentence. This is another one.") + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun) + starts = [i for i, v in enumerate(guesses[0]) if v] + sentences = [] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentences.append(" ".join([doc[i].text for i in range(start, end)])) + logger.info(f"[test_split_gaps_basic] Split sentences: {sentences}") + assert "This is a sentence ." in sentences + assert "This is another one ." in sentences + +def test_split_gaps_max_length_none(): + doc = make_doc_from_text("A B C D E F G H I J K L M N O P Q R S T U V W X Y Z.") + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun, None) + starts = [i for i, v in enumerate(guesses[0]) if v] + assert starts == [0] + +def test_split_gaps_max_length_set(): + doc = make_doc_from_text("A B C D E F G H I J K L M N O P Q R S T U V W X Y Z.") + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun, 10) + starts = [i for i, v in enumerate(guesses[0]) if v] + assert starts[0] == 0 + assert len(starts) > 1 + +def test_split_gaps_empty_doc(): + doc = make_doc_from_text("") + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun) + assert guesses == [[]] + +def test_split_gaps_whitespace_none(): + doc = make_doc_from_text(" . .") + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun, None) + starts = [i for i, v in enumerate(guesses[0]) if v] + sentences = [] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentences.append(" ".join([doc[i].text for i in range(start, end)])) + sentences = [s.strip() for s in sentences] + logger.info(f"[test_split_gaps_whitespace_none] Split sentences: {sentences}") + # Should have two sentences, each with a single period + assert sentences == [".", "."] + +def test_split_gaps_whitespace_set(): + doc = make_doc_from_text(" . .") + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun, 5) + starts = [i for i, v in enumerate(guesses[0]) if v] + sentences = [] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentences.append(" ".join([doc[i].text for i in range(start, end)])) + sentences = [s.strip() for s in sentences] + logger.info(f"[test_split_gaps_whitespace_set] Split sentences: {sentences}") + assert sentences == [".", "."] + +def test_split_gaps_mixed_whitespace_and_text(): + doc = make_doc_from_text(" . Hello . . World .") + guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun) + starts = [i for i, v in enumerate(guesses[0]) if v] + sentences = [] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentences.append(" ".join([doc[i].text for i in range(start, end)])) + sentences = [s.strip() for s in sentences] + logger.info(f"[test_split_gaps_mixed_whitespace_and_text] Split sentences: {sentences}") + # Should have sentences: '.', 'Hello .', 'World .' + assert "." in sentences + assert "Hello ." in sentences + assert "World ." in sentences \ No newline at end of file From 538ca5917f3f4bcbc65beb23e4c7b5de164d7bca Mon Sep 17 00:00:00 2001 From: jianlins Date: Wed, 27 Aug 2025 21:10:46 -0600 Subject: [PATCH 097/126] Add max_sentence_length parameter to PyRuSHSentencizer and update related methods --- PyRuSH/PyRuSHSentencizer.py | 70 +++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/PyRuSH/PyRuSHSentencizer.py b/PyRuSH/PyRuSHSentencizer.py index cf68a3b..dd64ff5 100644 --- a/PyRuSH/PyRuSHSentencizer.py +++ b/PyRuSH/PyRuSHSentencizer.py @@ -25,16 +25,22 @@ @Language.factory("medspacy_pyrush") class PyRuSHSentencizer(Sentencizer): def __init__(self, nlp: Language, name: str = "medspacy_pyrush", rules_path: str = '', max_repeat: int = 50, - auto_fix_gaps: bool = True, merge_gaps: bool = False) -> Sentencizer: + auto_fix_gaps: bool = True, merge_gaps: bool = False, max_sentence_length: int = None) -> Sentencizer: """ + Initialize the PyRuSH sentencizer component. - @param rules_path: The string of the rule file path or rules themselves. By default, it will look for - rush_rules.tsv in the site_packages/conf folder. - @param max_repeat: Total number of replicates that allows to be handled by "+" wildcard. - @param auto_fix_gaps: If gaps are caused by malcrafted rules, try to fix them. - However, this has no control of sentence end, - @param merge_gaps: When True, gaps between sentences are merged into the preceding sentence. - When False, gaps are split into separate sentences. + Args: + nlp (Language): The spaCy language pipeline. + name (str): Name of the component. Default is "medspacy_pyrush". + rules_path (str): Path to the rule file or rules themselves. If empty, defaults to 'conf/rush_rules.tsv'. + max_repeat (int): Maximum number of repeats allowed for the '+' wildcard in rules. + auto_fix_gaps (bool): If True, attempts to fix gaps caused by malformed rules. + merge_gaps (bool): If True, merges gaps between sentences into the preceding sentence. If False, splits gaps into separate sentences. + max_sentence_length (int or None): Maximum allowed sentence length in characters. If set, sentences longer than this will be split. + + Notes: + - Setting merge_gaps controls whether gaps are merged or split. + - max_sentence_length applies to both merge and split modes. """ self.nlp = nlp self.name = name @@ -45,32 +51,66 @@ def __init__(self, nlp: Language, name: str = "medspacy_pyrush", rules_path: str self.rules_path = rules_path self.rush = RuSH(rules=rules_path, max_repeat=max_repeat, auto_fix_gaps=auto_fix_gaps) self.merge_gaps = merge_gaps + self.max_sentence_length = max_sentence_length @classmethod def from_nlp(cls, nlp, **cfg): + """ + Create a PyRuSHSentencizer instance from a spaCy nlp object and configuration. + + Args: + nlp (Language): The spaCy language pipeline. + **cfg: Additional configuration parameters for initialization. + + Returns: + PyRuSHSentencizer: An initialized sentencizer instance. + """ return cls(**cfg) def __call__(self, doc): + """ + Apply sentence boundary detection to a spaCy Doc and set sentence start annotations. + + Args: + doc (Doc): The spaCy Doc to process. + + Returns: + Doc: The processed Doc with sentence boundaries set. + """ tags = self.predict([doc]) cset_annotations([doc], tags) return doc def predict(self, docs): - """Apply the pipeline's model to a batch of docs, without - modifying them. + """ + Predict sentence boundaries for a batch of spaCy Docs. + + Args: + docs (list of Doc): List of spaCy Docs to process. + + Returns: + list of list of bool: Sentence start guesses for each Doc. + + Notes: + - Does not modify the Docs; only returns sentence start predictions. """ if self.merge_gaps: from .StaticSentencizerFun import cpredict_ww - guesses = cpredict_merge_gaps(docs, self.rush.segToSentenceSpans) + guesses = cpredict_merge_gaps(docs, self.rush.segToSentenceSpans, self.max_sentence_length) else: - guesses = cpredict_split_gaps(docs, self.rush.segToSentenceSpans) + guesses = cpredict_split_gaps(docs, self.rush.segToSentenceSpans, self.max_sentence_length) return guesses def set_annotations(self, docs, batch_tag_ids, tensors=None): """ - This function overwrite spacy's Sentencizer. + Set sentence boundary annotations on spaCy Docs. + + Args: + docs (list of Doc): List of spaCy Docs to annotate. + batch_tag_ids (list of list of bool): Sentence start tags for each Doc. + tensors: Placeholder for future extensions (optional). - @param batch_tag_ids: a list of doc's tags (a list of boolean values) - @param tensors: a place holder for future extensions + Notes: + - This method overwrites spaCy's Sentencizer annotations. """ cset_annotations(docs, batch_tag_ids, tensors) From 6136a6a6632763dfe9fa2d99683a5b4025c83b00 Mon Sep 17 00:00:00 2001 From: jianlins Date: Wed, 27 Aug 2025 21:12:30 -0600 Subject: [PATCH 098/126] Remove obsolete StaticSentencizerFun binary files for Python 3.10 and 3.13 --- PyRuSH/StaticSentencizerFun.cp310-win_amd64.pyd | Bin 68096 -> 0 bytes PyRuSH/StaticSentencizerFun.cp313-win_amd64.pyd | Bin 58368 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 PyRuSH/StaticSentencizerFun.cp310-win_amd64.pyd delete mode 100644 PyRuSH/StaticSentencizerFun.cp313-win_amd64.pyd diff --git a/PyRuSH/StaticSentencizerFun.cp310-win_amd64.pyd b/PyRuSH/StaticSentencizerFun.cp310-win_amd64.pyd deleted file mode 100644 index 567357204246900897187832d0ce615af1893968..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 68096 zcmd?Sdwf*Y)&D;M2BHurUPjS^MhzB46pbi2QZth18J#FzQ0g^SMW{E#Ohp9@P0}(R z$MUplwOUK7R@+)HtrtY038)0TfZ_#fRjjR@anz!%fZ96W_h;=hlS!a``aIv)@AduT z$LmGTIeV|Y_S$Q&z1G@mU#98w1)=_-P-p=E3x!Z<5l{WAx4-|*pNdduyFC_f7kYS` z=l5Jxk$is7Q>IP7qGra-%PyRG?j<$noqOq}m$lS<(j! zo=0uI+tL?bd-t^M`95Uhuctp@&%1)>8PnYN&+Yr9>E~Hp#gOO=ubRRAw%b4SC@E81>iJ?!wY|5}Z1>(T6dGtrJY#k^$Rug;R+QWTYkHg zNuPy6vnbQe^L!N^QxV#%XH7+@vsj6~>nlPHV4d_)C^Ssp|KeZuhX&e{yCxJ`7|_*| z*zCY^_TKT_*|J_Qc*B{SIKT;|5=~Sn_$24`UY^jQmKWJ`c!jD&~vbvRpXiJws z8t?TwI#b&x{Ry*IheAuEbX{)ZA)>8n?;#bTq$p_2dVYMUH$J=#DXHC){)k()s|Yo9 z{HbM|L}5T}E<+*DAAVd_MJONVn|v@R+V-!lNNod>LNsSpK3j-RQ>nJig;bPU>x%H6-!wmWIAuJ) zJ3IM`Vq03?NcxRK6MlWA7Yl!_GZacyCJKu_l?m zV3Lyqiz46keb zkzQuK2?IUP&)`enz4xnl%Vx;TA(Y z_Sec|l#YwE-=@~a!Z8X(+HcUuipF&X{flmTk}q2A`D+l%Nc(~V`9^}0e(QR*D&1I# z6i3=;S$fmD#=4i1{ufFA-Kh(bK58T3|0C&N@}TFRwFtE^uBajW)Gcre1<{$B;`v8G z*0I%*%&IX)$6Dw}A$vy+P5Os)=lAjo6`ubhn7wq<{J6g=nVyU?p)P+}{`=IjYG|S` zu&HBXr2U&;1wo-pnfV<&e-&dZY2zu*|ZKt zP#Nhs9g~od4Bz?$WRWEv2f4WkWG4MNixNJv88UfV=Nd84H3H@0MLaU^ROxhMeX$+y z?JsPv!RER9Pn|k7vPV0ORZ`VExEJ(h!ap9f6JD#Y^ne$=!ZI(mcJ|wJ&n-wV@-5yo z5a4}5F#aQ(k>I4i$@AZK*2e!5vOKZim`b$K>w{!5w$Q0p8*|s2;Kq%p@OQ!IMz;ob zjpv^YC3CCQ<&5jnpq>v{T_+9n5vH?d*{Hs|RgG{1dsd%;y;|e?BC{HsjSX+KM9PF> zJm{<&P~ma^_o*SK#rJlp60;`%kEtP^IK7=qOk&6D(GZdLGe*($^qdD1{_{;8AGTB{ z+Aay9BMQ+!J#I*0>n8)SivA}F{}U?~7|jNlVn%Z!FFi&xZCx>8XFM+H{RvgP&|69W z7>wo^YIjES4sZrW^9vv_n*ZP-G$o_CqltReY=e9)`{XxWS!XooN+<%Oc{*4znm@o) zB>fd;G#^`J#Dg!=K6Afb2J(>bRy*_8LV}Ak#q-x$kB(BYGNbr1D3R+0fq^><_M{@7 z-yxTz2}ht{?XAiHwiP>f_Lj-AbHD2!*twex`+6hzp?yU#ot2{wHzw{NN!Io+7da|D zBXJI?8s+nZ+~4d3(F!BBvGDsOw;az)*JHdiN~BjbGIx{WolBC2W7_CXB8pP8i$@BP zhs@7guYOE7jJtxfa!pdnlQ%T^ z?=wUj#G`+LD;1ZomWUe}-a(+ISd^aHN||p_Ch2F5OWy(had;9cL|=PEs2Xa=B!h1G7J2BtPUGCP znnwS9FOAFSpHL0II=%5H)Xa^V4Lt@19?|V(;uT){nCkp42{fH#;Sr`;((I!_WMlon z3M?No1m}Ia$rr~PHzfUxx+T()G386aVH6aUdV5R2+y%fDq6ez!xQjKk^jB5@-Lym2 zyV)vjzrRL}`nI*z@F(Oj;4C71Ptw$K%>la#(RDbHB=5YRDD3WJhYoOd5lD?ohmngrt)E;1lZL`@^tRz4qt@`kZ6&0by@(Yvx#hN~J ztdmgt;hzdwx+Ckp-T;cP&3wi2qid4syVvucO#ga=J(d6+!ham(+{fJl_i>Q(9tYz+ zn)4WGU?dz<^Pf7JtD2~*x?wO5=Lyw|2T~Kh=Fmj{rfTyyGa@ycohEwSz&>I`Z++Cj z{JXJkqv`8L|2_2vj6Z->3BSApPib&tH_|-+l-eoIGWp?meyirW$<+9xDpPxS{!C=? zbnMhXnW}xamGil(5M2)-7TnC#BlAV$CS36)np`+bb(1Cb_ClYPv07@{8fST3fCL!>gUrhwv4=x@|e^Uq%2i#JW9 zjzk2C{?nr*HZniKZpx!zMcMo2j+RpGk3GM!7isNO>Hw7-2*mdoEhnTLF3-Z^fD6p<``CNj|Z97GoKcFt@&rFxSC&YU5_|m;|?XU*nE7` zA47PNGn0m?Bvd+I`;TNne=;_93@CdwA1{mK%dOE}yuJNES@X^3T>^=v=4bZk#n&|D z)bJ}%EXX1K5PjhvdW~!1vw+c6K6T&Ggh2DoNc;U?u@Vn@{;S#B8HceAB4^Je?7#rT7 zYU2JUC0TifA&rDI0N`ZhH}@Br^+F&k3t))9L9((jGXMJBdXaV}jbfg;>%E{ji3=Qc=GUeSo)jfG3lxJ_A-b2UJ5?{Zmn|9;Q z5gT*vD;yX~N$C+}&Yrmd8^+)6oC9Ym?( z@S;6mqlLxc^@Yh?sX1+AnHY^?(o$6Vm%2oT)RWNMi+N}Zl2iAY81eQudy~pRjI0+E z6e%(9jNU>U1+kd7-}Y+CF+|G{k?3hOY%2{~X#8?Ru2Fb;;%ik~FL5a)io<0`N|dC< z2pb2fMYeuwGFq}@bI1y~5DX^8-Gao}ghwH}FBcG;O}`2T(}WA`E#QKQ$wZ2jxG>oU zQ;Fvh5Q_`7)#Y6H4BuxfT=)@)ihWnul@?&wGB@2~N}}r5lu@`M-JyiUB2=0FwPIM( z9SeCYM31F17aG_sPlQpk0AgvZ5`%dviPyM9W|*Fo;(4k`ZcCox8pWo{<4ToN?}oxL zl+iF&uxheG56vaLa0G-gM6=Hk+b{CGV`ovq#8&fvps~&CP$sR4Ax)D#Z;o`_w^@$x zN54c#{FK-6&e(ReBtM?P--!7QPm2!Sr~Dil>Kv zxQPc?!eCKI`BURg)!7F4YW9vZTv0bWZdPV6J3c~4Gtm?pF6lpGv*TNT;bgAkjYxaQ z$e$Qkj$I&UE0Umo3ns^C)dq!PF-DKeZujWD--5Mlf_wy=lx=7v^tBGru_*9mz?7W+ z1Zvhf{qp$GPhVqI-*6Cy~%&hD%h?!rsskR-oCe2 zy-HRE^;Q89?_DUybTXN?*^SLPSL~#Eii+Y@_`DUyTuCFv5HIO>8c)7WDFrYgE+*m> z0w+3~Z%jd;EGn51X7Vx)$wJwhFKa&pTKJSfo{e#z@1mU?w+cvB7?&IOSVxphKAQ zL3nj29%&$y%9OEZc31*+vqOQ}>=00o1KGzb@MBDkV z8uD?50!BH`Iy3aC^=oO?wm&d+Z@`jPbAJ(Rt+aWX2yAconudC@ssnz819j8#mN_i_ z#1{>K#pa-VkK|q)Xo@&f84K@Ehv2j@t%=;Y9QED;sFqDM{f)l;)1o8)Y-&dfoh-4s z2yq~<{yGfT{Fk>=i-|?x!Vp_&r@ZXhWE>2w5H;gqbX%dd&cRsAha8L#AOY)f@rOo< ziFn{*eD+rqH!jA6ZeDPnLrf~t z{`0Ex;ds^7xv?8;EgJqL1p>d~U67?vxC3ev{@)2!D&A{Gdxvr=q|hC4@BXA$+l#(W z1H_ru|B8zBj;KC4M73=wU;cOnEIkTj4;Z(V{+kBBiWJW>9OelfB#O;VOZb1tAB!+w zxFt>64KQxYWbyIV_XARAC|`acuF&SeB$lnu9AbIlvuIY)t&o*UcT`uXQ21q#@Kxi! zUT|My+}9L!RJx;{FG9E&#i6`mqDCq^{m3|b{Mh61|A7bLAN?Qt4>&jfQFOWFKAcc= zAH4p@%YE?hA{JMM)JH|{LCnd{JE^DJe!4A9?$OI{{a6z%c@Mr#;?U#IlGIpNaP}4! zOP^tHA0}B&N3bxYR3!fnrTXE^cqII{Eq0R_-&mR2E8(YxHu))qoVC`xZ<8cM9>>K7 zx;oF8`r z1UpX;1p(wO{fFa03qMcD>Cq&$hx(7}3!ACWB`o>z)g^0arQSo zETXzQX_gD~5ZY%Z3`8*woXT1ZewbFG_NuVe%0;oi7TIryKcgq2fHmOZws}37nH|7{TJHAg7VlWc`419 z-v*~!P;LTUV9tI9qGWo+avr@Fl$V(-R*f>qSF;x-U0G+(Mk_P0XWNL$ndnhJ6@Z&E zPyLZeiLKA>BhAwj5qcMVThWDQ3JFY@Vv)$A+w;1yt7og#T=16Jv_Z7T*|f4H<->v5 zicK55WwMmz#eWuc%nK(P>h(tQH>g%C#iyxEpg{K|jkVER7|MS5OY4mnlWgr94x?ezk z_%#aI+}?9L@=g#EcH{*MtlE<|r@3d={BPD&|2IpjT2{9$e9WHruvs$O_ijHPdwVARZw=MX9-H@;?d-W9Xk<4Z z_H5>BQ{HZ8zvzNBJ@(5I%k39+%6@e;iBn9j`p$h%F&bGq9p&b})+a|66AxtJl#M?| z_l~kw^XXH2LF$_H+y|*2IA#bl2!!2-@0PLseP*ZH!bX^$_{mvUF1p+ zRp|*NSySjZta_=h4AmGqC)e$05upG32LF@Xp|1Gr7}_#6#L3Uy?y{DWwe-j$W+7>F z8F#A;euS}(%{0im!R5|#`5is^EV?PX!GGUD#2v(WTZ8bNggL;W4)E};0RAxmEFt}X z7y@iJQdOJ(``i?g5S}jMhVecUZor20JWbQ^ciCB-QwaFm38@)g4hx z%H{heywZ~(9839JdJy1<$RoT%*f$miNyf#0E7jXM3RIrTXS(uxsr&&X{rBZxQ0)Sx z_QQCGQwQ=T!bZ4HW$JTR_ql)Yxh;d)9+dFla)-1h;VhRh0tm8Ca+MTt?w2+g z-Aybf+OA(WKj>H>gWkt@&mhrSh+<@mn5wSNecz#a7*v74`=3kf8bO;&xy&E-)ME2OMye8MyxR)|>YMzO)viET6u}J;MibqBI)ACatos4Ly0TJKk1Ff5P`v z+i>0kISK5<2|})9#P2x=c>$0;I>#)9LFD#!bT^W}l~xOhC>)df!q8b6|MUDzC{>>L z>+$^!`^txx4s+FQWHvybl#|t`{SKYLi#T>)`W3#wG1T(BK#y5=(l2TtT74_XliE?J z@RBrQ{@gfloR&`%q7~m$*5vuFlAciV_<-JV#p%vSOK7@RgmD1N5jN*R z_Qn%lXGPtqpngDY(M=!vR{f%0O<(5{Z|Rk|oW#CUk_RlUchB8sX?--^cMKzelU^ui z1gW!nrJio7ES8k_Mxzq@^!z~%u)7!FFazA!GP-OVVx`h-S;<0IpIz7M^L>4C`fA=& z_o4yg=fuyBpA|pzoU_k3Yx#3M|Fo)5Ir&r$zg=AWI49p0+VBX!f@AO1Jw0!d;Kb}ySUc_Bd2xJ>1^`d%;PZs*}p}uZ5(9F&sosPA=mMj`C8+P z-0+IkutKdq)N9LDwCrPlqwJ8tk$u5(kE*7!U2IOuv6pls(?8nMk2gD3lxbu;^GYu> z_9G1jogm?)0hUgFat0#N`y(Z^HQ66Gxzz_(Y11z7RGYP`KAo?Ed~<51tS3^q#C8naosOq4yQk0dpBp&*Z0gK4r4~ zBJICX8`PS1v@dT32p0l6BU5!86nF;Tsb^w=*G|{Z81B>-oBOrHo z^<^fyhthSL?heRz`_UQTZxWCw8{nU)X@%up+ZX+3ze6=wGd|*rb`&L=41CpjeD;hWHHGBIrnkj8o462sO z|A~l}Dz8@MU!ihl@;&=M`_g}KIsM|P-*lu`_gr@7MJjlEZKrCS_tZij=bYV3mL=7m z|5QV|eyEq8M7zAoQ{$5nYAu>K)_v%ur=Xmst&jV>&RQ4u|CvlTuZjC_Cet&z8~sm{ z=~5kA7Ial!wH7*v;LrrRg1y}te*5qsK zWYjBO`bML;$$wf*7A^U2Q`d__i}AU8cBVLIrUJlxgkx^S(AqbY$B=9$`XH-cI_LIi zhocF8z2K#9cQg`tA_!kPZ1sJE(N?3X3+;8nf|o>TSES=+YtqR~&R^1c+o0V2O2`~l z;r)UQn}wG1ytZWnytdCPuRMl9fBA0CoBDU_(CH%Tk&kt59yHX5&m&QDM_d#aokYBt%ZO2ePHCzD3IEo z9-3JFNo2}1h3KxrKo~`(*?+^A@rT&vMN!^+qG1ayD_PsDv(P9qZBO1wLSlQ`KHG^m zMF$trSVd|4?t2pBeCS$)Lyp5 z<#ZS7D?1#qTo86RBAI@;)>aa=ujR>FTQ0O!svBK$?=YX8kGO+a4}9nwt~tNC9BOKQhiy>D48y?qCA;ma(KRU2lLRh}K+gE1ovDtz=jW=~&2pX?3m zUA#RgR+*C9mrN-7ZHLi@hGwh}lg z*%=;Maws2#=;_uKI%k`&CiA@2Ui$i4qze_+9m!0rQh^tXi)9d({rcfbl1Y!W`zivj zXY|#avs(qkd6Bv|{4dzoIgd{psxhnNM(a5*wkDDpTp2*RIfMD(SSeBbV0Cs`S-IAr9QlaH3#?W38eVT-YvmTxeo0UEyA-wcHrJ@yptP9S zcPelfMWr03puLi@2}|bEgKW6D#O;i>1rqea?m!h;^uZ9`W8vk{f@=}?pLZK=1_KiJ zpSF!Q6TT%hcFyEkT_wN(IR$XH!R9`dXYd5^`@Jcr4K`=?Qx#^j0KW-viLbHv8A?b9 z)QQR1=168|8k+R)s$EAyalusQ@7TAH^~b5CYxs942tUMeff~EZJjr{ZQkz|pnJcA& zK3=6yHp6Y9g$^@n9L`Z9pP2oW;@)xA@CtL3w$8eEK=0-?sE*k1n~Xc3!Yz1IAGwj! z7XKvqRSLS(77h01Hi*M94Ax#0@i*wG#Y2O^(POSY1~lQn$)4(yjbVwxie&7ck?TLP zwJI|h8kmdr@+8Krw}T5#Zued_(C&H(Qwa=`-dz=I1A0z^Gy7*7aiqh z7$YD8RoB3v6dLv&L0_@a6{66oA3RXobLJ+mjYcacv~gUc3s$NRw&3pT)(wt(4{hrZ zdBOtu?QVnz8~rvJ)#1(HQEhHfZ*0y!+iTa;#ZD&y9_cV|FZvWka-YH(`DKji=wP53 zLJ9xqt(~lBDx+mRv=k=*Y;2>Scd`1N94r#0vG9{L5UX#)fCKE|0f3Z-!DZJJhXJdU zHVm2ow^hpaH$`pFFc?NT(PX1LI<rnkyo4iPuU-6$&Lqy z33tx~eF|BoOB}CzB=$&^iA`8wyy-Ov-X!_OtAjx>;YfQc4FV`0P7yZWVJjT`&$QaYKcIxQ2qw4h zY6dy{0A+d)!O=&!A$YEE_~Emy&hS80q*u zUP~c*4n+bHF#T6pQ52(p{#};me1ayi^^x`;0o_C8yk%RZa+UGdHTY>!SsKKfw_e~@ zE8v9VMhciroMMTA12tEvJ?i+ulz`a8x4Nw4=~j|qP2WchV6)kYqpWOcNVbrrP!re) zAO(Z!91`us6&gFnyLJW#?$G-KRCP&hk2TDWjdFox#qB(FvF~>>fa@(Rr8risYQL6H zNJFLN(%zfEuOq!!%W%~a3m-^7I7J~DC*fPb6s_^_cdqCq<7kQBQJItRKlB%wrQPW_ zP)>5afx(GG^qLV>ZdxXfcXIt#%9TXbGs$rsu|x<>L=ja!rGrZ8!0H+C&=GU#SCQe(}eKL$`BrzH_J zfz3_+M4AdEnD3w9@;7NhoawE@?hEYyB<69HA`^-zs4DCqxTPfS74^CXNYRkuo)j1B zD@HhMZ_1B85NW3iZ{tHxZG@lY)crkL`L<7 zxP@{T=nYv4<<8&@1P2QNo=2&vYUi!Op~T!+!K>EXsNx$0<#u<4KgFph3MM8s_+Kvh z25?FLZ@JSQZLjstJvYcb!sY7Xjk2=MLGJD@_cn5ubdVPGz@}W-WqiADM%~8T`z9?T zE>u>!yv&O(<9KDvFV9fmgvO6m#&^mxK=xyowVSdQ^kjXSYj+tH%J@Nf#zihe^PnZ) zFV8s1Wvo`le-$%KHa^WA>hgZ8JXQdU?ff)1+-3ZzPsXMkS0;((d1RFF?HtOA=kM8b z#Chep8~wM`EoYHs3-3#A=1tvqrp5S19NGiyf5|P54*X1k|Ine^NmQK7yLCfuo=g3b z3B!_;im4^}&tCv}e^lNHTjnJS(Vtun9t)e$66RLB%+RA-i`NfRf}9SqvTkFM6wv)Z z=#DR<>$S2o^fDC>26Fk5PvKJ4(|i3mPg>u&+-nBcdzE{+!S%jm?*W7Bedoz-q{*e%0$8KUZ)zG_&f)(i*K~a$x8R?dCdUfOp0Bo^jeRx2@ zG(~QvNnpymcvz4-t5@nkrS|FgPf)WSmT88eV8Pl2Q<*W~(Y#`r_tnlKv#)eKPM*S$ z&6d|&B7SUnPu5N`q`f6*p5cU}G*=6Btp-yTM`{iLS_M$vsAGZ*laKO@1C-I{H zCH$vZ`kPM$@w3iezB(BS>HqD)v-tg3+4sL#-~RL$R8Y3Q{pn+TyY=m>aT|F3D(l;a zx!3=9>)WSl#Q(eX?Ii~|KkF6aJ?pr^dTsQZD(KdA|MT_j$26jY_3a<%{eQ8(eG%o{ z`u2&!;MTWqkV5HM-#(#Ok4AJ2OD12_+HNL#q|&v%y%ZneU#)NNpr)0rZ~urf5MShp z0hy{#`N%Fh8lCz-Ti^ckG>ZR6>)Y?h+mh>V>)XT2E1#vxV^scc*SAliZ6*54*0;Zp z)c!~7+sU+7Q(WJUpThF?8nwMp(OCB1oV1&|Qg2FFgZi zd)E3m_l>5faW&`%tbDIw*_+jGi}x_sIA5#SM1J(AS%1cH?~-xHtxmf`wW;kre{>cH zYw3@uDR;p@s*6Qet8V}AasRov{}fY}(Dl-3BZ!=>0-)y7U~e0*`bvI26UY zYFDsoerZ@`>XeiFg-x+aM^3i~54s^~Yh2vVA>wSA+K4(?#cxxwH7cfK(Ei6PU9%bisUicMR?%4PcIdU*a=9}7 z=+(cK!>|%2X9w+}6b%|ygoV=<+?++4XwcWY2p%VGhzzLHvu%5-43g`L9b=>r3?bOm z>k_U0Rz5s{+Io8JQA?cg*WYl`>^qS&a{wfOF!J~X& zOWhLk7fi$Cz%+Z@0LvLAx~gW z;Zodx*$rsU7@^omU38u(_yePwk2)aTs4ku~N>j+4G@7k;x=H@4EvIH5G%-+pEo0np zVAn$UM}Q+PUp%M|nSTp>)3z9H{C-h4em@6~Iy$d9o9?&@>!6MFKnLBi0Q}k0OIRcA zpIWY6AO_}JpcBkL-8$x(hB-37OJ#aO8^3jpbK4!EoqoB|dNX$rt0bHo6}iD3lgAiK z&7c3%L^H(gPivz`ba!x?>^hfO6}vY#!2hD>^j=udi$BXl9ptH zK&I+6*vn4U?CP(fyALizAFzHA{xi zmg|xT4AM(mbWe^2E_Wnb`e)u#YsiS7S-zQ5``@;{pELXukks|;=M4XvZ|Cp+@;-Z& zzk82M{eS20-mi}QcmD44dz%!_@9Xc5I7bElr~a<&X3^ihQ}6!^fA<Tw!qj z?t25lYVdcDZzewzJwfU6cV9UBU;DdzscB{Y?k^bz@kO^vDl%0~RFv(G z{g3_KPtT(Gf8y_6|EZLVY_R#e8keQY*YJ@&j>`Y7zxxf^R-(Vm-~Cx!1^?UqoSCZe zKNErx{ouoM&()vmwS9o~__z@LFEP>gI4QnDEF$XJ(ulhMMBr_o7gEE#bokHu@WWsG z6%Um5Xt@Y&PmMX7t54Rnyhl&$q@tPV1vJHaq87$*pu)k}ZyRKy_bO96JLN!NS4S!$ zU&Fn!Vq)N*d|UmY#iof$G+n<=BbJE&xk_P|;WupM?IJ70#-q-bqo}yqDrQaL2PI@J zlXVSOCOY1-a8aiapb|=c;vhh|9l(wF%Z+7*>vN!V6hwQUi6KMmnYi-!CN)88{Hgd95$(CRr9hpTWpZXiDbBJS>8tnhOZ@iTjTpNGH>Z}-mq1gx~_GXShjoyx#Ce%ER{ zT($YjbJy|Jv!CAf8(Zkb4=-Sln$m5z-Mq-*qZk?m(M%LMyF7?#YL_-j_*Eu1ndp-y z)G+WiYULuYLLs+M0PH7ZTbLprnuf^n(e|)g?D?dHi<0TPEOeu7d={ojR~w-G)#RrLUsai5U5sf?MWjGv2&ba(|txKB&%SAL?HAHA7;9IShU(yv;7 zk3s*`pf7S=GJk)78~ouflh?>Eo=j!Gsrs?*jrihFD8yv7lbh4AjS!PH&lgJ97A~VDTV$L7qkq1^a^nZO~>k*X)q=v-1-sfVv z&PI0IE^0^*pH5ZrdGA7U!OM*94~b8}9+JKYg|_YAa$)4Lum=b`Z>9Gmk9Nl&?@xZ@ z(eS|}FYe|Y2fp6R;Plz-Eff9@fYRAxH2g=(I?xxeWOtg~JyrILlt+vZoib-x~xtzXv0vnX$iA zayop59+?SePSYdo?lTJ?Phv7N_IGOE9<7kdMKD7ekas_FZ%xag@r|AQfJ`BJ8RX{2 zMjktjP0FiC+@od3c-#6tn5~`Q4|s}X@du{b#fei~TP~2Kh!eX3qzwp1xb(@z^nRo__<8m_D+9=m4Fh>0 zo}o%r;;3TnuaTi!4m06Rw~5`^S-S(04sTz?^(X+e1%Royt8jPrR0pxgoqURl8-Zw> zunBI3TJ}bezb0mKkE%VYs!fnjf)HTmPXo3Pz3((ZNRvd`PbJ0Er4Xq5Gu0^=$FG28 zofRh?rGM9{uJnEeJN9nX>{os3XL`JveUuIltm;AB3+Y;x+6RbJtdpY|)Nt|Aj)y?S zy;d{9o_ix~?^9KiZlQQq-~L#4Z&}qY0s7N=q2IL({ikY1CVD^_`jZ^`y@h_ZL%&NA z{l$hpx?-|wp%0d-cse{t^@tNm@(a=GGTt0+C93Wg1fzq{Mb&iP;m)>7Vt}!op=xKb zgZF0MUkcKvF}x=IEwp2Z=P5h)13+H7M&HQnz;S{P|L~_gRPO@=lpQ`&MT1&)mTI}xYJo9Z)N;71<&A}8sV(oKguH$C2rT;{#_H&gh17pG`}}UI&uw;iTY&!P z>&U9Z+Wp8NmDMr|+1(L*P1-WsiHB0-DYP{*=$CzW7jlgIYp1AA#Aa*-lL#f-vUjJF z-KAzslJ){WF{7<)M4W`A1jKZ#vzAr8$x@lw#=F3w)=*gb(Q0NGL?v8GPa8Y~W^E7I zvmkpX$YvmEFY_dbDnticC1$cG*q#Z>Y_c-uGkwKI-7{1xO7=aA`eFkxHaWKNL_kOeizaDinDWvnjbO39@rvHMximhpZ-Dnp5?}9inQ4 zHSemuVH(5jujdlDalC6+O=77>!Q2L#(8JehMn$n_PuCZ}=L0?r!3RGyL2CL5UuIzZ z@QKQixC8Y{@LrQ0OoTsB727_sBV5eAMkRoJ8xZ|aO!~X?#al%zqhG3+Up0tFA$mC# z5;_S=9j{U)F?;?b!*RUeGgUuv#H`U*K+ID=cJCs_3({+{b4RO~Iw!U56P}1U))Dh5 z;C+dir(%B9)nxHIJXC1JSg9^iTq3645wlWNW~x59**J4t88L^I5%WawUX%TJ|1x53 z6){VMmi=Ua^d;sWD&|*xh(TZvu0HxP9!B-hVmTeQplNoB+rI8@Q4oM~;nJSObh4+zNW4%#zt~f==ET-s(w<6r6 zqWnwk$3DMFRIwN1P{K#AL3GugVs|Fm&&DWne-JQmZ8wT37Gl05M?ie%PHHJptC3%U z0nXv%$1?~^$OSEQYXWCZDKV#di9+3T*_CxX=RGxp&-j+-^Aw+uH^uS17X1<<$||lo zT@81zR@*+>*BPUc8?-R)W%Pp{tRf8Mx6}B>*<*rT*%W(bcAg8vI%x>I^8JB{*t$sj zX)54PtZZ9!uUa3;@C!MiO>Op}9a8O|kgA_O^5-k9W93!5#W%GUvpI8uy1oBEdh1P< zUi{8(k+x3Wn*1r%bt{{Q$KLRBKKU6jF5ZM5-N6`l`> zm@{?LyiyIPd$HGBrh9&yaTv9|Ptvciu6v1dZ*#u}o}u!8z44XIVu{5Y-ZiuTn&)@* zmiOFG#S%hD4UKiR?5+;|s~20%36?@vqHP(Bc{XzW)yx^{I`g|N0xXo@F~NOoGTp}^ zb@1$Z9u#9D{IvYEMC_x;^^2LoXy$mP&Iw+u$fQ;pm`Hle9BbldFL$Wkvh$T~AD|LS zTd|UyD?@xzJGo;9>c6(9MlxIf)A3YC4|}w(^z)4olQgqt9gm5B$U%J}I?0l&{;Nq8 zW<8&^C2xusTh+23VjXFp4`hC2+YlOw>z6G`KdajML?RzE3S>U}Tm^84Wf*QKmPo=Pac5+o@Eh^BizU^} zy7jxFCfHo0kT`DxOK^4#vp4FaM*kgt-y^A3Im`YyE%fVaM16gd5#iyi?{shd6JtoX zmCZ=QxN@A9kuXa@wiNI_H`GqrF&mF2_Tk_A*0;CP#JtrY8H3 z32NfTxs$xMFRN$n?`2Lcc>ULUnPYgo0@x_8KVz%=%CHxE?TRXMH($$UzJ}E7?u~jo zkr{BL*ZZR>Eh*1O*FE7bqe%!WWll*X8W_V5-@A1K}-8o~zwy)i(LD1`bR99kCHqn2L*Y){8 zk00ae%0kteiHpI2%a;ATY}*I*Pw2jFE_!Q- zJLpJ+_5=Tw-SbuJzXPBPOqjzA@bS4`^{e z4>vS?BuDLr*GTNe|Lbd84Q^O&l2wF1F|2-kj-7_0W%wRNiRR;h+{hya*8eS`ft+G$`~|UKvOImx&9E^kHqFt zUR9k5pfl0LBo)E{j^W!@$(&2~5T8{&}~^=UF`^Nfe zUT0Lc45)iUZTk99%=R|aM_h-_Mq%wxQn&<3fxfj#G`CIpvT?@L{5m_NI45H!Ix{B{B;Q_Uhc4bJ-PR~ z6fwE*L}MwZ#uAm;K_iW3M)|~Qnl_;2XdS_OHd|m2Ibx_qmTlYzJrNK>6Pkeez|u#-1jj^oR=X5?hoXY;`01S zJ)pD`rWXY7nEoewbxaTGJrkV{29uZ2F?v~^-$6E?C{R$_XO`UCi1z6X-A^Ff`65!d zpTKo$9K!K3j?x_&DXzEZ$Bm`jW;LSItL`Uf&a<`7b+YKCjt_>;1b5 z?BzIiSe##h?R9J%16+4B%A!vm?n+_AZe&13bAsQ9^>L|$fg|J89*-oB<;*xRcSx*a z;mZI`ML&s0s*lo{@$iasNiK|7-zW^>ZOCoxKpU>|B8_3&_fl9t5W4jLMbv%qUC$58 zzI&uISg-0WpJP1#@=9G36dU`I(fV1W{R?$+Y^-jJN^ynXGCRsCThO+l0vhOGPv|9M@0zlQSk7nY4Yo$O}Nu~86w@)_LO(dS;&)A4i1=(6Kdu^L5y#k zB_17YeGnegfP&qqMU;AP{4tfOD|OSQoU;q<2P55G2}y3f>1}!=|8;BZVJ1H7^2Z4lhkqNUF6XV|j)Z6Q z7;7+BK#cRl=;Rg4WhV1`no;=-R@n&wpOqX-`Qb!0<%>e=d*vY<^K&XtD$Z2 zy@*R_4vI8d1p~6PI5gw-ZKPuSP97DS%x0or1vg@~0dnXWx);&9f;zMaqCRnc>kQXZ z%Spx89M|)sN9r@Kxj98H^t0JzpHYj6{|0`kUQ%@nsPd0YU69Q{1pi+Rp2hDKW#7dy z-pBs9`5umOJg`L5xjy@|Cj4MCK3>~D3#lDhFA}e?NETPV1F1>)+wtpZ@h8zsaGgoP zExg!#$=}9{O&^-NI>{hN#!jwI&G1jGY4BGy_!Dd5erLQZ*AIUq-uBKQe2#`#cWTnc zWpX^0)i09K!SQ&?>)$0K9rp-@*R<@QD0$H-dAa12G~%oJ-A$1PmSxXpf?Kx|*}k!h z?Ut7yy_LkhD@9kw#mG-0hH6lsMfRW-c&!+@=|dwO^GPa^IoHTs)__M8_q*cIXk@N; zWOk;`7MXkXB{S-c`qCVJudC2Mkv=ky_=#?ODd_HTrh4cW#Sor-;NWti4{(z|enU$O zbj}y450(vWubCZr#+}!-QD$qF z#!s!rjIZFXH2+xGzD#US<#dn9w$=f!z+}x`#|SS?I!3$rz6Ft~;Yn6@>Sb$cnCsY& zajeejU_ee0mQpC7I)(31bsr@dtz)gZcpkKLU&u3;DSw&uDtA|CMcJ=(zg>?CEJWV~ zrfqXY%S@Bp?;9=nIBm-zvop0e!Lgax#uk3?yXXF+!KLC>%WcL4egs}+VsEA1Vk;#W zQFdbA?X?Tx1b2ZaxhXw!NGB#Ta)UO(X@83biZ=>hq_~ft<+mRGB>bHd{_F^Gl@~OX zz!^Nn8$YfkiP*!|E@SIQ>SeT9EPR8AX8ltmEp>D<sIZ4g}U-_{Yvtv=VTDOK4WkV=*~SwZXdz;fvU(vhoN=3e8{@2nt8TkYKjgPjfX>P zasP6;c<6RnIw;aU55y)Szq0y!L_~KqG{xR)*|&6mQXl!a1{8GNDUy$b78=5e z)CDv`8^9Lut?4z5Ne}p#xllqHDwdyvx`M&EyKMgYHxrt~sK0w%U-r*GR+OCwsDn0! z%rvepHgn%2ip{(M(pw~C-*JG4^#%-;0Q))3GD|MS)+bT*2n-Gewuzs%mKn&E_&q>( zEEX4TbQCz(MpxUaT(~B zb#_vzn5#N4Xnt8Zh4?}+)np{XtdF=D$fs7C#sYES0(vT@|mqK4a22OmuM+&q8XY$DSbs?OvFSs) zi`(w5G+^*L6J^MYjzI9#svq%R;v>4Qk3G+54*Gv2zspTwDL7_*-QxII=9uG|$IsnA z(bl=Ss4o-j)oseK!_%FN^wLA{V7XWVz}fF65cobZIpLOU1p(RFwfaH#OmqUS4D$Po z5sYqc8Xw5Em6uDqLvnn)re%l&|C&6X)d+JR>`wm$sxRypkBTrpJCV#fy093NORykGA(KQh&A1{&K8nQGcbFB@jSM!J@w8 zXrB=T$co;gCu*}^1GGjS$VjP|ksP#6W{&tMzJz@C;T1qD3H*`h4y=CC-_$f}Q4gkySh$@&J+K8iJ%zUz3&i!?;Hw=#sqsVjZr z6e%@+g|PI4o2$s)QlpjV^8)&a93-F*{*Bi#N>B7e*}!rVyfPA0zeE!!h~a!xWyzXX zx9n}Pt%R3IPXb5TQcQghC3M@t;%X*(p4ly`{XVlT^RB36B)o`EGDgHFnOXer>zhP< zlX}&+M)NnP%c*af*p!KWrC8ryz3MCRgFdzXqKj%&B0Uk^)rS%FWsC?qq=!DH6`AOt zWnbC?lsy{pC{TwRe&giRE zuPIeO*z&#kj59C&6MK{s-lW-Rr!?{y@r>YiEP5^|TYl9`Xa=Niqmmk02`jcS-hsZ= z>ZWLsbCfU2f3d&jS(UEK&hY$s(qQygi)Rrz*2*>F(!N1S*XT%{Z$1}aJ-hAiwyN43 zSaIAH2jzof)XX5Y$bZyg*IQjK3mLHZ~5DCgI3=;+I@gFxAb zUp2&~b8Q-1Y}0H2i`|b_DD=zs)0#wjA^n_4FVLgcNcjWsTWUM@lWTpl1_7}W zuKmR9Ca4`{!#mw(`F)6|&x`xfZJA#u4Uw^JTqD_KzHweFgy8RTi;@BlY-b~|5@~;# zRJL+Ga7}PiC%>a9-UMf{x|LgK#FTVPOj)II?qo$a#X|!lLuD+O2U(9&RWtiC! zDlC>m^|Z?{`=E?T#fh*4xz5Z4_robF{8Pdaka~sR?L`I%sAI&j062RWiqRE zs65MXI*9Zgz4R0F#n>QwUzcqi7-ZXfkiD(TUZ@g5w!H`0dD|W38_R=idk?aov+Uz$ z6{%&4?uh!WWw6!3MoXDyb$wJEFR!z|Ox@6S&Bzb}uki3gR`S{Gczks^Z&H)E(BCJy zcmFaTde%!{*RApDWdgv^S;c;p!mFCDbgN-RM=e#sHB0@COSLYpDy81;QmwaoQorL; zt*ao>Fr+vXcbs)YDdiiUU+RNWViSo)&X*F$kyvDCDRFNQ8XNJX@yGGNWz_IS$C+jG z3hRrCyOdTg@BWFzBG?8|b-3mSg%MYbDrc8F&$Y$+SiD-lMsI5}4L&!Wv(y}K`(q@I zhaYKwpJwyMMf7XkTSd#Y@wU$guwlrrIz*k6iT>KAMA(Ky_LE-BeGa6KUG89)2>o?m zfYb+$+Y_q^#M}9^lZV>c7t0ORtVJ#|BvtdE7?i8zx8ai$X`|mj{TWz5{gVLoc82;! zhdPB#1}%DJK;27ug7=2-1cz{pLwHqRg!dZ4{E1DP`dGZ;)qI1fKFm;$cc@$T4X~NN z>R0`d7DC=cuoWpg$00&PIxOn4`m*SES>Qsf$5GIJ9t;Kj8Yl3Qwbcq@FP$w(*27@W zvZ0!~?Z=v8H9N{V!5xH|3B9k$c7Ln^A?=R@n8rIt+erE(At=6?i`uZ+iz5c4<`{DS z1zY=LpZG{xVm6}Xa2Y%1ZB+p~iRYIt=la;ds@_)?>rFN%IflCFlBr~&kq_q>rL94& zVTzNzRl&8uj1bggBtHOHeg|B`p$7O&_FEQzW1JE;GR@t)F+IcE*;6i%TGO+;Ai4J% zo2(|PoBY>lcieAnMJCu4QCSyZFRRo2s?m=48{`;?@Lzv5FyCX& zSnig}SpIJrlW4mJeg{Q0zoY0UYCi2Q7M)pER137*u@$Hyg#)PVKMKWoL$$}?U{=f| z)X$o0UUi?t@+p3*eki9T)U{>U)Cbthr^cY>)(1Q={rG>=epmfySN#oC@7ll5f6#tc z^b;sl`-??qmK9a|cd+)?YrTZklM)Zq2_+`HWz4I($Dxux*^3E#l%XU_TFwP$ACmsp z>FhEYz=uH$it?TCI_c>3&XR0Qu|quM6FKh)o3UIiWh@Q9${%!Azwqmfa!PaK8R zQt3>TCdkFMG!fmiK9T8Jk6mGTu{_1;Qya=!iR%$kq;Xtw?@mV{a#|ZkQWO5wqVsTe zUsPOi0Ce6=!=dv)qq84$ekfl`0y@OeX^O5I zkzRTlZVpSs=~*)rQo-@*Y$2SD=AH;1otsEsQ*Ry;mq(;0HhbwSCh@7{aRSrQUs;)H zq|Yk)E$JuBVy2(4$yWM_g&O7QC+_9JFBZ;w=X?EF^vLyZIlnDe(K4kiH=yO%w%pY% zN6h;yM2=1|RNVv{Ve59q?!ReozI#;L&b559o#9|kJ{^pkk>8bcn|nMaVhfM*sEvz`3=JnG-7-V{2!x zB{QB$R5*a;mGQ{&D`mTc%>J_STjmR>kNfs-`H~vZ#kkf{**!WOKJgRtgkg`s=l&}F zu0ZM-&QtKp(RkE&c2DB`v1N0<;@v|1<6D+j@-x-`Fy=~jyF|xQV#rPY9Cpp}1J=!y zN%(WPL-1wd(g`-q^1qq?(>S-MJ~3%Z9oYAre#c(&8(8kXA8d-nMmp}&oBu-YW_vbG zY4R^-x>i7l@$5wTqZF+eB>H?7!4? zG6cF71pC<&>}MYhpV-gN{pwJh1oyDW9#E84pQ)-UW&)|dCSxcp3R|>k`Ez8X2D*19 zyN;4hc?2Fi>-|-DhjZiFTb~>sYFSs>ufKBkU)f~m_K%eC?_~J-E!S9hDimf-vD=MTn*t!x;-K=t=NpOB2P1uN0RMLVCsW@jSDb0@e13U)qWV(;?_ zth$x2HGoKn^q5Gh$VXBxRYUpm<*2Mw3j++aH5Pl82%?ya2# zqgMCu2Lo~~t6uQGA%gb#s3HY3uXlHK*tQYY^PtKNvq<}O;7Jk>orW@5E*n0p!C%{z zt#0t2<%qEk3H#yiDRV7Loip0r?XPu|Y5ZP~maH{j!nDc`lj`ZMUVqpWL;rHVd0>3r z9}E4N&voSnut%sr`0^%V`cr6b$^SiuCPrV#!6Fp-ok+s6F9r97R?++N!X zjrd$OY~WzY<_cyo*Yw)NQ55ddv#dX3RVz*m9<`o}8TCOkBi#7M`}{9O_LDIZ|t zE;xYNJ6lf3KCum7pI|4lyOM;D@yFfus9MV-`_>TKG_Y@WvIDOn|2HoGPUSBme?>S6 z(taSVKe%w+v)RRicxx}Dj?9KC#rt($+qe*G-|V#-L>84YdwOQE3P{9!)OFPXAmd`A zLimDa!j~eDuw|#T?ieTigv4w$g+n)wgM^y-yP)V7nBQ7|-DiGy1m!zAukGzr7P`~n zA!KAvrkTc>nzjkLUa4Q|(Cme@EZa}*{}6Ilgs&xC|7eL$TGzc)f8FZrw$v7RbY1qF z0JE3&PDjHbq5&d24oaq{)FkmAi$AY+n(f}G$Nl7&X8n&@JdKQx&Oavi>k{2Bb2 zqY8M>SJpEO7Bc*#6T+D(6?xW@YxwrpNz!%O)%^g&FrO9W7qWL_ipgpqi|+nGd6s@l z*JZU8DpH4n?_i46y`*9v!EJ5Ky{RkvtVN;1`@1?@wkyIEqQ78ZVEe*th5o4{N;TG% z)z~ruYCqBVDN@UFtD}r{c+wr!dA5;@dW~_$JtAEnKl>c+7AeWFI*d`bna#}p#|(^A zIC~qt&XER19uSIB$IBhf-3HC{bt%TC1N2wyBH?0E{taFV(TnY~d+{~{LK;gJVQW^J z?W<_}VBF(;0Cx%x+>SwyOa7kU7eW8o+4TJ{_>vDl4o4(SH+kA{p|fUu@2YPE`)7mBV3>snn^yCVD;ZxY@W;jZ9q5i%9>F6z{XdR!$}45(1eJ?Vbcyi-X} zCkNCN6Iphty04f>!TDc-LY*mCK+DE0hY$QNC8uW4=bT^LnbQ7mH-ogUppE-1LN{9s zWV~`{E1s~DGD*A_!sUe1#OZEvW(?h@j2ZeEiN8b-p)?0kvao`CdS>6R&f~W+t?!0T z_PEu}qCMf~CWHKmgMaq+=vx()iY;fia;B-;ThUl8{OqSNx*AbejT`vgqbS1B=^do^ zcielXX{h^NsqaQkf;4ULy4Kftb-dR#Zt&{5yvMaCM~8g8z4@r?sf~5qE;`_7QGu{b z@;0sE^Gbc zyCZZ@6?;vBC-|~|fflc=vNrpkd+A4IoODxN+3!&ZX771{=MNY$h6-7tsANY74%$w2 zog2K!LserynEL2or|TcA$ZwMVQrVMT*+JP~qKERkW@CJ#E-!Y;0}k%Y56WJ^$Ac`v z$)%zUN`G{PDgB!qz9O#EfNLjwNTD6F$GVR}**Gux56a4C43F~-VMRpciFMA#tL7R8V>J|z< zwt+*(@tS|X)Ig^{S|>b6$&mkg>O$fVSAH}53QC=x8_BXvEmZx8-0VLs4e3EY==(zl za{yeen;N++T%`uLbr<+W*YI=z5I*S#eFl2s8a|&1H}bc_2p~@LI7mX-FZpBrjv4G) z)Pxf;b$ocvQ8*B@{!e?~0^L@1rn?nCqS#I>zk>50Pt#y2xwm+xy@D{`bH4{`cdY^B=j{xo{j2Y_dHB z8GnDU-nJPwW1q;yqS0C3SQ=_;x&CDS3p@Zqrhu)7}ps+{Y&@vx58_Jt{i#FFEPW>}LZ!p=^;jDU^ zN2jyA#ElQ;3+)5Dnh!s@s)24u9QdCB+&uV=DadU;{Ivwp2Y-lF5dTkta-jZA&4Yq|`0ylUe|81L96vlo zS+oZ7RR|G<_87dB2zd{l;nk64Kl~uoM7nTd<3xli=Gz4KVxJmE!az-2D}P3!Q%-+MLHbMQU4SKXTX+TavlbPv9Nv+51y{d?Q; zzptV3aN~T^aqvC56gmEj1;JdDn}>3VySW$geGBPu$qqMvoIkv2@a)7YByOC#j~CiB z_+uOeu7sA&hbI5^bk-8YNR883MTpVPj+KBRnt%M)#lb7V^gG;N**=r?OGHm+{Tkm` znuuvx{-G)efI8!^Qg~hRlvm%J`*Bv?zol#C_9Ym5=n}@Uh46*NlD-ON85Z`n)E~+R01!~?!sT*Gj z)$$syI=Sv7y~LWe5#&P)Z^m~67=L^^s~S<#7DcW>l%8g1eF2gD!n;Hjyh#P~=FTHIl`xG~$4*e|q;jA7kdCdo3!IKO_^M|iBU%fEC zjf5|J1+d<*FSq&7&wha9aU@A#Nw9r{$smM#t$|bGV(Q3{`0`^P18B!>X%wF&a_mau2=4U z`vE*N`?Tx>Tz7q1Hid62u_>SA_36xmmz3OBUros`JB|gWVHfsfrFrn3N48&p047zJ*WxLcxM;-eD3 zCGkaxXC=NX(J1AukZ6^7uf%?ddQ+d2>0ybdB%YIaHb>}nMxtIWPo|ejTqALd#BPa9 z;sX+&(CJc8%pl~Sl{hK!g2Zh4)f9Y|ORSW5i^M;X=#|(n@rcAxiO)zpDKRc_O5z2H zIT@lpMPiM_EfRN2^hi7)@y{iWN{mVTM~RaXKbE+N{+SFuio|siTO@W${8NdKN*tE> z_Y&Wf_<_WnEFrf-Vzb0AN$i$*K;mN(pOJV<;#rAP5kSqy%Hah zcueB+63<9{Q{p=k|5f6m`J$eyB(9UVU1GaLP2v%WV-inDd|l!>iRUHGlk;Uv+Sw6_ z{Sy5W+a#`&c#Xux5}Cwz`Ei7?*Cn2k__V~Z#QP+6NNkaKoy1a!=Rf7$%zh&AjKt#- zhb4Yhq9(CJ;&zGaBwiIfTzSi6^A}#ARCKGG>mkei{iK<1dT!^eoi*pOJJu z{YS$q3|x<>L|rZ&T2nu^zE=1H->_42Xm00D6~|$((|teF_E(ucuGxP!*5Q z;R1u(tNB1vDKaC)4@v%jB36^wZuJu1~>xAQUqyPYu+BP!85-5!;wjQIlYF1MFr zME5(KdmQLyJM%jN7|0}D>lXY(35Gg^e)gd12spb@jWgLBxScPQFQ2RZ`kvCxKCRp5 zwFg38%}p)0+jS11^B=GaD$b}*j?D2q*Wbk#|3u9nx`>vK2UD|#( z%7!#W4Fn*|?S-)0IGeo*#f1j27V4}{w5GhQqS9;*LQ^sqSdrW7QhV11F)g_XsaXkEXjK;^_n=*FKUCi5@PyPLZ`K~QZ@(|#QXGMx_Kp8S<*VelZm;D%qha9@P<)*2x&sJw7C71)@%w@>E}z%onbEea!V*+gQ)ZWYpX$YR8a}cggm1}@cub#G?=U>U`<#%!6h99PKe^R6XX zBp!NW0y{y(G{H6T^7i|Lal<=>@|Xt+IfW+^r^f6LP6ND7?` zrY^L*d``WO(UU$WmR?RLbCDUkohA8__Cz!xM;~22zDOkZ9iHFi@pU*ne5%{+G9g)b zzSH9ih621wyIm&I@_nwPK29WN$=3%C`lMO}>6bpPlukJaK#biT#3T%0nZ~Xl%0qu& zXD6KxFlX)deXt;3fEN5DmFW9b#~$o?DZHXSEH5GFjD{N>E^cBT%&pK>2$T%;UxQ}GlkATwd?sPH+vMU&!lH zu-0_@Tw?9piDlQTsxIvD-dW<<#pr44YK2^6X)tNYu=@hEn4tpZ@?kZn9j?crIlG09 z^exs%RiWKq-(>fxfgnmNWv+@cm*VTxPgL0REPP{xP0Is=lEzE&u->Y6HM-Q zOeE^^|45v}>o&=ByL!23+@Z#)+M$uHV#dl*<@NCsoL=LJ--vc+fKNdfKPS=@ zrx5*>NH0gc9U(rMopD`ENx&-C|Y{&dK`p^rVzmguHeH zMe;9_>Ed6C@^mAY^8Hf3LYY=%eR}yjWqz^DzaZsbg|vbYlW9GFOr{GYf9c!l^|eTO zR6mt3{+YAP5~EA)UMtQg54(91COtZj?YK_uCtCd}IblKf5B&GBX+p*wY zi|ewkP$0xufGz{27PI$}dz5XCz#b(iuB_?wjEPUnyXM=2>nGOZ=*5*7zwB1+9@X2W zb+enKECREovdUUrQ(IT>=x}0xOQst)Hf`Fxg?KvDnae;%vh2JP%0avI>+R%qvTT~H zl`i(}Sg_Y%W&VsRvY|p0|2CV=7IUS!%4{`Pn`_Lq<~nn|#cZ)yDlJtOtEJjfW2v>& zS?Vjzm6poN%Bo6hWp!muWo>0$Wqp;o%2HKXRaIrJs;;W3s;#Q4s<)c07Hg%o%4)S% zTWhSf);eo_wYl0-U0GdKZLO}ZuBooAuB)!EG1pjXDr>50tToj&H8r(0bv5<1=2}Z_ zWo=chwYIvprna`WuC~6;TxY4PtgEWC)>YTl)YaD2)z#NS#d=g<57G4~Ru2+2=5CC! z!dsQnPbv2*D2vV^j$qK%NxC+v-%Uu!v)98+bxiS{AWJ_C^h~>wPhL#aPx@+xW7Hris0i#idZW zBjC(7%xaHS0H2g4P$!~%Pr$eCRQev zUq5C^EdN5Hcop{B;u4Zy9m$I9j4mA8HC{Fzm?)ZPoY;{lz7n-(MG@hA5SJ*vcB}-| z6<-~-B#NoU;;ST4vNTc_2}FycjnO;CnoyO@m=t9>YYE)u3W{f%QZlYiR99Qv+&cc{*v1%?oKb(a=6MBSj8*=? ze%|q!xdV8#&F)cl(#IgTtwEDGjL7V5H5@t z)lFnfSmNvNir@96f1}Nngzf0tofu-{qJhqEW4JZk7tN(Eh~Kd7%+9#2E#9^}zPml% z-Wl(_H-2v*9thG|N;l8TH_Aes(Kd#ik%GvYsB5eYJ=`>YAXX5&D%O}Vm4tUBOf>r3 zc8i1|Crkx2U)Du-L>y$)q44HNA#8Vl)CqG@$E(I&usTr|ocRO6@JeoRw@0lo;h8Kd ztA#BI7``R4If{{99PW%XMp`3%@zu7t?e_TXcgOGUj(7LOd%QvoQ(1Hu3BXuOy_Q9) zVYICzcdUARKCGFljOSk+twm$B@eQ$gF%+1qihJVsIO9&p55+?V;s^fIXR8pFGcy2j zr732CvAND%eP&m@70O`<+;O)j?&+CrV4B*?FU0M3O%0}8r_H?$&^pMk(D*sl@AH#= zrf+FoxTzFWgM6cf9nN)M^&TZ|Nk`zmIx#pKC$W*N&#^)h1vAb$Y-D)8o}wie=vJqz2W3A?Jt z3o?r#zXqt&R+-H+Wdse^h;dw7;80)Tb~!Z1s%k2Sv14^q&TMveVzWpfP3P>2rdGH_ zHZ>B@WI7ElFz{nbqiyAU)9EzMcOi{t<9zH#sdd;mpD~Tr#`#R3p{Y_A`mbZ?8_?R9 zfEJ)OFov;abh})nSMBq8v>rNj(Rl5~xJmkf0|#6kv==u2Ue+DZoU67r@_KA(v@h{` z?o6e%3BL9;n&9gOZE8O1=@t6eK4J_#%_>xpK7+K4(xT5R5oM(xBP3~6L^FeyGB#GC z(R0K+_{qq!G`#2#10&!53L&0NFE+4o3|K`+P^^lIfyF4_FB!}Re$7-72nL+1%oY;S z4q5~#X5GsTr#$04X6!Czls<}Zj%cVmJq^Q0d4}>NbEYgy($nfop_8sN zx{;$C0g7Ec^-Ti zO3F`7r2+SAJSz% zGtW%@Mr(j!MGjkGEo3VKOQ%aG7srcZh5mwelg*Hy&GKFISw7mDUzzJS6dPGF+Fy+J z7lT$@X_B-Y&=!D(yjIZWXR!JAXZYtqIM>Ow2z{`SnGfSkg>Zp%qJBANtS0t*7$XCasXI za}_Y-$;?UK-jZxqaz2NdUeATynOOL1@L)&yiIkn1Hqdy%c5+&?{lZQ#2OeYW9R%eA z*&k>}It}wFg~w28V5P7<1AIKl1`Q@7Gof!xm3h-SF|q22zRqJdEPT%*Xe68Z|MeW6 zj)OM9>l68@>C`fY#q-(X`wJ&cu|@uc?RmCbGvt(HqP<3De7%r0JkHp4{{?HGt~=Wr5t)0ZO7*^)7l~^NJx)!sQZCEp{#cbt!(%w+Epv1^Zv<0jL^_4(&NoB$0 zz2MnC0ba->dk zSV!8hj##r;aUs?X-Xi1yc{$kE><{?^5$%4QUpOD)guO`LhtQ8O9%Ss>z_~qR;3`yQ zF|XZ^CmoI+SB;u+Qa5f-}(nCRyU6kZ;**;#{QDj*vfP9QER?q<`K8 ztsk_h6xuQPM^1otvCrrJqBN3>eeN$R`r&hbQPB^d`-_Sd<3HM8GzZJ%Mn_P+O*n+L zk=xIM^JW~xcB)#VrUkI}X4v|6dEMOUf%_nOw*-7WJEc3C1yY)HZvs0P!Wv1+PJ7Vn zhs&DQ$v#55t($!JXhl}-H>(;OLb*-kr-6Dl34a{cLbAxWV!OJ3bB|x^V}Aq6=0L!H zGr3u9?sckudbq!h9*wC%_J+*8!x2Cxc1;7gd!6=H;nGKB?gI5C(64s-WG7V;k?H_ zr2ewg^Ha1jM9yI4)T`w0#XDm}*AhKJTIxM9IzxxCuoq|d3-6m2P`AKYRomd&ZnPLZIOZwQ2QIEjaOU&E8q^{^>eFQv7snCeEk&PA@GB)ZxE0isa@$B>sHo3H{|qK&-ag~L#*L7{BGI}3-lbXmJ+$*5^xaDAv zv(rTho)e|;HQB`~S`L{T`|v!BuOC8Uqt~^QR-7B8mZXlb^0j2UNO5K%o|xeP&Dbxf zMzpY1+Ie5HwynXoKuBdvr94_Kg}l`ykFSF^?zJ{f+jJ7}rmmoz+uZ>Tp3=L#xR}LS z{&ChWFJAL-c~sZTlClM_FwnaHLKd2P(d>qKFu*3^f@~A-cd&oXYEuI}*a$UMC$SeY zwgzRw7ucdYNLL%)L%`!z)}Of}qz3wISY2uN@H){79IH@y<7cIBb+OGURB_e=kuAx9 zCSpL_;AYtvL<3Zcu|~rtysd$4QZTo;I|6X?rcMvCuV>Iqq4zlss5@!VYlaQ7??Bs4 zRC#Efuvx}A2>jp(;B@}ALL+}juQf}@PD&rW`{jD3;ys69a(pV;k9t-H3jZQZunVig05=Uyn`*3bn#tTq|9tMHnF zOHMbv9)nCfjeaS-{(as~H@vh|xa#2Wwcide*?oA8U6d4!)3a3XJ~w>$=|u?K3Qgm{ zaitfxGUQtoS24TVixVmi(P*U1x_ASQIP`bn=&E=gFM?+z;=_~TYFQ`Y z1^PFW{-Q)xl38i7?zaSAk)$?AoGq_+7N7YdeBoJqt1rSQ$(ICO4?G|0r?_Z4?$hs^ z`+2%9a7^kE`-b4t<>>jBLVCPLpZ>zR{R^xjn>8F2@;AzK_?Spn%Cvb*q;Hn#e#u`Y z)6o%;-=OCgi29oKwDfP#^@&S;YGl6hxF|1^GoX*Z=_2_vjF%a){SQ%3{}TcyCC)D1 zewlpTl24K2(=Ktgyxz;?8<2eZc*idyFCzKO|5dcZFL8FgW0%P{A^9dRB1e~d<}&#b zl5hGF@}@46?}FqreRFO*(e*RFCGs!DS1S1?r9AURO~g zLI=bN_adA?ydSt8z7ww^p6Vl!>OYa{TR}b+*FOMXr}F}DLVOM4gqsl7BTl#jp#||& z-wE=mXnhdRH$f-tL>NHa541l7J&7OKH~^ml#BIP|A;b`$25OJsQ6b{tLB_6y-@`e? zQ~e^yKSK9)AdLJu`U<`SgwGd;268<2d z6K+TN8^mqEhrY(xDa2F#63AcSvB%&}1Ulh!2&NCv2B7YnKv?`&=r_;_w;(hiPPiAL z4e@^9FA-dbPXn8I3dbr0$m6+ zX9EcpialjL;kt zaVzkZlUN79Lq1zu5vW})z$X#bgO1;BVP_G{h~xKLxQ}fe;^d!8z9Mzdf$$m`CnR4F zq7(WNP9RP=fRKr@pMU~RFw_PCt^U(na7t;PG!>m0$JihY)Hywu}$cjC^Me6}WPBT+Z# zvXJbhx>=a!_ndK;n;9|cyf4pn2nf5lo9ht!-#F9 zeZ)V~KN22^j*O4QM&curBhw>nw0X4qss5)1o-#eX`f2mibx$`u-TXB9X^*4s{^(dV z9=#Byy8DL)hRzI44P6*A4wnwE9z98#NA~t^{hkfZB|xZ8hp^MtygpzFyQA zL5*Xm@eJymLY=13($TunhSB!X?$LqKFmH8YbaHfRlpQr5H65ka*B!MTz4K`M(cYu| sN6#FcJX(6pe5~%+oyYvgdXM!V3m=Odiyj+4HgPO=ERMbJ|K-pB0cPr0C;$Ke diff --git a/PyRuSH/StaticSentencizerFun.cp313-win_amd64.pyd b/PyRuSH/StaticSentencizerFun.cp313-win_amd64.pyd deleted file mode 100644 index 0285e8772de1f88abf9dbee8fb593dd92646bbbf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 58368 zcmeFadw7)9)i*pj41~il;V>8#WRxHviou8y1CoJ6@4*R%0!lq4AqgZJNK9q~G$=Gl zoAG8ewP@2;Yieyv`)H+otl|Ni5G3JfMA0A~;;}PMRj?M&s`LJSYv1=|CdBr6zxVy) zyRPqpE7|9@*Is+=wbx#I-8=c$HCvJ_7E3aIA`y#a8$$lek>CIRS4x7#GGfBBBP>r2 z*?;!71ke7nmy}o3I;(4{R@M}+a+VaYUR~vNE-!P|_*OeBRy%XAUhG^|Ra!RPW*eGr zfd2NZyEC7?>w_Zm=f4N9DcXkgoZ}xBGd%FE3g;aETk$iJ-&_1=3EwS#8sWT?A4>kS zcYRni4(W@Je^UIMgdZ5;nMEr7k)*#@QBuyn`j4z2&tfV4;UJ6S>?g|2vZI!BoWl}F zoNw8ToFyvv(FYKwN_+!`ui}Xo%OJ_>3oRKaB~kG&(NctjYGekdrd3YwA&X@#+AMnw z_^@$mg5@MbOM<1r?1V{Yf@KO3*lV$jWBNP(we7}S3Zh1Ztrp7$1FwOaO!tTJRI|F9%&uSJf=NBT#S@KPmX>#!t+@ z9E+uGdbO&k#t4L2uP3GwqNtZMy{5LN1aYAWXk!8GExq`O`IjU0$||c+QD_8ONxk=C zDj|w`^Wt;@zaW*X8denW=4MFZm-Rim2hW@TX&@(J2i8+ed?3xTY*`` zQT!_)(Aef1>Cq>R2MfM17Sk8HYXWlh09gYKwrOmbZG9Ydy_JY;ZbhU->&)94;Xg+% z(we_fs_R`QxC2iB+-U@tA>eXSJo<)Sq&@z;qxnXkzmHbB@htYaoj~$3cWe66t>Lqc zv_81LEy0`UAB+N*{<6oP4?->7ny0saZcm+g$u@x0!D zx_MREV6?M`U_-D7d133w9|Gd;NM)bN{-xG2C5U{1! zivSovbF(h38>H1=nBbkLfnc*ddMS8G_oP=FWak`s^t_{?$H>e2OA*KWza)C7VayOq zW82E?h1R1w#u{}zXfb>+Ujp^|Z@8&FIzfyNt;p(pg!X$Za)+HqQbRjw0Yx-Z(z)f!bTYZpX zph23Rk16_8(;ZtrW(5~{vKhn&TX?OQ(lDm$ahZf3HMPJ@)2(|RvRIx0EQ^+PMT+lY zkA7R4M_--l(g!8Ady`!LK`uSdf@reKpJ&Osyl#Xxb%bw-remB>plN0smWAH#@%sw2 z6Fr*$#${T2?_4dgv_K1t+2h(q`Rke14x)L6wFTc=$UaZzHy-yoT=kb*>g;ZgYKWWQ ziU1DO&(n|PW(}(wB`h`^ydQB~p-SBP;k+!LrEavlew8J`H(X}oJXMyjhlH1JJ_9}Y z*LS(~m+~7U-jR7=h2d_n0=f0u2SIvs#i3S3PAd49?dS0p?f0&Op7G z#-4P|qtCP=)8qH1Ynvh0&CeYgF=_VrWAx)Dc+NXvR^L`6@@<0LmKda{G6Vt8<_E}4zH?G5@E z4kNz>A|QC0;ws_lC27dVu3^mV7+3x5bW6nx9B@#pnWL>SP2jr{^y-C%XH|0rR9 zaHd++rGTy{sim9**4y_HY6q4UareBjh~)C;mAkirBK)@@;&=rOAc+CJ&MqqjOA;$( z9I|B%snsx+ol?AQ`UN9~y!sM5VQ$uiY>0J%hLnFlLxsj8zRCJuNOzB3nZ7Kt3!4R* z9IV2PNApWr=x^UpXbxf)Xys`8kdo-Y0A#padR5fD1)XBT+R$Jw8hnR<8jcnoC-wT zRAejujf`R@*5MDOrW*fY$>j7$9LIl(3e;c(#$Qn4f`pHig-d{oM?da@^g=dg2?8zR zm?PQW=7B9h{0s;{O@A%oc#ZHdyToWEpmD#zDWks5(Xp* zlgO{ekoTa3b#p{#jFb4o0UiRm7ZZB=`>GaLcA#0k4}F>dD-D1UVbDm?J|{O;qCfor ztLFE(^|vFAi`Y)Y)>LVQX}>oH)EEQQEP}#P%G2UNRRR=;b8$?QS5p&fW@aPV)1DvDno7QO@u70i@@Y?3St zWEZl`)wo77u?`_+F1er36<-$$zs0Yl2T-1u8FA_F2_%=a^TW; z31&b=B{`;NPJFx^_z0&?bFN-NDfPWrVKx?hU?)4Z^KaNNd;B$^pG!aCf~u=`Dn|Yc z*$<;sG4gs;Fd11%jxa##N6E#@rNitvF8&Q#>Y^XG^>-pr87M=^&^r)v62N~y>m||< z6$4C&Lnc&B$aFjF`LJpiMIsc z2<%QJN>CauA)`YU>_N%||7(EBnh;0Y=cIDp6pmyTxBppLX??oRLnw{WNNrY*Ij~fe zYQL|#Zv*rA*RPE@MgS%Bm30=oh)Szv0D{@eZ~VmG-~a}~HhWQ3gU}UPL~N9pH}>N! zCsAuWu4KfWvq`?UR0kp$UqB;bz06!w8NmEZ(b62)|L$WcmQ=`@LZq<$spLo`Da-GR z0kv4jp5Mw{fQM2jZ)3Agr$~kJHay~Z0KlL^xY@qBBVPwI&pG&GgPG@SMY&?;z1#*d z$7dl&?`b?@Z(0Drxmk1Lrnm`9uwzDd)sPQfy**7QZ?prrQlcF zHga49AwqY7)irLdRPa1mx2M?~&SI8stpY$dPZ#Um`oh#_k%`>)`ragMTDMmJWooU> zrQejM*`IhLgb`V^_D~WMd$juYWE7{YnWY6BweLhCdc)n8Oq3_>i1ebxIG27!n%4gQ zS=z9k@D2Skv=c|dnryZ%3>Ly7e3_(v4P4YJ!QR-}!9#xdTp`Nos$PF;?7a*aIQc;vN60XK&fSE5y$xg%yx*q1ss=)H0ua2COc^QE5Wk) z-=IoYrmgP$6H3 zX+|FsacsYri4!;vLbtRxG;lZY`Jm|<<|MJxJ{?EnLD3z%PPcF1 zlmzZ?Lg4ZLg#q?};Aj`y9nHp3S7#&dP z+tI=lJx`#8YnxSigM0%;j@`qCN=B|iY5$2jTgEkj&`&`Dqq=hL)Cl4^FBLP&v zknWu*Yj=ICN4Gw~Jbio-aRd*#vtibG66k39wW-H=blJx~)jb2U&oP+=#s3}vCakD} zni2aoj}O6EzmOTCEP+^lJFn$B~&P$c5`Lrn7rr8yJ=Q^`G< zn>D3xct1j$7&7FhuN6a;Y{f8V#-V<_z;}G{x>^YOf#5UqjQPOf<}8J0uyF-AP1$3P zVng-WBaZWk4~X9Q6@*6rrD_FKr764hLt&SS>OQQ}()3w8=88GS-1DJtH`BO^;V>Uc z4s-^n2H5MH)e+dE=fkk=$+j&4rJ-}7{j7ZbLpdbp*zfOTbqw-L?;Wmfyd81tFGn01 zclQlsCH+LfAbX21eRF1G;pj64Emy8qDa^&bId7_Hgn;yql z`=6-TUQ`#FcF+Gb5>aAQw5Z4ZEK?%roG1!c3JQm5W!c;T41S7QCI)kAq8RKY23R1y z=s@ow22ddWK@8@~nJ#KeV0&1WM$vLe5htcYxBgAo5m<9Xia*{Df4767srBe=FknHS)N`Gz9>=>g&P6%Lz=aka3cqgY9@P zw$|~723%6V6jB{EDe@Yosz1N)#fAraJ2y+(>GHCs)10V`Hcy}oywZyfo`025DE9*& zE@w(ditaa|7Yw;4;yA(C5PfigvGbK}Pemj4}0Vuo3c;xpdc{)&m9Ueyb z{%ZeFLGm@3{!anLB+#BDw4gOL?sWDhYQWN<`EN*{6?c&Ckfy$#n00KM%s~h4Zv}(P zru7%bVmGb7Y(YlEF2H;&21vBaqvaN|_Iw~DdsMGfH&p$>$n8I*ONmQV(Z7U> z{uinX3V?+Tq3-L*v=Cw~ZWZ#7i>rCanwwGAiI5W1F=n4`>IT=F<} z%c8b#`ut-Edze`5jX}gBjz0s6IwABR&D#vz-~9Wl48R2kM~%R|&+zE0z`A``6mreL z+ldAK;d0Tzdk9{3gc3jEX0s%{T5dcxqj%w)YS4#3H~}$mWTixb$yG=DhmlCWeoPEN zFxFZvGjOQd>f0Wp*U2D zSZ15VrB3Mjz6|3sFC%m%t7o8kzW#0KGA1rG5*IKr-AFi@xWGt^X5xG!F^Gvvk;rTd zeT4~r;Sv@9l<}D={vP7MBo8UvE%FGde-_%WGUrD#L(j$KgdS7Ld8$I_A(hNA>%SAy zRnA!!^G;|!a$d+s8YVVO0H}}Y2wkA+F<(E4DFDfU zd}yr7UM$T|h6bzT_lzX(USD8>Qy6AqktDtiy^REDz@!0bpUNrd%i#$Ob(q7>q`%*) zysKmL?o)XSr6my~XB-XPf$SHUsRkj8bSzY+D$kM1ApSd{#r<+{bM{W?^8T5aBj%q= zpxJ1Q`8|ViXnl;2FymyL1SZII2JMbX9m$c)WN;|33_(v-)yUI0p(9)IbUO>C;8*xV z35>OH%vcMLuy`_lq2Hmfalde`ihuhx;*(VTbHaflgEz6_UvsLTKk(rI|f z3um(CuUIpUv7aIqaop8F`2|U}8RNm`F`=G87mkv_^NVC#YxJU|4|}kM3_$Ljah<&o z2PN|oTdrhdH_g+3Ky=1fmY+ZQd8hB3CCxxW5nBMmDk*E~WDIeK0e$=IXm}tKCL%yG+C)ulb>g zQR_n3jSzdG_yWis4K1^nT1{QVAvO!OT2QM6VHZLW6ut(mQ;nQ$$iWZ#3toF2d(PjM zc_b<_3yA79VVv=6WHLl__9x(9H>h?|2~M;%{V>t)gA_O3O=x1DqFfTbi^Ams782W| zKbGE;fIdzkTb}B<>(r8Is-)%Al94RIP16{;XN|G1oxF>~;Nd}kGX3a){T)J;fm!7_ zqj8Q0jk^bI+#-z!VvIXc;?bW^??OA?yMo_%I|mJf7q74n?~6FDlMeE`J^G{Ry#vCZ z#To-K_ypAy-12;SaKNVD+{vMXuVAeiQ$I`8R^FP5Ev{DzJG6~EDPFI?u><$@FE}9* z@bd>FG}AweN&gkQB!4y;p_%?&OnRU3j|`Zy^qOn6KxO(faH46o5FzXR&m+j(2^O6T zrVLo~_aN=hn}Fz_Rdh0h4q~ znpPoUzvpvw;?~<}vRk0l2NS#($#2*`PePpu3knNT`?%Vlf10K*NQup`d-Oz?GFZST z03t7MuolQ03|&ME%v`gP*g}cHmH$^F4PnNbz#H)?Tz$^qirGq8e_Of&^v2yj+>_hJ zV!~p3IZdmNB-#TDi1maGs0^os8@?Ir9ic@!SyXp4G#zDxoB;{)Sp(yq^eLWzt%LDS zbcT%*Jhm&mOw-XOm}qa<_g8!bU=Ur$> zyHV#yWHQ|dqt8o;C;mXwVbF93aEqs@zSQmsH2c0(*;RY(NS17e!~FLOTh0mt{7J5b3S9|^DL&($*b1uwXfjUP;J zW3L-`wjwCJ>BbIoHSF7$3;sysX*rb>nX(v5!?nb<)OC&PTC9$3s4oY<5HBv_YQSGb zc{pezEO6=HFs#ng|H&{1=R$_FTq>>QvHY?0764&>M#Qm@Xd3QWLvhmD4)iRcC8@{- z9?<>s=nU41THbYt{OEQ>uy^d&IvdhApepT}IL9Mf^Eaeh$cosu4S>(!G`IaG0!$)6 z*vK{I6vn#E$TurCM=Q>-v*Pm#Qs)jr4^K8TqK+ysS-ABVHUH-H4TNGq^XUJ;X~}~N zdYlM8#tIn)5S*pLz!!+iq zNc4f2lXArI=sKgq9-Q+dj`1Y4oKL>6Ax5(d3?V#z0=&TBVYUH~rO#^s7S>SvrfG=qn%mxZHe)ni{A(>H zXtbm}=|!*~ax!5vkcK%5SxhP%xo}r`Rt&D`D8o!(=y%Ro5#t|Ys{ll`;FOEb_mLKN zyMF#$1(Z6XXTYBeRs_?Zi|sXiQ+f*63B=3S|Jk>Eb;psNEER{aqVF)9_Xw6pN#%w$ zGP2DAOnG(b*5Dd|$eNep!||;qP1CQ7aqiNS;M$ewPs+_YkDgsaxfQ~p?hu;5VjzxP zPo}q$?daU~Wcn6H#ldT{%8qsNB0s{(>m)R6Fb?&HF|?P&`Zd%6Fh)oM#9sN8Nq@+W zH#RPm?6<_R>!D$Q3JQYTm1ma=HSB6<0xh+!9mx)X=DZEO?eYK{7_n05CC&xq+|7ArxL0a4|_6vt>5K=)+N{1sqQM8b~=rpjF& z%xV*6)!qOd1gH@+l_GG{@qy00cWzKq!LlLTUdca23n5=RAt>^&{9@$UY=wg0It@ z_iN(_21y6oo8|e5epIs6V_g z;Gm|V2U^i6tj{D8^ojWta21oLNEY~l^A05Jw@sL^iTk$>(E3G!ULo! z2Wa(k5V=%(`zQTF0)m#c5!Is{*EC6S9L@QPX9jKOFy)Tt6p8k8G&M=E87;)b$!!5d z&WYP&+Nyz*S|IA)s58q5a3N8i7Z zT(YBNBTK|XuGCW;eXeAJmqDK=t5`v?#|Y&2=fyF=)y;i2UQB+2W{M=wJx2foljKLl z!w}+sO6tFiDx_=0!9PK1e_r^R^l=I={D94}XC47!1MtFd;Q;c&8K@V}3)Q5%@IpGe z>&FYzP#u=5Golj%(G=-4XJpPv`s-s$b0qzn@%|JYm@M!ooFBiN1-7roAwW&K(P8L~ z6PWbunOCWCVMl$@IB|lwWQ0eaCGY|L9VW!|OCz&+SEzP|1oUOj!U~l3pW+!T&mMm_ z2+)t9|3vn4LPmPQ?p_r~P`K|+K$#-w2!R{VBZpC+Tj?j>??IN6zR{I6{T#Kj#JHl{ zt|gq9FBz_sy04jvRgCx%m1+Y38&r$VkSv$b1IM{={wL>v9P(^I%*N!x6^g7l;}oTD zPTv5TU|yfN{jO9^SLd(LOP{MNlma;}ECM>R*e=A5X8@54ym&~_);~t;?L~kcT8akS zF;+`xVKnAq3^Lmix{NW1Hhdgp#vmF)^rzU4)eLHW6B^3|SY8rCn7}c_60${O-y*5< zjxh%F0R=i_b|}O&j#p;-Fw;NiOYdeH2Y0jl8Kw!{X#aaN9oig?{fIG;!xCP<{f7+d zBmftLYqqm6T)w@M!A;w5W^nEHO$acMaQ^lRBQ}3~35)W!FJ~}wdm)4A@S6i%*~LgT ztcQyLh{`XF-E-Z-PK`4;?3)A*E?O~44pjCT*t(a?0S&e_Hm6}UH)|?2W_3e+{w>Uf zvAe$%8jXShPat1m1pUviqHDh^1vsLcHcJt0(g9nw zEaV48jTBpV9lvsWQR$-CQVV#tDB^wtdC_OFra=OEYlB;cmy5en1GJCs{;jqjn7KYt{Y%; zx?UNbau&n(y7BMLD1_PdtluG^*|i~k19Jk_SD%EH;t1(pj?^W8K_=qWVsX78eJxC` zFt@_kiW>;Aw*DT?qSn?Ak+9$M0Dc?R)|p~w9fv&~8ZtGger8o%Q1`d1@@gZ1U&^vt z#_8>6StV*XK^msjVUxLr)~-^P(>?4X>xQojVojzP1ZI2}Oggl#HqT5sx_jS;i@(`-cyPxeFHJWb^`24Ly$Yf+zce zMz+(xaOwM8`pz7jr;+yz@^FwrtP`!HhiP#H8ILj^rq>Y~&shpuuN7cU@vH7abJ%jO z7kg5~F@#h!%${hy1DoCpKPE0`0~ZbZZ#NP#P8Hmt+cp0t$=Cd;E@*J4i3<%nhpnkX z#?~Ok9{qyP|HPpv$Dv{T<`8*rBM+{8G|ncWckk7Rd|Zmi6>cB`_ROK_6z6^9aWRvq z3PpRvzeH<}F~;(P3ZA~+lL8om!so%bPC&rXoDN=cK<#ns5Dut9A+U@e#!*<}!#&Dt z%*8w^c3So#d~JIbNC&J+F&J7}U1^&6we8FOzP4S-eraiC*CYCDtfJm4)6jkm+SlmZ ztTKGLrVeBHc+rKynS>FKD(;X)I23Zr#Qfk>$8-C9lSZR980sA_piWR5yA zybl)3BltMypLa@FV^eyMkPmv?XTjm8GwZ0??pCxTE#O%VBT+ZEfz$J{CfC8a2Hs!8 zB8~?I#Oq;v!`7|ob0(pCWyc=kAB?mEX$`j-pwwkR(0e5!XQPSta2;KUIoyO~-`| zj@u$-<7PGsbuqIC|0pvH%SQ00GD)hdcVWfJ#f-#$4l`KjiGjNGS3TL*Wf+o@6uj5j zyu+=(iQK!9E8@c1Qf!I~*gb$Q>=Q(fpUpKbrxphMpsBp(T^RgiER*cno1tzza3f(o zp)fr9I7i(?g#&vfzd?SP-yjdc#5cCBy9Sf^!)i?8PdSMz05|w>6q>yu%n6q}6hgED zC5-u(14DcosMEysG}`25O^owb9fp<|AaMo*$R*F_^bM#ON&uxIj(-qWpzj3upGq%X zR81efTtGJ52RD$2<0f=KHjj03_;V3a!gUi@j1o8$6zBpDt&{DW4*m%-tSn6TYrI$f7hD_5H8IZ0Otq6I!wnajkMRfd+)O4cPJbrHa}{>n{@*h4aKk0ej^flyOhmxU&syT{m=mhd+UQ ze;nga@D21-!=K<;6`f3ff;Hp4!OR;_)4Xp7w!gtzaBaJnf7Nrl9YhY z4N|oFof$X5Kfc?G?Zh^eKmv|zj7jEf)O94dQHO43D7=$}(9$&C9`Ve^n-gcmaR7Bq zmFmaB!`NiAkO_}7Y^QOX_X5_1|6QdRDV9h}9t5B}1ss%t%3dCY=q>cabKfNknHVgnleVKxpf-0TC)j3m{}~x&>Qq_$-k42~?DBR!BGYhD9ieIJ(Ik z0Y{nH)rM2Ly4>t4Z{V(uBU`$nM~8HkYj*W7M0h&*NVJ18)zw(DD+jv5qXVKvd$XY) zaz4BcHNakBFbwdeYq4aG_V*s#NXW<6O$_0o9OwhKmr+Yb*JN~AD_wprGuaWk11a$D zQXDvoz<%J$YN!K-wIWC5ockM~^9gBuftKCw8w>}cr&Czh_BCk|aI6=UAW^*NY#XT; zdH@w-*7JkuYY7V{%?%PhpWcL!m%_^=d_KKE@@>yP%7f^uMA~mX^ay+DLeIXT&!OBB zx_dCdMo+Ql548Mc3%2918!vt3+_5Xu=Iu2zbF8*;GkqR(9Vi$U$@V@6h_+5Ki5d6_=6 zlexGmh3qGes^Yak&i)krT;vuSUrMJ2u%utepLH}kJY{xjr9$URjw)GXqKpB|1l zzR!FRv++W9q&p519m1+M@fwQ)kKTp(9se`VL+~C4s`#y)CL#ZsjCBszv4C|GDqsk~ zN>wAT6-iFiE2vRF=W#T!cqfA4lG#*uI3%#`LJ)B*10tO4-=QwPk6*$X6m0f}`;p_O zTNK3i?VK&}_?dum586pSl>QxyRqG1W)%2OKpl1&ad|0GEeVAEzzvEoC6*FvO#5W1& z4K+d^4xKiE8*u0l3p_$nJ$MNqHUFFpBFT@ucmW8|Q({2RHbL(dbZOyLpp#-iPhhtJ zM@9^2uLAudKndvo#)1JoRtbWj{TBr1x6UMZQh*nLcn>0mCL5)NO=k@(7Z^BgD-j1( zGMI%W2fLo_O(R}}c#Fiv-e~oZy-}weYp>uRmQfajOSD8}Lf?i56;@=h=_1y#KJ!Zi z#Nlfxww_KfMvvd13_ZTUDrkcqtv4IJ6k+u|P;MdMg7+-Tq=SkC)KeWq4XfvhT|zd!mxv6=M+RteZ%e?Uf=V)O|$$2yxO7%{eP>oz0q433?`X5?9j>rv=B zbVNpMTP0PRnvkU@Bz3?Oj8@2}k?Wxor|pvmIlWh-SDe_5ri;+DA6ma%BxqeoXaU;> zh1NQwd!qFlz%kJ}+sJnY@0uKg7G#tqlq+aCE{`HmDF_?@d_h1eb)#lK1STm2o?SlbZGI8#kq8p2kL0RmI_gT zO)aQRWdqw`jv2?`9Iey38*%>Qi8w><+RlqO+O7sR=OE;ez)G-+*^(I#I9?R_0MRrY zbuixnp_HLJoao^xoazKm-&8NoY~!Z~R5;*O2z8z+NTiE42vKZcj{hP9wKwi8LP4y% z@IE>3@@$xTEfjkv46Uh}{?+lER79&&yvdnI!pWNc{WJ`39dN>T;GhQdqfyjoZ=lx< z-PG%MItx3LKWhY+?f4XPxQywD55s$x^fQJxlktFKp*g$@7IF{-0cAko38+4QW&^)U z`ZIge(+ni%9OCvP`WJS*yYh8B9Te2`&-v}h#2)%(THj|++)CUj!O7DE=K{EvrDABw z-YcL>=zD_^sDvTj7Q5^JmEfC!R*%QDI(DA4!k!_mQoNwdST^fovxAsTaaGIi^bLL{ z1F3N8i49vg$thrqvvD$-<-6a zMj{6shq)DTyW8erq$4~EXyudcL@VO>Q$Dk0PQ?o6#gVJuY~cQe-&^nq);AyEfm>5F z{SEkZBGjw zm)PoRO}|a7J-J%_`V@PNU!uQmELyxkfYTNvah!>=T*lWK`Xd3t zkYBCU-6@mAeX?HjBCD;S`rovOz&5u;kg35XRsb?&?)UfsJV!Mag#IK z!etT1?*#>^NO{p6UPPvqfq5~eMHtg7yt}BGA{f6Rv>?X5L75S~GklKuAi@@a4dsGy z)wM=)x15TxS71~;=c+>@-<`q%xiH4LpZ90xQq91`^ zN@@M)nK|NE+=m9_3$p%-j)4wWou)bv5sbGQaLRlLsDYyW2zuI8iZgQJn#I`H;X$qs zZ{u5{aVEca7O)-q*@iU+;d#nvcpl@D)-+zAf5pX$VSoc%<+SNL@E(gijDxo%_M^F0 zc%wVg%Yl`X4mqH$y+Pj9s9&3Au{Wfl8wkq+c1!>4wfypfA7>&r_IaG)dZBt9=N=aI ze;wyytd*rL@>E7z1_u?yxs@Vj{T! za2QMQI^?p{-tY%x`tcR|4zSLY4!Lc0*Pl$SJspR&B-gYqm;H$actnMzux}+|kZ?0_ zw+EBtB_({a@I+Vi$)rU%^W;2$w;%L|rW`TLgDlR^#V%;E`hamak6=VhLAXLa}+^yO3tn(37 zUG@$IUi{f#p+YnLNld!G{FV=cWaF<$laGLps%=!O|0d!ajrVatVS2{O3H2#%mHG(0 ziUhp|j^VPfhG^LpY4*k+$_y+@^<*zjwKuLsN?+v6)%WD;^PMie&D9=CY!4>7>faq) z-#a)ryUVxi_ztIctiIz!cYSY;W@{xd)Xa8t z@d|4!o`px9^X*UU2yVWFhgHae&F%PRYn?o=Ro|IH92;+duybd_i@go-#XlX+i?}W& z$^__v+*T;xhg7ca0~5b|Fq)R9_qa~H;;Mf?$GvecLAcw) ziJqjx_-7GUx=>(4V4Kw#Np#;4vUbyAq_E> zZC#iLh9B>N;I&m;pPoV`pxGxM*c{X$_Jd*E&Ze`ny78vY{BCDWTVVpyv!iyJ(>(cDx(TO<6;DHk2Cz5{5s`Ch>Tl%DM4-t%L>M;aDwF1^$W z5q!{j(I)$C-`j8y0YqPc*hM&a5|6jlj_UbBuf{i^FNPNjYlL5AT|5zNdj)VXfPIiU z9=@ie#*l|5qSl803;0Yxz5i16l4YQ9jWcVWiJBC`*eo##o`Bqh-vX_m7M;H@j#@WB zb>KXu@)J^L?3aLI!dKs6@EMo~(_2A_e7zT!{`rB6+prPX@A&|;%v-)KB+@jf6EN$o zXm;yk-FlrpjqkUBZ=u{S(XwWFJ(S51mO@5+Ja=V^JSBS@ku<)C@h$NLHYvAvPH{e` z7@zgL6s(ZzTICTR>X9i);M6_i)=!IZdr>~8?rHn9_Aiq(|Cp}OZq)6^7VFtz!0|^n zhoeaPxyvN!8cZ)So(;_V-eZnsj|b)nd*H!0fEE@J9g<(97%cJ-Iitv2J|G=pTL*P7ZDgt?cmnRWzMchkgvU(e!@^ z-?*oJp|yXT6n@fVpUvn70}VhtgMG}g4vLOd2ljl5+IEYoxHG=uTRA3C5|vO{i#;SG z#=XzfIu+}d9WXT=Nm8`;(zOV9jP_`kp5Y*cCU)jWLKP!e)Q) z9cU1oLyh*J-{iJ>y>0qhDYq2{pPoHp?)@^U+ zFo3MOajrpsZrvT!bw_!!Tu$$BRsB;`55I(&VJX#~=s)?ick$Aw@P<5im)EN0?9HvG zJUbns+Q=gg`{nE|$i-x(6HSs^pDrrjF2O_2Ch&(cCjJKabmE|S5mBXJ$N2{&HST6g z#9j?It%&1n;I6#;y%%!F0AJwn0E_KO6p%k}&M&zlMZ>3^VB_-O$6#aZ8clnF826=e zG0gx{-I$o?vY!O1s+8wB^v+Uz#0N7Sj;0TorBKk~@Fkb~^0@#oHTY^3AAQ_S^kA_o z=Oq$Gem%DpNJ(|Td2o)=4;9yHclJ^5avl*camnKa?!G7AC!)*fIOMIW^56tD6#q=H ztspChlvA1p(Fcy^9wHGMfx6K3?VGHK&^^iCcpPBlwDnh13 zD4WR8sOQ5W0~t#}ng00rA5|VGm`*3Ew^Ubl60rqiH7n#p>o}R&-66% zmXkh8&L}eQu%rqAT=r`9FXFbSLa^}N&nw@I=gNrf^!kbG%z0{!ZK>{5q82>K}Vi-TOk@ zFOfl+mZM>Z#GyJm<7B9u#o!-|Xwfo$Bn{@vgVaxjF)NK49*&zjarVzgF%2lVa*>;SrJ?zno5@9dGs~~-H?6kxsGcHqwo1J_cFJ?U>Q4(cP0AK zWd?Ul>4@U18Td}X*#Y>1njwWL3ItR9{Ssmt1Ae|1i%B0oLBKf$K} z_!OLq&y8{T6e@fQOnjc0bqYRFdV)J-{xRiS00dKxobMuJPLz;+_(Dw&I2_WI?IS`U z1?I%{7)M|yWPQLf1`WddV=0}3k>#8^GRSKsA>ubk(6cC(o<;rXsoRbLnV2WSJ!p;f}r6eX*TnIlp3E?Jd2UR zvI;=;hZ95DWB)QjQcS7@;PJuW9k>%-I-hiU$Y6d_?!l0RHQa$&FMo}0moKN8Tq>N}hX zHPl_r86Kng@2Jko>hn^TN1|Z{GYg%ug@Xm(t8&c3oY=yTFXCXXlFgA|wb;V_s!(<+ z78b-7ZdHY{;j?gAY~fE-VUgKqQEcIDs<7NFERQYxzABsrh_hHBHMVfJ6zWYNJic{x4&>fMVUhGmB1Ca zO$DAdx85Cj4Qr{fi-R*b!bW47V3u5?E@D&wH z@ZROB|0>yg7DP`vCn?~V!_K&&j=PY;HS`r~@*9s)@@4hwIMh>~H7aV)Pvr-YJQ^=f z3#nsuUMhTpH34E<2aEyoIx2fOg2K|KK!*SYpdU6s!;HTh??NchzUz6*O^KbHc`$rC zu73#d9UdKY+Y|+GNPmDW0x-NNAM{dhy}uUDCDxY;=(81Q?^FZYZUyZPj1cI(0v*M~ z>}mxl#e~8s%ZamcN3TKq?AKI^GN3WT zFFDYGen74j@xD7wyK1-b zUe2u(XV+uQ0wpiXN{W;6FH2S18zf7>aRk)FAD{pZnDvDyf*hag!C&>`g?u;dqCbW% zVC+W|#TEi{9&90Zqb~od?X(ai9`H7K>pdx4I98C@cKwex6V%X&2-m)l~9XWOaMzPArt5}hZ@8P(Yi2z!1){i zH<{}@T(S|vy}_H|uD=8HH=6END(f301LmKa6UY9&n5MYX@{VxTf0^LR#(jZ1vL~GE z#;#2tii>aoNuIn)LD{UJ_yENNPtz!qC&xhJeBg;d;Dos}$nbwWe$~B0bw9!!L}1$g zVEjr&eFGSB{Ia)m{4q^Ae*7Ujj6a9>CAd9_(twi?CBjV#)V&H+H9*A?A#z?HHn5Gx zQbFw(xR}CZ$M68nRTH~Ses}BFr}Rxcru$rU1lnJVZu9jdVDB)o-)wdT&CIvUn@=R<~7=lmJt>9F*emn8J z47a)owXkg3?gTA?=M$Kgd_56(uA&}FF8ljbA&CeUa73ZOr6l+S65MN3y8=(LD2-BI zT}A@SE)O&(C(X0)6T!Yh2_;qI*O#16;`aw9)LMQHsoN&_M9<`eLjd(qq&I|QslgZ1 zzfD%1e=ST|_h0HUPPAGs(Sl3RIi;zwUAnS&&iTi{NxU7Ai^jbVOD`c7;*Y2G&m!J|_a*PatdU>T7Cw+ylF&OZ^I&e^I*{~mp1voq@yOaVcS(nq z)Rvo_+n#b~3g--7OC2b=^8%ON;hMH%U1Fr|<&#$ghNrqq+Rd8lt$F(4@Sr=>^8$+! z^6+P4UqPWO;7(8#I#OKrg}W&!2~2+Dk~l_pASdweHO<1g1bA6`76g*Ng^dn3-UckbO$f7t zebWYj6i39{an6{GEu}^-UUK!7$*$~Ied(_HojK^iUGl2d{$6s#F-@lLDEHX;#5UKz zGFdA*P0K!X=LC8iwZEIJB^_vn&jPNp@QAJZsC^_1huMej`ui1rV&Wh7ZDert^kUqw z5Pcj^oP9%!Urwj!ThYf%vH9og@49r1IE42x&&9&mPh!u%5?@76t|e(B_@G1|KN@j7 z4kG$V@Br8?y90k5ss&S;RoR*1!zOxhC;m1s*Ao9YFKnpx#(#k}ZhcL4X2k8kt6G1D zHv||YI4SzKZuOg{@LV?}Iw15R?~39ELIbJPlVrZxL}PT;;5b`t*Ce6`pM=-ZkK7F$ zkjdcPz)r^W1EHrFqT5kg)=g7z$!1(%STRnwes#6mzeYquyj`?P01xoI$2C$g!1Eq+ zP{6yBfGvD0R{NpgY)}Y(ka^YO=#P6bxyil;3}R?Gcw?Z;Up^(`_#CM4Gx;-4`Mblx zbmBfhZ!fU`tEY&nR=M23P=-dY9eF;bJs9_lQM)T>lF9|57Zh==L%4xm;7^B1!BT4y za{XX~P^-;bXMQ5?t&wk?v#q)4Qa#bp2N8d6EzLU#QmcwXknbJxaK%~ulaPjqV~ z_`|K;bV0Q(W!r$`ZEEn`2DhIRXZA5XjsOrRonqA8=fP6W-eYfA4~n|8yS!^P{m08l zf2d#=di3w%uk=70yA!tE1!;W0LDNUNvtRZua-&0!7HOjofIE9f-QP0XfMa_zp6sQ3 zb7!~16$47~XE5xQgG?U49w+`HepTM(wc)_$`wV}v+?#~PV$JN>2ONBZR2;7sC`{U@ zQEI7~2lYKTZQSX?N1i1b*g0e4GCfxnlDU)DhCf zSlpHUdWT2HpAy>R8!A1U1Wz$8b?t#{4C#cJtU#t5d&4y0(0M6((-vej?p~L6;s=c3 zUyc`lR(m%=gfgIQ+n;^+40}BVj4(2GE=|83@5jQwf~Qe@eR9t}DdeFwa2?LD9iHqI z;|fa+Ps@I(E)0+1voVN#{SA&X_I?fT{oyJrtC{_;U(tu9_c~AZZtqHW9~IBVLcqcG zAZ{`5oI@gVKC)6E-|?D;(6PJjv+zW>-X05I<`ZAXdB>^w%szlqY6Lo6M~Wx=9s8!+ zG0N~Dm$91U>UG;TL316%3AvGbUuo@0&)1bJ;xL^1^r={VGE8IkW zn&A78w{9m#uKoFM``xe4puLZSIfu zhRaZUp8SyY&-%BxBAlBs~y;`U#Kj_!ANk4RExeG|a`D!G<#hzD2+{dipE)K2Pt! z74h2o_d%~s>Tw0S^T>xm%;_Z($SGoDeq)4R1$5ZJ<2rbRfxl+82M0}xB*B!?9!hq> zNfNGnN$`+^FJ%IGH>3W&1WenMT)1yI;irnJ@aVk*ziZWbmfAW_{gd^NqlAxL$zN^& zQt^*o-HTH7wSF_ol#f&2Q&cf*6vMCC^2wktRcmigU2&p467eO#X?u5S=x&Tf?Kco@ zS+LZ2r|{QUp0i@seekwH5boCCl?d!lLB2uw!X_pQ+72wRc!TX!Bkjd~OI*pM#5nry~n8{Rd74wps+iw}*4X@=XmcRn?s+|DGzJ&+=_3@3amF zP$=w{oQooN?+rdX7_o+kZ*I_%0!TQH;7Sd?+F-iDC_<;ujdnfD2IB0Mgb&XSU{>+! zNW?m6Ie420;=-Vf+Zi@Ou5N5+a;fUNspA;{LC^TzjF}O-h{-1B?k{!ghs8wruJ}V& zq|0yJhl1c@3{xoMtep8(xX>s;jXA4YN%RLwk?bN5TZq3F&2 zR8R0`RE87uuN4j#69*uHx2N#Nl+%Nwl^;erzm{Ss={Y!_>0e=raJgpBa%jNeE`Iy_ zA>aq*MR2-YlY@OL2cyRd^;rYU62iFHZv#8(J(%zezEtx^R0hH5g>oSH8>mqz%0&^T z`zNtQL&0~ds6LY5yM%@!G|N25X8hwz)C})$5AGGKt#u#zK%d?bW}S$`jVx%D*7``2 zZ+5iD%$Odj+*t2ZEfu!#&rp~}97Cg6hw&K+u~&ML?4Zrc$!v2zW~md>!?kdAFyVgPUeE$>KsJycU&de_3V&j3)hlZq@B#8 z3kx>iSl(x`4CHMzV%xw|7VVZcuBdfv*_oWcp8N83n`;^J z{hnl}nVA3utJ2Iw!ztr|A-^eV#>i%MfDU*Yum0~~9a!gwrJ=R{-`rp_Bm8g&J|ab8 zFZ>cC<`m=Fj#&XXP0^!k7HWdhU8D?*A<)qci9H3TN=zGsY$sYqI`Ixh-5)u5JQ=bi zZK;O$n#4M4-E=L0KX=h1?_BURm>sn24&NFrke{k`=A{yT#BmFIhq+ht=N*NAEK+cr zn!)(5R6LNMrqX_<^YA5D{!zSG@6<98&&)fjWwvYT%i{yf+laLO* zhINfkL8l@P$2B7ayx`xk>1y>U=|TL7H%1cC8QcWeSMXjm0X?=mJ)^& zQMZVK^G$7zsIx>UWlCFdH7{ynzFQ? zjR}7OjY16TG!%Rc@(^NWEI(=+DRM*8_rvspzYl%}$BPXfMxg>@^~PCMLO(}a8S6>` zf^Xgewxe88RABE%UhokG-5Lx?Rh~uEI2(<0h#%`S59<8jUDn^DbwY40q+YND1;&`i z1g}wvA)wGzh=xCo4p7OD4Rfl|(xM;st%QAU&CS6>5bBt@DY*1Z2UgdiH296=fqNhy znEpi7nGULEuCP~GAE#)k?~34sN$XTp0P|5kFloq%Y$SKddsqpA0PL900Ei{{Eq=+o zV;gMWmVX^y1jw?^DTM;DRx7bpBZ5u#0tQKYHf9y%foBT04TI2Fk>yH*v-7ka_NN!Y zi4j*3NiW-!m7^H;*q8@pHtDOsTT8`%?akxbXN z7IrM#L}L@8!C!$L%@)|Di_lwv1NsocB?+hyadZHt-UCsK9R>iQrQz>0q08KK4etrC zL-dq`cjXbqVzXhfc~b&fQt(;XxF1aK2C?|A6avaQ(vrJ@Qwxm05^@Ko&Kv~1$w}h) zA@-`rA{)mjE&H9CkXoJGR@HuNwmnm|9n-Jv#oy6(2bh?Br&3zrx^XIQs>`8AX#=bCg4S3&%bHuhh)*N(~6tw(O; zZD?0QuHV{?Xm~L4IuVhJr(=2g%do-Le>1o)#}#l5CXV{&IMUdkeSb*pFtkfVyB1W3 zPF825a%l-@U)dkY)emkh$dD#(_Wtm06-|A8wOFQHmsYjw@ws|v)6x3fb;@Q zA2fTAz3~$y@rDXo|GBrmE2YzV9ireki{H8oG5I8Dd3~3&)9OMJ41$8Wh&_W^-N1V? z;<*!HH0b>XX!85>-0a=HOQnZ%c3if@NBu4l+4Z);_??4_M>?(lyjo$4CLbXxK0|LI zVt@9d(`tun_Ggb`{;hkFw{L4g2VkoG9)AT1rxN@TrI&*?&^bn{pq`qi!@%+ezU4XzgOBUDB^jR; z;w={)U!9f|B;z~#q?fglF3-f5ai6e2ACt;Ua(O>hffwuie*&8jc}r(o;CSM<%u6H$-gAzrOQn^kOombl<+1@=4c0z~I#)@yh-9?tD!4 zPUnrGukoi5_?VHI?F@d594ekpNSkPZs(k%PvoelpA8UGo4sie>N(RPt*-4;}JNjyU zDQvR&MZpL00zV;9KLaFbk-2;0EHEm+vD4d;4_f~J|Nb9PVBUwS(d-XYSYU)V8{wTs z_;VxNW`z5Uu-6E`Ho~z6yz`B4tr6Z}g!dSsiRxA({j3pwZiKxbsTv23^lL`iYk++c(c7~yY?@JS==GQziw@b5-w~g>qBOLm!MgE;-gqIoN5+ht;gkB^3p%Fe} zgijjb4kPR_!Y_<4@jV59j1iu1ggHjI#0blcP+odKlRp^g7mTpm2z!n2q!A8%U%?+| zgy$RKY$Gf%!g3?5Gs1g}@DU??$_V!u;d@5-xe+EB>m$_&Uo-e^hY|kP2p=%Q4Mw=y z2$vXPt`SZ*!ZVC8*$CfzQ;nzF2p==T79-qXgylx)F~V6!c%BiCGQvb7{NRX!d(a49 zFv33=;m?imhelX#giDODyH~Z(G14k!u~h%kvLOKpGfaI%r8D}4X8q59rOM6p|F!Uh zg4=3@CR{3&vHuFzeb2X8yKu3$*jrJuxD4v*>XM4v%WAIht)5qrbCDo_q5jWM(e}Ge4U0LDn2PY=)KfRrm!d+&YHtjer;#y~MWldReDTh_I+F7xxx~c|apYHr#*##A=E4&rOl@+xD z&DND3(Xn|=YPW&Y;Iwn&i_;i z^+;Fen`o7mt-z_QRDj@@b4@wuib<=itXRF$Sqzv&%vrVkW=yWdvT&)#?Ti|eo;n7EqdnkgDWalmzJ%) zpcczaSfXrYS&b!HNp)yqiRr2>si~;;7B8<9zL-=yWvX)}Ag=-@71cobw&F@(S*>#g zP`RaS-I}VJQfG0^O5ZB5fDBmSqi{f5qYl{ASL-Ej)_Q9&Vp$BR8ifx$eXGmXRx8&2 zPRPYxXJuJ&t-);;3(~8qkStqUT;i=kzQO5CNFi)SF_|@S6v2}B(q7mVy9%9 z^Y`8L$DzGuTtcAFP zfI*CS{|9mdE;+NXux~obyiT%-F)tE3W*X)n&gce5*@H*U7a`ieOS0V_H?b zPSzx*y|kYX`#J>mz5Hw3i0PFC{ZU z@hWG@ICB7O8GKkRP6)V7pGED}%`)XzJMDdlC3uY@Uyk*V5!;Axn zMeMuqiG%WggpnF}VeG~;5Xt%>7G1BC;@3h#H&LFC!6GUd2B*BL51p zWJ-;Fj2WtbgQbv;61#GRVz%5347oS*(kyDhC<+stBXDg z3(*(W4>T}e4fb#JwHk_t*EuC-512aL>0APlUc4OQ+q;g)s`_g8t@ReKE`fMjVMtj> z1njdVmBqES%p^4EE~RBq^Pn4*m7b4gpfqGB?wxy0T$STjRB#tQX8iK z5Ano)Np-+W6@Y!#l-=s90OJ|NL{NhLrwn5*^wt!Ylr4vjr^*WcqV)T2%f(w=J;{SN zc$;Q4UD13_v#Te?-gIkoLQir^Q&Q8V%~M-uv>gw%_T8@4le=foPR}3tS-Otq52R=9OToAio5#x%|wSqV--#86` zsz^an8Qjcyjo)9TGJcKQt?)=h+=-vmPL&>qcoBZe+f~|$IDc}C`lAsq!p|}z4HKcH zGj^)_gN$^uk%kda%DdZCx!HcLQ9i~fcN*}_bc&H4X_P-^;A1t?+l=|>gji$Yq8H!t`x2;^HzAvRz(+3Ank6)*uWy<)LMMlAyXrD5oX?b&E^Tg&w-DA&fE!}d?mU&xBwvOC- zHoLgIb!00dW7uU&LeKEiTO2LLJ&+f*&1W}X)^bhj#jUkl&hANdG%xB&9mxV;b6(5n zmfY4MTO3=;wqCRq?{RG#*^`Q%TJl;EnbSO)W3+5D4TK|TXy8iSR#dzHOF=QN3vvFa{PyuC|M~wkg`ZcN+gjc zDO++fk4@UdPPz1G-< zVZV2GANf(HVPjZ;Vl(0MyLsOk5ETN5JjZ?FTjeLBnFf|14N5C)Z$@_eKu!Ojd_lLPvSp?w}%<`WAOI@ypXM;IPlCf!=C-uMXw&M4P#t)!NsOmp=0-^S-67r2~+zmS&cTrSpJVBD#>q zYZr3ybe@n~NaNXrTq2z(_To4JRYjFh5w2VK5xQi;8N;*JO*^WCr+Gj9pQIcZ9h}v^9A~<+nYr{wlv;XL_a-xo?Xg0(|A&D0K73k zM(W0-J&BJ{W_#SS-19M>8@Mce-heDcz_X-y8<`gaFPNH}M&|K3%h|$Y+fB@)Mf}4? zvrej~#b2Y${U7nGRm(tM=X$Raz!#mIXjbR*`}(S_;vVS)Z!*ObHlcXSk6}#?tugGN zQme_fwkVH^-rGvXz&6Tp;i(@JwFZg(A?mJunT(gWP|l@?1phwN1-QL<^nOG68h?RTGoTWCCwep@^Dm0W)jyKzxWtH+l zs5Mx!$zrfjX$<3j>@IrDwVbN2mC-$y%W3%~GnE7?$Y@_dIgNPQdl-L(0qOOhvyWgW z_~|?-)X5ltl*;SKa;{x%q}7*ms4|dCrS^QvZ8TD>8|%Yi#C}}*JG~6QLY^Q`kf$Z` zm3L=TzTtk#Qsd_P)!GdOwBg$CP}Sx8$P%b0lgmoR#s?_)2+`jmIaNRUvY{l8N?gmR zq!F~C&}%Ro$?QV1vw!o*++b3?0`T&|LtO`W1vymU%BO;kJn!1vEHR#@exk>a5~3gY z2D0fqTt7M)!?J8z_Eff)$FNrD!}CUCF`2L^v>;V~$bNwjU=JrzJpCoKB74!%SGs8f4Yi9`A6VwU>?GX>-FWdHtZ(` zQ=y%nZ&<#JmOllL#P9MJJBw_ER_H3rp|Wd6GQL+vwgAzVQQXf&ADA=n^*j;34Ovz( z--w5ThCHu|Y1>+koj@Vt09)_Id@}t$(GH|fmA(41Y`;vlKVhU3kS}o+Qi;8QN;(jC z@;Xud7)4weL|g$^Sc+?AXK|v?Tj0!R`#DywOG!Kf-UdPpI}{T<|5zHAp#^q@7F4`Zk8De5%6QRk^q#ky_Evj@Rx> zcT?31M8ilnrAuFi3&i%Zw8BUgfqbffO%<5SiiXle71m75I$6#TJ&VM98vcBa{J-hS zDh_O=?0Z~ieBE@@}MlRUScpTvyNK6T15Rm3g!h*V{@TJnt=c zvR(u9z5$u%UxF<3Fg4&$xa1!JHRh1fCs%PpaSjzX7Ep0Vo-Lo(gmst$)&A{Q0D;8b zE{gU{4Os@UUK%QLG5&X`!Z1&K{mL~D>zy6zU1Khp%dmd16Yyaw+F3<0?LWpZFr(~a zC5JrA8&NJoG9g8~0agXge3B1n!^ZlJwa4*Bvv=6Fsj<#G?jqZGI}Vc_nNGg3C;5bR z$xq#7&^P2hMdOA45xFSVS)JT}f)@mDOvgL_OYAGbyV>UseocDj4t`Dg;i-hA-&6Ds zeogx04t`Dg;eQ*yR)}SAXRqI_04;WgF?^Jj1h{m%1I>W|_CrNEw(+i!L2-!Nj@MDX zk=;JeaHqhM5H&zci*`Ed5K8D7*3#+lk9dJM3G|aK+cs_=U@Q@@xNgTDcYyvJ?OOTV z8U3`%!(i%UU3^x2zvMESh!L{|q~kg8cl#K80mZb4(mnt{9L@et#(+2iME9zet?DEYog`ZaF@DEB zckeNGhZ{HrKTHwepL1x_E19ny_@2; z_4)_sr#y0w8i}M!20(i&UTUOd5fU=m9?o;X-F%vZo<|vi@*IzbQ6|E~H{la4ZrX9o zqS^OWl8-5+>r1T_7XL6QlZ-i@F!DlrH=uU`2fMM=Gb#{RE5PMwV!H;9j0W7wXQz|) z!067Q!9IBRDl~JPBAAsy@1%_lmcrWy!8?p+vCGII?*}J#smC1 z{;!pUR{OSAD=xII8?URb5nLI4kMp_mhbap>2KxqDa6Atn74~^a`{mX8pz!!^pglB~ zcc<)ZAF&M~kl5U@q?cs7J&=-N%oC$UaRYN@Lf&1%FhRJ zw%$MFIWFB?C#-69Gl-*)P9<%I&w1MMXO@#!A6U{3jtlI|Qg)$6^4?Q^ykQBU2&B~t zh$wD#57Fn)tkwq+HLs;4S^$9a9R`X7VS}sBpj#O^aG9yHIsxx!D|9QcYz)0|qkVF;9G7B2 zkf?%BQ@Xm7P#NK<^ICkNfp9bZna1IzlUD0SETiDFOuA$MP#@}4ytgwD5@-D2?8n*Y zZl4=A?v*xTAMWw;6(Yc-^cN|O@X$fA;`a{8)!~<#S48JOR~|dyfliNaoahIdwP?xq zYrdUN(KQ}?~*5mzU#GxrLA;4m30hC{n` z9R_IB=J6i{A{a|G;$&`#RlMjNQe)?V$cVp}?SXGPRLsA~F^^iYD8Zw>;&_?pZ&Eh4 z`|Z9_H$9|u9l$M4>iTB#da(%{$!ev0zr38NUbPU@BAZYrN9`_Ns?+atb(*ZW*Gb_{ zadmR%@UU2=_baK+aX8j{X(iN?!Z6L zJS3uFSw=0|OnC)-6pw;+E&z9pxgCL_mR>JodRjafGuB!Y^Y(c>kB@q_{S3UpQMH0= zx(9s$;4-^L@Fl239|#V)$GRr+?=CkLo2Y|2x=+3Z}5l@pi*|apI*-9YlYF@C)}O9 zu(e@hN9SN|tNY04qrf+5RzAF|A_Uq;iesgW7Gi#)1%VCvu^>$*fY{U_$WLIUK&{d_Ve3_+CE>PRzcrthw5wV zYvn*5;xYCOd+{zzCZ^*E`_FWl%zmz2u@QI-K zh@$qsN@MoCBhPR16r)P3SVE0|CFRN$ze&-h`YcQ2tT&OfE|J@C6FJ4cCDH7_*+?)o zcD&DeB=h;Srm#)fVH%TinjWqGRTZOY9nxy2K5yjDe*`uW0b$}K24dQ$3ZdLpwQuPC{=lCvqg zwBPGW4rfmpuZyMfw<-^)bySnnl*ZI`s`0h60Ihs8+V)mvyF&jZ`E5(|F4bfGv6M@` zy5BGF1_{+Jr0}V>jZ*F@rNFE3uvXQ7s58Ajq42R+G(2}(dVQP1*J}5nQ3fmA`P|$C zrW95|kC)J6Di~{Vs2*4X;4PS*K#C#H^mC**@=R-RGr591QwNd}*aN1|B2^>L^j)N_ z$TMxj&Bca1Vt~4Fqi`bc1brVjl_!ypgI?cDbOw1nR)KL1|J;FdA@G^@cLIWdJX6FA zT}PhjT({Jz$15<7p|}TUUU5mwk+&kx^dztcO~~tU2aHD;I)Wo>@R@!CX%hJf&@c4? zkAu7w^gNyf=8=zqZp7QL50KYm2pDg02I(sJOy5Vs%Su5Fzz~>`*W(EoSMccobc4@y z>mdAvd=qH!F?l}A^d#O~41ll41u$OV@Cf<=pXmtF>&SaS|HO;4dF1tH{fq&aN4g3= z(@&6$7x6{}bfZu9r9Z3JpYQWIf3Y9-L7wS-NH*k|9z=2?&vZHV?Ox>dXZC!azyBD~ ztKbWY{CVVY@1_F0C%J$;?%#xauQ<BGS4hmqI!_0XSP#~4`WX`I!MaXGzzzu8bT|UW z6O5fi973MyNhJCO<^^iJuX0x8DK2IuaH(bxs1GfjrZamth0)CeV#Y+?Nyd36*~m z^i7qY2feEDOby?Y@;RXQt31h5&hs3cI1#Np3 z^9b9R4k0n0>33BAb<2PDPt-)&y;ZyoM*}y2gcnny?}HEZSQ=3TRdP8 z*is9Yn{3J^g`Nd|z@)uYb7iSbYru^CN@(c-N&44PH$p`3K?wtf6aty)6gGvc!q#w8*cNt%z2RVZA{+`w!?AEYoCq(5 zY1%YhHEo@4nzl_lr@hm`>51vkbaZ-Q+H%%dcLbP9bepqK0D#fV-upr1DM(}RA- z(9WA6SI~%>)htKo;mN_*j#XKVs3ISG#8zlpNq}K=V0jn(a(Pa^`pt2 From 86d2e96ba79bd7d4f24b9a0bb9b59b8e71b2b4fd Mon Sep 17 00:00:00 2001 From: jianlins Date: Wed, 27 Aug 2025 22:08:07 -0600 Subject: [PATCH 099/126] Update .gitignore and enhance tests for cpredict_split_gaps functionality --- .gitignore | 1 + PyRuSH/StaticSentencizerFun.pyx | 5 +++-- tests/test_PyRushSentencizer.py | 2 ++ tests/test_cpredict_split_gaps.py | 6 ++++-- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index abc18a7..f414037 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ dist *.cpp .ipynb_checkpoints __pycache__ +*.pyd \ No newline at end of file diff --git a/PyRuSH/StaticSentencizerFun.pyx b/PyRuSH/StaticSentencizerFun.pyx index 17d2724..f51950a 100644 --- a/PyRuSH/StaticSentencizerFun.pyx +++ b/PyRuSH/StaticSentencizerFun.pyx @@ -89,13 +89,14 @@ cpdef cpredict_split_gaps(docs, sentencizer_fun, max_sentence_length=None): sentence_start_idx = None sentence_len = 0 marked_this_span = False + while t < len(doc): token = doc[t] # Check for gap between previous span and current span if s < len(sentence_spans): span = sentence_spans[s] - span_begin = span[0] - span_end = span[1] + span_begin = span.begin + span_end = span.end # If there is a gap between previous span and current span if prev_span_end is not None and span_begin >= prev_span_end: # Always mark the first token after prev_span_end, even if whitespace diff --git a/tests/test_PyRushSentencizer.py b/tests/test_PyRushSentencizer.py index 8670104..cddce4d 100644 --- a/tests/test_PyRushSentencizer.py +++ b/tests/test_PyRushSentencizer.py @@ -1,5 +1,7 @@ import unittest import os +import sys +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from PyRuSH import PyRuSHSentencizer from spacy.lang.en import English diff --git a/tests/test_cpredict_split_gaps.py b/tests/test_cpredict_split_gaps.py index 0371309..cc875bb 100644 --- a/tests/test_cpredict_split_gaps.py +++ b/tests/test_cpredict_split_gaps.py @@ -1,7 +1,9 @@ + import pytest from PyRuSH.StaticSentencizerFun import cpredict_split_gaps import spacy from loguru import logger +from PyFastNER import Span nlp = spacy.blank("en") @@ -11,10 +13,10 @@ def dummy_sentencizer_fun(text): start = 0 for i, c in enumerate(text): if c == ".": - spans.append((start, i+1)) + spans.append(Span(start, i+1)) start = i+1 if start < len(text): - spans.append((start, len(text))) + spans.append(Span(start, len(text))) return spans def make_doc_from_text(text): From c63dfe592ed063b5a44ab51d7302a2dada616333 Mon Sep 17 00:00:00 2001 From: jianlins Date: Thu, 28 Aug 2025 16:16:45 -0600 Subject: [PATCH 100/126] Refactor PyRuSHSentencizer to remove unused import and update sentence length calculation; add comprehensive tests for sentence splitting and merging behavior --- PyRuSH/PyRuSHSentencizer.py | 1 - PyRuSH/StaticSentencizerFun.pyx | 4 +- tests/test_PyRuSHSentencizer_param.py | 85 +++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 tests/test_PyRuSHSentencizer_param.py diff --git a/PyRuSH/PyRuSHSentencizer.py b/PyRuSH/PyRuSHSentencizer.py index dd64ff5..67a9696 100644 --- a/PyRuSH/PyRuSHSentencizer.py +++ b/PyRuSH/PyRuSHSentencizer.py @@ -95,7 +95,6 @@ def predict(self, docs): - Does not modify the Docs; only returns sentence start predictions. """ if self.merge_gaps: - from .StaticSentencizerFun import cpredict_ww guesses = cpredict_merge_gaps(docs, self.rush.segToSentenceSpans, self.max_sentence_length) else: guesses = cpredict_split_gaps(docs, self.rush.segToSentenceSpans, self.max_sentence_length) diff --git a/PyRuSH/StaticSentencizerFun.pyx b/PyRuSH/StaticSentencizerFun.pyx index f51950a..1e98009 100644 --- a/PyRuSH/StaticSentencizerFun.pyx +++ b/PyRuSH/StaticSentencizerFun.pyx @@ -55,7 +55,7 @@ cpdef cpredict_merge_gaps(docs, sentencizer_fun, max_sentence_length=None): logger.debug(f"[doc {doc_idx}] Mark sentence start at token {t}: '{token.text}' idx={token.idx} (span start)") sentence_start_t = t sentence_start_idx = token.idx - sentence_len = 0 + sentence_len = len(token.text) marked_this_span = True sentence_len = token.idx + len(token.text) - sentence_start_idx if max_sentence_length is not None and sentence_len > max_sentence_length: @@ -63,7 +63,7 @@ cpdef cpredict_merge_gaps(docs, sentencizer_fun, max_sentence_length=None): logger.debug(f"[doc {doc_idx}] Split due to max_sentence_length at token {t}: '{token.text}' idx={token.idx}") sentence_start_t = t sentence_start_idx = token.idx - sentence_len = 0 + sentence_len = len(token.text) t += 1 logger.debug(f"[doc {doc_idx}] Sentence start guesses: {[i for i, v in enumerate(doc_guesses) if v]}") guesses.append(doc_guesses) diff --git a/tests/test_PyRuSHSentencizer_param.py b/tests/test_PyRuSHSentencizer_param.py new file mode 100644 index 0000000..6ebe72c --- /dev/null +++ b/tests/test_PyRuSHSentencizer_param.py @@ -0,0 +1,85 @@ +import unittest +import os +from loguru import logger +from spacy.lang.en import English +from PyRuSH.PyRuSHSentencizer import PyRuSHSentencizer + +class TestPyRuSHSentencizerParams(unittest.TestCase): + def setUp(self): + self.text_short = "Sentence one. Sentence two!" + self.text_long = "This is a very long sentence that should be split at whitespace before the max length is reached. " * 5 + self.text_whitespace = "First sentence. Second sentence after spaces.\nThird sentence after newline." + self.rule_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "conf", "rush_rules.tsv") + + def make_nlp(self, merge_gaps, max_sentence_length): + nlp = English() + nlp.add_pipe("medspacy_pyrush", config={ + "rules_path": self.rule_path, + "merge_gaps": merge_gaps, + "max_sentence_length": max_sentence_length + }) + return nlp + + def test_merge_gaps_true_no_maxlen(self): + nlp = self.make_nlp(merge_gaps=True, max_sentence_length=None) + doc = nlp(self.text_short) + sents = [s.text for s in doc.sents] + logger.info("[merge_gaps=True, max_sentence_length=None] Split sentences:") + for i, sent in enumerate(sents): + logger.info(f" [{i}] len={len(sent)} {repr(sent)}") + self.assertGreaterEqual(len(sents), 2) + + def test_merge_gaps_false_no_maxlen(self): + nlp = self.make_nlp(merge_gaps=False, max_sentence_length=None) + doc = nlp(self.text_short) + sents = [s.text for s in doc.sents] + logger.info("[merge_gaps=False, max_sentence_length=None] Split sentences:") + for i, sent in enumerate(sents): + logger.info(f" [{i}] len={len(sent)} {repr(sent)}") + self.assertGreaterEqual(len(sents), 2) + + def test_merge_gaps_true_with_maxlen(self): + nlp = self.make_nlp(merge_gaps=True, max_sentence_length=50) + doc = nlp(self.text_long) + sents = [s.text for s in doc.sents] + logger.info("[merge_gaps=True, max_sentence_length=50] Split sentences:") + for i, sent in enumerate(sents): + logger.info(f" [{i}] len={len(sent)} {repr(sent)}") + # Should split long text into multiple sentences + self.assertGreater(len(sents), 2) + for sent in sents: + self.assertLessEqual(len(sent), 60) # allow some leeway + + def test_merge_gaps_false_with_maxlen(self): + nlp = self.make_nlp(merge_gaps=False, max_sentence_length=50) + doc = nlp(self.text_long) + sents = [s.text for s in doc.sents] + logger.info("[merge_gaps=False, max_sentence_length=50] Split sentences:") + for i, sent in enumerate(sents): + logger.info(f" [{i}] len={len(sent)} {repr(sent)}") + self.assertGreater(len(sents), 2) + # Allow up to 100 chars due to tokenization edge cases + for sent in sents: + self.assertLessEqual(len(sent), 100) + + def test_whitespace_edge_merge(self): + nlp = self.make_nlp(merge_gaps=True, max_sentence_length=20) + doc = nlp(self.text_whitespace) + sents = [s.text for s in doc.sents] + for i, sent in enumerate(sents): + logger.info(f" [{i}] len={len(sent)} {repr(sent)}") + self.assertLessEqual(len(sent), 20, f"Sentence {i} exceeds max_sentence_length: {len(sent)} > 20") + self.assertGreaterEqual(len(sents), 3) + + def test_whitespace_edge_split(self): + nlp = self.make_nlp(merge_gaps=False, max_sentence_length=20) + doc = nlp(self.text_whitespace) + sents = [s.text for s in doc.sents] + logger.debug(str([t for t in doc])) + for i, sent in enumerate(sents): + logger.info(f" [{i}] len={len(sent)} {repr(sent)}") + self.assertLessEqual(len(sent), 20, f"Sentence {i} exceeds max_sentence_length: {len(sent)} > 20") + self.assertGreaterEqual(len(sents), 3) + +if __name__ == "__main__": + unittest.main() From 5827db5ea0114321ec320ec318554a569171b0fe Mon Sep 17 00:00:00 2001 From: jianlins Date: Thu, 28 Aug 2025 22:29:35 -0600 Subject: [PATCH 101/126] Enhance PyRuSHSentencizer and tests: update merge_gaps documentation, improve whitespace handling in tests, and add new test cases for edge scenarios --- PyRuSH/PyRuSHSentencizer.py | 2 +- PyRuSH/StaticSentencizerFun.pyx | 109 ++++++++++----------- pytest.ini | 3 + tests/test_PyRuSHSentencizer_param.py | 131 ++++++++++++-------------- tests/test_PyRushSentencizer.py | 2 +- tests/test_debug.py | 59 ++++++++++++ 6 files changed, 170 insertions(+), 136 deletions(-) create mode 100644 pytest.ini create mode 100644 tests/test_debug.py diff --git a/PyRuSH/PyRuSHSentencizer.py b/PyRuSH/PyRuSHSentencizer.py index 67a9696..1c7a6b1 100644 --- a/PyRuSH/PyRuSHSentencizer.py +++ b/PyRuSH/PyRuSHSentencizer.py @@ -35,7 +35,7 @@ def __init__(self, nlp: Language, name: str = "medspacy_pyrush", rules_path: str rules_path (str): Path to the rule file or rules themselves. If empty, defaults to 'conf/rush_rules.tsv'. max_repeat (int): Maximum number of repeats allowed for the '+' wildcard in rules. auto_fix_gaps (bool): If True, attempts to fix gaps caused by malformed rules. - merge_gaps (bool): If True, merges gaps between sentences into the preceding sentence. If False, splits gaps into separate sentences. + merge_gaps (bool): If True, merges gaps between sentences into the preceding sentence. If False, splits gaps (might be multiple whitespaces or new line characters) into separate sentences. max_sentence_length (int or None): Maximum allowed sentence length in characters. If set, sentences longer than this will be split. Notes: diff --git a/PyRuSH/StaticSentencizerFun.pyx b/PyRuSH/StaticSentencizerFun.pyx index 1e98009..c783e22 100644 --- a/PyRuSH/StaticSentencizerFun.pyx +++ b/PyRuSH/StaticSentencizerFun.pyx @@ -60,7 +60,7 @@ cpdef cpredict_merge_gaps(docs, sentencizer_fun, max_sentence_length=None): sentence_len = token.idx + len(token.text) - sentence_start_idx if max_sentence_length is not None and sentence_len > max_sentence_length: doc_guesses[t] = True - logger.debug(f"[doc {doc_idx}] Split due to max_sentence_length at token {t}: '{token.text}' idx={token.idx}") + logger.debug(f"[doc {doc_idx}] Mark/Split due to max_sentence_length at token {t}: '{token.text}' idx={token.idx}") sentence_start_t = t sentence_start_idx = token.idx sentence_len = len(token.text) @@ -71,90 +71,77 @@ cpdef cpredict_merge_gaps(docs, sentencizer_fun, max_sentence_length=None): cpdef cpredict_split_gaps(docs, sentencizer_fun, max_sentence_length=None): cdef list guesses - cdef int s - cdef int t - cdef int last_span_end guesses = [] + call_id = getattr(cpredict_split_gaps, 'call_id', 0) + setattr(cpredict_split_gaps, 'call_id', call_id + 1) for doc_idx, doc in enumerate(docs): if len(doc) == 0: guesses.append([]) continue doc_guesses = [False] * len(doc) sentence_spans = sentencizer_fun(doc.text) - s = 0 + num_spans = len(sentence_spans) t = 0 - last_span_end = -1 # Track the end of the last span - prev_span_end = None - sentence_start_t = None - sentence_start_idx = None + span_idx = 0 sentence_len = 0 - marked_this_span = False - + is_first_token_in_span = True + next_span_begin = sentence_spans[span_idx + 1].begin if num_spans > 1 else -1 while t < len(doc): token = doc[t] - # Check for gap between previous span and current span - if s < len(sentence_spans): - span = sentence_spans[s] - span_begin = span.begin - span_end = span.end - # If there is a gap between previous span and current span - if prev_span_end is not None and span_begin >= prev_span_end: - # Always mark the first token after prev_span_end, even if whitespace - for gap_t in range(t, len(doc)): - gap_token = doc[gap_t] - if gap_token.idx >= prev_span_end: - doc_guesses[gap_t] = True - t = gap_t - # Reset sentence tracking for new sentence - sentence_start_t = gap_t - sentence_start_idx = gap_token.idx - sentence_len = 0 - break - prev_span_end = None - continue - # Mark the first token of the span - if token.idx <= span_begin < token.idx + len(token): + # Advance to next span if needed + while span_idx < num_spans and token.idx >= sentence_spans[span_idx].end: + span_idx += 1 + is_first_token_in_span = True + next_span_begin = sentence_spans[span_idx + 1].begin if span_idx < num_spans - 1 else -1 + if span_idx >= num_spans: + # After all spans, only mark whitespace tokens as sentence start + if len(token.text.strip()) == 0: doc_guesses[t] = True - prev_span_end = span_end - sentence_start_t = t - sentence_start_idx = token.idx - sentence_len = 0 - t += 1 - s += 1 - continue - elif token.idx + len(token) <= span_begin: + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' marked as sentence start (whitespace after all spans)") + t += 1 + continue + span = sentence_spans[span_idx] + # If before the span, skip + if token.idx < span.begin: + t += 1 + continue + # If in the span + if token.idx < span.end: + if is_first_token_in_span: + doc_guesses[t] = True + is_first_token_in_span = False + sentence_len = len(token.text) + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' marked as sentence start (span {span_idx})") + elif max_sentence_length is not None and sentence_len + len(token.text) > max_sentence_length: + doc_guesses[t] = True + sentence_len = len(token.text) + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' marked as sentence start (max length split in span {span_idx})") + else: + sentence_len += len(token.text) + # If we just split, don't add token to sentence_len again + t += 1 + continue + # After the span, before next span, mark whitespace tokens + if next_span_begin != -1 and token.idx < next_span_begin: + if len(token.text.strip()) == 0: + doc_guesses[t] = True + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' marked as sentence start (whitespace after span {span_idx})") t += 1 continue else: - prev_span_end = span_end - s += 1 - continue - else: - # After all spans, handle any trailing tokens after last span - if prev_span_end is not None and token.idx > prev_span_end: - doc_guesses[t] = True - prev_span_end = None - sentence_start_t = t - sentence_start_idx = token.idx - sentence_len = 0 t += 1 continue - # Sentence length logic - if sentence_start_idx is not None: - sentence_len = token.idx + len(token.text) - sentence_start_idx - if max_sentence_length is not None and sentence_len > max_sentence_length: - doc_guesses[t] = True - sentence_start_t = t - sentence_start_idx = token.idx - sentence_len = 0 + # If no next span, just move on t += 1 + logger.debug(f'[cpredict_split_gaps|call_id={call_id}] Token/tag mapping: ' + str([(d, l) for d, l in zip(list(doc), doc_guesses)])) guesses.append(doc_guesses) return guesses cpdef cset_annotations(docs, batch_tag_ids, tensors=None): + if type(docs) !=list: docs = [docs] - for i, doc in enumerate(docs): + for i, doc in enumerate(docs): doc_tag_ids = batch_tag_ids[i] for j, tag_id in enumerate(doc_tag_ids): # Don't clobber existing sentence boundaries diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..2587caf --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +filterwarnings = + ignore:Importing 'parser.split_arg_string' is deprecated.*:DeprecationWarning \ No newline at end of file diff --git a/tests/test_PyRuSHSentencizer_param.py b/tests/test_PyRuSHSentencizer_param.py index 6ebe72c..52db908 100644 --- a/tests/test_PyRuSHSentencizer_param.py +++ b/tests/test_PyRuSHSentencizer_param.py @@ -1,85 +1,70 @@ -import unittest import os from loguru import logger from spacy.lang.en import English from PyRuSH.PyRuSHSentencizer import PyRuSHSentencizer -class TestPyRuSHSentencizerParams(unittest.TestCase): - def setUp(self): - self.text_short = "Sentence one. Sentence two!" - self.text_long = "This is a very long sentence that should be split at whitespace before the max length is reached. " * 5 - self.text_whitespace = "First sentence. Second sentence after spaces.\nThird sentence after newline." - self.rule_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "conf", "rush_rules.tsv") +text_short = "Sentence one. Sentence two!" +text_long = "This is a very long sentence that should be split at whitespace before the max length is reached. " * 5 +text_whitespace = "First sentence. Second sentence after spaces.\nThird sentence after newline." +rule_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "conf", "rush_rules.tsv") - def make_nlp(self, merge_gaps, max_sentence_length): - nlp = English() - nlp.add_pipe("medspacy_pyrush", config={ - "rules_path": self.rule_path, - "merge_gaps": merge_gaps, - "max_sentence_length": max_sentence_length - }) - return nlp +def make_nlp(merge_gaps, max_sentence_length): + nlp = English() + nlp.add_pipe("medspacy_pyrush", config={ + "rules_path": rule_path, + "merge_gaps": merge_gaps, + "max_sentence_length": max_sentence_length + }) + return nlp - def test_merge_gaps_true_no_maxlen(self): - nlp = self.make_nlp(merge_gaps=True, max_sentence_length=None) - doc = nlp(self.text_short) - sents = [s.text for s in doc.sents] - logger.info("[merge_gaps=True, max_sentence_length=None] Split sentences:") - for i, sent in enumerate(sents): - logger.info(f" [{i}] len={len(sent)} {repr(sent)}") - self.assertGreaterEqual(len(sents), 2) +def test_merge_gaps_true_no_maxlen(): + nlp = make_nlp(merge_gaps=True, max_sentence_length=None) + doc = nlp(text_short) + sents = [s.text for s in doc.sents] + logger.info("[merge_gaps=True, max_sentence_length=None] Split sentences:") + for i, sent in enumerate(sents): + logger.info(f" [{i}] len={len(sent)} {repr(sent)}") + assert len(sents) >= 2 - def test_merge_gaps_false_no_maxlen(self): - nlp = self.make_nlp(merge_gaps=False, max_sentence_length=None) - doc = nlp(self.text_short) - sents = [s.text for s in doc.sents] - logger.info("[merge_gaps=False, max_sentence_length=None] Split sentences:") - for i, sent in enumerate(sents): - logger.info(f" [{i}] len={len(sent)} {repr(sent)}") - self.assertGreaterEqual(len(sents), 2) +def test_merge_gaps_false_no_maxlen(): + nlp = make_nlp(merge_gaps=False, max_sentence_length=None) + doc = nlp(text_short) + sents = [s.text for s in doc.sents] + logger.info("[merge_gaps=False, max_sentence_length=None] Split sentences:") + for i, sent in enumerate(sents): + logger.info(f" [{i}] len={len(sent)} {repr(sent)}") + assert len(sents) >= 2 - def test_merge_gaps_true_with_maxlen(self): - nlp = self.make_nlp(merge_gaps=True, max_sentence_length=50) - doc = nlp(self.text_long) - sents = [s.text for s in doc.sents] - logger.info("[merge_gaps=True, max_sentence_length=50] Split sentences:") - for i, sent in enumerate(sents): - logger.info(f" [{i}] len={len(sent)} {repr(sent)}") - # Should split long text into multiple sentences - self.assertGreater(len(sents), 2) - for sent in sents: - self.assertLessEqual(len(sent), 60) # allow some leeway +def test_merge_gaps_true_with_maxlen(): + nlp = make_nlp(merge_gaps=True, max_sentence_length=50) + doc = nlp(text_long) + sents = [s.text for s in doc.sents] + logger.info("[merge_gaps=True, max_sentence_length=50] Split sentences:") + for i, sent in enumerate(sents): + logger.info(f" [{i}] len={len(sent)} {repr(sent)}") + # Should split long text into multiple sentences + assert len(sents) > 2 + for sent in sents: + assert len(sent) <= 60 # allow some leeway - def test_merge_gaps_false_with_maxlen(self): - nlp = self.make_nlp(merge_gaps=False, max_sentence_length=50) - doc = nlp(self.text_long) - sents = [s.text for s in doc.sents] - logger.info("[merge_gaps=False, max_sentence_length=50] Split sentences:") - for i, sent in enumerate(sents): - logger.info(f" [{i}] len={len(sent)} {repr(sent)}") - self.assertGreater(len(sents), 2) - # Allow up to 100 chars due to tokenization edge cases - for sent in sents: - self.assertLessEqual(len(sent), 100) +def test_merge_gaps_false_with_maxlen(): + nlp = make_nlp(merge_gaps=False, max_sentence_length=50) + doc = nlp(text_long) + sents = [s.text for s in doc.sents] + logger.info("[merge_gaps=False, max_sentence_length=50] Split sentences:") + for i, sent in enumerate(sents): + logger.info(f" [{i}] len={len(sent)} {repr(sent)}") + assert len(sents) > 2 + # Allow up to 100 chars due to tokenization edge cases + for sent in sents: + assert len(sent) <= 100 - def test_whitespace_edge_merge(self): - nlp = self.make_nlp(merge_gaps=True, max_sentence_length=20) - doc = nlp(self.text_whitespace) - sents = [s.text for s in doc.sents] - for i, sent in enumerate(sents): - logger.info(f" [{i}] len={len(sent)} {repr(sent)}") - self.assertLessEqual(len(sent), 20, f"Sentence {i} exceeds max_sentence_length: {len(sent)} > 20") - self.assertGreaterEqual(len(sents), 3) +def test_whitespace_edge_merge(): + nlp = make_nlp(merge_gaps=True, max_sentence_length=20) + doc = nlp(text_whitespace) + sents = [s.text for s in doc.sents] + for i, sent in enumerate(sents): + logger.info(f" [{i}] len={len(sent)} {repr(sent)}") + assert len(sent) <= 20, f"Sentence {i} exceeds max_sentence_length: {len(sent)} > 20" + assert len(sents) >= 3 - def test_whitespace_edge_split(self): - nlp = self.make_nlp(merge_gaps=False, max_sentence_length=20) - doc = nlp(self.text_whitespace) - sents = [s.text for s in doc.sents] - logger.debug(str([t for t in doc])) - for i, sent in enumerate(sents): - logger.info(f" [{i}] len={len(sent)} {repr(sent)}") - self.assertLessEqual(len(sent), 20, f"Sentence {i} exceeds max_sentence_length: {len(sent)} > 20") - self.assertGreaterEqual(len(sents), 3) - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_PyRushSentencizer.py b/tests/test_PyRushSentencizer.py index cddce4d..bd83d73 100644 --- a/tests/test_PyRushSentencizer.py +++ b/tests/test_PyRushSentencizer.py @@ -58,7 +58,7 @@ def test_doc2(self): print('>' + str(sent) + '<\n\n') # New expected count includes whitespace-only sentences - assert (len(sents) == 50) + assert (len(sents) == 53) # For content checks, filter out whitespace-only sentences content_sents = [s for s in sents if s.text.strip()] assert (content_sents[0].text == 'Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency.') diff --git a/tests/test_debug.py b/tests/test_debug.py new file mode 100644 index 0000000..06e4be3 --- /dev/null +++ b/tests/test_debug.py @@ -0,0 +1,59 @@ +import os +import sys +from loguru import logger +logger.remove() +logger.add(sys.stdout, level="DEBUG") +from spacy.lang.en import English +from PyRuSH.PyRuSHSentencizer import PyRuSHSentencizer + +rule_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "conf", "rush_rules.tsv") +text_whitespace = "First sentence. Second sentence before spaces.\nThird sentence after newline." + + +def test_whitespace_edge_split(): + nlp = English() + nlp.add_pipe("medspacy_pyrush", config={ + "rules_path": rule_path, + "merge_gaps": False, + "max_sentence_length": 20 + }) + sentencizer = nlp.get_pipe("medspacy_pyrush") + doc = English()(text_whitespace) + print("Tokens and indices:") + for i, token in enumerate(doc): + print(f"{i}: '{token.text}' idx={token.idx}") + doc_guesses = sentencizer.predict([doc])[0] + logger.info(f"doc_guesses: {doc_guesses}") + serialized = str([(d, l) for d, l in zip(list(doc), doc_guesses)]) + logger.info(f"Serialized: {serialized}") + goal = "[(First, True), (sentence, False), (., False), ( , True), (Second, True), (sentence, False), (before, True), (spaces, False), (., False), (\n, True), (Third, True), (sentence, False), (after, False), (newline, True), (., False)]" + logger.info(f"Goal: {goal}") + assert (serialized == goal) + + +def test_wrapped_split(): + nlp = English() + nlp.add_pipe("medspacy_pyrush", config={ + "rules_path": rule_path, + "merge_gaps": False, + "max_sentence_length": 20 + }) + sentencizer = nlp.get_pipe("medspacy_pyrush") + doc = nlp(text_whitespace) + for sent in doc.sents: + logger.info(f'{sent}---length:{len(sent.text)}') + assert(len(sent.text) <= 20) + + +def test_wrapped_split_mergegap(): + nlp = English() + nlp.add_pipe("medspacy_pyrush", config={ + "rules_path": rule_path, + "merge_gaps": True, + "max_sentence_length": 20 + }) + sentencizer = nlp.get_pipe("medspacy_pyrush") + doc = nlp(text_whitespace) + for sent in doc.sents: + logger.info(f'{sent}---length:{len(sent.text)}') + assert(len(sent.text) <= 20) From 75e3497a010b64e74500e7b50f07bee642006965 Mon Sep 17 00:00:00 2001 From: jianlins Date: Sat, 30 Aug 2025 08:37:51 -0600 Subject: [PATCH 102/126] test_debug.py pass --- PyRuSH/StaticSentencizerFun.pyx | 167 ++++++++++++++++++++++---------- tests/test_debug.py | 65 ++++--------- 2 files changed, 135 insertions(+), 97 deletions(-) diff --git a/PyRuSH/StaticSentencizerFun.pyx b/PyRuSH/StaticSentencizerFun.pyx index c783e22..9c8d1b0 100644 --- a/PyRuSH/StaticSentencizerFun.pyx +++ b/PyRuSH/StaticSentencizerFun.pyx @@ -25,45 +25,59 @@ cpdef cpredict_merge_gaps(docs, sentencizer_fun, max_sentence_length=None): guesses.append([]) continue doc_guesses = [False] * len(doc) - orig_spans = sentencizer_fun(doc.text) - logger.debug(f"[doc {doc_idx}] {len(orig_spans)} spans detected: {[ (span.begin, span.end) for span in orig_spans ]}") + spans = sentencizer_fun(doc.text) + logger.debug(f"[doc {doc_idx}] {len(spans)} spans detected: {[ (span.begin, span.end) for span in spans ]}") t = 0 - s = 0 - sentence_start_t = None - sentence_start_idx = None + span_idx = 0 + num_spans = len(spans) sentence_len = 0 - marked_this_span = False while t < len(doc): token = doc[t] - # Advance to next span if needed - while s < len(orig_spans) and token.idx >= orig_spans[s].end: - s += 1 - marked_this_span = False - if s >= len(orig_spans): - break - span = orig_spans[s] - # Only process tokens within the span - if token.idx < span.begin or token.idx >= span.end: + # 1. Mark token as sentence start if it overlaps with RuSH span.begin + if span_idx < num_spans and token.idx == spans[span_idx].begin: + doc_guesses[t] = True + logger.debug(f"[doc {doc_idx}] Mark sentence start at token {t}: '{token.text}' idx={token.idx} (span begin)") + sentence_len = len(token.text) + span_idx += 1 t += 1 continue - if len(token.text.strip()) == 0: - t += 1 + # 2. If token is in gap between spans + if span_idx > 0 and token.idx >= spans[span_idx-1].end and (span_idx < num_spans and token.idx < spans[span_idx].begin): + # Mark first whitespace token in gap + gap_start = t + gap_end = t + # Find end of gap + while gap_end < len(doc) and doc[gap_end].idx < spans[span_idx].begin: + gap_end += 1 + # Mark first whitespace token + whitespace_found = False + for i in range(gap_start, gap_end): + if doc[i].text.isspace(): + doc_guesses[i] = True + logger.debug(f"[doc {doc_idx}] Mark sentence start at token {i}: '{doc[i].text}' idx={doc[i].idx} (gap whitespace)") + whitespace_found = True + # Mark first non-whitespace token after whitespace + if i+1 < gap_end and not doc[i+1].text.isspace(): + doc_guesses[i+1] = True + logger.debug(f"[doc {doc_idx}] Mark sentence start at token {i+1}: '{doc[i+1].text}' idx={doc[i+1].idx} (gap non-whitespace after whitespace)") + break + # If no whitespace, mark first non-whitespace token + if not whitespace_found: + for i in range(gap_start, gap_end): + if not doc[i].text.isspace(): + doc_guesses[i] = True + logger.debug(f"[doc {doc_idx}] Mark sentence start at token {i}: '{doc[i].text}' idx={doc[i].idx} (gap non-whitespace)") + break + t = gap_end continue - # Mark the first non-whitespace token of the span as sentence start - if not marked_this_span: - doc_guesses[t] = True - logger.debug(f"[doc {doc_idx}] Mark sentence start at token {t}: '{token.text}' idx={token.idx} (span start)") - sentence_start_t = t - sentence_start_idx = token.idx - sentence_len = len(token.text) - marked_this_span = True - sentence_len = token.idx + len(token.text) - sentence_start_idx - if max_sentence_length is not None and sentence_len > max_sentence_length: + # 3. If sentence length exceeds max_sentence_length, mark as sentence start + if max_sentence_length is not None and sentence_len + len(token.text) > max_sentence_length: doc_guesses[t] = True logger.debug(f"[doc {doc_idx}] Mark/Split due to max_sentence_length at token {t}: '{token.text}' idx={token.idx}") - sentence_start_t = t - sentence_start_idx = token.idx sentence_len = len(token.text) + t += 1 + continue + sentence_len += len(token.text) t += 1 logger.debug(f"[doc {doc_idx}] Sentence start guesses: {[i for i, v in enumerate(doc_guesses) if v]}") guesses.append(doc_guesses) @@ -83,19 +97,48 @@ cpdef cpredict_split_gaps(docs, sentencizer_fun, max_sentence_length=None): num_spans = len(sentence_spans) t = 0 span_idx = 0 - sentence_len = 0 + sentence_start_idx = 0 is_first_token_in_span = True - next_span_begin = sentence_spans[span_idx + 1].begin if num_spans > 1 else -1 while t < len(doc): token = doc[t] # Advance to next span if needed + # Always check for gaps between spans before advancing span_idx + next_span_begin = sentence_spans[span_idx + 1].begin if span_idx < num_spans - 1 else -1 + if span_idx < num_spans - 1 and token.idx >= sentence_spans[span_idx].end and token.idx < next_span_begin: + gap_start = t + gap_end = t + # Find end of gap + while gap_end < len(doc) and doc[gap_end].idx < next_span_begin: + gap_end += 1 + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] GAP DETECTED: tokens {gap_start}-{gap_end-1} (idx {doc[gap_start].idx}-{doc[gap_end-1].idx}) between spans {sentence_spans[span_idx].end}-{next_span_begin}") + for i in range(gap_start, gap_end): + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] GAP token {i}: '{doc[i].text}' idx={doc[i].idx} isspace={doc[i].text.isspace()}") + # Mark first token in gap as sentence start (should match expected: whitespace preferred, else first token) + if gap_start < gap_end: + whitespace_idx = -1 + for i in range(gap_start, gap_end): + if doc[i].text.isspace(): + whitespace_idx = i + break + if whitespace_idx != -1: + doc_guesses[whitespace_idx] = True + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {whitespace_idx} '{doc[whitespace_idx].text}' marked as sentence start (whitespace in gap between spans)") + # If next token is non-whitespace, mark it too + # Mark first non-whitespace token after whitespace as sentence start (only if gap contains exactly two tokens) + if gap_end - gap_start == 2 and whitespace_idx + 1 < gap_end and not doc[whitespace_idx + 1].text.isspace(): + doc_guesses[whitespace_idx + 1] = True + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {whitespace_idx + 1} '{doc[whitespace_idx + 1].text}' marked as sentence start (non-whitespace after whitespace in gap)") + else: + doc_guesses[gap_start] = True + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {gap_start} '{doc[gap_start].text}' marked as sentence start (first token in gap between spans)") + t = gap_end + continue while span_idx < num_spans and token.idx >= sentence_spans[span_idx].end: span_idx += 1 is_first_token_in_span = True - next_span_begin = sentence_spans[span_idx + 1].begin if span_idx < num_spans - 1 else -1 if span_idx >= num_spans: # After all spans, only mark whitespace tokens as sentence start - if len(token.text.strip()) == 0: + if token.text.isspace(): doc_guesses[t] = True logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' marked as sentence start (whitespace after all spans)") t += 1 @@ -107,31 +150,51 @@ cpdef cpredict_split_gaps(docs, sentencizer_fun, max_sentence_length=None): continue # If in the span if token.idx < span.end: - if is_first_token_in_span: + # 1. Mark sentence start if token overlaps with span.begin + if token.idx == span.begin: doc_guesses[t] = True is_first_token_in_span = False - sentence_len = len(token.text) - logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' marked as sentence start (span {span_idx})") - elif max_sentence_length is not None and sentence_len + len(token.text) > max_sentence_length: + sentence_start_idx = token.idx + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' marked as sentence start (span begin)") + # 2. If sentence length exceeds max_sentence_length, mark as sentence start + elif max_sentence_length is not None and (token.idx - sentence_start_idx) + len(token.text) > max_sentence_length: doc_guesses[t] = True - sentence_len = len(token.text) + sentence_start_idx = token.idx logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' marked as sentence start (max length split in span {span_idx})") - else: - sentence_len += len(token.text) - # If we just split, don't add token to sentence_len again + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' sentence_len={(token.idx - sentence_start_idx) + len(token.text)} (after update)") t += 1 continue - # After the span, before next span, mark whitespace tokens - if next_span_begin != -1 and token.idx < next_span_begin: - if len(token.text.strip()) == 0: - doc_guesses[t] = True - logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' marked as sentence start (whitespace after span {span_idx})") - t += 1 - continue - else: - t += 1 - continue - # If no next span, just move on + # 3. If between two adjacent spans, mark the first token (even whitespace) as sent_start + next_span_begin = sentence_spans[span_idx + 1].begin if span_idx < num_spans - 1 else -1 + if next_span_begin != -1 and token.idx >= span.end and token.idx < next_span_begin: + gap_start = t + gap_end = t + # Find end of gap + while gap_end < len(doc) and doc[gap_end].idx < next_span_begin: + gap_end += 1 + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] GAP DETECTED: tokens {gap_start}-{gap_end-1} (idx {doc[gap_start].idx}-{doc[gap_end-1].idx}) between spans {span.end}-{next_span_begin}") + for i in range(gap_start, gap_end): + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] GAP token {i}: '{doc[i].text}' idx={doc[i].idx} isspace={doc[i].text.isspace()}") + # Mark first token in gap as sentence start (should match expected: whitespace preferred, else first token) + if gap_start < gap_end: + whitespace_idx = -1 + for i in range(gap_start, gap_end): + if doc[i].text.isspace(): + whitespace_idx = i + break + if whitespace_idx != -1: + doc_guesses[whitespace_idx] = True + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {whitespace_idx} '{doc[whitespace_idx].text}' marked as sentence start (whitespace in gap between spans)") + # If next token is non-whitespace, mark it too + # Mark first non-whitespace token after whitespace as sentence start (only if gap contains exactly two tokens) + if gap_end - gap_start == 2 and whitespace_idx + 1 < gap_end and not doc[whitespace_idx + 1].text.isspace(): + doc_guesses[whitespace_idx + 1] = True + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {whitespace_idx + 1} '{doc[whitespace_idx + 1].text}' marked as sentence start (non-whitespace after whitespace in gap)") + else: + doc_guesses[gap_start] = True + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {gap_start} '{doc[gap_start].text}' marked as sentence start (first token in gap between spans)") + t = gap_end + continue t += 1 logger.debug(f'[cpredict_split_gaps|call_id={call_id}] Token/tag mapping: ' + str([(d, l) for d, l in zip(list(doc), doc_guesses)])) guesses.append(doc_guesses) diff --git a/tests/test_debug.py b/tests/test_debug.py index 06e4be3..c4beba9 100644 --- a/tests/test_debug.py +++ b/tests/test_debug.py @@ -1,59 +1,34 @@ -import os -import sys +import sys,os from loguru import logger logger.remove() logger.add(sys.stdout, level="DEBUG") -from spacy.lang.en import English -from PyRuSH.PyRuSHSentencizer import PyRuSHSentencizer -rule_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "conf", "rush_rules.tsv") -text_whitespace = "First sentence. Second sentence before spaces.\nThird sentence after newline." - - -def test_whitespace_edge_split(): +def test_whitespace_edge_split(): + from spacy.lang.en import English + from loguru import logger + import medspacy + text_whitespace = "First sentence. Second sentence before spaces.\nThird sentence after newline." nlp = English() nlp.add_pipe("medspacy_pyrush", config={ - "rules_path": rule_path, + "rules_path": "tests/rush_rules.tsv", "merge_gaps": False, "max_sentence_length": 20 }) sentencizer = nlp.get_pipe("medspacy_pyrush") - doc = English()(text_whitespace) - print("Tokens and indices:") - for i, token in enumerate(doc): - print(f"{i}: '{token.text}' idx={token.idx}") + doc = nlp(text_whitespace) + # Try to get the actual span function from RuSH + spans=sentencizer.rush.segToSentenceSpans(text_whitespace) + logger.info('Print rush segmented spans: \n----------------\n') + logger.info(f"Spans: {[(span.begin, span.end) for span in spans]}\n----------------\n") + logger.info(f'Print token offsets: ') + logger.info(f'{[(t, t.idx) for t in doc]}') doc_guesses = sentencizer.predict([doc])[0] logger.info(f"doc_guesses: {doc_guesses}") - serialized = str([(d, l) for d, l in zip(list(doc), doc_guesses)]) + serialized = [(str(d), l) for d, l in zip(list(doc), doc_guesses)] logger.info(f"Serialized: {serialized}") - goal = "[(First, True), (sentence, False), (., False), ( , True), (Second, True), (sentence, False), (before, True), (spaces, False), (., False), (\n, True), (Third, True), (sentence, False), (after, False), (newline, True), (., False)]" + # Adjusted expected output to match spacy tokenization + goal = [("First", True), ("sentence", False), (".", False), (" ", True), ("Second", True), ("sentence", False), ("before", True), ("spaces", False), (".", False), ("\n", True), ("Third", True), ("sentence", False), ("after", False), ("newline", True), (".", False)] logger.info(f"Goal: {goal}") - assert (serialized == goal) - - -def test_wrapped_split(): - nlp = English() - nlp.add_pipe("medspacy_pyrush", config={ - "rules_path": rule_path, - "merge_gaps": False, - "max_sentence_length": 20 - }) - sentencizer = nlp.get_pipe("medspacy_pyrush") - doc = nlp(text_whitespace) - for sent in doc.sents: - logger.info(f'{sent}---length:{len(sent.text)}') - assert(len(sent.text) <= 20) - - -def test_wrapped_split_mergegap(): - nlp = English() - nlp.add_pipe("medspacy_pyrush", config={ - "rules_path": rule_path, - "merge_gaps": True, - "max_sentence_length": 20 - }) - sentencizer = nlp.get_pipe("medspacy_pyrush") - doc = nlp(text_whitespace) - for sent in doc.sents: - logger.info(f'{sent}---length:{len(sent.text)}') - assert(len(sent.text) <= 20) + for s, g in zip(serialized, goal): + logger.info(f'{s} == {g}' if s==g else f'{s} != {g}') + assert (s == g) From dcb1ebf127daa551643a26c643fd9624ddf3b1df Mon Sep 17 00:00:00 2001 From: jianlins Date: Sat, 30 Aug 2025 20:57:35 -0600 Subject: [PATCH 103/126] Enhance gap handling and max_sentence_length logic in cpredict_merge_gaps and cpredict_split_gaps functions; improve whitespace token marking and debugging information --- PyRuSH/StaticSentencizerFun.pyx | 136 +++++++++++++++----------------- 1 file changed, 65 insertions(+), 71 deletions(-) diff --git a/PyRuSH/StaticSentencizerFun.pyx b/PyRuSH/StaticSentencizerFun.pyx index 9c8d1b0..d982356 100644 --- a/PyRuSH/StaticSentencizerFun.pyx +++ b/PyRuSH/StaticSentencizerFun.pyx @@ -38,30 +38,38 @@ cpdef cpredict_merge_gaps(docs, sentencizer_fun, max_sentence_length=None): doc_guesses[t] = True logger.debug(f"[doc {doc_idx}] Mark sentence start at token {t}: '{token.text}' idx={token.idx} (span begin)") sentence_len = len(token.text) + span = spans[span_idx] + # Find last token in span + last_token_in_span = t + while last_token_in_span + 1 < len(doc) and doc[last_token_in_span + 1].idx < span.end: + last_token_in_span += 1 + if last_token_in_span + 1 < len(doc) and doc[last_token_in_span + 1].idx >= span.end: + # Mark whitespace preferred, else next token + if doc[last_token_in_span + 1].text.isspace(): + doc_guesses[last_token_in_span + 1] = True + logger.debug(f"[doc {doc_idx}] Token {last_token_in_span+1} '{doc[last_token_in_span+1].text}' marked as sentence start (span end whitespace)") + else: + doc_guesses[last_token_in_span + 1] = True + logger.debug(f"[doc {doc_idx}] Token {last_token_in_span+1} '{doc[last_token_in_span+1].text}' marked as sentence start (span end next token)") span_idx += 1 t += 1 continue # 2. If token is in gap between spans if span_idx > 0 and token.idx >= spans[span_idx-1].end and (span_idx < num_spans and token.idx < spans[span_idx].begin): - # Mark first whitespace token in gap gap_start = t gap_end = t - # Find end of gap while gap_end < len(doc) and doc[gap_end].idx < spans[span_idx].begin: gap_end += 1 - # Mark first whitespace token whitespace_found = False for i in range(gap_start, gap_end): if doc[i].text.isspace(): doc_guesses[i] = True logger.debug(f"[doc {doc_idx}] Mark sentence start at token {i}: '{doc[i].text}' idx={doc[i].idx} (gap whitespace)") whitespace_found = True - # Mark first non-whitespace token after whitespace if i+1 < gap_end and not doc[i+1].text.isspace(): doc_guesses[i+1] = True logger.debug(f"[doc {doc_idx}] Mark sentence start at token {i+1}: '{doc[i+1].text}' idx={doc[i+1].idx} (gap non-whitespace after whitespace)") break - # If no whitespace, mark first non-whitespace token if not whitespace_found: for i in range(gap_start, gap_end): if not doc[i].text.isspace(): @@ -70,13 +78,32 @@ cpdef cpredict_merge_gaps(docs, sentencizer_fun, max_sentence_length=None): break t = gap_end continue - # 3. If sentence length exceeds max_sentence_length, mark as sentence start - if max_sentence_length is not None and sentence_len + len(token.text) > max_sentence_length: - doc_guesses[t] = True - logger.debug(f"[doc {doc_idx}] Mark/Split due to max_sentence_length at token {t}: '{token.text}' idx={token.idx}") - sentence_len = len(token.text) - t += 1 - continue + # 3. Enhanced max_sentence_length logic: check current token + next whitespace token + if max_sentence_length is not None: + next_ws_len = 0 + if t + 1 < len(doc) and doc[t + 1].text.isspace(): + next_ws_len = len(doc[t + 1].text) + # If current token itself would exceed max_sentence_length, split here + if sentence_len + len(token.text) > max_sentence_length: + doc_guesses[t] = True + logger.debug(f"[doc {doc_idx}] Mark/Split due to max_sentence_length at token {t}: '{token.text}' idx={token.idx} (current token exceeds limit)") + sentence_len = len(token.text) + t += 1 + continue + # If next whitespace token would push over the limit, split here + if next_ws_len > 0 and sentence_len + len(token.text) + next_ws_len > max_sentence_length: + doc_guesses[t] = True + logger.debug(f"[doc {doc_idx}] Mark/Split due to max_sentence_length at token {t}: '{token.text}' idx={token.idx} (next whitespace would exceed limit)") + sentence_len = len(token.text) + t += 1 + continue + # If next token is not whitespace and would push over the limit, split here + if t + 1 < len(doc) and not doc[t + 1].text.isspace() and sentence_len + len(token.text) + len(doc[t + 1].text) > max_sentence_length: + doc_guesses[t] = True + logger.debug(f"[doc {doc_idx}] Mark/Split due to max_sentence_length at token {t}: '{token.text}' idx={token.idx} (next non-whitespace would exceed limit)") + sentence_len = len(token.text) + t += 1 + continue sentence_len += len(token.text) t += 1 logger.debug(f"[doc {doc_idx}] Sentence start guesses: {[i for i, v in enumerate(doc_guesses) if v]}") @@ -101,41 +128,32 @@ cpdef cpredict_split_gaps(docs, sentencizer_fun, max_sentence_length=None): is_first_token_in_span = True while t < len(doc): token = doc[t] - # Advance to next span if needed # Always check for gaps between spans before advancing span_idx next_span_begin = sentence_spans[span_idx + 1].begin if span_idx < num_spans - 1 else -1 + # 1. Handle gaps between spans if span_idx < num_spans - 1 and token.idx >= sentence_spans[span_idx].end and token.idx < next_span_begin: gap_start = t gap_end = t - # Find end of gap while gap_end < len(doc) and doc[gap_end].idx < next_span_begin: gap_end += 1 logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] GAP DETECTED: tokens {gap_start}-{gap_end-1} (idx {doc[gap_start].idx}-{doc[gap_end-1].idx}) between spans {sentence_spans[span_idx].end}-{next_span_begin}") + # Mark first whitespace token in gap, else first token + whitespace_idx = -1 for i in range(gap_start, gap_end): - logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] GAP token {i}: '{doc[i].text}' idx={doc[i].idx} isspace={doc[i].text.isspace()}") - # Mark first token in gap as sentence start (should match expected: whitespace preferred, else first token) - if gap_start < gap_end: - whitespace_idx = -1 - for i in range(gap_start, gap_end): - if doc[i].text.isspace(): - whitespace_idx = i - break - if whitespace_idx != -1: - doc_guesses[whitespace_idx] = True - logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {whitespace_idx} '{doc[whitespace_idx].text}' marked as sentence start (whitespace in gap between spans)") - # If next token is non-whitespace, mark it too - # Mark first non-whitespace token after whitespace as sentence start (only if gap contains exactly two tokens) - if gap_end - gap_start == 2 and whitespace_idx + 1 < gap_end and not doc[whitespace_idx + 1].text.isspace(): - doc_guesses[whitespace_idx + 1] = True - logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {whitespace_idx + 1} '{doc[whitespace_idx + 1].text}' marked as sentence start (non-whitespace after whitespace in gap)") - else: - doc_guesses[gap_start] = True - logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {gap_start} '{doc[gap_start].text}' marked as sentence start (first token in gap between spans)") + if doc[i].text.isspace(): + whitespace_idx = i + break + if whitespace_idx != -1: + doc_guesses[whitespace_idx] = True + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {whitespace_idx} '{doc[whitespace_idx].text}' marked as sentence start (whitespace in gap between spans)") + else: + doc_guesses[gap_start] = True + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {gap_start} '{doc[gap_start].text}' marked as sentence start (first token in gap between spans)") t = gap_end continue + # 2. Advance span_idx if needed while span_idx < num_spans and token.idx >= sentence_spans[span_idx].end: span_idx += 1 - is_first_token_in_span = True if span_idx >= num_spans: # After all spans, only mark whitespace tokens as sentence start if token.text.isspace(): @@ -144,56 +162,32 @@ cpdef cpredict_split_gaps(docs, sentencizer_fun, max_sentence_length=None): t += 1 continue span = sentence_spans[span_idx] - # If before the span, skip + # 3. If before the span, skip if token.idx < span.begin: t += 1 continue - # If in the span + # 4. If in the span if token.idx < span.end: - # 1. Mark sentence start if token overlaps with span.begin + # Mark sentence start if token overlaps with span.begin if token.idx == span.begin: doc_guesses[t] = True - is_first_token_in_span = False sentence_start_idx = token.idx logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' marked as sentence start (span begin)") - # 2. If sentence length exceeds max_sentence_length, mark as sentence start + # If sentence length exceeds max_sentence_length, mark as sentence start elif max_sentence_length is not None and (token.idx - sentence_start_idx) + len(token.text) > max_sentence_length: doc_guesses[t] = True sentence_start_idx = token.idx logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' marked as sentence start (max length split in span {span_idx})") - logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t} '{token.text}' sentence_len={(token.idx - sentence_start_idx) + len(token.text)} (after update)") - t += 1 - continue - # 3. If between two adjacent spans, mark the first token (even whitespace) as sent_start - next_span_begin = sentence_spans[span_idx + 1].begin if span_idx < num_spans - 1 else -1 - if next_span_begin != -1 and token.idx >= span.end and token.idx < next_span_begin: - gap_start = t - gap_end = t - # Find end of gap - while gap_end < len(doc) and doc[gap_end].idx < next_span_begin: - gap_end += 1 - logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] GAP DETECTED: tokens {gap_start}-{gap_end-1} (idx {doc[gap_start].idx}-{doc[gap_end-1].idx}) between spans {span.end}-{next_span_begin}") - for i in range(gap_start, gap_end): - logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] GAP token {i}: '{doc[i].text}' idx={doc[i].idx} isspace={doc[i].text.isspace()}") - # Mark first token in gap as sentence start (should match expected: whitespace preferred, else first token) - if gap_start < gap_end: - whitespace_idx = -1 - for i in range(gap_start, gap_end): - if doc[i].text.isspace(): - whitespace_idx = i - break - if whitespace_idx != -1: - doc_guesses[whitespace_idx] = True - logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {whitespace_idx} '{doc[whitespace_idx].text}' marked as sentence start (whitespace in gap between spans)") - # If next token is non-whitespace, mark it too - # Mark first non-whitespace token after whitespace as sentence start (only if gap contains exactly two tokens) - if gap_end - gap_start == 2 and whitespace_idx + 1 < gap_end and not doc[whitespace_idx + 1].text.isspace(): - doc_guesses[whitespace_idx + 1] = True - logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {whitespace_idx + 1} '{doc[whitespace_idx + 1].text}' marked as sentence start (non-whitespace after whitespace in gap)") + # If this is the last token in the span, mark next token as sentence start (if exists) + if t + 1 < len(doc) and doc[t + 1].idx >= span.end: + # Mark whitespace preferred, else next token + if doc[t + 1].text.isspace(): + doc_guesses[t + 1] = True + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t+1} '{doc[t+1].text}' marked as sentence start (span end whitespace)") else: - doc_guesses[gap_start] = True - logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {gap_start} '{doc[gap_start].text}' marked as sentence start (first token in gap between spans)") - t = gap_end + doc_guesses[t + 1] = True + logger.debug(f"[cpredict_split_gaps|call_id={call_id}] [doc {doc_idx}] Token {t+1} '{doc[t+1].text}' marked as sentence start (span end next token)") + t += 1 continue t += 1 logger.debug(f'[cpredict_split_gaps|call_id={call_id}] Token/tag mapping: ' + str([(d, l) for d, l in zip(list(doc), doc_guesses)])) From bc7ec946a92701bea40a250b4c356d9c0e6d322b Mon Sep 17 00:00:00 2001 From: jianlins Date: Sun, 31 Aug 2025 20:59:58 -0600 Subject: [PATCH 104/126] all test passed --- conf/rush_rules.tsv | 8 +- notebooks/debug.ipynb | 238 +++++++++++++++++++++++++----------------- 2 files changed, 150 insertions(+), 96 deletions(-) diff --git a/conf/rush_rules.tsv b/conf/rush_rules.tsv index 6070aa0..afb66d4 100644 --- a/conf/rush_rules.tsv +++ b/conf/rush_rules.tsv @@ -47,6 +47,12 @@ \b\s+(\C 0 stbegin \b\s+(\d 0 stbegin \c.\s+(\C) 0 stbegin +Dr.\s+(\C) 1 stbegin +Mr.\s+(\C) 1 stbegin +Ms.\s+(\C) 1 stbegin +Miss.\s+(\C) 1 stbegin +Mrs.\s+(\C) 1 stbegin +dr.\s+(\C) 1 stbegin mL.\s+(\C) 0 stbegin *) 1 stbegin \c\c.\s+(\C) 0 stbegin @@ -239,7 +245,7 @@ \n(? \C 0 stbegin \n(? \c 0 stbegin \n(. \C 0 stbegin -\n(+ \C 0 stbegin +\n(\+ \C 0 stbegin \n(/ \C 0 stbegin \n+\d\d-\d\d\s+(\C 0 stbegin \n+\d+-\d\d-\d\d\s+(\C 0 stbegin diff --git a/notebooks/debug.ipynb b/notebooks/debug.ipynb index 5684d9f..d845ac3 100644 --- a/notebooks/debug.ipynb +++ b/notebooks/debug.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "id": "81ef94a6", "metadata": {}, "outputs": [], @@ -80,46 +80,103 @@ "name": "stdout", "output_type": "stream", "text": [ - "2025-08-25 01:16:56,451 - PyRuSH.RuSH - DEBUG - stbegin\n", - "2025-08-25 01:16:56,452 - PyRuSH.RuSH - DEBUG - \t19-20:1.0\t \t[Rule 57:\t\\n\\n\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,452 - PyRuSH.RuSH - DEBUG - \t23-24:1.0\t Ms. \t[Rule 49:\t\\c.\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,453 - PyRuSH.RuSH - DEBUG - \t132-136:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. \t[Rule 953:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,454 - PyRuSH.RuSH - DEBUG - \t189-192:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. \t[Rule 954:\t.\\w+(We \tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,454 - PyRuSH.RuSH - DEBUG - \t241-244:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. \t[Rule 565:\t.\\s+(The\tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,455 - PyRuSH.RuSH - DEBUG - \t300-303:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. \t[Rule 954:\t.\\w+(We \tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,456 - PyRuSH.RuSH - DEBUG - \t332-337:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. \t[Rule 964:\t.\\w+(This \tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,456 - PyRuSH.RuSH - DEBUG - \t428-432:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. \t[Rule 953:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,457 - PyRuSH.RuSH - DEBUG - \t461-462:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. \t[Rule 204:\t\\n+(\\C)\tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,452 - PyRuSH.RuSH - DEBUG - \t19-20:1.0\t \t[Rule 57:\t\\n\\n\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,452 - PyRuSH.RuSH - DEBUG - \t23-24:1.0\t Ms. \t[Rule 49:\t\\c.\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,453 - PyRuSH.RuSH - DEBUG - \t132-136:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. \t[Rule 953:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,454 - PyRuSH.RuSH - DEBUG - \t189-192:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. \t[Rule 954:\t.\\w+(We \tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,454 - PyRuSH.RuSH - DEBUG - \t241-244:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. \t[Rule 565:\t.\\s+(The\tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,455 - PyRuSH.RuSH - DEBUG - \t300-303:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. \t[Rule 954:\t.\\w+(We \tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,456 - PyRuSH.RuSH - DEBUG - \t332-337:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. \t[Rule 964:\t.\\w+(This \tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,456 - PyRuSH.RuSH - DEBUG - \t428-432:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. \t[Rule 953:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,457 - PyRuSH.RuSH - DEBUG - \t461-462:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. \t[Rule 204:\t\\n+(\\C)\tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,459 - PyRuSH.RuSH - DEBUG - \t499-504:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: <1>\t[Rule 784:\t\\a\\n+(\\d.\\s+\\C\tstbegin\t0.0\tACTUAL]\n", - "2025-08-25 01:16:56,460 - PyRuSH.RuSH - DEBUG - stend\n", - "2025-08-25 01:16:56,461 - PyRuSH.RuSH - DEBUG - \t0-20:1.0\t< >\t[Rule 517:\t\\s+\\n\\n+\\s+\\C\tstend\t2.0\tACTUAL]\n", - "2025-08-25 01:16:56,462 - PyRuSH.RuSH - DEBUG - \t129-130:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency<.>\t[Rule 404:\t\\c\\c(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", - "2025-08-25 01:16:56,463 - PyRuSH.RuSH - DEBUG - \t187-188:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina<.>\t[Rule 417:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", - "2025-08-25 01:16:56,466 - PyRuSH.RuSH - DEBUG - \t238-239:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG<.>\t[Rule 413:\t\\C\\C\\C(.)\\s+\\C\\c\tstend\t2.0\tACTUAL]\n", - "2025-08-25 01:16:56,467 - PyRuSH.RuSH - DEBUG - \t297-298:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001<.>\t[Rule 347:\t\\d(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", - "2025-08-25 01:16:56,468 - PyRuSH.RuSH - DEBUG - \t327-328:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram<.>\t[Rule 404:\t\\c\\c(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", - "2025-08-25 01:16:56,469 - PyRuSH.RuSH - DEBUG - \t374-375:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d<.>\t[Rule 311:\t\\a(.) +\tstend\t2.0\tACTUAL]\n", - "2025-08-25 01:16:56,470 - PyRuSH.RuSH - DEBUG - \t425-426:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case<.>\t[Rule 404:\t\\c\\c(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", - "2025-08-25 01:16:56,470 - PyRuSH.RuSH - DEBUG - \t459-460:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor<.>\t[Rule 417:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", - "2025-08-25 01:16:56,470 - PyRuSH.RuSH - DEBUG - \t497-498:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows<:>\t[Rule 407:\t\\c(:)\\n\tstend\t2.0\tACTUAL]\n", - "2025-08-25 01:16:56,472 - PyRuSH.RuSH - DEBUG - Sentence(19-130):\t>Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency.<\n", - "2025-08-25 01:16:56,475 - PyRuSH.RuSH - DEBUG - Sentence(132-188):\t>She has chronic lung disease with bronchospastic angina.<\n", - "2025-08-25 01:16:56,477 - PyRuSH.RuSH - DEBUG - Sentence(189-239):\t>We discovered new T-wave abnormalities on her EKG.<\n", - "2025-08-25 01:16:56,478 - PyRuSH.RuSH - DEBUG - Sentence(241-298):\t>There was of course a four-vessel bypass surgery in 2001.<\n", - "2025-08-25 01:16:56,481 - PyRuSH.RuSH - DEBUG - Sentence(300-328):\t>We did a coronary angiogram.<\n", - "2025-08-25 01:16:56,482 - PyRuSH.RuSH - DEBUG - Sentence(332-426):\t>This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case.<\n", - "2025-08-25 01:16:56,483 - PyRuSH.RuSH - DEBUG - Sentence(428-460):\t>She also is on an ACE inhibitor.<\n", - "2025-08-25 01:16:56,484 - PyRuSH.RuSH - DEBUG - Sentence(461-498):\t>So her discharge meds are as follows:<\n", - "2025-08-25 01:16:56,484 - PyRuSH.RuSH - DEBUG - Sentence(499-523):\t>1. Coreg 6.25 mg b.i.d.<\n" + "2025-08-31 20:58:16,934 - PyRuSH.RuSH - DEBUG - stbegin\n", + "2025-08-31 20:58:16,934 - PyRuSH.RuSH - DEBUG - \t140-144:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. \t[Rule 959:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,934 - PyRuSH.RuSH - DEBUG - \t197-200:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. \t[Rule 960:\t.\\w+(We \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,934 - PyRuSH.RuSH - DEBUG - \t249-252:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. \t[Rule 571:\t.\\s+(The\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,934 - PyRuSH.RuSH - DEBUG - \t308-311:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. \t[Rule 960:\t.\\w+(We \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,934 - PyRuSH.RuSH - DEBUG - \t338-343:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. \t[Rule 970:\t.\\w+(This \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,941 - PyRuSH.RuSH - DEBUG - \t451-455:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. \t[Rule 959:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,941 - PyRuSH.RuSH - DEBUG - \t559-563:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. \t[Rule 958:\t.\\w+(Her \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,941 - PyRuSH.RuSH - DEBUG - \t671-676:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. \t[Rule 970:\t.\\w+(This \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,941 - PyRuSH.RuSH - DEBUG - \t767-771:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. \t[Rule 959:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,941 - PyRuSH.RuSH - DEBUG - \t800-801:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. \t[Rule 210:\t\\n+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,941 - PyRuSH.RuSH - DEBUG - \t838-843:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: <1>\t[Rule 790:\t\\a\\n+(\\d.\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,934 - PyRuSH.RuSH - DEBUG - \t140-144:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. \t[Rule 959:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,934 - PyRuSH.RuSH - DEBUG - \t197-200:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. \t[Rule 960:\t.\\w+(We \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,934 - PyRuSH.RuSH - DEBUG - \t249-252:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. \t[Rule 571:\t.\\s+(The\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,934 - PyRuSH.RuSH - DEBUG - \t308-311:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. \t[Rule 960:\t.\\w+(We \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,934 - PyRuSH.RuSH - DEBUG - \t338-343:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. \t[Rule 970:\t.\\w+(This \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,941 - PyRuSH.RuSH - DEBUG - \t451-455:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. \t[Rule 959:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,941 - PyRuSH.RuSH - DEBUG - \t559-563:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. \t[Rule 958:\t.\\w+(Her \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,941 - PyRuSH.RuSH - DEBUG - \t671-676:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. \t[Rule 970:\t.\\w+(This \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,941 - PyRuSH.RuSH - DEBUG - \t767-771:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. \t[Rule 959:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,941 - PyRuSH.RuSH - DEBUG - \t800-801:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. \t[Rule 210:\t\\n+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,941 - PyRuSH.RuSH - DEBUG - \t838-843:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: <1>\t[Rule 790:\t\\a\\n+(\\d.\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,946 - PyRuSH.RuSH - DEBUG - \t863-868:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. <2>\t[Rule 790:\t\\a\\n+(\\d.\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,946 - PyRuSH.RuSH - DEBUG - \t894-899:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. <3>\t[Rule 790:\t\\a\\n+(\\d.\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,948 - PyRuSH.RuSH - DEBUG - \t921-926:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. <4>\t[Rule 790:\t\\a\\n+(\\d.\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,950 - PyRuSH.RuSH - DEBUG - \t945-950:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. <5>\t[Rule 790:\t\\a\\n+(\\d.\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,950 - PyRuSH.RuSH - DEBUG - \t971-976:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. <6>\t[Rule 790:\t\\a\\n+(\\d.\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,953 - PyRuSH.RuSH - DEBUG - \t994-999:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. <7>\t[Rule 790:\t\\a\\n+(\\d.\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,954 - PyRuSH.RuSH - DEBUG - \t1018-1023:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. <8>\t[Rule 790:\t\\a\\n+(\\d.\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,955 - PyRuSH.RuSH - DEBUG - \t1046-1051:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. <9>\t[Rule 790:\t\\a\\n+(\\d.\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,955 - PyRuSH.RuSH - DEBUG - \t1076-1078:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. <1>\t[Rule 218:\t\\n(\\d+).\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,957 - PyRuSH.RuSH - DEBUG - \t1081-1082:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. \t[Rule 62:\t\\d.\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,958 - PyRuSH.RuSH - DEBUG - \t1107-1109:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n. \t[Rule 965:\t.\\w+(I \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,958 - PyRuSH.RuSH - DEBUG - \t1148-1152:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n. I will see her in a month to six weeks. \t[Rule 959:\t.\\w+(She \tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,959 - PyRuSH.RuSH - DEBUG - \t1204-1205:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n. I will see her in a month to six weeks. She is to follow up with Dr. X before that. \t[Rule 63:\t\\n\\n\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,960 - PyRuSH.RuSH - DEBUG - \t1210-1211:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n. I will see her in a month to six weeks. She is to follow up with Dr. X before that. Ezoic <->\t[Rule 812:\t\\c\\s+(-)\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,961 - PyRuSH.RuSH - DEBUG - \t1270-1271:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n. I will see her in a month to six weeks. She is to follow up with Dr. X before that. Ezoic - MTSam Sample Bottom Matched Content - native_bottom \t[Rule 63:\t\\n\\n\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,961 - PyRuSH.RuSH - DEBUG - \t1280-1281:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n. I will see her in a month to six weeks. She is to follow up with Dr. X before that. Ezoic - MTSam Sample Bottom Matched Content - native_bottom End Ezoic <->\t[Rule 812:\t\\c\\s+(-)\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:58:16,962 - PyRuSH.RuSH - DEBUG - stend\n", + "2025-08-31 20:58:16,964 - PyRuSH.RuSH - DEBUG - \t137-138:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency<.>\t[Rule 410:\t\\c\\c(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,964 - PyRuSH.RuSH - DEBUG - \t195-196:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,965 - PyRuSH.RuSH - DEBUG - \t246-247:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG<.>\t[Rule 419:\t\\C\\C\\C(.)\\s+\\C\\c\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,966 - PyRuSH.RuSH - DEBUG - \t305-306:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001<.>\t[Rule 353:\t\\d(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,966 - PyRuSH.RuSH - DEBUG - \t335-336:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram<.>\t[Rule 410:\t\\c\\c(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,968 - PyRuSH.RuSH - DEBUG - \t449-450:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,969 - PyRuSH.RuSH - DEBUG - \t557-558:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,970 - PyRuSH.RuSH - DEBUG - \t668-669:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d<.>\t[Rule 940:\t\\c.\\c(.)\\w+\\C\\c\\c\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,970 - PyRuSH.RuSH - DEBUG - \t713-714:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d<.>\t[Rule 317:\t\\a(.) +\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,971 - PyRuSH.RuSH - DEBUG - \t764-765:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case<.>\t[Rule 410:\t\\c\\c(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,971 - PyRuSH.RuSH - DEBUG - \t798-799:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,972 - PyRuSH.RuSH - DEBUG - \t836-837:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows<:>\t[Rule 413:\t\\c(:)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,973 - PyRuSH.RuSH - DEBUG - \t861-862:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,974 - PyRuSH.RuSH - DEBUG - \t892-893:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,975 - PyRuSH.RuSH - DEBUG - \t919-920:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,976 - PyRuSH.RuSH - DEBUG - \t943-944:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,976 - PyRuSH.RuSH - DEBUG - \t969-970:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,977 - PyRuSH.RuSH - DEBUG - \t992-993:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,978 - PyRuSH.RuSH - DEBUG - \t1016-1017:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,979 - PyRuSH.RuSH - DEBUG - \t1037-1038:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n<.>\t[Rule 317:\t\\a(.) +\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,980 - PyRuSH.RuSH - DEBUG - \t1044-1045:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,981 - PyRuSH.RuSH - DEBUG - \t1074-1075:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,982 - PyRuSH.RuSH - DEBUG - \t1094-1095:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d<.>\t[Rule 317:\t\\a(.) +\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,983 - PyRuSH.RuSH - DEBUG - \t1105-1106:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,984 - PyRuSH.RuSH - DEBUG - \t1145-1146:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n. I will see her in a month to six weeks<.>\t[Rule 410:\t\\c\\c(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,985 - PyRuSH.RuSH - DEBUG - \t1190-1191:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n. I will see her in a month to six weeks. She is to follow up with Dr. X before that<.>\t[Rule 423:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,986 - PyRuSH.RuSH - DEBUG - \t1192-1205:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n. I will see her in a month to six weeks. She is to follow up with Dr. X before that. < >\t[Rule 523:\t\\s+\\n\\n+\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,987 - PyRuSH.RuSH - DEBUG - \t1208-1213:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n. I will see her in a month to six weeks. She is to follow up with Dr. X before that. Ezoi\t[Rule 813:\t\\c\\s+-\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,988 - PyRuSH.RuSH - DEBUG - \t1263-1271:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n. I will see her in a month to six weeks. She is to follow up with Dr. X before that. Ezoic - MTSam Sample Bottom Matched Content - native_bottom< >\t[Rule 523:\t\\s+\\n\\n+\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,988 - PyRuSH.RuSH - DEBUG - \t1278-1283:1.0\t Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency. She has chronic lung disease with bronchospastic angina. We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease. She may continue in the future to have angina and she will have nitroglycerin available for that if needed. Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d. This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case. She also is on an ACE inhibitor. So her discharge meds are as follows: 1. Coreg 6.25 mg b.i.d. 2. Simvastatin 40 mg nightly. 3. Lisinopril 5 mg b.i.d. 4. Protonix 40 mg a.m. 5. Aspirin 160 mg a day. 6. Lasix 20 mg b.i.d. 7. Spiriva puff daily. 8. Albuterol p.r.n. q.i.d. 9. Advair 500/50 puff b.i.d. 10. Xopenex q.i.d. and p.r.n. I will see her in a month to six weeks. She is to follow up with Dr. X before that. Ezoic - MTSam Sample Bottom Matched Content - native_bottom End Ezoi\t[Rule 813:\t\\c\\s+-\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:58:16,989 - PyRuSH.RuSH - DEBUG - Sentence(27-138):\t>Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency.<\n", + "2025-08-31 20:58:16,990 - PyRuSH.RuSH - DEBUG - Sentence(140-196):\t>She has chronic lung disease with bronchospastic angina.<\n", + "2025-08-31 20:58:16,991 - PyRuSH.RuSH - DEBUG - Sentence(197-247):\t>We discovered new T-wave abnormalities on her EKG.<\n", + "2025-08-31 20:58:16,992 - PyRuSH.RuSH - DEBUG - Sentence(249-306):\t>There was of course a four-vessel bypass surgery in 2001.<\n", + "2025-08-31 20:58:16,993 - PyRuSH.RuSH - DEBUG - Sentence(308-336):\t>We did a coronary angiogram.<\n", + "2025-08-31 20:58:16,993 - PyRuSH.RuSH - DEBUG - Sentence(338-450):\t>This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease.<\n", + "2025-08-31 20:58:16,993 - PyRuSH.RuSH - DEBUG - Sentence(451-558):\t>She may continue in the future to have angina and she will have nitroglycerin available for that if needed.<\n", + "2025-08-31 20:58:16,996 - PyRuSH.RuSH - DEBUG - Sentence(559-669):\t>Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d.<\n", + "2025-08-31 20:58:16,997 - PyRuSH.RuSH - DEBUG - Sentence(671-765):\t>This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case.<\n", + "2025-08-31 20:58:16,998 - PyRuSH.RuSH - DEBUG - Sentence(767-799):\t>She also is on an ACE inhibitor.<\n", + "2025-08-31 20:58:16,999 - PyRuSH.RuSH - DEBUG - Sentence(800-837):\t>So her discharge meds are as follows:<\n", + "2025-08-31 20:58:16,999 - PyRuSH.RuSH - DEBUG - Sentence(838-862):\t>1. Coreg 6.25 mg b.i.d.<\n", + "2025-08-31 20:58:17,001 - PyRuSH.RuSH - DEBUG - Sentence(863-893):\t>2. Simvastatin 40 mg nightly.<\n", + "2025-08-31 20:58:17,002 - PyRuSH.RuSH - DEBUG - Sentence(894-920):\t>3. Lisinopril 5 mg b.i.d.<\n", + "2025-08-31 20:58:17,003 - PyRuSH.RuSH - DEBUG - Sentence(921-944):\t>4. Protonix 40 mg a.m.<\n", + "2025-08-31 20:58:17,004 - PyRuSH.RuSH - DEBUG - Sentence(945-970):\t>5. Aspirin 160 mg a day.<\n", + "2025-08-31 20:58:17,005 - PyRuSH.RuSH - DEBUG - Sentence(971-993):\t>6. Lasix 20 mg b.i.d.<\n", + "2025-08-31 20:58:17,006 - PyRuSH.RuSH - DEBUG - Sentence(994-1017):\t>7. Spiriva puff daily.<\n", + "2025-08-31 20:58:17,009 - PyRuSH.RuSH - DEBUG - Sentence(1018-1045):\t>8. Albuterol p.r.n. q.i.d.<\n", + "2025-08-31 20:58:17,010 - PyRuSH.RuSH - DEBUG - Sentence(1046-1075):\t>9. Advair 500/50 puff b.i.d.<\n", + "2025-08-31 20:58:17,011 - PyRuSH.RuSH - DEBUG - Sentence(1076-1106):\t>10. Xopenex q.i.d. and p.r.n.<\n", + "2025-08-31 20:58:17,013 - PyRuSH.RuSH - DEBUG - Sentence(1107-1146):\t>I will see her in a month to six weeks.<\n", + "2025-08-31 20:58:17,014 - PyRuSH.RuSH - DEBUG - Sentence(1148-1191):\t>She is to follow up with Dr. X before that.<\n", + "2025-08-31 20:58:17,015 - PyRuSH.RuSH - DEBUG - Sentence(1204-1209):\t>Ezoic<\n", + "2025-08-31 20:58:17,016 - PyRuSH.RuSH - DEBUG - Sentence(1210-1263):\t>- MTSam Sample Bottom Matched Content - native_bottom<\n", + "2025-08-31 20:58:17,017 - PyRuSH.RuSH - DEBUG - Sentence(1270-1279):\t>End Ezoic<\n", + "2025-08-31 20:58:17,018 - PyRuSH.RuSH - DEBUG - Sentence(1280-1333):\t>- MTSam Sample Bottom Matched Content - native_bottom<\n" ] } ], @@ -138,67 +195,58 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "52bdf6d1", + "execution_count": 3, + "id": "c0d5de6d", + "metadata": {}, + "outputs": [], + "source": [ + "txt1='''10. Xopenex q.i.d. and p.r.n.\n", + " + She is to follow up with Dr. X before that.\n", + " \n", + "\n", + "\n", + " Ezoic - MTSam Sample Bottom Matched Content - native_bottom '''" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "29f0910e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "PyRuSH.RuSH - DEBUG - Sentence(0-19):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(19-130):\t>Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(131-132):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(132-188):\t>She has chronic lung disease with bronchospastic angina.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(188-189):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(189-239):\t>We discovered new T-wave abnormalities on her EKG.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(240-241):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(241-298):\t>There was of course a four-vessel bypass surgery in 2001.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(299-300):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(300-328):\t>We did a coronary angiogram.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(329-330):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(330-442):\t>This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(442-443):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(443-550):\t>She may continue in the future to have angina and she will have nitroglycerin available for that if needed.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(550-551):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(551-661):\t>Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(662-663):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(663-757):\t>This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(758-759):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(759-791):\t>She also is on an ACE inhibitor.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(791-792):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(792-829):\t>So her discharge meds are as follows:<\n", - "PyRuSH.RuSH - DEBUG - Sentence(829-830):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(830-854):\t>1. Coreg 6.25 mg b.i.d.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(854-855):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(855-885):\t>2. Simvastatin 40 mg nightly.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(885-886):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(886-912):\t>3. Lisinopril 5 mg b.i.d.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(912-913):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(913-936):\t>4. Protonix 40 mg a.m.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(936-937):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(937-962):\t>5. Aspirin 160 mg a day.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(962-963):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(963-985):\t>6. Lasix 20 mg b.i.d.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(985-986):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(986-1009):\t>7. Spiriva puff daily.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(1009-1010):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(1010-1037):\t>8. Albuterol p.r.n. q.i.d.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(1037-1038):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(1038-1067):\t>9. Advair 500/50 puff b.i.d.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(1067-1068):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(1068-1098):\t>10. Xopenex q.i.d. and p.r.n.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(1098-1099):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(1099-1138):\t>I will see her in a month to six weeks.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(1139-1140):\t> <\n", - "PyRuSH.RuSH - DEBUG - Sentence(1140-1183):\t>She is to follow up with Dr. X before that.<\n", - "PyRuSH.RuSH - DEBUG - Sentence(1183-1193):\t> Ezoic<\n", - "PyRuSH.RuSH - DEBUG - Sentence(1194-1247):\t>- MTSam Sample Bottom Matched Content - native_bottom<\n", - "PyRuSH.RuSH - DEBUG - Sentence(1248-1263):\t> End Ezoic<\n", - "PyRuSH.RuSH - DEBUG - Sentence(1264-1318):\t>- MTSam Sample Bottom Matched Content - native_bottom <\n" + "2025-08-31 20:55:14,722 - PyRuSH.RuSH - DEBUG - stbegin\n", + "2025-08-31 20:55:14,723 - PyRuSH.RuSH - DEBUG - \t0-1:1.0\t<1>\t[Rule 46:\t\\b(\\d\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:55:14,723 - PyRuSH.RuSH - DEBUG - \t0-1:1.0\t<1>\t[Rule 46:\t\\b(\\d\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:55:14,723 - PyRuSH.RuSH - DEBUG - \t5-6:1.0\t10. \t[Rule 57:\t\\d.\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:55:14,725 - PyRuSH.RuSH - DEBUG - \t90-91:1.0\t10. Xopenex q.i.d. and p.r.n. + She is to follow up with Dr. X before that. \t[Rule 58:\t\\n\\n\\s+(\\C)\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:55:14,725 - PyRuSH.RuSH - DEBUG - \t96-97:1.0\t10. Xopenex q.i.d. and p.r.n. + She is to follow up with Dr. X before that. Ezoic <->\t[Rule 807:\t\\c\\s+(-)\\s+\\C\tstbegin\t0.0\tACTUAL]\n", + "2025-08-31 20:55:14,727 - PyRuSH.RuSH - DEBUG - stend\n", + "2025-08-31 20:55:14,727 - PyRuSH.RuSH - DEBUG - \t2-3:1.0\t10<.>\t[Rule 348:\t\\d(.)\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:55:14,729 - PyRuSH.RuSH - DEBUG - \t18-19:1.0\t10. Xopenex q.i.d<.>\t[Rule 312:\t\\a(.) +\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:55:14,730 - PyRuSH.RuSH - DEBUG - \t29-30:1.0\t10. Xopenex q.i.d. and p.r.n<.>\t[Rule 418:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:55:14,731 - PyRuSH.RuSH - DEBUG - \t76-77:1.0\t10. Xopenex q.i.d. and p.r.n. + She is to follow up with Dr. X before that<.>\t[Rule 418:\t\\c(.)\\n\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:55:14,731 - PyRuSH.RuSH - DEBUG - \t78-91:1.0\t10. Xopenex q.i.d. and p.r.n. + She is to follow up with Dr. X before that. < >\t[Rule 518:\t\\s+\\n\\n+\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:55:14,732 - PyRuSH.RuSH - DEBUG - \t94-99:1.0\t10. Xopenex q.i.d. and p.r.n. + She is to follow up with Dr. X before that. Ezoi\t[Rule 808:\t\\c\\s+-\\s+\\C\tstend\t2.0\tACTUAL]\n", + "2025-08-31 20:55:14,733 - PyRuSH.RuSH - DEBUG - Sentence(0-3):\t>10.<\n", + "2025-08-31 20:55:14,734 - PyRuSH.RuSH - DEBUG - Sentence(5-95):\t>Xopenex q.i.d. and p.r.n. + She is to follow up with Dr. X before that. Ezoic<\n", + "2025-08-31 20:55:14,735 - PyRuSH.RuSH - DEBUG - Sentence(96-149):\t>- MTSam Sample Bottom Matched Content - native_bottom<\n" ] } ], + "source": [ + "sents=rush.segToSentenceSpans(txt1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52bdf6d1", + "metadata": {}, + "outputs": [], "source": [ "\n", "nlp = English()\n", @@ -312,7 +360,7 @@ ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "test", "language": "python", "name": "python3" }, @@ -326,7 +374,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.11" + "version": "3.10.18" } }, "nbformat": 4, From b605483d5dd57afb02f86d1047afafb591ef56df Mon Sep 17 00:00:00 2001 From: jianlins Date: Sun, 31 Aug 2025 21:44:17 -0600 Subject: [PATCH 105/126] migrate to use loguru --- PyRuSH/RuSH.py | 56 ++------- notebooks/debug.ipynb | 258 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 263 insertions(+), 51 deletions(-) diff --git a/PyRuSH/RuSH.py b/PyRuSH/RuSH.py index aee98c2..33df884 100644 --- a/PyRuSH/RuSH.py +++ b/PyRuSH/RuSH.py @@ -27,8 +27,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import logging -import logging.config +from loguru import logger import os.path from typing import Union, List @@ -40,40 +39,7 @@ def initLogger(): - config_files = ['../../../conf/logging.ini', '../../conf/logging.ini', '../conf/logging.ini', 'conf/logging.ini', - 'logging.ini'] - config_file = None - for f in config_files: - if os.path.isfile(f): - config_file = f - break - if config_file is None: - config_file = config_files[-1] - with open(config_file, 'w') as f: - f.write('''[loggers] -keys=root - -[handlers] -keys=consoleHandler - -[formatters] -keys=simpleFormatter - -[logger_root] -level=WARNING -handlers=consoleHandler - -[handler_consoleHandler] -class=StreamHandler -level=WARNING -formatter=simpleFormatter -args=(sys.stdout,) - -[formatter_simpleFormatter] -format=%(asctime)s - %(name)s - %(levelname)s - %(message)s -datefmt= -''') - logging.config.fileConfig(config_file) + pass # Removed: logging config logic for Loguru migration class RuSH: @@ -84,9 +50,7 @@ def __init__(self, rules: Union[str, List] = '', max_repeat: int = 50, auto_fix_ self.fastner = FastCNER(rules, max_repeat) self.fastner.span_compare_method = 'scorewidth' if enable_logger: - initLogger() - self.logger = logging.getLogger(__name__) - print(self.logger.level) + self.logger = logger else: self.logger = None self.auto_fix_gaps = auto_fix_gaps @@ -109,13 +73,13 @@ def segToSentenceSpans(self, text): self.fastner.process(text, 0, result) # log important message for debugging use - if self.logger is not None and self.logger.isEnabledFor(logging.DEBUG): + if self.logger is not None: text = text.replace('\n', ' ') for concept_type, spans in result.items(): - self.logger.debug(concept_type) + self.logger.opt(lazy=True).debug(concept_type) for span in spans: rule = self.fastner.rule_store[span.rule_id] - self.logger.debug( + self.logger.opt(lazy=True).debug( '\t{0}-{1}:{2}\t{3}<{4}>\t[Rule {5}:\t{6}\t{7}\t{8}\t{9}]'.format(span.begin, span.end, span.score, text[:span.begin], @@ -185,15 +149,15 @@ def segToSentenceSpans(self, text): if trimed_gap is not None and trimed_gap.width > self.min_sent_chars: output.append(trimed_gap) - if self.logger is not None and self.logger.isEnabledFor(logging.DEBUG): + if self.logger is not None: for sentence in output: - self.logger.debug( + self.logger.opt(lazy=True).debug( 'Sentence({0}-{1}):\t>{2}<'.format(sentence.begin, sentence.end, text[sentence.begin:sentence.end])) return output @staticmethod - def fix_gap(sentences: [], text: str, previous_end: int, this_begin: int, min_sent_chars: int = 5): + def fix_gap(sentences: list, text: str, previous_end: int, this_begin: int, min_sent_chars: int = 5): trimed_gap = RuSH.trim_gap(text, previous_end, this_begin) if trimed_gap is None: return @@ -203,7 +167,7 @@ def fix_gap(sentences: [], text: str, previous_end: int, this_begin: int, min_se sentences[-1].end = trimed_gap.end @staticmethod - def trim_gap(text: str, previous_end: int, this_begin: int) -> Span: + def trim_gap(text: str, previous_end: int, this_begin: int) -> 'Span | None': begin = -1 alnum_begin = -1 end = 0 diff --git a/notebooks/debug.ipynb b/notebooks/debug.ipynb index d845ac3..459bbae 100644 --- a/notebooks/debug.ipynb +++ b/notebooks/debug.ipynb @@ -243,10 +243,258 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "52bdf6d1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-08-31 20:58:32.779\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 1 'Ms.' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.779\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 25 ' ' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.779\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 25-25 (idx 139-139) between spans 138-140\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.779\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 25 ' ' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.779\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 26 'She' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.779\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 35 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.779\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 35-35 (idx 196-196) between spans 196-197\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.779\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 35 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.779\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 36 'We' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.779\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 47 ' ' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.794\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 47-47 (idx 248-248) between spans 247-249\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.795\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 47 ' ' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.796\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 48 'There' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.796\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 61 ' ' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.797\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 61-61 (idx 307-307) between spans 306-308\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.797\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 61 ' ' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.798\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 62 'We' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.798\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 68 ' ' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.799\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 68-68 (idx 337-337) between spans 336-338\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.799\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 68 ' ' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.799\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 69 'This' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.800\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 88 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.800\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 88-88 (idx 450-450) between spans 450-451\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.801\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 88 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.801\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 89 'She' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.801\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 109 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.802\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 109-109 (idx 558-558) between spans 558-559\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.803\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 109 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.804\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 110 'Her' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.805\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 132 ' ' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.807\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 132-132 (idx 670-670) between spans 669-671\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.808\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 132 ' ' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.808\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 133 'This' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.809\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 152 ' ' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.810\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 152-152 (idx 766-766) between spans 765-767\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.810\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 152 ' ' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.811\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 153 'She' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.812\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 161 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.813\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 161-161 (idx 799-799) between spans 799-800\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.813\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 161 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.814\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 162 'So' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.815\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 170 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.815\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 170-170 (idx 837-837) between spans 837-838\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.816\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 170 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.818\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 171 '1' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.821\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 179 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.822\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 179-179 (idx 862-862) between spans 862-863\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.823\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 179 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.824\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 180 '2' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.825\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 188 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.825\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 188-188 (idx 893-893) between spans 893-894\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.825\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 188 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.825\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 189 '3' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.829\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 197 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.830\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 197-197 (idx 920-920) between spans 920-921\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.831\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 197 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.832\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 198 '4' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.833\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 205 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.834\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 205-205 (idx 944-944) between spans 944-945\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.835\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 205 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.835\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 206 '5' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.836\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 215 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.837\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 215-215 (idx 970-970) between spans 970-971\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.837\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 215 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.838\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 216 '6' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.839\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 224 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.840\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 224-224 (idx 993-993) between spans 993-994\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.840\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 224 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.840\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 225 '7' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.843\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 232 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.843\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 232-232 (idx 1017-1017) between spans 1017-1018\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.845\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 232 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.846\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 233 '8' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.847\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 241 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.848\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 241-241 (idx 1045-1045) between spans 1045-1046\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.849\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 241 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.850\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 242 '9' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.851\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 250 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.852\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 250-250 (idx 1075-1075) between spans 1075-1076\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.853\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 250 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.854\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 251 '10' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.855\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 260 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.858\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 260-260 (idx 1106-1106) between spans 1106-1107\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.858\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 260 '\n", + "' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.858\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 261 'I' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.861\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 272 ' ' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.862\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 272-272 (idx 1147-1147) between spans 1146-1148\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.863\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 272 ' ' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.864\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 273 'She' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.865\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 284 '\n", + " \n", + "\n", + "\n", + " ' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.866\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 284-284 (idx 1191-1191) between spans 1191-1204\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.867\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 284 '\n", + " \n", + "\n", + "\n", + " ' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.868\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 285 'Ezoic' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.869\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 286 '-' marked as sentence start (span end next token)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.870\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 286 '-' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.870\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 294 '\n", + "\n", + "\n", + "\n", + "\n", + " ' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.870\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] GAP DETECTED: tokens 294-294 (idx 1264-1264) between spans 1263-1270\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.870\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 294 '\n", + "\n", + "\n", + "\n", + "\n", + " ' marked as sentence start (whitespace in gap between spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.870\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 295 'End' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.870\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 297 '-' marked as sentence start (span end next token)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.870\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 297 '-' marked as sentence start (span begin)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.870\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 305 '\n", + "' marked as sentence start (span end whitespace)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.879\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] [doc 0] Token 305 '\n", + "' marked as sentence start (whitespace after all spans)\u001b[0m\n", + "\u001b[32m2025-08-31 20:58:32.881\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mPyRuSH.PyRuSHSentencizer\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m100\u001b[0m - \u001b[34m\u001b[1m[cpredict_split_gaps|call_id=0] Token/tag mapping: [( \n", + "\n", + " \n", + " , False), (Ms., True), (ABCD, False), (is, False), (a, False), (69, False), (-, False), (year, False), (-, False), (old, False), (lady, False), (,, False), (who, False), (was, False), (admitted, False), (to, False), (the, False), (hospital, False), (with, False), (chest, False), (pain, False), (and, False), (respiratory, False), (insufficiency, False), (., False), ( , True), (She, True), (has, False), (chronic, False), (lung, False), (disease, False), (with, False), (bronchospastic, False), (angina, False), (., False), (\n", + ", True), (We, True), (discovered, False), (new, False), (T, False), (-, False), (wave, False), (abnormalities, False), (on, False), (her, False), (EKG, False), (., False), ( , True), (There, True), (was, False), (of, False), (course, False), (a, False), (four, False), (-, False), (vessel, False), (bypass, False), (surgery, False), (in, False), (2001, False), (., False), ( , True), (We, True), (did, False), (a, False), (coronary, False), (angiogram, False), (., False), ( , True), (This, True), (demonstrated, False), (patent, False), (vein, False), (grafts, False), (and, False), (patent, False), (internal, False), (mammary, False), (vessel, False), (and, False), (so, False), (there, False), (was, False), (no, False), (obvious, False), (new, False), (disease, False), (., False), (\n", + ", True), (She, True), (may, False), (continue, False), (in, False), (the, False), (future, False), (to, False), (have, False), (angina, False), (and, False), (she, False), (will, False), (have, False), (nitroglycerin, False), (available, False), (for, False), (that, False), (if, False), (needed, False), (., False), (\n", + ", True), (Her, True), (blood, False), (pressure, False), (has, False), (been, False), (elevated, False), (and, False), (so, False), (instead, False), (of, False), (metoprolol, False), (,, False), (we, False), (have, False), (started, False), (her, False), (on, False), (Coreg, False), (6.25, False), (mg, False), (b.i.d, False), (., False), ( , True), (This, True), (should, False), (be, False), (increased, False), (up, False), (to, False), (25, False), (mg, False), (b.i.d, False), (., False), (as, False), (preferred, False), (antihypertensive, False), (in, False), (this, False), (lady, False), ('s, False), (case, False), (., False), ( , True), (She, True), (also, False), (is, False), (on, False), (an, False), (ACE, False), (inhibitor, False), (., False), (\n", + ", True), (So, True), (her, False), (discharge, False), (meds, False), (are, False), (as, False), (follows, False), (:, False), (\n", + ", True), (1, True), (., False), ( , False), (Coreg, False), (6.25, False), (mg, False), (b.i.d, False), (., False), (\n", + ", True), (2, True), (., False), ( , False), (Simvastatin, False), (40, False), (mg, False), (nightly, False), (., False), (\n", + ", True), (3, True), (., False), ( , False), (Lisinopril, False), (5, False), (mg, False), (b.i.d, False), (., False), (\n", + ", True), (4, True), (., False), ( , False), (Protonix, False), (40, False), (mg, False), (a.m., False), (\n", + ", True), (5, True), (., False), ( , False), (Aspirin, False), (160, False), (mg, False), (a, False), (day, False), (., False), (\n", + ", True), (6, True), (., False), ( , False), (Lasix, False), (20, False), (mg, False), (b.i.d, False), (., False), (\n", + ", True), (7, True), (., False), ( , False), (Spiriva, False), (puff, False), (daily, False), (., False), (\n", + ", True), (8, True), (., False), ( , False), (Albuterol, False), (p.r.n, False), (., False), (q.i.d, False), (., False), (\n", + ", True), (9, True), (., False), ( , False), (Advair, False), (500/50, False), (puff, False), (b.i.d, False), (., False), (\n", + ", True), (10, True), (., False), ( , False), (Xopenex, False), (q.i.d, False), (., False), (and, False), (p.r.n, False), (., False), (\n", + ", True), (I, True), (will, False), (see, False), (her, False), (in, False), (a, False), (month, False), (to, False), (six, False), (weeks, False), (., False), ( , True), (She, True), (is, False), (to, False), (follow, False), (up, False), (with, False), (Dr., False), (X, False), (before, False), (that, False), (., False), (\n", + " \n", + "\n", + "\n", + " , True), (Ezoic, True), (-, True), (MTSam, False), (Sample, False), (Bottom, False), (Matched, False), (Content, False), (-, False), (native_bottom, False), (\n", + "\n", + "\n", + "\n", + "\n", + " , True), (End, True), (Ezoic, False), (-, True), (MTSam, False), (Sample, False), (Bottom, False), (Matched, False), (Content, False), (-, False), (native_bottom, False), (\n", + ", True)]\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PyRuSH.RuSH - DEBUG - Sentence(0-27):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(27-138):\t>Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(139-140):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(140-196):\t>She has chronic lung disease with bronchospastic angina.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(196-197):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(197-247):\t>We discovered new T-wave abnormalities on her EKG.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(248-249):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(249-306):\t>There was of course a four-vessel bypass surgery in 2001.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(307-308):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(308-336):\t>We did a coronary angiogram.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(337-338):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(338-450):\t>This demonstrated patent vein grafts and patent internal mammary vessel and so there was no obvious new disease.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(450-451):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(451-558):\t>She may continue in the future to have angina and she will have nitroglycerin available for that if needed.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(558-559):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(559-669):\t>Her blood pressure has been elevated and so instead of metoprolol, we have started her on Coreg 6.25 mg b.i.d.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(670-671):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(671-765):\t>This should be increased up to 25 mg b.i.d. as preferred antihypertensive in this lady's case.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(766-767):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(767-799):\t>She also is on an ACE inhibitor.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(799-800):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(800-837):\t>So her discharge meds are as follows:<\n", + "PyRuSH.RuSH - DEBUG - Sentence(837-838):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(838-862):\t>1. Coreg 6.25 mg b.i.d.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(862-863):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(863-893):\t>2. Simvastatin 40 mg nightly.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(893-894):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(894-920):\t>3. Lisinopril 5 mg b.i.d.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(920-921):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(921-944):\t>4. Protonix 40 mg a.m.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(944-945):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(945-970):\t>5. Aspirin 160 mg a day.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(970-971):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(971-993):\t>6. Lasix 20 mg b.i.d.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(993-994):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(994-1017):\t>7. Spiriva puff daily.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1017-1018):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(1018-1045):\t>8. Albuterol p.r.n. q.i.d.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1045-1046):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(1046-1075):\t>9. Advair 500/50 puff b.i.d.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1075-1076):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(1076-1106):\t>10. Xopenex q.i.d. and p.r.n.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1106-1107):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(1107-1146):\t>I will see her in a month to six weeks.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1147-1148):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(1148-1191):\t>She is to follow up with Dr. X before that.<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1191-1204):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(1204-1209):\t>Ezoic<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1210-1263):\t>- MTSam Sample Bottom Matched Content - native_bottom<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1264-1270):\t> <\n", + "PyRuSH.RuSH - DEBUG - Sentence(1270-1279):\t>End Ezoic<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1280-1333):\t>- MTSam Sample Bottom Matched Content - native_bottom<\n", + "PyRuSH.RuSH - DEBUG - Sentence(1333-1334):\t> <\n" + ] + } + ], "source": [ "\n", "nlp = English()\n", @@ -260,17 +508,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "id": "e5f9fe60", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "50" + "53" ] }, - "execution_count": 10, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } From a4e1f3f6f07669a21f81e27a9ec712f164bb445b Mon Sep 17 00:00:00 2001 From: jianlins Date: Sun, 31 Aug 2025 21:48:56 -0600 Subject: [PATCH 106/126] Refactor logging in tests to use debug level for improved verbosity and consistency --- tests/test_PyRushSentencizer.py | 18 +++++++++--------- tests/test_cpredict_split_gaps.py | 10 +++++----- tests/test_debug.py | 18 +++++++++--------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tests/test_PyRushSentencizer.py b/tests/test_PyRushSentencizer.py index bd83d73..1e71caa 100644 --- a/tests/test_PyRushSentencizer.py +++ b/tests/test_PyRushSentencizer.py @@ -71,14 +71,14 @@ def test_doc3(self): We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. ''' - from PyRuSH.RuSH import initLogger - initLogger() + from loguru import logger + logger.add(sys.stdout, level="DEBUG") nlp = English() nlp.add_pipe("medspacy_pyrush") doc = nlp(input_str) sents = [s for s in doc.sents] for sent in sents: - print('>' + str(sent) + '<\n\n') + logger.debug('>' + str(sent) + '<\n\n') # SpaCy has no control of sentence end. Thus, it ends up with sloppy ends. assert (sents[1].text == 'Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with' @@ -92,18 +92,18 @@ def test_customized_rules(self): We discovered new T-wave abnormalities on her EKG. There was of course a four-vessel bypass surgery in 2001. We did a coronary angiogram. ''' - from PyRuSH.RuSH import initLogger - initLogger() + from loguru import logger + logger.add(sys.stdout, level="DEBUG") from PyRuSH import RuSH pwd = os.path.dirname(os.path.abspath(__file__)) rush = RuSH(str(os.path.join(pwd, 'rush_rules.tsv')), enable_logger=True) sentences = rush.segToSentenceSpans(input_str) # for i in range(0, len(sentences)): # sentence = sentences[i] - # print('assert (sentences[' + str(i) + '].begin == ' + str(sentence.begin) + ' and sentences[' + str( - # i) + '].end == ' + str(sentence.end) + ')') + # logger.debug('assert (sentences[' + str(i) + '].begin == ' + str(sentence.begin) + ' and sentences[' + str( + # i) + '].end == ' + str(sentence.end + ')') # self.printDetails(sentences, input_str) - # print('\n\n'.join(['>{}<'.format(input_str[s.begin:s.end]) for s in sentences])) + # logger.debug('\n\n'.join(['>{}<'.format(input_str[s.begin:s.end]) for s in sentences])) nlp = English() @@ -112,7 +112,7 @@ def test_customized_rules(self): doc = nlp(input_str) sents = [s for s in doc.sents] for sent in sents: - print('>' + str(sent) + '<\n\n') + logger.debug('>' + str(sent) + '<\n\n') # SpaCy has no control of sentence end. Thus, it ends up with sloppy ends. assert (sents[1].text == 'Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with' diff --git a/tests/test_cpredict_split_gaps.py b/tests/test_cpredict_split_gaps.py index cc875bb..c3149fc 100644 --- a/tests/test_cpredict_split_gaps.py +++ b/tests/test_cpredict_split_gaps.py @@ -60,7 +60,7 @@ def test_split_gaps_non_ascii(): for idx, start in enumerate(starts): end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) sentences.append(" ".join([doc[i].text for i in range(start, end)])) - logger.info(f"[test_split_gaps_non_ascii] Split sentences: {sentences}") + logger.debug(f"[test_split_gaps_non_ascii] Split sentences: {sentences}") # Expect sentences to be 'Hello 世界 .' and 'World .' assert any("世界" in s for s in sentences) assert any("World" in s for s in sentences) @@ -80,7 +80,7 @@ def test_split_gaps_basic(): for idx, start in enumerate(starts): end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) sentences.append(" ".join([doc[i].text for i in range(start, end)])) - logger.info(f"[test_split_gaps_basic] Split sentences: {sentences}") + logger.debug(f"[test_split_gaps_basic] Split sentences: {sentences}") assert "This is a sentence ." in sentences assert "This is another one ." in sentences @@ -111,7 +111,7 @@ def test_split_gaps_whitespace_none(): end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) sentences.append(" ".join([doc[i].text for i in range(start, end)])) sentences = [s.strip() for s in sentences] - logger.info(f"[test_split_gaps_whitespace_none] Split sentences: {sentences}") + logger.debug(f"[test_split_gaps_whitespace_none] Split sentences: {sentences}") # Should have two sentences, each with a single period assert sentences == [".", "."] @@ -124,7 +124,7 @@ def test_split_gaps_whitespace_set(): end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) sentences.append(" ".join([doc[i].text for i in range(start, end)])) sentences = [s.strip() for s in sentences] - logger.info(f"[test_split_gaps_whitespace_set] Split sentences: {sentences}") + logger.debug(f"[test_split_gaps_whitespace_set] Split sentences: {sentences}") assert sentences == [".", "."] def test_split_gaps_mixed_whitespace_and_text(): @@ -136,7 +136,7 @@ def test_split_gaps_mixed_whitespace_and_text(): end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) sentences.append(" ".join([doc[i].text for i in range(start, end)])) sentences = [s.strip() for s in sentences] - logger.info(f"[test_split_gaps_mixed_whitespace_and_text] Split sentences: {sentences}") + logger.debug(f"[test_split_gaps_mixed_whitespace_and_text] Split sentences: {sentences}") # Should have sentences: '.', 'Hello .', 'World .' assert "." in sentences assert "Hello ." in sentences diff --git a/tests/test_debug.py b/tests/test_debug.py index c4beba9..1f7d40a 100644 --- a/tests/test_debug.py +++ b/tests/test_debug.py @@ -18,17 +18,17 @@ def test_whitespace_edge_split(): doc = nlp(text_whitespace) # Try to get the actual span function from RuSH spans=sentencizer.rush.segToSentenceSpans(text_whitespace) - logger.info('Print rush segmented spans: \n----------------\n') - logger.info(f"Spans: {[(span.begin, span.end) for span in spans]}\n----------------\n") - logger.info(f'Print token offsets: ') - logger.info(f'{[(t, t.idx) for t in doc]}') + logger.debug('Print rush segmented spans: \n----------------\n') + logger.debug(f"Spans: {[(span.begin, span.end) for span in spans]}\n----------------\n") + logger.debug(f'Print token offsets: ') + logger.debug(f'{[(t, t.idx) for t in doc]}') doc_guesses = sentencizer.predict([doc])[0] - logger.info(f"doc_guesses: {doc_guesses}") + logger.debug(f"doc_guesses: {doc_guesses}") serialized = [(str(d), l) for d, l in zip(list(doc), doc_guesses)] - logger.info(f"Serialized: {serialized}") + logger.debug(f"Serialized: {serialized}") # Adjusted expected output to match spacy tokenization goal = [("First", True), ("sentence", False), (".", False), (" ", True), ("Second", True), ("sentence", False), ("before", True), ("spaces", False), (".", False), ("\n", True), ("Third", True), ("sentence", False), ("after", False), ("newline", True), (".", False)] - logger.info(f"Goal: {goal}") + logger.debug(f"Goal: {goal}") for s, g in zip(serialized, goal): - logger.info(f'{s} == {g}' if s==g else f'{s} != {g}') - assert (s == g) + logger.debug(f'{s} == {g}' if s == g else f'{s} != {g}') + assert s == g From 81448b9e92e233d71d23b1c0fb0e06ba3538f960 Mon Sep 17 00:00:00 2001 From: jianlins Date: Tue, 2 Sep 2025 09:29:26 -0600 Subject: [PATCH 107/126] Add length verification for split sentences in gap handling tests --- tests/test_cpredict_split_gaps.py | 57 ++++++++++++++++++++++++++++- tests/test_merge_gaps_max_length.py | 25 +++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/tests/test_cpredict_split_gaps.py b/tests/test_cpredict_split_gaps.py index c3149fc..809ef79 100644 --- a/tests/test_cpredict_split_gaps.py +++ b/tests/test_cpredict_split_gaps.py @@ -28,12 +28,21 @@ def test_split_gaps_single_token(): guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun) starts = [i for i, v in enumerate(guesses[0]) if v] assert starts == [0] + # Verify split sentence length + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_len = end - start + assert sentence_len > 0 def test_split_gaps_single_period(): doc = make_doc_from_text(".") guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun) starts = [i for i, v in enumerate(guesses[0]) if v] assert starts == [0] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_len = end - start + assert sentence_len > 0 def test_split_gaps_consecutive_periods(): doc = make_doc_from_text("Hello..World.") @@ -42,6 +51,10 @@ def test_split_gaps_consecutive_periods(): # Should mark the first token and after each period assert starts[0] == 0 assert len(starts) >= 2 + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_len = end - start + assert sentence_len > 0 def test_split_gaps_long_sentence_no_period(): doc = make_doc_from_text("A " * 100) @@ -50,6 +63,11 @@ def test_split_gaps_long_sentence_no_period(): # Should split every ~10 tokens (since each token is 1 char + 1 space) assert starts[0] == 0 assert len(starts) > 1 + # Check each split sentence is <= 20 tokens + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_len = end - start + assert sentence_len <= 20, f"Sentence from {start} to {end} has length {sentence_len} > 20" def test_split_gaps_non_ascii(): doc = make_doc_from_text("Hello 世界 . World .") @@ -64,6 +82,11 @@ def test_split_gaps_non_ascii(): # Expect sentences to be 'Hello 世界 .' and 'World .' assert any("世界" in s for s in sentences) assert any("World" in s for s in sentences) + # Verify split sentence length + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_len = end - start + assert sentence_len > 0 def test_split_gaps_punctuation_only(): doc = make_doc_from_text("!!! . ??? .") @@ -71,6 +94,10 @@ def test_split_gaps_punctuation_only(): starts = [i for i, v in enumerate(guesses[0]) if v] assert starts[0] == 0 assert len(starts) > 1 + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_len = end - start + assert sentence_len > 0 def test_split_gaps_basic(): doc = make_doc_from_text("This is a sentence. This is another one.") @@ -83,12 +110,21 @@ def test_split_gaps_basic(): logger.debug(f"[test_split_gaps_basic] Split sentences: {sentences}") assert "This is a sentence ." in sentences assert "This is another one ." in sentences + # Verify split sentence length + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_len = end - start + assert sentence_len > 0 def test_split_gaps_max_length_none(): doc = make_doc_from_text("A B C D E F G H I J K L M N O P Q R S T U V W X Y Z.") guesses = cpredict_split_gaps([doc], dummy_sentencizer_fun, None) starts = [i for i, v in enumerate(guesses[0]) if v] assert starts == [0] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_len = end - start + assert sentence_len > 0 def test_split_gaps_max_length_set(): doc = make_doc_from_text("A B C D E F G H I J K L M N O P Q R S T U V W X Y Z.") @@ -96,6 +132,13 @@ def test_split_gaps_max_length_set(): starts = [i for i, v in enumerate(guesses[0]) if v] assert starts[0] == 0 assert len(starts) > 1 + # Check each split sentence is <= 10 characters + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_text = "".join([doc[i].text_with_ws for i in range(start, end)]) + char_len = len(sentence_text) + logger.debug(f"[test_split_gaps_max_length_set] Sentence from {start} to {end} has char length {char_len}") + assert char_len <= 10, f"Sentence from {start} to {end} has char length {char_len} > 10" def test_split_gaps_empty_doc(): doc = make_doc_from_text("") @@ -114,6 +157,10 @@ def test_split_gaps_whitespace_none(): logger.debug(f"[test_split_gaps_whitespace_none] Split sentences: {sentences}") # Should have two sentences, each with a single period assert sentences == [".", "."] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_len = end - start + assert sentence_len > 0 def test_split_gaps_whitespace_set(): doc = make_doc_from_text(" . .") @@ -126,6 +173,10 @@ def test_split_gaps_whitespace_set(): sentences = [s.strip() for s in sentences] logger.debug(f"[test_split_gaps_whitespace_set] Split sentences: {sentences}") assert sentences == [".", "."] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_len = end - start + assert sentence_len > 0 def test_split_gaps_mixed_whitespace_and_text(): doc = make_doc_from_text(" . Hello . . World .") @@ -140,4 +191,8 @@ def test_split_gaps_mixed_whitespace_and_text(): # Should have sentences: '.', 'Hello .', 'World .' assert "." in sentences assert "Hello ." in sentences - assert "World ." in sentences \ No newline at end of file + assert "World ." in sentences + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_len = end - start + assert sentence_len > 0 \ No newline at end of file diff --git a/tests/test_merge_gaps_max_length.py b/tests/test_merge_gaps_max_length.py index 58c58b1..aadf594 100644 --- a/tests/test_merge_gaps_max_length.py +++ b/tests/test_merge_gaps_max_length.py @@ -48,6 +48,13 @@ def test_merge_gaps_basic(): print("cpredict_merge_gaps sentence starts:", [(i, token.text) for i, token in enumerate(doc) if guesses[0][i]]) print("guesses:", guesses[0]) assert guesses[0].count(True) == 2 + # Verify split sentence character length is non-zero + starts = [i for i, v in enumerate(guesses[0]) if v] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_text = "".join([doc[i].text_with_ws for i in range(start, end)]) + char_len = len(sentence_text) + assert char_len > 0 def test_merge_gaps_basic2(): nlp = spacy.blank('en') @@ -61,6 +68,12 @@ def test_merge_gaps_basic2(): print("cpredict_merge_gaps sentence starts:", [(i, token.text) for i, token in enumerate(doc) if guesses[0][i]]) print("guesses:", guesses[0]) assert guesses[0].count(True) == 2 + starts = [i for i, v in enumerate(guesses[0]) if v] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_text = "".join([doc[i].text_with_ws for i in range(start, end)]) + char_len = len(sentence_text) + assert char_len > 0 def test_merge_gaps_max_length(): @@ -73,6 +86,12 @@ def test_merge_gaps_max_length(): print("cpredict_merge_gaps sentence starts:", [(i, token.text) for i, token in enumerate(doc) if guesses[0][i]]) # Should split at least once assert guesses[0].count(True) > 1 + starts = [i for i, v in enumerate(guesses[0]) if v] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_text = "".join([doc[i].text_with_ws for i in range(start, end)]) + char_len = len(sentence_text) + assert char_len <= max_len, f"Sentence from {start} to {end} has char length {char_len} > {max_len}" def test_merge_gaps_whitespace_edge(): nlp = spacy.blank('en') @@ -83,3 +102,9 @@ def test_merge_gaps_whitespace_edge(): print("cpredict_merge_gaps sentence starts:", [(i, token.text) for i, token in enumerate(doc) if guesses[0][i]]) # Should split at whitespace/newline before max length assert guesses[0].count(True) >= 3 + starts = [i for i, v in enumerate(guesses[0]) if v] + for idx, start in enumerate(starts): + end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) + sentence_text = "".join([doc[i].text_with_ws for i in range(start, end)]) + char_len = len(sentence_text) + assert char_len <= 15, f"Sentence from {start} to {end} has char length {char_len} > 15" From ccd23afe66934c88cebebfec57bd049941fa552e Mon Sep 17 00:00:00 2001 From: jianlins Date: Tue, 2 Sep 2025 09:30:16 -0600 Subject: [PATCH 108/126] Bump version to 1.0.11 --- PyRuSH/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyRuSH/__init__.py b/PyRuSH/__init__.py index 6031715..7696b31 100644 --- a/PyRuSH/__init__.py +++ b/PyRuSH/__init__.py @@ -30,7 +30,7 @@ from .PyRuSHSentencizer import PyRuSHSentencizer from .RuSH import RuSH, BEGIN, END -__version__ = '1.0.10' +__version__ = '1.0.11' From 09b248e2e2ebd54d356e6eac3897ce417a97540a Mon Sep 17 00:00:00 2001 From: jianlins Date: Tue, 2 Sep 2025 09:50:22 -0600 Subject: [PATCH 109/126] Add loguru to requirements for enhanced logging capabilities --- dev-requirements.txt | 3 ++- requirements.txt | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 7813532..1e13a64 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -6,4 +6,5 @@ PyFastNER>=1.0.8 quicksectx>=0.3.5 pytest numpy -wheel \ No newline at end of file +wheel +loguru \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ebd3a61..0432f76 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ setuptools spacy<3.8; python_version < "3.12" spacy>=3.8; python_version >= "3.12" PyFastNER>=1.0.8 -quicksectx>=0.3.5 \ No newline at end of file +quicksectx>=0.3.5 +loguru \ No newline at end of file From 5712f7f8719399c01a1d65dcb00c0800644bcbd3 Mon Sep 17 00:00:00 2001 From: jianlins Date: Tue, 2 Sep 2025 09:57:54 -0600 Subject: [PATCH 110/126] Add medspacy to development requirements --- dev-requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 1e13a64..1ffb0d2 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -7,4 +7,5 @@ quicksectx>=0.3.5 pytest numpy wheel -loguru \ No newline at end of file +loguru +medspacy \ No newline at end of file From 417264a7644f62556cf95e8fda87505084c475c5 Mon Sep 17 00:00:00 2001 From: jianlins Date: Tue, 2 Sep 2025 13:19:49 -0600 Subject: [PATCH 111/126] Refactor max_sentence_length handling in gap merging logic for improved accuracy and clarity in sentence segmentation --- PyRuSH/StaticSentencizerFun.pyx | 49 ++++----- notebooks/debug.ipynb | 152 +++++++++++++++++++++++++++- tests/test_merge_gaps_max_length.py | 19 +++- 3 files changed, 187 insertions(+), 33 deletions(-) diff --git a/PyRuSH/StaticSentencizerFun.pyx b/PyRuSH/StaticSentencizerFun.pyx index d982356..e250f02 100644 --- a/PyRuSH/StaticSentencizerFun.pyx +++ b/PyRuSH/StaticSentencizerFun.pyx @@ -30,14 +30,16 @@ cpdef cpredict_merge_gaps(docs, sentencizer_fun, max_sentence_length=None): t = 0 span_idx = 0 num_spans = len(spans) - sentence_len = 0 + sentence_start_t = 0 + last_ws_token = None while t < len(doc): token = doc[t] # 1. Mark token as sentence start if it overlaps with RuSH span.begin if span_idx < num_spans and token.idx == spans[span_idx].begin: doc_guesses[t] = True logger.debug(f"[doc {doc_idx}] Mark sentence start at token {t}: '{token.text}' idx={token.idx} (span begin)") - sentence_len = len(token.text) + sentence_start_t = t + last_ws_token = None span = spans[span_idx] # Find last token in span last_token_in_span = t @@ -78,33 +80,26 @@ cpdef cpredict_merge_gaps(docs, sentencizer_fun, max_sentence_length=None): break t = gap_end continue - # 3. Enhanced max_sentence_length logic: check current token + next whitespace token + # 3. Split at last whitespace or previous token BEFORE exceeding max length if max_sentence_length is not None: - next_ws_len = 0 - if t + 1 < len(doc) and doc[t + 1].text.isspace(): - next_ws_len = len(doc[t + 1].text) - # If current token itself would exceed max_sentence_length, split here - if sentence_len + len(token.text) > max_sentence_length: - doc_guesses[t] = True - logger.debug(f"[doc {doc_idx}] Mark/Split due to max_sentence_length at token {t}: '{token.text}' idx={token.idx} (current token exceeds limit)") - sentence_len = len(token.text) - t += 1 - continue - # If next whitespace token would push over the limit, split here - if next_ws_len > 0 and sentence_len + len(token.text) + next_ws_len > max_sentence_length: - doc_guesses[t] = True - logger.debug(f"[doc {doc_idx}] Mark/Split due to max_sentence_length at token {t}: '{token.text}' idx={token.idx} (next whitespace would exceed limit)") - sentence_len = len(token.text) - t += 1 - continue - # If next token is not whitespace and would push over the limit, split here - if t + 1 < len(doc) and not doc[t + 1].text.isspace() and sentence_len + len(token.text) + len(doc[t + 1].text) > max_sentence_length: - doc_guesses[t] = True - logger.debug(f"[doc {doc_idx}] Mark/Split due to max_sentence_length at token {t}: '{token.text}' idx={token.idx} (next non-whitespace would exceed limit)") - sentence_len = len(token.text) - t += 1 + sentence_len = 0 + last_ws_token = -1 + for k in range(sentence_start_t, t): + sentence_len += len(doc[k].text) + if doc[k].text.isspace(): + last_ws_token = k + current_token_len = len(token.text) + # If adding current token would exceed max length + if sentence_len + current_token_len > max_sentence_length: + # Find split point: last whitespace before limit, else previous token + split_token = last_ws_token if last_ws_token >= sentence_start_t else t-1 if t > sentence_start_t else t + # Prevent split_token from being the same as sentence_start_t (in case no whitespace and only one token) + if split_token == sentence_start_t and t > sentence_start_t: + split_token = t-1 + doc_guesses[split_token] = True + logger.debug(f"[doc {doc_idx}] Mark/Split due to max_sentence_length at token {split_token}: '{doc[split_token].text}' idx={doc[split_token].idx} (split before exceeding limit)") + sentence_start_t = split_token continue - sentence_len += len(token.text) t += 1 logger.debug(f"[doc {doc_idx}] Sentence start guesses: {[i for i, v in enumerate(doc_guesses) if v]}") guesses.append(doc_guesses) diff --git a/notebooks/debug.ipynb b/notebooks/debug.ipynb index 459bbae..13f896f 100644 --- a/notebooks/debug.ipynb +++ b/notebooks/debug.ipynb @@ -597,10 +597,160 @@ " print('>' + str(sent) + '<\\n----\\n')" ] }, + { + "cell_type": "markdown", + "id": "5cf31051", + "metadata": {}, + "source": [ + "## Test dummy sentencizer" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a30b61a6", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from loguru import logger\n", + "logger.remove()\n", + "logger.add(sys.stderr, level=\"DEBUG\")\n", + "\n", + "import pytest\n", + "import spacy\n", + "from PyRuSH.StaticSentencizerFun import cpredict_merge_gaps\n", + "\n", + "def dummy_sentencizer(text):\n", + " # Dummy sentencizer: splits on periods and newlines\n", + " spans = []\n", + " start = 0\n", + " split=False\n", + " for i, c in enumerate(text):\n", + " if split:\n", + " spans.append(type('Span', (), {'begin': start, 'end': i+1})())\n", + " start = i+1\n", + " split=False\n", + " if c in '.\\n':\n", + " split=True \n", + " if start < len(text):\n", + " spans.append(type('Span', (), {'begin': start, 'end': len(text)})())\n", + " return spans" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "71159489", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-09-02 12:51:47.767\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[34m\u001b[1mcpredict_merge_gaps called: docs=1, max_sentence_length=20\u001b[0m\n", + "\u001b[32m2025-09-02 12:51:47.768\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[34m\u001b[1m[doc 0] 1 spans detected: [(0, 89)]\u001b[0m\n", + "\u001b[32m2025-09-02 12:51:47.770\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[34m\u001b[1m[doc 0] Mark sentence start at token 0: 'A' idx=0 (span begin)\u001b[0m\n", + "\u001b[32m2025-09-02 12:51:47.772\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[34m\u001b[1m[doc 0] Mark/Split due to max_sentence_length at token 4: 'that' idx=21 (split before exceeding limit)\u001b[0m\n", + "\u001b[32m2025-09-02 12:51:47.773\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[34m\u001b[1m[doc 0] Mark/Split due to max_sentence_length at token 9: 'whitespace' idx=45 (split before exceeding limit)\u001b[0m\n", + "\u001b[32m2025-09-02 12:51:47.774\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[34m\u001b[1m[doc 0] Mark/Split due to max_sentence_length at token 12: 'max' idx=67 (split before exceeding limit)\u001b[0m\n", + "\u001b[32m2025-09-02 12:51:47.775\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[34m\u001b[1m[doc 0] Sentence start guesses: [0, 4, 9, 12]\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dummy_sentencizer spans: [(0, 89, 'A very long sentence that should be split at whitespace before the max length is reached.')]\n" + ] + } + ], + "source": [ + "nlp = spacy.blank('en')\n", + "doc = nlp(\"A very long sentence that should be split at whitespace before the max length is reached.\")\n", + "max_len = 20\n", + "spans = dummy_sentencizer(doc.text)\n", + "print(\"dummy_sentencizer spans:\", [(span.begin, span.end, doc.text[span.begin:span.end]) for span in spans])\n", + "guesses = cpredict_merge_gaps([doc], dummy_sentencizer, max_sentence_length=max_len)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2f54d05a", + "metadata": {}, + "outputs": [], + "source": [ + "sys.path.append('../PyRuSH')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ea7241fd", + "metadata": {}, + "outputs": [], + "source": [ + "from StaticSentencizerFun import cpredict_merge_gaps,cpredict_split_gaps, cset_annotations\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "790f0b46", + "metadata": {}, + "outputs": [], + "source": [ + "cset_annotations(doc, guesses)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ab9d18a4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(list(doc.sents))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "8c5bdcf7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">A very long sentence<\n", + ">that should be split at<\n", + ">whitespace before the<\n", + ">max length is reached.<\n" + ] + } + ], + "source": [ + "for s in doc.sents:\n", + " print(f'>{s.text}<')" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "5cf31051", + "id": "9c4b8f95", "metadata": {}, "outputs": [], "source": [] diff --git a/tests/test_merge_gaps_max_length.py b/tests/test_merge_gaps_max_length.py index aadf594..902ddf1 100644 --- a/tests/test_merge_gaps_max_length.py +++ b/tests/test_merge_gaps_max_length.py @@ -88,10 +88,12 @@ def test_merge_gaps_max_length(): assert guesses[0].count(True) > 1 starts = [i for i, v in enumerate(guesses[0]) if v] for idx, start in enumerate(starts): - end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) - sentence_text = "".join([doc[i].text_with_ws for i in range(start, end)]) + last_token_idx = starts[idx + 1] - 1 if idx + 1 < len(starts) else len(doc) - 1 + end_offset = doc[last_token_idx].idx + len(doc[last_token_idx]) + sentence_text = doc.text[doc[start].idx:end_offset] char_len = len(sentence_text) - assert char_len <= max_len, f"Sentence from {start} to {end} has char length {char_len} > {max_len}" + logger.debug(f'{sentence_text} --- length: {char_len}') + assert char_len <= max_len, f"Sentence from {start} to {last_token_idx} has char length {char_len} > {max_len}" def test_merge_gaps_whitespace_edge(): nlp = spacy.blank('en') @@ -105,6 +107,13 @@ def test_merge_gaps_whitespace_edge(): starts = [i for i, v in enumerate(guesses[0]) if v] for idx, start in enumerate(starts): end = starts[idx + 1] if idx + 1 < len(starts) else len(doc) - sentence_text = "".join([doc[i].text_with_ws for i in range(start, end)]) + # Find last non-whitespace token in the chunk + last_token_idx = end - 1 + while last_token_idx >= start and doc[last_token_idx].text.isspace(): + last_token_idx -= 1 + if last_token_idx < start: + continue # skip empty chunk + end_offset = doc[last_token_idx].idx + len(doc[last_token_idx]) + sentence_text = doc.text[doc[start].idx:end_offset] char_len = len(sentence_text) - assert char_len <= 15, f"Sentence from {start} to {end} has char length {char_len} > 15" + assert char_len <= 15, f"Sentence from {start} to {last_token_idx} has char length {char_len} > 15" From b670a40a1cb44fe39e476fb8062620f8bfc97dd0 Mon Sep 17 00:00:00 2001 From: jianlins Date: Tue, 2 Sep 2025 13:27:29 -0600 Subject: [PATCH 112/126] Update rules_path in medspacy configuration to use the correct directory --- tests/test_debug.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_debug.py b/tests/test_debug.py index 1f7d40a..34fe8a6 100644 --- a/tests/test_debug.py +++ b/tests/test_debug.py @@ -10,7 +10,7 @@ def test_whitespace_edge_split(): text_whitespace = "First sentence. Second sentence before spaces.\nThird sentence after newline." nlp = English() nlp.add_pipe("medspacy_pyrush", config={ - "rules_path": "tests/rush_rules.tsv", + "rules_path": "conf/rush_rules.tsv", "merge_gaps": False, "max_sentence_length": 20 }) From 0009be1bc5bb610e7459fd80ba886e367a7b38fd Mon Sep 17 00:00:00 2001 From: jianlins Date: Tue, 2 Sep 2025 13:36:18 -0600 Subject: [PATCH 113/126] Update rules_path in test_whitespace_edge_split to use dynamic path resolution --- tests/test_debug.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_debug.py b/tests/test_debug.py index 34fe8a6..e5130d6 100644 --- a/tests/test_debug.py +++ b/tests/test_debug.py @@ -9,8 +9,9 @@ def test_whitespace_edge_split(): import medspacy text_whitespace = "First sentence. Second sentence before spaces.\nThird sentence after newline." nlp = English() + rule_path=os.path.join(os.path.dirname(__file__), 'rush_rules.tsv') nlp.add_pipe("medspacy_pyrush", config={ - "rules_path": "conf/rush_rules.tsv", + "rules_path": rule_path, "merge_gaps": False, "max_sentence_length": 20 }) From 5680f0a227c22fce0980a5ab5e698a9e5084a75a Mon Sep 17 00:00:00 2001 From: jianlins Date: Tue, 2 Sep 2025 15:19:42 -0600 Subject: [PATCH 114/126] Update README to include revision history for version 1.0.11 with improved sentence splitting logic and edge case handling --- README.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.rst b/README.rst index 1464a03..0bb2dc0 100644 --- a/README.rst +++ b/README.rst @@ -52,3 +52,16 @@ Start from version 1.0.3, PyRuSH adds Spacy compatible Sentencizer component: Py A Colab Notebook Demo --------------------------- Feel free to try this runnable `Colab notebook Demo `_ + +Revision History +---------------- + +**1.0.11 (2025-09-02)** + +- Improved sentence splitting logic: Sentences are now split at the last token before exceeding the max length, ensuring no chunk exceeds the specified limit. +- Edge case handling: Trailing whitespaces (caused by spacy sentence labeling mechanism) can be optionally split into a separate sentence (merge_gaps=False) to avoid necessarily long sentences. + +**1.0.9 (2024-10-27)** + +- Initial release with spaCy 3.x compatibility and core RuSH logic. +- Added Spacy-compatible PyRuSHSentencizer component. From 634bb93f592747add299b30b87a4ccee6e26fa16 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Wed, 10 Sep 2025 18:12:12 +0000 Subject: [PATCH 115/126] add additional test cases, revert rules --- conf/rush_rules.tsv | 10 ++----- tests/test_PyRushSentencizer.py | 2 +- tests/test_PyRushSentencizer2.py | 45 ++++++++++++++++++++++++++++++++ tests/test_Rush.py | 44 ++++++++++++++++++++++++++----- 4 files changed, 86 insertions(+), 15 deletions(-) create mode 100644 tests/test_PyRushSentencizer2.py diff --git a/conf/rush_rules.tsv b/conf/rush_rules.tsv index afb66d4..0460155 100644 --- a/conf/rush_rules.tsv +++ b/conf/rush_rules.tsv @@ -20,7 +20,7 @@ #stbegin is the marker for sentence begin, the span of sentence will start at the begin of the captured group #stbegin has two scores 0, 1: 0 for true sentence begin clues, 1 for false sentence begin clues which will overwrite 0-scored rules when they are overlapping. -#stend is the marker for sentence end, the span of sentence will end at the end of the captured group +#stend is the marker for sentence begin, the span of sentence will end at the end of the captured group #stend also has two scores 2, 3: 2 for true sentence end clues, 3 for false sentence end clues which will overwrite 2-scored rules when they are overlapping # \b the begin of an input @@ -47,12 +47,6 @@ \b\s+(\C 0 stbegin \b\s+(\d 0 stbegin \c.\s+(\C) 0 stbegin -Dr.\s+(\C) 1 stbegin -Mr.\s+(\C) 1 stbegin -Ms.\s+(\C) 1 stbegin -Miss.\s+(\C) 1 stbegin -Mrs.\s+(\C) 1 stbegin -dr.\s+(\C) 1 stbegin mL.\s+(\C) 0 stbegin *) 1 stbegin \c\c.\s+(\C) 0 stbegin @@ -245,7 +239,7 @@ dr.\s+(\C) 1 stbegin \n(? \C 0 stbegin \n(? \c 0 stbegin \n(. \C 0 stbegin -\n(\+ \C 0 stbegin +\n(+ \C 0 stbegin \n(/ \C 0 stbegin \n+\d\d-\d\d\s+(\C 0 stbegin \n+\d+-\d\d-\d\d\s+(\C 0 stbegin diff --git a/tests/test_PyRushSentencizer.py b/tests/test_PyRushSentencizer.py index 1e71caa..ff136d9 100644 --- a/tests/test_PyRushSentencizer.py +++ b/tests/test_PyRushSentencizer.py @@ -58,7 +58,7 @@ def test_doc2(self): print('>' + str(sent) + '<\n\n') # New expected count includes whitespace-only sentences - assert (len(sents) == 53) + assert (len(sents) == 51) # For content checks, filter out whitespace-only sentences content_sents = [s for s in sents if s.text.strip()] assert (content_sents[0].text == 'Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with chest pain and respiratory insufficiency.') diff --git a/tests/test_PyRushSentencizer2.py b/tests/test_PyRushSentencizer2.py new file mode 100644 index 0000000..8f3c1b7 --- /dev/null +++ b/tests/test_PyRushSentencizer2.py @@ -0,0 +1,45 @@ +import unittest +import os +import sys +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from PyRuSH import PyRuSHSentencizer +from spacy.lang.en import English + + +class TestRuSH(unittest.TestCase): + + def setUp(self): + self.pwd = os.path.dirname(os.path.abspath(__file__)) + + # def test_doc(self): + # nlp = English() + # nlp.add_pipe("medspacy_pyrush") + # doc = nlp("This is a sentence. This is another sentence.") + # print('\n'.join([str(s) for s in doc.sents])) + # print('\nTotal sentences: {}'.format(len([s for s in doc.sents]))) + # print('\ndoc is an instance of {}'.format(type(doc))) + + def test_doc4(self): + input_str='''Ms. [**Known patient lastname 2004**] was admitted on [**2573-5-30**]. Ultrasound +at the time of admission demonstrated pancreatic duct dilitation and +edematous gallbladder. She was admitted to the ICU. +Discharge Medications: +1. Miconazole Nitrate 2 % Powder Sig: One (1) Appl Topical BID +(2 times a day) as needed. +2. Heparin Sodium (Porcine) 5,000 unit/mL Solution Sig: One (1) +Injection TID (3 times a day). +3. Acetaminophen 160 mg/5 mL Elixir Sig: One (1) PO Q4-6H +(every 4 to 6 hours) as needed.''' + nlp = English() + nlp.add_pipe("medspacy_pyrush", config={"rules_path": os.path.join(self.pwd, 'rush_rules.tsv')}) + nlp.initialize() + doc = nlp(input_str) + sents = [s for s in doc.sents] + for sent in sents: + print('>' + str(sent) + '<\n\n') + assert(sents[-1].text=='''Sig: One (1) PO Q4-6H +(every 4 to 6 hours) as needed.''') + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_Rush.py b/tests/test_Rush.py index bedbef6..18bd374 100644 --- a/tests/test_Rush.py +++ b/tests/test_Rush.py @@ -95,7 +95,7 @@ def test7(self): sentences = rush.segToSentenceSpans(input_str) self.printDetails(sentences, input_str) - def test_doc2(self): + def test8(self): input_str = ''' 9. Advair b.i.d. 10. Xopenex q.i.d. and p.r.n. @@ -109,12 +109,44 @@ def test_doc2(self): sent = sentences[1] assert (input_str[sent.begin:sent.end] == '10. Xopenex q.i.d. and p.r.n.') - - def test_doc11(self): + def test9(self): input_str=' This is a sentence. This is another sentence.' - sentences=self.rush.segToSentenceSpans(input_str) - for sent in sentences: - print('>' + input_str[sent.begin:sent.end] + '<\n') + self.rush = RuSH(str(os.path.join(self.pwd, 'rush_rules.tsv')), min_sent_chars=2, enable_logger=True) + sentences = self.rush.segToSentenceSpans(input_str) + self.printDetails(sentences, input_str) + + def test10(self): + input_str='''Ms. [**Known patient lastname 2004**] was admitted on [**2573-5-30**]. Ultrasound +at the time of admission demonstrated pancreatic duct dilitation and +edematous gallbladder. She was admitted to the ICU. +Discharge Medications: +1. Miconazole Nitrate 2 % Powder Sig: One (1) Appl Topical BID +(2 times a day) as needed. +2. Heparin Sodium (Porcine) 5,000 unit/mL Solution Sig: One (1) +Injection TID (3 times a day). +3. Acetaminophen 160 mg/5 mL Elixir Sig: One (1) PO Q4-6H +(every 4 to 6 hours) as needed.''' + self.rush = RuSH(str(os.path.join(self.pwd, 'rush_rules.tsv')), min_sent_chars=2, enable_logger=True) + sentences = self.rush.segToSentenceSpans(input_str) + self.printDetails(sentences, input_str) + assert (sentences[0].begin == 0 and sentences[0].end == 173) + assert (sentences[1].begin == 174 and sentences[1].end == 202) + assert (sentences[2].begin == 203 and sentences[2].end == 225) + assert (sentences[3].begin == 226 and sentences[3].end == 258) + assert (sentences[4].begin == 259 and sentences[4].end == 316) + assert (sentences[5].begin == 317 and sentences[5].end == 367) + assert (sentences[6].begin == 368 and sentences[6].end == 411) + assert (sentences[7].begin == 412 and sentences[7].end == 447) + assert (sentences[8].begin == 448 and sentences[8].end == 502) + + def test11(self): + input_str = '''Patient doesn't have heart disease or high blood pressure, but their dad did have +diabetes. Pt is a 63M w/ h/o metastatic carcinoid tumor, HTN and hyperlipidemia.''' + self.rush = RuSH(str(os.path.join(self.pwd, 'rush_rules.tsv')), min_sent_chars=2, enable_logger=True) + sentences = self.rush.segToSentenceSpans(input_str) + self.printDetails(sentences, input_str) + assert (sentences[0].begin == 0 and sentences[0].end == 91) + assert (sentences[1].begin == 92 and sentences[1].end == 162) if __name__ == '__main__': unittest.main() From 0c771c433df72df05df834887aae74dc8d373cbc Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Wed, 10 Sep 2025 18:17:27 +0000 Subject: [PATCH 116/126] Update GitHub Actions workflow to use latest OS versions and remove outdated Python versions --- .github/workflows/run_pytests.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index 517afb6..693eb81 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -22,13 +22,12 @@ jobs: # Github Actions doesn't support pairing matrix values together, let's improvise # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - - [ ubuntu-20.04, ubuntu-20.04 ] - - [ macos-12, macosx_12 ] - - [ windows-2019, windows-2019 ] - - [ windows-2022, windows-2022 ] + - [ ubuntu-latest, ubuntu-latest ] + - [ macos-latest, macosx_latest ] + - [ windows-latest, windows-latest ] # spacy doesn't compile win32 # python: ["cp36"] # Note: Wheels not needed for PyPy - python-version: [ "3.6", "3.7", "3.8", "3.9","3.10.x","3.11.x", "3.12"] # Note: Wheels not needed for PyPy + python-version: [ "3.9","3.10.x","3.11.x", "3.12"] # Note: Wheels not needed for PyPy timeout-minutes: 45 steps: - name: Checkout From 22241a25f58fcece794db7b239dd9e29719eb9ff Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Wed, 10 Sep 2025 20:03:52 +0000 Subject: [PATCH 117/126] Remove license_files entry from setup.cfg --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 162c62a..ffcad36 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,7 +2,6 @@ readme = README.md license = MIT -license_files = LICENSE [bdist_wheel] python_tag=py3 From 1d59bbc07f8e334cc95a25db7729b9fa7ab24429 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Wed, 10 Sep 2025 20:09:45 +0000 Subject: [PATCH 118/126] Update workflow to ensure setuptools is upgraded during dependency installation --- .github/workflows/run_pytests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run_pytests.yml b/.github/workflows/run_pytests.yml index 693eb81..4322040 100644 --- a/.github/workflows/run_pytests.yml +++ b/.github/workflows/run_pytests.yml @@ -47,6 +47,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r dev-requirements.txt + pip install -U setuptools - name: pytests run: | From a1b2a862f2bde27cdb9f97cf422078094d94d60d Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Wed, 10 Sep 2025 20:19:36 +0000 Subject: [PATCH 119/126] Bump version to 1.0.12dev0 --- PyRuSH/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyRuSH/__init__.py b/PyRuSH/__init__.py index 7696b31..5905afc 100644 --- a/PyRuSH/__init__.py +++ b/PyRuSH/__init__.py @@ -30,7 +30,7 @@ from .PyRuSHSentencizer import PyRuSHSentencizer from .RuSH import RuSH, BEGIN, END -__version__ = '1.0.11' +__version__ = '1.0.12dev0' From 83b807ac564aca9e0374bf79fc95d960c0c82cbd Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Wed, 10 Sep 2025 21:53:33 +0000 Subject: [PATCH 120/126] Update pip_install_unitest workflow to use dev-requirements.txt for dependency installation --- .github/workflows/pip_install_unitest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pip_install_unitest.yml b/.github/workflows/pip_install_unitest.yml index 8066859..6dd9fed 100644 --- a/.github/workflows/pip_install_unitest.yml +++ b/.github/workflows/pip_install_unitest.yml @@ -34,7 +34,7 @@ jobs: python-version: ${{ matrix.python-version }} cache: 'pip' cache-dependency-path: | - 'requirements/requirements.txt' + 'requirements/dev-requirements.txt' - run: | python --version pip install --upgrade pip @@ -44,7 +44,7 @@ jobs: if: ${{ github.event.inputs.install_prerelease == 'true' }} run: | # use this to avoid install prereleases of dependencies packages - pip install -r requirements.txt + pip install -r dev-requirements.txt pip install --pre PyRuSH - name: Install formal released medspacy From 700310ede3fadfe113e4e10cbb5dc3324f88f1b2 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Wed, 10 Sep 2025 21:59:49 +0000 Subject: [PATCH 121/126] Refactor test rule path to simplify directory structure and improve readability --- .github/workflows/pip_install_unitest.yml | 2 +- tests/test_PyRuSHSentencizer_param.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pip_install_unitest.yml b/.github/workflows/pip_install_unitest.yml index 6dd9fed..67a59eb 100644 --- a/.github/workflows/pip_install_unitest.yml +++ b/.github/workflows/pip_install_unitest.yml @@ -58,7 +58,7 @@ jobs: # ls /opt/hostedtoolcache/Python/3*/x64/lib/python*/site-packages/conf pip install pytest python -c "import shutil;shutil.rmtree('PyRuSH')" - python -c "import shutil;shutil.rmtree('conf')" + # python -c "import shutil;shutil.rmtree('conf')" ls python --version pytest diff --git a/tests/test_PyRuSHSentencizer_param.py b/tests/test_PyRuSHSentencizer_param.py index 52db908..36415cb 100644 --- a/tests/test_PyRuSHSentencizer_param.py +++ b/tests/test_PyRuSHSentencizer_param.py @@ -6,7 +6,7 @@ text_short = "Sentence one. Sentence two!" text_long = "This is a very long sentence that should be split at whitespace before the max length is reached. " * 5 text_whitespace = "First sentence. Second sentence after spaces.\nThird sentence after newline." -rule_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "conf", "rush_rules.tsv") +rule_path = os.path.join(os.path.dirname(__file__), "rush_rules.tsv") def make_nlp(merge_gaps, max_sentence_length): nlp = English() From aa688c82597333ac9ac9e070616ee56a1b3d1c93 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Wed, 10 Sep 2025 22:10:07 +0000 Subject: [PATCH 122/126] use test rule path --- tests/test_PyRushSentencizer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_PyRushSentencizer.py b/tests/test_PyRushSentencizer.py index ff136d9..26df271 100644 --- a/tests/test_PyRushSentencizer.py +++ b/tests/test_PyRushSentencizer.py @@ -10,7 +10,7 @@ class TestRuSH(unittest.TestCase): def setUp(self): - pwd = os.path.dirname(os.path.abspath(__file__)) + self.pwd = os.path.dirname(os.path.abspath(__file__)) def test_doc(self): nlp = English() @@ -51,7 +51,7 @@ def test_doc2(self): End Ezoic - MTSam Sample Bottom Matched Content - native_bottom ''' nlp = English() - nlp.add_pipe("medspacy_pyrush") + nlp.add_pipe("medspacy_pyrush", config={"rules_path": os.path.join(self.pwd, 'rush_rules.tsv')}) doc = nlp(input_str) sents = [s for s in doc.sents] for sent in sents: @@ -74,7 +74,7 @@ def test_doc3(self): from loguru import logger logger.add(sys.stdout, level="DEBUG") nlp = English() - nlp.add_pipe("medspacy_pyrush") + nlp.add_pipe("medspacy_pyrush", config={"rules_path": os.path.join(self.pwd, 'rush_rules.tsv')}) doc = nlp(input_str) sents = [s for s in doc.sents] for sent in sents: @@ -116,4 +116,5 @@ def test_customized_rules(self): # SpaCy has no control of sentence end. Thus, it ends up with sloppy ends. assert (sents[1].text == 'Ms. ABCD is a 69-year-old lady, who was admitted to the hospital with' - ' chest pain and respiratory insufficiency.') \ No newline at end of file + ' chest pain and respiratory insufficiency.') + \ No newline at end of file From 5cf4ba46906369ba1cafa06bc438a841a592b48f Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Wed, 10 Sep 2025 22:11:47 +0000 Subject: [PATCH 123/126] Bump version to 1.0.12dev1 --- PyRuSH/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyRuSH/__init__.py b/PyRuSH/__init__.py index 5905afc..8da1f5b 100644 --- a/PyRuSH/__init__.py +++ b/PyRuSH/__init__.py @@ -30,7 +30,7 @@ from .PyRuSHSentencizer import PyRuSHSentencizer from .RuSH import RuSH, BEGIN, END -__version__ = '1.0.12dev0' +__version__ = '1.0.12dev1' From b30cc9b6879c2f99bc34466a9665ffcc474fa027 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Wed, 10 Sep 2025 22:58:14 +0000 Subject: [PATCH 124/126] force reinstall --- .github/workflows/pip_install_unitest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pip_install_unitest.yml b/.github/workflows/pip_install_unitest.yml index 67a59eb..87562c3 100644 --- a/.github/workflows/pip_install_unitest.yml +++ b/.github/workflows/pip_install_unitest.yml @@ -45,12 +45,12 @@ jobs: run: | # use this to avoid install prereleases of dependencies packages pip install -r dev-requirements.txt - pip install --pre PyRuSH + pip install --pre -I PyRuSH - name: Install formal released medspacy if: ${{ github.event.inputs.install_prerelease == 'false' }} run: | - pip install PyRuSH + pip install -I PyRuSH - name: tests From ac5818d9d056fc5f6d22fad4c0784afbdfebca84 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Wed, 10 Sep 2025 23:02:16 +0000 Subject: [PATCH 125/126] previous configuration results in numpy incapatable version issue. retry --- .github/workflows/pip_install_unitest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pip_install_unitest.yml b/.github/workflows/pip_install_unitest.yml index 87562c3..d7a1c42 100644 --- a/.github/workflows/pip_install_unitest.yml +++ b/.github/workflows/pip_install_unitest.yml @@ -43,9 +43,9 @@ jobs: - name: Install prereleased PyRuSH if: ${{ github.event.inputs.install_prerelease == 'true' }} run: | - # use this to avoid install prereleases of dependencies packages + # use this to avoid install prereleases of dependencies packages + pip install --pre PyRuSH pip install -r dev-requirements.txt - pip install --pre -I PyRuSH - name: Install formal released medspacy if: ${{ github.event.inputs.install_prerelease == 'false' }} From 816367ee0bbec0e2d5416a8504e89034b6ba9450 Mon Sep 17 00:00:00 2001 From: Jianlin Shi Date: Wed, 10 Sep 2025 23:57:08 +0000 Subject: [PATCH 126/126] Update version to 1.0.12 --- PyRuSH/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyRuSH/__init__.py b/PyRuSH/__init__.py index 8da1f5b..bc28b14 100644 --- a/PyRuSH/__init__.py +++ b/PyRuSH/__init__.py @@ -30,7 +30,7 @@ from .PyRuSHSentencizer import PyRuSHSentencizer from .RuSH import RuSH, BEGIN, END -__version__ = '1.0.12dev1' +__version__ = '1.0.12'