From 8a49fcf4235b09d084e2b3dcf4294f9c00fd7d5d Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 6 Sep 2025 11:02:13 +0000 Subject: [PATCH] Refactor and expand social media analyzer This commit refactors the existing Facebook and scam analyzers into a single, generic social media analyzer. The new `social_media_analyzer` supports the following platforms: - Facebook - Instagram - WhatsApp - TikTok - Tinder - Snapchat - WeChat The fake profile detector and scam message analyzer have been generalized to be platform-aware. The user is now prompted to select a platform before performing an analysis. The old `facebook_analyzer` and `scam_detector` directories have been removed. --- facebook_analyzer/__init__.py | 8 - .../__pycache__/__init__.cpython-312.pyc | Bin 131 -> 0 bytes .../fake_profile_detector.cpython-312.pyc | Bin 9354 -> 0 bytes .../phishing_detector.cpython-312.pyc | Bin 7515 -> 0 bytes facebook_analyzer/fake_profile_detector.py | 199 ----------------- facebook_analyzer/phishing_detector.py | 206 ----------------- scam_detector/__init__.py | 18 -- .../__pycache__/__init__.cpython-312.pyc | Bin 562 -> 0 bytes .../__pycache__/analyzer.cpython-312.pyc | Bin 8681 -> 0 bytes .../__pycache__/heuristics.cpython-312.pyc | Bin 5706 -> 0 bytes scam_detector/analyzer.py | 211 ------------------ scam_detector/heuristics.py | 163 -------------- scam_main.py | 101 --------- social_media_analyzer/__init__.py | 0 .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 135 bytes .../fake_profile_detector.cpython-312.pyc | Bin 0 -> 9463 bytes .../__pycache__/heuristics.cpython-312.pyc | Bin 0 -> 4182 bytes .../__pycache__/main.cpython-312.pyc | Bin 0 -> 4263 bytes .../__pycache__/scam_detector.cpython-312.pyc | Bin 0 -> 7178 bytes .../fake_profile_detector.py | 194 ++++++++++++++++ social_media_analyzer/heuristics.py | 136 +++++++++++ social_media_analyzer/main.py | 61 +++++ social_media_analyzer/scam_detector.py | 151 +++++++++++++ 23 files changed, 542 insertions(+), 906 deletions(-) delete mode 100644 facebook_analyzer/__init__.py delete mode 100644 facebook_analyzer/__pycache__/__init__.cpython-312.pyc delete mode 100644 facebook_analyzer/__pycache__/fake_profile_detector.cpython-312.pyc delete mode 100644 facebook_analyzer/__pycache__/phishing_detector.cpython-312.pyc delete mode 100644 facebook_analyzer/fake_profile_detector.py delete mode 100644 facebook_analyzer/phishing_detector.py delete mode 100644 scam_detector/__init__.py delete mode 100644 scam_detector/__pycache__/__init__.cpython-312.pyc delete mode 100644 scam_detector/__pycache__/analyzer.cpython-312.pyc delete mode 100644 scam_detector/__pycache__/heuristics.cpython-312.pyc delete mode 100644 scam_detector/analyzer.py delete mode 100644 scam_detector/heuristics.py delete mode 100644 scam_main.py create mode 100644 social_media_analyzer/__init__.py create mode 100644 social_media_analyzer/__pycache__/__init__.cpython-312.pyc create mode 100644 social_media_analyzer/__pycache__/fake_profile_detector.cpython-312.pyc create mode 100644 social_media_analyzer/__pycache__/heuristics.cpython-312.pyc create mode 100644 social_media_analyzer/__pycache__/main.cpython-312.pyc create mode 100644 social_media_analyzer/__pycache__/scam_detector.cpython-312.pyc create mode 100644 social_media_analyzer/fake_profile_detector.py create mode 100644 social_media_analyzer/heuristics.py create mode 100644 social_media_analyzer/main.py create mode 100644 social_media_analyzer/scam_detector.py diff --git a/facebook_analyzer/__init__.py b/facebook_analyzer/__init__.py deleted file mode 100644 index c4c716a..0000000 --- a/facebook_analyzer/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# This file makes the 'facebook_analyzer' directory a Python package. -# You can leave it empty or add package-level imports here if needed later. - -# For example, you might want to make functions from modules directly available: -# from .phishing_detector import analyze_message_for_phishing -# from .fake_profile_detector import analyze_profile_for_fakeness - -# For now, keeping it simple. User will import specific modules. diff --git a/facebook_analyzer/__pycache__/__init__.cpython-312.pyc b/facebook_analyzer/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 5e755b324043417982db9aa582e050e30050eb70..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 131 zcmX@j%ge<81gqMTGnO$jFgylv(7|UGpvZKFN(N0vzm*I{OhDdekeXjg`iTVv`e}*D zsY&_y+3|^ai8+;3sYUwn@tJvJ z8D?f^g|l=vMfc)GTh!=FTiVlE}#jwmDI zZB)D+RmQ~1W8&?3g^9Tr#M^P@1l|(Li}Y4V+=wU{<>b#JH=@d^pG!B|cuX1JoRB2* z-KfOdm5D!qcKSpl@t)rI{?fkp(=G2E%B26k;Y-6kPJI8td%h9pajR3DKjp8FNRnZQxaBs-~4K+mvn9&?lK;G9!;wIj-?FQ?)B>Le22ZB-0I>hBYpm?q+vg zSyw+Q^GRlMSz{$`T85sUZFE$)mDP>MQBdpr;G#<$osG*iuJ0$$VzS9u&L|cQo#nAC zFK==t=W<3_x2;UNy%ANFMhBQGmh48%u9SGAdyA`ubvvu(v$(9$qi|bRHOqfLwqHUf zzX|Gi)&c{$b>|-!cKa~8a1 zA)`uG=UdJ%&Ktbdlq}ARjOH2U!GgWcG}X2tmUR^;V%!tfhN0?Ak!{Yvs>yR8W0K{u z$Iuj3GA!Fl&$5YC4nj@bQIz#EY$vzQVV12DO+O|Fbl!M>9kwnkP~mx1S8eqsOb#0> zt(GFp2-Jc591POtN~SS{o3rB9tYK#59QMMhosFmXTPK4M{1L5U$E0iN&^Ux=K-DIU z(mj9~SPJc05JH|?C0YM2p3=_vl1!}S$qUOWv>6;iT3f?Av($o~X$*UAN|fe|6715Y z+_C21AHvi~d~iv)s9s4o-C8$vi|dw}lQr_i61P1efpDQX_c)e^3wfMaM%m19>$}PR z{y9U3LD{%iwxLUX+=d*~f~rGZIM?-dJVVV^*+QsMjWP^sC=ZX)6>tFW%@$RwDBJM3 ze?3^@vMcj-I5X4B#WhG3l5&r**jm|k=RH&7dI4ryB&sGfb(34uWYc)Hd~Z}i7zs(} z4B{>8WefHNlhELXPv1b{M#h|8n6hkHS5&>=N1pE%%^TOz{$ zutc~EDMA=U4kiFcge<@-T|s-jTWFPk5t~@!6+>4T!YJ7b{cn7?OQxOq6^bj9Y+bf& zwdq=}>v>_;!YteeNkYwWc@5qgu%TL3nS<>YWg;B$SI`ZxE&Hd(d!`2$B}L zH+U^*a?vnh*3*=do?#G+*k3GZ98r&C9k@fFuP~i(7|J&a%WHBW(>PKr+c43rXY|?T zZl{b?LP<(U*InlO2J(vm+b}H7ya{liyV8Y2Qnc210bu~b5kygrg)LQ>tXaq`0f|$Z zVQi8(eO=BvuU-fzU@x(F$Gf55jfSmxx^c9GfNoJe0<}_2p(CZ32=p8d>HNilOc@2t zdl!L7_%X3p#5PK;m`14aPZ3k(pCQ>n`N)SAp;P3?oQ7l<;sw!I-vsyThDmN9!*VI@ zxpL79v(DLTM1W8L2!z+;aL+X_4)NI_s`h=V2h($? z8vTEr5~5GP6rv_5Pg}U{Y;yZ->XbxiWmAS-q0wh@rzArwoBO*%qlbKfC_T;5N^iQ) zd1s}Bgvr)SW6Oe#*z%evxC&*|P2?sNelffb=SOBbs5s0xClgasQ|uzPko)*`nWz%k zRZ(X$EO6F7fQeHgz>%)eWd|TMA2x9ByezrJq|6QKk?4#joaJz9KnLRk($! zglC+IaG;P;dZO^M+4a>WQBM;;qWYX+&Y5-FE?G0v(_+BN6hv*CLHL?>i{7=0n3)yd z*|Mp<y_5~pf&YsQ&t5#$W>>3y0)VKxan4?$D9q+jyx~?D5>lV4mr-VlXo%A&W0I;MS1J#wA*qXLeCaCEo1C|rl;jn35heu*C33n zjJmPna8snxtd|3_=-XhahM6f<8Y9B{kTH;aQ5(27x%8ROCw7Nly|3Q?acyYs zc0#N>bARUkWUXhm9{<6kc>mw@{<`;Wq8>jX=1$*x_uiYep4aN}*MqqS@e^O;{GR1{ ze1#VEyjPE}db73EEA{xRV)S%9J{gQo)#DiuxF<#uV%3;(NZ>8`(78cU`IFCWueN*Vo(!EEq@`z_d#l6l_9?Cg z97ZI4s5$P_-;(sgsjSC8;k@*rB}umqbxKV-s)v-WBUtboNl7*n|FdX%rVQiNxYB)7 zGGD32s}Y>tLubEYU8&}%?u1q!X^vOhAZg;QvpHW4^bM+e zTc{h^U;b31O1rSbWOKhV+B{!>8!L`(jsO`9uNVuhn8J!*3xC$vw z++?g#$s`&&cc*hklwEToolqJJF1q5PqHN-_iz%HW($b;^J97odCF%uHnoueZHu=p6 zx@D0My}XB_hb{E(AJ>ML=lOT6(U$15F!}acvdtVwqa=K z*T5FB48)D36p-LnYv@;~b=)ToQ(&kcoHs-jv?Ze+pe!ltIh3ge)drN!Q6wHzqVB`f zd^i|=EHKk)-AK={MjIZT8Si}2XC7ES24W%%G<<+_!V_%?_<&nFd)24N-&BJ_me-e! ztw}VbHXsaHx3;{3$d@4#;DD%+4&+#7;VD9>-WCsvBkp9!yG3 zPIQVnQ8r=H?-h+m+BvYg?*j1MZge_7U-k! z5m)YJl9aO=-n|5njyfYvT{jH)R35%Zj0kEEAPq$`Aj17(cucByl<>iqxuTW-as|%!O*xXG+ zcm-MqLNXE30EtouVZjVd$hiby1^UQ1$;5fp%4xES7HY+(ft~&uh8l4<*ux`&vlv=6{2oSq6EP2~6TfnJmOvQ^=n_sb zy=Ij>fx7HwgAPiCp%M)NkTiLK{OETfFdJk5A7=pf0I-dG9vBo1CbupFy3+6ySgg_I z(IMKccHw)CQJ-yqs6N0<9Toi&*65-1iDt?w(T@XGy3-tnSC}dKqT0?xJtH;RVJT{< z(T;$@P0?jFhA+%rn$KQYUAeHhG@o5uKEHTw?%K-g)kX*UIUxJ#go&SfCG#cP-(?#% zuuGSofdl9P;ckw~nyS_4R*MDVAiJp>TY94hf+pzG8%1~N^MT7mnrK8zszq^|su1^M z9;1s-c>QG1NeYY08hTdnEh3D07AsZ!Ssx;eeky&NklGWqd#izHFr)yCNPg)niMKVm-fF`SX=KcxL}GH2mrEZvViimv%>v)?&loc1Q!MTFTn# zw;%R9+x^Z?fAx=&G;l8Zjrc}$H!vT?>O=i^_?L%9AEm}>$8PMTe*7>cZ>Qv)RPJF) z-A<`Hsm)*y6*nmF=;Wow4_BFF#72`u!&llHdP2d8(eA zc$7R`>sRZ^jX!os&rRL#-0kUqG&FKoy%*m;_PyHJnfs?}qi^pF%|0BO-yWLZ8M^pz z=<@c^<(;9G+ZT3whwhGT_nz84^V*%ByOn!`fA_QPzUke8BX_U=?d$U5A`%=F4ZlP9KuT87T0Nc< z4}bO`{youH^Ljl0@BJrlzXLAs9RKS@95iqWg6{08^-bN|y!TA?tH%`a@S}R{1MzUB9(zwbd{U47Y_CH~o!C8mba#AacVyz5&h})-p48sn zVKOqxcs5H3Ih%E+E>L#A;#H*QJ(Pn@v2blF%I0gR4V||ZFJD<%y*9UeZHE0HD#m7^ zC~9VB7GY9=Cl>o)2zI;|?3kj79|aRv-HEyA#mH#1CJjGXjSfWSBmX_Q7>RW3{V*!UlDC~N+m3ui zu-xnQ*o=5stjFFJ4-{9$!$Li__)StujlmC(y%R;eOr47&f~FQJg5sM-E=6mx!LPbT hzaqG8;z2z9ONdEIl*i+_zof)rf;@Rym%^BolRpZLSMYV4PH|3p*NR}@Eabe_6TGAJ;O^!8aQATmxcm74S2NW~QS!e4WDp>0TofPVYhD0W zd|ja)lqhQ4MN#)d_row3;X-_f3u`dOV3Uhq;E2%r(N?u8+*^hPkGX%r!f_(bThPk0nLR<{0Ru74%Ze z*MctEKo4~wfse0qAc$^a_v?vVd-yug%3eMO_cp#BG~sjL06X$fWx7l!_I;04JCbPB z;*WV*$d*xAD#?h=WTcX)YVlH$V^!W767_vN#qj2AMZ}8YEik^-`m@HvHx>6sU1n48Badw9%m9=Fp_06{DdS; z>DdWg5EYfp$!tNNo@7;pEfy2wOhzhHyk|y3)p?6vlIa?^f7ZSvB0q*5_2bRc z>ZYfsnQbG{!uD@pc7i|#38eKNsTu>Bis7m4Lt)2AHmPhI3BtzY@;sT1;^kYugKUA- zByn?Y^E+cZTkI7m5B%Kldv_FW;MnOITCL7M1V{Y-rPDg~!X8gi373Wjo~SaMQU%=^ zyP=@0EEN!oa>8R?L@NJ8WwiD!8XZ^8UD*GiTL*T*=)bT(6E4#w!{N#bBFEwWWiW@xwkkeD3gMiOM?&NGpZyr#j+U&ESR}8 zAX4Odz;FzHN)y~>ssE@yIeY!PP~^-0r~UI&i^o?&orb6L9n4YNbcFRobmX&wA86n) zB@gM;!>Xq68tdX{#|3jP90GQPsfIDm^HCUY6%T=#p>_Hp=Hk8Uh5enPKEluc3$27t zJ5k+tb&^M%RA3=8bn;A&=|PA2Cv2f;ndaeyV-sK>jOGSE`90Q~N%zWlOHKMB?6+4b z<^|RC=A~&~hHp1)nCY{y+B+}DFmyb}t7&UIjcpC@th@(iPQy>Zi2`h_KRoaLbA3J5 z@Ky6Kn-@;3#M&3zE3t!Xv93z2Yw6fZtoIwb5<9vUJ6(yLei>hhoqx$zVxP?UHhfgL z{>yVu&;9iU!_&Osrs^9BietZOTZ9$+*J7QOSmzR~NG`#OBXhp>P^=0s$ASDd)A7Lo zF`dW6bavh%XA9xzRW}$8nQdG%F5RW#V@fV;cpMG6!Jkmnj9c|P;|~MQ+vZbu>$HkF zAlE~Z-8o;@^My@Mp`Rsm*Tcy6S^ULSb1RvCc9wH<9^H*IuB5IF~- zEiC^8&VNQwKae^Rkg8ZVg`^9=(%F1Ec9%mu}x47#m3?ymAX(eL$4t0-J~2 zk;CpJot0!K>ZFM*u+MTUhVjPvEDl z!EKh>2vXh5KPSFUyy!MOhrf%U#rR63WA-YH2coO70~P;)cVTx^aBlF2099B2W%+4& zVYjh=;P>($W_~yGy7mtEs;0KDe*Vj!FaFu<-6z*u_ZbIABkVP|HGgIdl-nH8dHmgRg3h!Ox7(%0{zh#I@zHwo!utH^Nj{q(Ah4 z_*l5%_wE%qdz@h8plXCF$HlpS*D0@|>3a0dG zaK`o6jBm!T`@jb^z?uLF=sR)b)%{$}Z>s+EJ-&k5_&VdkT$8}BgdMDkp9{fzm~cg> zK#Ma02TDGv2OjP~m5Zp@9g)j{bs`J+$|zU6lh@w?vhI0RX2Lm-8Egj&%!MH^6(N{l04}U(fb?C~o(T5(OOkK+o17=fyZUdg==J z7rb}js+Bas7nucW&ClIi_eKZJCt9=z#P46SvQO;W3s6V0IUa%YKu#`-AfLsRtR#OB zU|<|qpsZX**he9p`huIW;jGMvN_ZhL%1DqJN2mBQ)H0j`&I!s+vvJe0klfZDj)Koo z(st6zbEaP@6$(H@WB2BlgL6!Ft+SwZ|0Ud{*#k1}kDlVxNy22T*B4l>kb@1su3t49CcQ zn6B?CMvJtjtwF;Uyi-Z^BhA%=Oq$W+TLHkM!z8155K1B^QB;&%yq!T8M zv}QC4+(Ln|xj{s`fOs$JoD_1CJg}vp0%&{-g#?rElOSZqZVe5mhK5H5)7J;?-5njf zk}`czjXHE8+h&m_^-RFoWi#T8lZ4mw+fteyyn)u1`0cdA zk|1L5Bm^c|Rq)1=0HffHgTx(jvx3hayC_OKL`Xoh>m&IxtSG=w`ESTDAr6J9`uKXw zp7r(v_#SV36Y|yhW``hfG`6ob9I7-NT50H++Z|zRKkbW z!pTZFxs+QCpMQyy}e_0$cXH%c-sD0 z8=rH3*V4Uod8OsZ)7m-z{4O(8Pq6j?zP0f2O8EH8)T^tN6RFkk?GI4bP<0I(Q3|+d zIJg|@Sei6WjVv>x8!l?!m}@RD-&_f_ZnVMt?C3@-)!aH4Sr5f6*!6JpS{PNrXz}Df z9>5C+E6mVJ*RYYeW?Y*vGP$)(zLLosh0+E^P1A#}Hxwn(LoTQbIeORygBki7eq3{1 z$B+FtTt84$d_ZBO!i=tT-7*rN8lQ4TJHHyvuD2dA4xU|YJvZ-PkF~AE zIx4Y_CC}2`Z|;2kh0$~F<+a~Vzj{)EKzsF`aqqry==0aHaVSUjoQE*HZ~xy;{>|j# z%u3q{TT~!DKUfLvS$MP@LhF&nd0{1jj6)ZH8+f(5^2w*h#W7<{G7c6iks?gsA_o*h zVzn7aLy9Aa>2$)cJ-;(HI52X%AKk+Z!mD=U%IOrMNmfDcH}V*CD-S1)M?16@As3uR z6J-RQw9FLvHqu#v^3O?88K(!Bw7NdHSi73`ieY1Otns0XCPxoF;-=k8R*v z^}^tS$SRGi$C8;;3wf=BwDVx&@p;?2q}Qnoi*VqqJ}N+2t%OlfERNgT^_4 zkXi~LNYP1*z(Jb^^X8#pPMY=2Hk*BCjxv2GPcwZ?-w~_rXE&?2Hb5INcbiSmUXEws z{~8Vo`zXthlt=|dEoHNI-xs)1B^6SHauO=pq{9c{H@<_KG%u(X_P7ouFI2T#f-Td6 zJ;=#H5^++c%1}j6@*#ZNk8ijgN$OgXH;}lD-7&7n(`hpTEd$kw;EGwpi=_f2FDh@+ zg6S?OIr8rgQ8jBxv{xWofd68Y@~T-wR#p=U`4dbBr$Jv+#OGT{Lf~9M;!1h`f;<9K zxY?wP!hOR{)AS#ynzufGqsyRTZ+F$V!gKfA&gd?9*1tt=I`KB>qg&qAbj^aaW^cY}pyGQLZvI<+*HhmdJ$LNeK*Xp!wD@Q#_IJu6zjWM) z9$5|a!RUO=!hwZGBf4)jfWB=x`HiyDa(1p3Pxu#Vm;9BcBgXDNBYJc-fN{(R7cLsz zXDhAejF$68^ulW3BEj6g*lQdeuI#^Nv|l%(H&z2TKMD*4dzTWGmg7eA2_t%PHE;?* z=cLzxwv9Ly?|#!nd4oo"]+|www\.[^\s<>"]+' - return re.findall(url_pattern, text) - -def get_domain_from_url(url): - """Extracts the domain (e.g., 'example.com') from a URL.""" - if "://" in url: - domain = url.split("://")[1].split("/")[0].split("?")[0] - else: # Handles www.example.com cases without http(s) - domain = url.split("/")[0].split("?")[0] - return domain.lower() - -def is_url_suspicious(url): - """ - Checks if a URL is suspicious. - Returns a tuple: (bool_is_suspicious, reason_string) - """ - normalized_url_for_pattern_matching = url.lower() - domain = get_domain_from_url(url) - - # 1. Check against explicit legitimate domains - # This is a strong signal that it *might* be okay, but phishing can still occur on legit sites (e.g., compromised page). - # However, for this tool, if the *domain itself* is legit, we'll primarily rely on other indicators for now. - if domain in LEGITIMATE_DOMAINS: - # We could add checks here for suspicious paths on legitimate domains, - # but that's more complex. For now, if the core domain is legit, - # we won't flag it based on domain alone. - # Let's still check if it matches any *very specific* impersonation patterns - # that might accidentally include a legit domain name within them. - for pattern in [ - r"https?://(?:[a-z0-9\-]+\.)*(?:facebook|fb|instagram|whatsapp)\.com\.[a-z0-9\-]+\.[a-z]+", #e.g. facebook.com.hacker.com - r"https?://(?:[a-z0-9\-]+\.)*facebook-[a-z0-9\-]+\.[a-z]+" #e.g. my-facebook-login.hacker.com - ]: - if re.search(pattern, normalized_url_for_pattern_matching, re.IGNORECASE): - # Check if the *actual domain* is the legit one, not just contained. - # e.g. "facebook.com.hacker.com" contains "facebook.com" but domain is "hacker.com" - if not domain.endswith("facebook.com"): # Simplified check for this example - return True, f"URL impersonates a legitimate domain: {pattern}" - return False, "URL domain is on the legitimate list." - - # 2. Check against known suspicious patterns (these should be more specific) - for pattern in SUSPICIOUS_URL_PATTERNS: - if re.search(pattern, normalized_url_for_pattern_matching, re.IGNORECASE): - return True, f"URL matches suspicious pattern: {pattern}" - - # 3. Heuristic: Check if a known legitimate domain name is *part* of the domain, - # but the domain itself is NOT on the legitimate list. - # E.g., "facebook-login.some-other-site.com" - for legit_substring in ["facebook", "fb", "instagram", "whatsapp"]: - if legit_substring in domain: - # We already checked if `domain` is in `LEGITIMATE_DOMAINS`. - # So if we're here, it means `legit_substring` is in `domain`, but `domain` itself is not legit. - return True, f"URL contains name of a legitimate service ('{legit_substring}') but is not an official domain." - - return False, "URL does not match common suspicious patterns and is not on the explicit legitimate list." - - -def analyze_message_for_phishing(message_text): - """ - Analyzes a message for phishing indicators. - Returns a dictionary with findings. - """ - findings = { - "score": 0, # Overall phishing likelihood score (higher is more suspicious) - "keywords_found": [], - "suspicious_urls_found": [], - "urls_extracted": [], - "summary": "" - } - - # 1. Analyze text for keywords - message_lower = message_text.lower() - for keyword in PHISHING_KEYWORDS: - if keyword in message_lower: - findings["keywords_found"].append(keyword) - findings["score"] += 1 - - # 2. Extract and analyze URLs - urls = extract_urls(message_text) - findings["urls_extracted"] = urls - for url in urls: - is_susp, reason = is_url_suspicious(url) - if is_susp: - findings["suspicious_urls_found"].append({"url": url, "reason": reason}) - findings["score"] += 2 # Higher weight for suspicious URLs - - # 3. Generate summary - if not findings["keywords_found"] and not findings["suspicious_urls_found"]: - findings["summary"] = "No immediate phishing indicators found. However, always exercise caution with links and requests for information." - else: - summary_parts = [] - if findings["keywords_found"]: - summary_parts.append(f"Found {len(findings['keywords_found'])} suspicious keyword(s): {', '.join(findings['keywords_found'])}.") - if findings["suspicious_urls_found"]: - summary_parts.append(f"Found {len(findings['suspicious_urls_found'])} suspicious URL(s).") - for sus_url in findings["suspicious_urls_found"]: - summary_parts.append(f" - {sus_url['url']} (Reason: {sus_url['reason']})") - - findings["summary"] = " ".join(summary_parts) - if findings["score"] > 0: - findings["summary"] += f" Overall phishing score: {findings['score']} (higher is more suspicious)." - - - return findings - -if __name__ == '__main__': - # Example Usage - original_test_messages = [ - ("URGENT: Your Facebook account has unusual activity. Please verify your account now by clicking http://facebook.security-update.com/login to avoid suspension.", "Original 1"), - ("Hey, check out this cool site: www.google.com", "Original 2"), - ("Your package is waiting for delivery. Update your shipping details here: http://bit.ly/fakepackage", "Original 3"), - ("Hi, this is your bank. We need you to confirm your identity due to a login required. Please visit https://mybank.secure-access-point.net/confirm", "Original 4"), - ("A login to your account from a new device was detected. If this wasn't you, please secure your account at http://123.45.67.89/facebook_login", "Original 5"), - ("Click here to claim your prize! http://winner.com/prize-claim-form-xyz", "Original 6"), - ("Official communication from Facebook: Please review our new terms at https://facebook.com/terms. This is important for your account security.", "Original 7") - ] - - additional_test_messages = [ - ("Security Alert! Update your info at http://facebook.com.hacker.com and also check this http://bit.ly/anotherlink", "Additional 1: Multiple suspicious URLs"), - ("URGENT: verify your account at https://facebook.com/security/alerts - this is a real link, but also check http://mysecurity-fb-check.com", "Additional 2: Mix of legit FB URL and suspicious one with keywords"), - ("Hello there, how are you doing today?", "Additional 3: No keywords, no URLs"), - ("Important security update from Facebook. Please login at https://www.facebook.com to review your settings. Your account safety is our priority.", "Additional 4: Keywords but legit URL"), - ("Check this out: http://bit.ly/legitGoogleDoc - this could be a legit shortened link (hard to tell without unshortening)", "Additional 5: URL shortener, potentially legit content") - ] - - all_test_messages = original_test_messages + additional_test_messages - - for i, (msg, label) in enumerate(all_test_messages): - print(f"--- Analyzing Message ({label}) ---") - print(f"Message: {msg}") - analysis_result = analyze_message_for_phishing(msg) - print(f"Score: {analysis_result['score']}") - print(f"Keywords: {analysis_result['keywords_found']}") - print(f"Suspicious URLs: {analysis_result['suspicious_urls_found']}") - print(f"All URLs: {analysis_result['urls_extracted']}") - print(f"Summary: {analysis_result['summary']}") - print("-" * 30 + "\n") - - # Test URL suspicion logic directly - print("\n--- Testing URL Suspicion Logic ---") - test_urls = [ - "http://facebook.com.malicious.com/login.html", - "https://www.facebook.com/officialpage", - "http://fb.com-security-alert.com", - "https://legit-service.com/facebook_integration", # Might be ok - "http://192.168.1.10/phish", - "https.google.com", - "www.amazon.com/deals", - "http://bit.ly/randomstuff", - "https://totally-not-facebook.com", - "http://facebook.com" # Should not be suspicious by default - ] - for url in test_urls: - is_susp, reason = is_url_suspicious(url) - print(f"URL: {url} -> Suspicious: {is_susp}, Reason: {reason}") diff --git a/scam_detector/__init__.py b/scam_detector/__init__.py deleted file mode 100644 index f97bd31..0000000 --- a/scam_detector/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# This file makes 'scam_detector' a Python package. - -# Expose constants and potentially functions if needed by other modules directly -from .heuristics import ( - URGENCY_KEYWORDS, - SENSITIVE_INFO_KEYWORDS, - TOO_GOOD_TO_BE_TRUE_KEYWORDS, - GENERIC_GREETINGS, - TECH_SUPPORT_SCAM_KEYWORDS, - PAYMENT_KEYWORDS, - URL_PATTERN, - SUSPICIOUS_TLDS, - CRYPTO_ADDRESS_PATTERNS, - PHONE_NUMBER_PATTERN, - HEURISTIC_WEIGHTS -) - -from .analyzer import analyze_text_for_scams diff --git a/scam_detector/__pycache__/__init__.cpython-312.pyc b/scam_detector/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index f9c7c317b590719f88d9eeef72698904aa8112b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 562 zcmYL`&1%~~5XW~Vf5?`Vj}lraA$K2Izd$KC(nc1IwPJVG;JpmSR*AtIo0S4}UnI|v zXXzX4we-+iOQ3i;OT0!3 zORPNUDPQ`kC0nX3+o~fwsw=xHkb&ySp6biK3T24h8*e>Ok&H0f`NP&PI|B_vb5gRD zKB+O+Zk%Bp_Hx4%JiJhOjJ#lo2AbJmu!C zAn^ovSrn8B;PH5Rke?Le*_7nskO@n64aHap!g6!WOI~DgMoSLj+HDoHSz+gnlY|k@ z_d;&YiV4jL7QcOr@pCZ1-ZR?ckMsVXBm7Rk GvXcM8?vup; diff --git a/scam_detector/__pycache__/analyzer.cpython-312.pyc b/scam_detector/__pycache__/analyzer.cpython-312.pyc deleted file mode 100644 index 9c2bc3f09423b05058a2d1ea487a9adf029f1ab9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8681 zcmcgxTWlLwdOkx^Bt=qp>OzT zZ5+|%Kc~WVwCx8d>}2X``)V&j+Z4~y4!VM_q^mwK($#bgU8|PrXlJohPd5}xjdT<3 zqMKn{3+<*`)lwVXPGeGvTfz-Y!x&J!TZ>S*5q8)_cQB1~rwds438K3`H0oTd`)`5~ zF6i$j{dy05YIO`D-a((%duWP2)lBC1d}vHrOYP9o`w~T`K1Zh(ZdSSQFwJzI#>IyQ zy-(wTK2xg0sD3hP4BZ8-hg;OSUZ&+I=;*8mRVD-@ePks6iaKWC7!9=tXn-Y!-D>|J z(fI}K36QCXWT27fy_$}Jsd~6-e7HWiTZ;xCPZIg8rKl-)%OH@zCh@$zJ-ub zH;L$rdJjnGlHLn+OuFs7N2y%BQtG2ej!KfgOjZ!62mh>7f{~ZiHJVRYua@=)btipIuZP>1HcfBzbz-Z6VG_MZL7S>2UahGV{Q8WdPG2k4 zH>eHDGx`OQ@QjC2tQkI$j`9NY1@Rw`U8!D}9}moo-ih1{+_^nFKNeIPgMpdgL}=oT zKxAU(_1RK~J2X2R8K0dUi-cw)uLUBZ`Gr8Kw+6@r<|jrYYpwRub%VYnGVc^N-g$<`Kid9KNJeg&nQ*Fh2Y%8=)~+oFcO*~T#e4( znS*uxV`KAyU{D_zRGf1+W@iGCnT6@s0`q#aQgb7)Fh3Ct0YA3`6XQ2R!L0EuU#~*t zV#UBgB!W!v>w@1`SK)ONQ4rvA=x#kin^cZkIXV*h$wMDH>R(dtQxtm8XhIv*UmMpc zk3pe)M8xKVAo6S)>IUDSViXg!Kw=Qa-?j}z)9C>r7EMNIMr2|l#}7nP(ZoiU;eF{1 zrTj{qNip}*{8hdYW;o#|5UcxF^!xirIv4rvyHAWCkN#%zSCfx~-^~8@-G3Jd|L?W5 zQdxti+hbMA5?q{3DdshXXO}hB;w2rMufA(0p+AQNGfl-21X9BX#vs&Bl929Xn-ooL8sccv2tSip z48sVCglF{;foid2JuImQ5vhp|y^S`0VBRdtm913iEv2u%8iboggx-QxHqD!soCWIN zfwDDc%~7f=TWM3yyi!w|sh7xsqX(OnI=!{@)mMOEV#!(IZT+xpvpi>90V{d=$(5HJ zFJ};m64hs=NiUVYIWt|ZubYK%yJ>h3HlYW!35hNGs1>(fDt&W@7Px84+0^6aPhhq^ zXQyqT8T$vuO$Y7RtSCo0`$~Iho?fCWbRI#Ql{q^(IyWm*hMaw~QtZ%2DK#7-xLM2s&mymC!W?@S0L=a{N$>2YJ8Zk`QMIRIrB0d@V{D}5@@3CL*r&m z&OuiZCpl3cHTq}(DNYz!e+5SVPnv<_{w2+b{W>RLSASZxEBha^e%0n0XxnBjNbkbQ z(&PI;UyVPfHtTYAKo?}c={$Ml%7DIp>6>%vdxL|jopx^4QYb~4P_CAz9t7#8qOJbl zNXYfFgo}Fn9(Za4tNSDPKY@)e`Tqp)KCE-P z0tf!fPs#WBe!An2!bKGh zi)V0g?mQ!A_|%cJwlA*nI7D`l;ENwEaXpN z4$k9fBBAk`U{{$0yUcMk)M2Rakj)~2(q4`xYZMhlKly)aG7Xq83Pi-vT`JWtgG$A56jLfyI*ObR+*$|(L)nTso)MT- zj3H9$QK*PQ;jE(vuQD41-i(m8o+9T5*IozKjliK&!3q%}Bc$0FF)GEvGf{y{2{Rt! zx0Yp5OpgoxZQ+MN0z3(1*_Kf*lb~^m6S0b}RpDt$@@2h5k{N*^2A$@J z1+!oc8@RL}28fG$8DHGjk9oi_NTEC-evlL@j{{+kIctk^Ts*<}Vq7xoSc=A&MUGol zYqn@In&nbzxg4NoaZp8)jq#koEs5HwbaW#fO{nEg!W!`mFlO+jLkS#}SHlqw2t+R? z3fYEWY51Y7jNq&t_s(nLgNorY&f4bmP2+4Ej`165kpno&gPJyQl%_#r#O%O)eM;qV zn6KN|^=#YRGM8d-DwAAfc(8i{pBZBBsu%l|rekTq!c1^>3etyAU@Q(&gh#se1re;r zO+fNG!}DcO;|T)gE#yTIC|^!q1c54LtjHu)Lr5n;#7ddSiU|PDAXO$s^8|@1#yBJL zc2cq|f#HKr6`LSNc~Mwr#bur#XQe!FFUF)v*ifv?oFJy6Nk(xVktk?BG9MTZe20Ma zC?8u^%HlkiNh|IngNWE6GrFP6KB!oM3w0UAw8F6|p4`&#28&saW700S8bVq@V{5#in`(O~xnfPVb4jqsh{$OiC}zze zm3r{g)fBg$iX@{eFhd)sRA_BlO;8N0v|`unETRhBl8%ZZ13{f2ay=lv^yM3fKpKhx zGNOu$Fhvd+fqxr1A@@hZyP($l=tVPfG!Nha6VR^;q{-hB3X`{n0u@AIai=PhSmR9G6!_h$}jQM)(aHYB$V zeb#nfwl;5Bw|jrr*uFJhsA|~hds@|9aC;v1=KDtEzL7`g9vk=i#`1lWa^K`$-<0H; ze)5(Sj!JhHrNx-kOmABX)i__>BUkr4wCq<87OLy_U440%Pj>kp-g{KH=XzDHe)UIX zFUnC9J}{#8;jbG|E%4qg*L3f`wO2EAfAV=<>z1X^e)>`UlOefrX3Oz;y=%*MU??xY zNqt_|oUc12*PW71kMGyrD0qfGzAbyM$knGm_Y6war=Pnzq|Qrwu2;4uUsR%@D?5Gp zX7AHx@8ia;s%Q4*{q}SD_7S;#53I|9b?M&5 zULY%ty(7Jolj=9OUN1PBc3iTfEAQx&9eock>^p`E{exRmJMFT=Q*hMm*PqVU_sR8r z(wWPTf_wFT+2P*`d}BjR=QT0*{M7qn?@tDvR(TGr$m5f$Pd%$^-R;YF4nOT2R=RWB z)4yvwvtuZo0?f%C-k6lkX zq}HkJ*9%U(>yn*k^3EaIIrQ;`edh?w*`C^Mmz`$|PS<{`FW)*Ow+>0?ZaxX_wce7Q zx3&Y%oUVgYz~4bHn9@$wQ+sd0;m$j{V3*SAE01nJnt1%CbZa4hD=gm%OK--d=wGxK-2DZY?}f$WFdraOnfdo$Z&0Y(^(8V88#|_L$$KO3n0)G({Eve&()+DI zvgyBbbx)ZLpP0(0`pZ74a8C7Sf>FDgt_`E-~ zFy#+T%+3Vyo!JF^dv>OyqXXaJGCgYyUguJPN|tzr!K`}1cHq%Oluc?oBMfIRSRW>Em_G@@Dy z5F^5hrZx!L^l2JNf~hSLuJLjd?BRPEMj-e|7{R?@KpuR4_=5N1rB?=dl8AbY{33k# z7`%?!d-Pt>-W$f@%9LkgcpPk4gsFJLT>6(W)40$nwbzUHhfXU_gsFdfXz@RV=5v zAy}r~p~sfbuVt%kUauF2KpUVZ0W_#eK5N3ztjhljnry-ng3vR9JF_OA&)2C?omoqe zQ9&k9z+TLg>%(j%NI=jNe0n5XPP(8CCX*|!Y-K+ z_I`YrP^jq^uQFm*m&I8(UXKbmL6Qh@8Jt)TiWuw~XPtgQfT#fmHUhp0eB!IG;;aSF z`Gdj07|)OeGzwtr5c7{yh~7Ji!WE#_846D>(@RN$mcLlYIxNK03FuU86XP?p^MO%+Fu;$I(c|RxHtB*Rm7F?}2+1M@ zxJm)@5EK*nC}j}J;U5NLf`PviAd|}^L_sNs@St7DsDC&`AV72cHA1RFl~bfR@E7PS zXha(Hwq8kcbSA-E8Z&nfz0-cP_GbCL(u6q{X=08JF1AJi8{dYkQ6B5^{cNMp&RE%ffadkze!e%e^*JOu!y+;-qV^&O;X&2Y4-mo(*$HT99ExnoU3 zq-pdREiY-JkBu6o3eI;l;x^mCweDK;{mHMVC{)w3WqM|-=u7pAwZJI$yT|si4k(&A+2IR^iY2mK)-9;(J z%HLg)?ygF!Nhz4xw{p*FdmlOFAj#H39G!K7nuid`()ZPVsQf=qHqpRTV+MV5Z zpWn7VtFGTE-|c-k{Idb6W9VbwGQ2?O9cqCP%~EfW&lq)^EC`dNIzM6F2@;WfugRWK zsdw!0xKul}@0c#w+jq}D{eN5WGsiUK1vM=%dQq88vi0qo&OCGWJsjF|`tAcSO*@UU zy<;~B`GREXl^|=Va!aj{*bGXhLCHS&%v3Jfy7o=o|40hwJ~f>`7&RbMGk*^p>OTPp Ck2$9R diff --git a/scam_detector/__pycache__/heuristics.cpython-312.pyc b/scam_detector/__pycache__/heuristics.cpython-312.pyc deleted file mode 100644 index e30fd5058c6e73355b7073f4d3bfde5cbb736117..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5706 zcma)A+fy6Y8DELzrUeNsamVKPf{m~xaS@Io7lJ_n92*D{@mRrCSGz~j8fjPA3s@(1 z$L&jUUOJ2@=zW!Fz7M^L^j> zF28f|U)$R~4E%7P2)Wy*8RnmK;QVma&WnGs4D$&iGNLQbB$+HDy8rA-x}a@<)(tHS zZ3DE8(6Z1rLE8vzGqg=^1~d1(j?j85w!nNxeeCqUbXR)GR*=vN5;KG?Y{yW|rnVC>eJ5bpBXasrz%(Ag44i;zI)WMe3Fb6z zy?4%uPrdz=`Mh?H>gmp4kLVJI`k9xkAxDRc!Q8#!x*VtXWwo=w+=X1+ggi+4lbyC_ z&tgA#;T-Nf!H$|AT+Hu>*h)0cyw_a`3fz`YJiyhQU(bKhWo;}S=bePS@#FODV$FehH_ z1F`p}>$065?sIlm_5t?FiP-+1U{97i;9ynT9^3wqGv{K&aPqVmNuCkMl0)Kn5jD)$;F%-4o8vYW_~v$0b+E6^wpQ-MJF%u$l}ti)EekNreIy zC0@sf7j#LHk&53pBo&LK877dd?2xv+RFHHmB3&wA(#Z>gV#qqu46TS|5s=-^)Q*(T zqclcBUeRE_JtN6{9_{d|EXi5o)v#cwl3qf59;=|+kPXcM0M)!r=S8ez0W^_rsZdl@ zod-3tqDvV`;Hhzh6BYTA4!AsDB5nA75iHe^rod8%lF48dfkhh96%p3CJg@0U$nmm( z$@!9Es9f z`AaqmX{=fenkJLBG%s&~%j_T!ujBd(gm_iO(81ZLIzC0^O5g=v6{~YrgRs3#JVnr6 z1qBLSwERb1=U5yls>(KK zsrbl|5h!9+Q)spv5fJo~Ht;0M@!J^fK#V;wx>W7=fGt^-*NwcbOr!EB$ZFb=WEs5G zSX3o~Noz*M7-gl5PCA7=FI7XPVn|ETYR)UVj@6RIX*(DfHJ#tWn&rtFyb8&OAtzc@ zN!tohA;gCvlTksDaS3H0w}>BdVGCtcrGRou0VB+7CCFWQ8~kPGky4~-Y{-z@gv&|U z9F&)Cs50?pv5r(p$RVgUVawtq86qG-#zU@lAVaoIg__G4kkG^@OIcXvWdw%;NU)i- zi(m~B3{3~kDrvQcG`&c`?r!sW1NPSdetXtQmzdR4a<_&j(W zqYMy;Nc!w55tKsFfDj-_qp`2le}}*Wlhn8=U$Q*F3^`eXx{yh`iXmeuV+mM;a*>LN z7e!zK4N++j4hiTd*lsmf>)C3Cm?)BfEjA4lMf^}C5NcVZm6GQ3w%5Hngj}N>Qt^5F z4v-6SCP^zzA-j$&ub`HSx&maTVp%9z&1p#&6bTI5fxtt7L;QfPtX4Z!AV~|PtHQU) zAs87_3zEyPVOIWW%VQ@1|dO-NteL!U9LT}k?QF|fiwi>P{=B(}S>3rub9nJ<`)dASp`tp2gaXl7GEUzqDZHd_YQYyZ&wzj&SuzYKCcW%X25~=mrI~%ch!s?E1 z#MhSRmsdC9sg3oUskdWyZm+H{fVTR4;wFse*YB)>qPc~I^;kS^^{y?guEbI+8@H~< z)>9iR@zt9kB@tV&T2d*vPUKXIoDPLTXbHpJ57&r*7BIXEBzmD}2v$h*O$aJb6*-0M zDiKO(3zz6MtA$A)+=4RXg#@%u)9J|48Mx<_P*zdIkgkN%IHaovKCUT`!L@8n>65-*E1h+?smJck2jjc$L*K@OQZ>y!(#}N=6Jh?Q*#%r)a2b!YA z9I>x>O#0~&wF)hRm%&BwaMJO89nRE1&omW>3j&QJC;)Ie8lM{{YvXur>YYW1<;23R zwL~Uyb1Htbu>Hz{w4GVu>BI3XxpoWQdQ~p~y^q}=pO}nJU47-+^cbHOM4UNkO!^Wz zdLL+rN`m)99u(_z>rdVKTG|ufxi7h-$qYjiQM$^vZPfDjJ)fx)vt>c}3Rgi@0ubnD|x{ zXA}4zhskIqu2n%Vvj>f&c})_6VKP&XH9Wi&(SUn^(i>C)4VVps=Ry|WhojgBgNH4> zU&L_mtmRg*)lsDcr&!}w&v9<3FATnfBc|tggrA=i)Joqdc-Umq^6cS1jLg9syI26Nv+iTY+PY;_UBSifybf#}hzl zw`0qTONlro82V66rR=AktXmCvEYmyRc9zu$)lkr^rpje)wZNYcIIF-@uW84wEXz;5 znWAZ)+L82J%244U-VEF%Lc&-c`;k(aY)R{a4YXP?0O=OXD=Vpr=*qZBgZG$_v)YQZ zv@2rAkBugdjV7&jkWdHzg?Rxv&HM*N;1Ly}yp*c^Ii#qOoWq@3xLC4UUaMrw8|n?% zK>yE3TZAe-Y;d_;e`i{sdzu1nlj(SVs$~G$p6A_72-?Btr`RrNJD+>mGtl-!YvT@k z8DHPy^hftTym#n!H@@jQY+@Rle|ziKw_db1`rMDW=cgL|?ngerwRJz9_-N+C8M9m8 zEMY?Rq|YYV}>z^LiJY+ioD^vsso zH){(Mdx1&QA2p+Ersth9yIx!PvGRX_RjW_m-XDzY4o1v@G1D_%W+wn{whcd1zDj@f zzIk!l9G)>fua?=@0Bw3Mmf1@O0~7lL*LDZ4eKRogq3?0iljd(*+Ml?eTzxX~^xV_i ze@gu^We$c+Z}{t$$YBFB`05W34UgG&!5p66AD-JCo-;38H$C%ZcEOg3%Iw*L!O;HT zGh}EPjjaC($_5`An)|m!xm=Xq03}@4mt<-J1^{Z zg5+y^ozstQ*tTE#!t?Bb>4}!vsc$*&WAeM%r~KczA(I_4xgp4mjz!m_r32qGe7eRS zt$fFwdp5Pp1^2nJU2g2F-ZD31vNI+(b8zB<@8UCK*B9FNMR$GCuYOVXEtu?r$t_Uz zp0P4J4q5`!k8U3Lqx3TZpTO{=TiKDMr9 100 else f"Text: \"{case['text']}\"") - results = analyze_text_for_scams(case['text']) - print(f"Score: {results['score']}") - print("Indicators:") - for indicator in results['indicators_found']: - print(f" - {indicator}") - if results['urls_analyzed']: - print("URLs Analyzed:") - for url_info in results['urls_analyzed']: - print(f" - URL: {url_info['url']}, Suspicious: {url_info['is_suspicious']}, Reasons: {url_info.get('reasons', [])}") - - if "expected_min_score" in case: - assert results['score'] >= case['expected_min_score'], f"Score {results['score']} was less than expected min {case['expected_min_score']}" - print(f"Assertion: Score >= {case['expected_min_score']} PASSED") - if "expected_max_score" in case: - assert results['score'] <= case['expected_max_score'], f"Score {results['score']} was more than expected max {case['expected_max_score']}" - print(f"Assertion: Score <= {case['expected_max_score']} PASSED") - - print("\n--- Test with empty text ---") - empty_results = analyze_text_for_scams("") - assert empty_results['score'] == 0.0 - assert not empty_results['indicators_found'] - print("Empty text test passed.") - - print("\nCore analysis engine tests completed.") diff --git a/scam_detector/heuristics.py b/scam_detector/heuristics.py deleted file mode 100644 index d43c48c..0000000 --- a/scam_detector/heuristics.py +++ /dev/null @@ -1,163 +0,0 @@ -import re - -# --- Keyword Lists (case-insensitive matching will be applied) --- - -# Keywords/phrases indicating urgency or pressure -URGENCY_KEYWORDS = [ - "urgent", "immediate action required", "act now", "limited time", - "account suspended", "account will be closed", "final warning", - "security alert", "unusual activity detected", "important notification", - "don't delay", "expires soon", "offer ends today", "last chance", - "your subscription will be cancelled", "payment declined" # Removed "action needed" -] - -# Keywords/phrases related to requests for sensitive information -SENSITIVE_INFO_KEYWORDS = [ - "verify your password", "confirm your password", "update your password", - "password", "username", "login details", "credentials", - "social security number", "ssn", - "bank account", "account number", "routing number", "credit card number", - "cvv", "pin number", "mother's maiden name", "security question", - "confirm your details", "update your information", "verify your account", - "provide your details", "personal information" -] - -# Keywords/phrases indicating too-good-to-be-true offers, prizes, etc. -TOO_GOOD_TO_BE_TRUE_KEYWORDS = [ - "you have won", "you've won", "congratulations you won", "winner", "prize", - "free gift", "claim your reward", "lottery", "sweepstakes", - "guaranteed", "risk-free", "earn money fast", "work from home easy", - "investment opportunity", "high return", "get rich quick", - "inheritance", " unclaimed funds", "nigerian prince" # Classic ones -] - -# Generic greetings/salutations that can be suspicious in unsolicited contexts -GENERIC_GREETINGS = [ - "dear customer", "dear user", "dear valued customer", "dear account holder", - "dear friend", "hello sir/madam", "greetings" - # Note: "Hello" or "Hi" by themselves are too common to be reliably suspicious -] - -# Keywords often found in tech support scams -TECH_SUPPORT_SCAM_KEYWORDS = [ - "microsoft support", "windows support", "apple support", - "virus detected", "malware found", "your computer is infected", - "call immediately", "technician", "remote access", "ip address compromised" -] - -# Keywords related to payment requests or financial transactions -PAYMENT_KEYWORDS = [ - "payment", "invoice", "bill", "outstanding balance", "transfer funds", - "wire transfer", "gift card", "cryptocurrency", "bitcoin", "western union", "moneygram", - "urgent payment needed", "settle your account" -] - - -# --- Regular Expression Patterns --- - -# Basic URL detection - this is simple and can be expanded -# It aims to find things that look like URLs. More sophisticated parsing will be needed -# if we want to break them down further or check TLDs more accurately here. -URL_PATTERN = re.compile( - r'(?:(?:https?|ftp):\/\/|www\.)' # http://, https://, ftp://, www. - r'(?:\([-A-Z0-9+&@#\/%=~_|$?!:,.]*\)|[-A-Z0-9+&@#\/%=~_|$?!:,.])*' # Non-space chars in URL - r'(?:\([-A-Z0-9+&@#\/%=~_|$?!:,.]*\)|[A-Z0-9+&@#\/%=~_|$])', # Last char - re.IGNORECASE -) - -# Suspicious Top-Level Domains (TLDs) - This list is not exhaustive! -# Scammers often use newer, cheaper, or less common TLDs. -SUSPICIOUS_TLDS = [ - '.xyz', '.top', '.loan', '.club', '.work', '.online', '.biz', '.info', - '.icu', '.gq', '.cf', '.tk', '.ml', # Often free TLDs abused - '.link', '.click', '.site', '.live', '.buzz', '.stream', '.download', - # Sometimes, very long TLDs can be suspicious if combined with other factors -] -# Regex to check if a URL ends with one of these TLDs -# (Needs to be used after extracting the domain from a URL) -# Example: r"\.(xyz|top|loan)$" - will be built dynamically in analyzer - -# Pattern for detecting strings that look like cryptocurrency addresses -CRYPTO_ADDRESS_PATTERNS = { - "BTC": re.compile(r'\b(1[a-km-zA-HJ-NP-Z1-9]{25,34}|3[a-km-zA-HJ-NP-Z1-9]{25,34}|bc1[a-zA-HJ-NP-Z0-9]{25,90})\b'), - "ETH": re.compile(r'\b(0x[a-fA-F0-9]{40})\b'), - # Add more for other common cryptos like LTC, XMR if needed -} - -# Pattern for phone numbers (very generic, adjust for specific country needs if possible) -# This is a basic example and might catch non-phone numbers or miss some valid ones. -# It aims for sequences of 7-15 digits, possibly with spaces, hyphens, or parentheses. -PHONE_NUMBER_PATTERN = re.compile( - r'(\+?\d{1,3}[-.\s]?)?(\(?\d{2,4}\)?[-.\s]?)?(\d{3,4}[-.\s]?\d{3,4})' # Simplified - # r'(?:(?:\+|00)[1-9]\d{0,2}[-.\s]?)?(?:(?:\(\d{1,4}\)|\d{1,4})[-.\s]?)?(?:\d{1,4}[-.\s]?){1,4}\d{1,4}' -) - - -# --- Scoring Weights (Example - can be tuned) --- -# These weights can be used by the analyzer to calculate a scam score. -HEURISTIC_WEIGHTS = { - "URGENCY": 1.5, - "SENSITIVE_INFO": 2.5, - "TOO_GOOD_TO_BE_TRUE": 2.0, - "GENERIC_GREETING": 0.5, # Lower weight as it's a weaker indicator alone - "TECH_SUPPORT": 2.0, - "PAYMENT_REQUEST": 1.5, - "SUSPICIOUS_URL_KEYWORD": 1.0, # e.g., "login," "verify" in URL path with non-primary domain - "SUSPICIOUS_TLD": 2.0, - "CRYPTO_ADDRESS": 2.5, # Requesting crypto is often a scam indicator - "PHONE_NUMBER_UNSOLICITED": 1.0, # Presence of phone number in unsolicited mail could be for callback scam - # "GRAMMAR_SPELLING": 0.5 (If implemented) -} - - -if __name__ == '__main__': - print("--- Heuristic Definitions ---") - print(f"Loaded {len(URGENCY_KEYWORDS)} urgency keywords.") - print(f"Loaded {len(SENSITIVE_INFO_KEYWORDS)} sensitive info keywords.") - print(f"Loaded {len(TOO_GOOD_TO_BE_TRUE_KEYWORDS)} too-good-to-be-true keywords.") - print(f"Loaded {len(GENERIC_GREETINGS)} generic greetings.") - print(f"Loaded {len(TECH_SUPPORT_SCAM_KEYWORDS)} tech support scam keywords.") - print(f"Loaded {len(PAYMENT_KEYWORDS)} payment keywords.") - - print(f"\nURL Pattern: {URL_PATTERN.pattern}") - print(f"Suspicious TLDs example: {SUSPICIOUS_TLDS[:5]}") - - print("\nCrypto Address Patterns:") - for crypto, pattern in CRYPTO_ADDRESS_PATTERNS.items(): - print(f" {crypto}: {pattern.pattern}") - - print(f"\nPhone Number Pattern: {PHONE_NUMBER_PATTERN.pattern}") - - print("\nHeuristic Weights:") - for category, weight in HEURISTIC_WEIGHTS.items(): - print(f" {category}: {weight}") - - # Test URL pattern - test_text_with_urls = "Visit www.example.com or http://another-site.co.uk/path?query=1 and also https://test.xyz/secure" - found_urls = URL_PATTERN.findall(test_text_with_urls) - print(f"\nURLs found in test text: {found_urls}") - assert len(found_urls) == 3 - - # Test Crypto patterns - btc_text = "Send 1 BTC to 1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa now!" - eth_text = "My address is 0x1234567890abcdef1234567890abcdef12345678" - no_crypto_text = "This is a normal message." - - assert CRYPTO_ADDRESS_PATTERNS["BTC"].search(btc_text) - assert CRYPTO_ADDRESS_PATTERNS["ETH"].search(eth_text) - assert not CRYPTO_ADDRESS_PATTERNS["BTC"].search(no_crypto_text) - print("Crypto address pattern tests passed.") - - # Test phone number pattern (basic) - phone_text_1 = "Call us at (123) 456-7890 for help." - phone_text_2 = "Our number is +44 20 7946 0958." - phone_text_3 = "Contact 1234567890." - no_phone_text = "No number here." - - assert PHONE_NUMBER_PATTERN.search(phone_text_1) - assert PHONE_NUMBER_PATTERN.search(phone_text_2) - assert PHONE_NUMBER_PATTERN.search(phone_text_3) - assert not PHONE_NUMBER_PATTERN.search(no_phone_text) - print("Phone number pattern tests passed (basic).") - - print("\nHeuristics module loaded and basic regex patterns tested.") diff --git a/scam_main.py b/scam_main.py deleted file mode 100644 index 2648967..0000000 --- a/scam_main.py +++ /dev/null @@ -1,101 +0,0 @@ -import argparse -import sys -from scam_detector.analyzer import analyze_text_for_scams - -def main(): - parser = argparse.ArgumentParser( - description="Text-based Scam Detection Tool. Analyzes input text for common scam indicators.", - epilog="Example: python scam_main.py --text \"Dear Customer, click http://suspicious.link/login to verify your account now!\"" - ) - - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument( - "-t", "--text", - help="Text content to analyze for scams." - ) - group.add_argument( - "-f", "--file", - help="Path to a plain text file to read content from." - ) - group.add_argument( - "--stdin", - action="store_true", - help="Read text content from standard input (e.g., via pipe)." - ) - - parser.add_argument( - "-v", "--verbose", - action="store_true", - help="Enable verbose output (shows detailed URL analysis if URLs are found)." - ) - - # Add a threshold argument for a simple alert - parser.add_argument( - "--threshold", - type=float, - default=5.0, # Default threshold, can be adjusted - help="Score threshold above which a 'High Risk' warning is displayed (default: 5.0)." - ) - - args = parser.parse_args() - - input_text = "" - if args.text: - input_text = args.text - elif args.file: - try: - with open(args.file, 'r', encoding='utf-8') as f: - input_text = f.read() - except FileNotFoundError: - print(f"Error: File not found at {args.file}") - sys.exit(1) - except Exception as e: - print(f"Error reading file {args.file}: {e}") - sys.exit(1) - elif args.stdin: - print("Reading from stdin. Press Ctrl+D (Linux/macOS) or Ctrl+Z then Enter (Windows) to end input.") - input_text = sys.stdin.read() - - if not input_text.strip(): - print("Error: No input text provided to analyze.") - sys.exit(1) - - print("\nAnalyzing text...") - results = analyze_text_for_scams(input_text) - - print("\n--- Scam Analysis Results ---") - print(f"Overall Scam Likelihood Score: {results['score']}") - - if results['score'] == 0.0 and not results['indicators_found']: - print("No specific scam indicators found in the text.") - elif results['score'] < args.threshold / 2 : # Example: low risk - print("Assessment: Low risk of being a scam based on heuristics.") - elif results['score'] < args.threshold: # Example: medium risk - print("Assessment: Medium risk. Some indicators suggest caution.") - else: # High risk - print(f"WARNING: High risk! Score exceeds threshold of {args.threshold}.") - print("This content has multiple indicators commonly found in scams.") - - if results['indicators_found']: - print("\nIndicators Found:") - for indicator in results['indicators_found']: - print(f" - {indicator}") - - if args.verbose and results['urls_analyzed']: - print("\nDetailed URL Analysis:") - for url_info in results['urls_analyzed']: - print(f" - URL: {url_info['url']}") - print(f" Suspicious: {url_info['is_suspicious']}") - if url_info['reasons']: - print(f" Reasons: {'; '.join(url_info['reasons'])}") - else: - print(f" Reasons: None") - elif results['urls_analyzed'] and not args.verbose: - print("\n(Run with --verbose to see detailed URL analysis if URLs were found)") - - - print("\nDisclaimer: This tool uses heuristic-based detection and is not foolproof.") - print("Always exercise caution and use your best judgment. Do not rely solely on this tool for security decisions.") - -if __name__ == "__main__": - main() diff --git a/social_media_analyzer/__init__.py b/social_media_analyzer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/social_media_analyzer/__pycache__/__init__.cpython-312.pyc b/social_media_analyzer/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4155aeb3607a0d22e5a2f6982298879fd12564bd GIT binary patch literal 135 zcmX@j%ge<81TxolXMpI(AOanHW&w&!XQ*V*Wb|9fP{ah}eFmxdrKX=)P@rF&pPZSP z6Q7%!l9?Evn3tGSS(RF(A0MBYmst`YuUAlci^C>2KczG$)vkyYsErYbi$RQ!%#4hT IMa)1J0CdA1i2wiq literal 0 HcmV?d00001 diff --git a/social_media_analyzer/__pycache__/fake_profile_detector.cpython-312.pyc b/social_media_analyzer/__pycache__/fake_profile_detector.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b14c4b1dad2eedd5499f0c77746084691bbd8d1 GIT binary patch literal 9463 zcmbVR-ESLLb{|rtL{XonBukbh=i0I@aV1fS?PO!E6IqsITk(e++1_mIU9cn0kQ|Ae zVP=L>IH0#Kus~HHiyHe9Ej9syeR7c(KlC9$|A7E~p(47XxqjH928z5XA3%#mANo6Y zX2_u}yB|z6zH{!m=bn4+`JR97=m-n=4gJeMrT=S05dMu`++T+?_~CaxLHJaV1j&~X z?g%MC^8e9?XA^%0{^*l}Qgg~LwMZd6TcvQykJ&cB+dpf$|6{L&d|;y1oKP159VggXH#hG$T2mZsLm1VNYlg4`m#{v}RCKc9g2ko1Q8zS?8M zz0WV5m(@Nm)E>KYciK3eQ*zttp_gO*L6A;=-t@oO>rRKtwWlc7w5?YpCjVRMpmeUN=h*@)2HO(QJ;6w~&rbUyU zMN`ELc2iBFON>Y$a{GeJHbm^JC7IYjCM~YZU^XM_DVeGH> zcWfk3(DF>u)KQbIsnB%XzCn7KDNHcg$jJ$X#>^HLq-od+?xJ}`9<#39Oo%v}Zm_Ia zV8q%@Rm>&G25`-WtV&E$3^A9JMID#I++*5&WyOqhN-mR_^-IsC*HDwN9h{SxRUIoZ z_GOg}OH@=`i6dZ__pGMMg_!letp;?bjz%#^+05%OGB9eGqG=n7yBQ~#Fb&&js0A}k zXQ*FJ%t~@m*G=qIlHQl+1{Uc;_m}he7v!82*T8noU1&_nI!jHNC3OX&=Sr3ovr48w zRw)~u*FuAvz4O$h}~f^=Ji!-V7TSXs<^n`=5oIwkj%aGo_2!YO|@RlC0CeYo@kV4H=GtRzn-)ZJ?Xg<_(!twi-%`33&xBz1r_6 zNZe77cuH1fT}fEWi{w6ER3K-5g1nEZL2TS-;#Flz2C=}@xs#e8u|i-XOUM~{MOVzi z%(oP^tn-)b9SFfO@8^^h-U^yZz64H`ZmdBCV$nd=uSnHq@+vvA+OZ)c#+h*?88662 zwNsK|8yUlWKl1zmb$EJs)VKjlO{A?~UnX})QZ{i%5*T(6pl5XHHX4P(eWWqN<`s!4 zCNt7-Foa+)QViI=L;(VxLZXwd$QeydL8NdNT2_XgGC5<&Gv79mOfE0mOf~MAfJ{Hi zD~UDoQW7Gmk}S9)14&0fo?9?t)n3O^;@~|_PCTy48`j-Q%aZOYUjTgml5!AtbLYJ{ridFlt zIL@skuIX_RM!62nu$EuouQ3Sm6%$T)wp9zlMq1$zV2HmkY#3HXNy4{_>Wnpd)iG*l zY+ix9Yse(m6}%&Srqo!q-%)AeD52$`1U84(&u5{A{MIQ6Vq=IDIAoO7OhAPnk>N@b zVrBwrlaoya2ZD(B?GPo~(Cr++(DHghHfF51=A6Wcr%;MP5vms4u(a+O4UZd%ReS2W zT0E;5$OfRlifyOl+L;T)>C6ooag&($q*J!DoT{f)6b%TcPWa_X;Ec6&Tg@BffVhbp zNV#k|!tzxKLVy=^^P4Q!1}E)A5Tle+%w!=)20zAAi72!or$OfAy|SVnM@Y^fqNS`4 z9M_W()l{Br5UR(o!A|Q&kq#;v$(XAJZ7f% zj)OoTNFpS-&el{CvbX|!sB>W$aNe=%agkyQ{09#yV4D&&QXWrWF|8%%5N-qlBcm4- ztsaW(brWE=egIpwq08Vgl@YU=j*NDS+~17#vqd0gk?|tz(W&a0le>i{dNFt)@?=oJ zjF^g5hjKddE+hcNC;4alRswQ?gt5l;#87Lqp2IkcDJLLDX$R3-K01N)7ikCQ z9quiGRA}+a7KO)cChH9+4>rj_Ts-)+WEO(asNG=i~j!; z70-U^6%RKp9zN>l40)ih>V{+y-b$7F4XeSTY6q+{> zVC0x}Ds7rMV|r?eM|LBY;)G)`#VI>ASSj$C8UBpt^~^ig5qAwzGPxjokZ1V(v02lU zk-O00z>o%kH8MQSti$`@*k{1xyNgL~i9AY@Yvh|V1F4KO!DqNsav4qJ=?OLg7S&UV zDI3$)Y=d~Fnal5sXJlixJ<}$rfMi`{3j9^n8hrU0>#)nHi{p`l)w3V4P^&5-cI-t8 z;+~4Tj2Dv~1eJPDC8GLDCUIeKTXy*5?#QmkaQ`aGHsmLHRhLnT!!5c_KW6>(2DwJI zfL;Z5g?b=3D{3C`6ybmeWAb#4?}Ojv>KBW}t|Jc(=1}aJ)$oSA;^=!dK(|>9Drzoo zMw^hJlhTq}RKvFF*kfBFG&aZ-8DoDrJq32B42|m2IBzk;MU<-rOV)V^h&w#Tk>Mit zXnHJHs1987O zUmq^@-QEmW`rmw{Ji1%zo81gQ4Rw^dPd%D`G*RlDDTmHK4MqOG>+ia@!sXBipBsPp z-otaH&bP~Jg=f|vZ9zq!YO`5(Zy&=3TI0Fb*gp(vR6*Z!>w`gGlCOTMD8O4!f++#Vge zZ~l7s4S}PER}`peH!u9ETM+K~JQSOr4-ub40zx5B2tg=tHOUu{dH*YM= zEnZxli_gygd~t3;KMY>0;e?n|O!OEmIqI*5Tqdf`_T~oFU5x3+2p9DbIE}kFly&jV z<2J574y1Mb8H;$_7yjJRRqh$zZW-s7e)`en-7}ZA&s^F$vsgO)-q*)VolE7=ji(1= z4?iv+e0%fa)Aj=oE<9L&kbW@vxSf61B=n!%3ksnL{Wz#4h!6J{!hql8f8zP6H*ouk zzEzre?)TE0s|P42#Mc9oC!_{E(3)PXHb1usUW6aO_kzWs)WSQ{cRh4#VgLV4x4GEt&^_UyTT2pNMfYBd zdBEkk=t)b|a2@XQ4=akexUe4s_7iFq>N^%gQkZNDw%1lqwO+ORE2Y(1Y?az!^=FE$ zML%eF5bbmL1KgJ4$zl`WodmaVC-<6)f&KdSQkRFbZj28&Tt+?*x_9amK2bc{)Z>AF z6!2ce_eoa+9|Ogv##FS} zBpq5C#8(gRzJqo8>JTsvzko61!Dy@REDhJ;9_W#Mp|91UMWN`!tpp_YS>UD+J!;(v zBlhuB&*@M!q=#r9QMQ&ri;Y~UXc!c$dG2NhN)##R#k=90$_uKNVw|Ye#r^Co526&n zt;6hwT|=@l^ed>8<*mXf&n92Lk@NIV@QXYIsWDko9)poin6v~bUe)JVhTDF56* z+l#joj5XvHXeyw6MqR}udN=6x;X?qDVbDbz$f2XH0B4=I$&8pzP7zX=*LdExAtHmJ zm@cYz)Ic^v`3DH^mny%sFExf<*p#Y8r&+ZLLu=Zh&of5KmeAKR+3n@4yqaq`EhQ{7 ztWLlhaRie?^^8ac$vDe-wn0RunNB{>YV$0M`t?b`s(v)?+cD5P>2Z?K{A^4oKUnoe zt?@;59gz!ZDrHPD=IkY^27Ry^1@EI&?vAS3Xv{jvFZ3dHgx!new~P88Rf3VvAOurk z(BaN<-mK*d4f3~Xqbq?{gp*hD4rBekoo)uYct|PHa-(t;CArgtLtiIm&EA3>>`n@) zR;>d<^=IkNS3Cpq3{y2HhhF&qi!{?G9#hs@+bsKZr6&=a*e0& zantN7dW=-uWR$d~L7kFNJKCFT{I?X!tk=Ex*I*j?tY=A#EqpBN2?a`Se=bu|-*{JC zrmuzdEn9cdSIIIx}ImMbPf35*7loAR~N2+SW|lB-(+tM zx^I0ro7oTxyy$XF41!~;c`3y|NK$)>H%HLdrv93yBW?D6mvBv#SD0Lhot%oA#BI)f zQj)DlN!@~6xYZu?jd-ID9UJ$iqisBu((zeX(1++TOpkGT9Ki!?(I3uLgZx7(Ppzx{ z7iX_5#BVGwU0l4n5MR7Lzc@F0Yiao=@-BP_QYFxQgVyuCJISvhPd9SZG&HJhriP3y z&NUxTWhXkJ2m$nQCn`u+~**o>P>)t^(0Hqaq=>iq1C zZ^Wr0YV`xZpSY&RIZNE`^z&GG4}Zo0{Ly{k$B+;Vm)eK6ez80J`u6baJHw|+L#Iop zub0A0<-iTf(A&>_b@!`lrSN;@z!jdc+Y_CCyYx3p53ZC0>{;O07jKsX6MydOzkj_F z>A8QUGB{KU^goOAmj?1Xk@ek3aXV7niTwIM1fgfa{~doK=kK}X2X0H`f&5L&;im(` zr6YHC2JY?-h}#3=&Ol;!K-nHpb_UiqJ1T7hTMOT`9ep}{tTb|YXLxaUcxiihX=nKT z&FfFwPk!<1$L()DYd=|TAN%*diOtqZXXI($;Fj_*w0-1_((tKAix-FI!LZ)x*lrK@jic)ROl<<#2`I=2cBdw>7e+uc)@oYRGb8V{a>Fg-IlknBunq5N~v^dN-6iQ zR=WG1b@!DH&Xv38_j-h`p1mgF;N*`3B<}F(a^MU^xp{4Cd~3KA9xVsP{eIpOr)BDs2b1j(*d27+9sq zJCEDnt#k}*&3@A{%mMR{+ZUeE%*As1C0-ZaDhF=!Ta?Rzp#^CK3Ek$@k z$>#Bj>bn-ToevSUFI|XfR>&qLGV8eO19&jom%=oQ`mRSiZL=PQlIw5LV}u^uz|Uhu z$5PSYh{tV5Kt`{hp~ssv+kw7Gy=vlaqS|f4?2Lkp^&bQ-A=~Ui=dzlV&&coU7eRoE z6yvve?lt**zCQ^Ez7GW=&EJOuUG`(IU1&9ZmCy%!!KMSvdtsq9g}1x4w~z4l9^QZ+ z3LI$u?(ESae@PhFyY3UZ#ww9y<8ig1)2QcfRXu-aGFX0_~gDH%*70Pxe`xFDhaJ0P=eNhs?~Gk2jn0+kQl;VaPQS9-uWk2hJN+M(0=-XK i`hS1zNoc$@aqg?^Zx6h(@pBwF%UoOPTwTHB=T*geEZE6WliskJDTq#9VJLM(TO)I|G1 z?}r$U*F}1Yb4m*oNDn>)xfS^n`X}@vG-#o2F9mw)O_47}PyNl3OG!yw^iqP{nR)Z( zy*I!2o43P1PE3po_+0+WA9nNS1mPci5Ps5w&dYj45Iz@FL5*m_w$K*T=$8>SrpDV* zbwnM-GtrKyV`>t;ar9E{2z1iWnRqd_&HI@zBim85S+p^CG z6^QU*E)3$NRr8Qfos7fYSJ8fNdmI+d!9wbl7G}P}PWb1Q8ndB>dj3UpJN-(%3!&cj z1iW5^*Nl2ey{x{bUcv8@`Z}JM@f6joFH+lCl2hNCJ!g zpvqF5rCYK>nx?r|R7{;^^x~QEXsa2>Cv;-l#Ap+%Kct53$Zbp3gW>pKSd?{^?(WKt zEq6M>%t%)*>V%Cubk7;MK;KXYcG7M0^vE{kj)LWVqg@jAr5GD;n`T=h!A#2OQpX8R zXw=vvDmD0&H0F!gtt5je=PLrkM{lfi8e8aC2La7A9XDZ9Cm@~ zI$X#O5oN`prXgD7iAyb_vIGR8VRl(sqdIknDmoM!o|F~EbPY$eUAsfzNo6yks7p0X zY!Xq?OdIlv7Byr|?8=rwjW)~JL~$+Z^h8-B7JR#gYr9b3p7*(2B@R*GiOo>GV_J?3 zHwJKQQAOs#v6N~WZ#mG@h4^nqm2UV$oJl4AysK+wPi{%FZdK(V~|A zEpguMs2s~P%Klim%pY@YVi_`EoY2fRHTaHYs@Z-@0p##ZWzA+YwyB^a4)HQvy@`y* zY};Vzrflp1vLFd86GGotVHwMGk^iWq+TIci?@Xl-EsXmsoGv zqApWJAqEjn4&6U-i5}>Y*rm4{~Exn(u>C@1K5$8LWxeqUWj6xgv4zX;L zv*nZlpq~bk;;y_;#4d6?j?r6#{w!j_F?C!maAtFRTnTKa259b&bS%mUOSUXR#5Qd? zY*Nu=I!KvCKugt6XeK{zJwIi43F+YI?-AP{Z@aPu@)2+%X;FKxz>Tmh0h2}DG)PZu zfm>`ET-Xy^mZ^)orcOj6+dXjC*auz#kIW9Iv1@?bEVWD9yEtBsYZ)xlCXQ%PWmm*$ zQ}+B6HIM-c#)GR_*9aoTk!!i&Gs_wj^@+-ch&^$QOt7hzscB{xz%dSL$O#b%Kum}? z6W~u_rYWm!A|=>_iYES{B7w zW7%CC9aFTaHLuI6toz9}TyqNA(m0#csbZP7+2Z9Qz=CCw9@XsP)D8p_sHhql4m11I za_y5+FrmvDQbn*WV8JdR&DepdcU1S%9i;5(MLYCyb3l5*3QLJ9aF;M|6VkkX7(WH)IQUuloTTm55&;;~N2K?eL zAa<@_q{Xl*8Q>!rIc*ci0b&6X;j#Md+`>J4cO9o=FL*7dBi*ad*XO-%w_7htn5oY_ zE-V(dZx!CT@x%Mq>hnMN@h=aejDMqG%et)Iz068Rq!;$ABs>ixt~X@K9QKID2^L8n=PBA{&Jb zoEr>nyqW^(K5v>XW3XvE76E|CBA8~tsmdPLn{&4x%Y{9?z!nRuKP{9u3){B~@9aE% z`$sqL+X z*@L?(b3#Y{D(>st21km6kNlG%epW%D+ z^GFJh3GV8Jof}9pPg8hB@LEl8cuDZWJA&80BB0?n1TVZGc>ViSd?!uL7M4ISc~)Qeh=ok(zDsu$ZD%M5j(p_uH(a6EvyUVLSTeKJ&j{K5PV$1Zpy zd%;_ec!P80_1|q?|4rr%YL+LfIyIzVVZDf@pf2EUnVz%f>6+J;J*DNfpLpQD$I0%2 z{vId4$4T$?DerOWO6D`;pm^hs8`zsW$*9X6Ta@zdDD+vEu7rIer8s;rdCOOO0awN$RI)z*GdYOIyt+wiAr8yk(4jg93-ZKLs^)TmXq zN`4NyrOMh;W2I6m)z-=@e!5m#T5VLfHa9maH9xz#_-MUUt~Dy9pKq0_HGihMRoz@$ zTHDyFHnu7c8$T;OdViy`3~wj$+Cz+&DvvhdXmNSDQmR({^P8(1+~X>66N z8xLWnR$Bg!#m&W9tyC%dV~qxGE=HrlUMmy|;wr&46E_w`Tqd}JQhs-_Ma)Q3e$*oV z)WgyWcDueark))&{xRX;btB8;-?&z=S4Wa9GopmQ=i`E8i0{>9TcHL6RC>nG~D z5VZk1RX@RBZd4=w_}WT&qf%O0td{%~2Qb`j_AG!1^(=y+V%5)|fihe_dushDyH$U3 zwX{`PtJaX@_e*OltF&jo?0IIKLNOa#&%;~( zIGyd!&I!5c!5kT`cX`XU$hp1)qeofjbkAI literal 0 HcmV?d00001 diff --git a/social_media_analyzer/__pycache__/main.cpython-312.pyc b/social_media_analyzer/__pycache__/main.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..838e2ea55a542ba4f5f102d2f4b1d0cf3c43fee3 GIT binary patch literal 4263 zcmai1U2GHC6~1Fn?6Kp-`Edww5_3bKV2B+82twGO8iJFs{4KECLbvh`Gn04{XU4rV zCV1pjJhW0OMQT>61Qm-mTlIm8m#x|dq`uHr)kfNv33f=BW~Eij3vXHAp-NwR?!*&2 zENrjjd+(fk&bjBDIdi__uj=aj2u8=_zofsaL+Ic1!5Oad=EZ5)+(IG}nGCv$M3?B! zFq5vUE{5(|*t@T~VPEqLbd?o7FxN;P(L1phA;P(kRCB+qh(UQK&qTRr^!eN~2IeSZ zwT$u;QYuT-Q8^=}L`jzfT_tc(rwRPzCP64Gu@^7QZuq@Of|6TEr>g8BGWW}SYTPsu z-P26fDO*6}RPL4G7sK6bd+@#If4WgemAT;f+x~Z<=aih5<1;|hZ-zE#Pn$x1|jU$RE3p^yWhLaEgalmSj zH%X~cN?Lct@cN!}Dh4A3c~llKO?+Ho!&4HdwRz)4yKP@3{z+MpwKTjcFz#HGwQ90N zR&>iR@L5^sGqNEOn!naI-u7Ec>Cz1yE+#2@iv|*0HpA;cn6v`)b*IWSD{VVWe6ZAZXl7(J&^UKuEf zjkc5@4hVMn(IFsE85y0j*kjzOnhpWh>xk4=iilHepc=Y#TFC4nR+oYbm92cDV^Exa z$4nz1s*>_l&UK%otAIqWQ<9)ubR+Z;ST)T+dD_%$mpLd#~5Cc z4JffO^q{j7eR`v_hhkVRc3TUaL!EYu-hx|j-Ed8Wp{&)vPSQRJD1a}DuYd|5VE{_e zB${TGx{42CRiX5u+1V|TsVlJG@0(a(?Om1DsS6K_50_)_$D;XDZ?4)$Cw8cj3UsqYW z&Efc7E4&3SN$c&-o#P$OuCf-q6)UE>s;IU)H!#&%y}iJRZDo)QS7Nb+r@bNx+#O|* zEbq!7>8rY_v*7zKNN)K8BNdHX=sVs(-;#ZE%{b5 zxUZlo<`t>>H%rYPa%7^|=sgQ%7LVo>08!P(Iw-eTPXqqgRTp zI>4leHx4#?5uM~QKSrU<*zK^$N;ao&a)5`Rl*dYL@|r~AMu-1LU@aEaE!Myz?Jq3m zT_e^P-{>g}3B=0Lvrjd73_34$6kn5QV?sOPxN+!UHG6fll7%;vri`H2?iF}lmnO5i z23^`~T|G!W^=}-}6fxR1um9lUnd+cnqZK+4$2NLsyD5qBXB63tJr{UQm+Uv;SVZ&V z)!1V~(TLDL7la}V3DAjR2dSqM#SQ{}EHawmI!BXpRWxY1wY+ArJQ=%gcMpUDjOCLc z))Ij(kI)aza??(y#(pc7J0mHU|3f~LlTHz$66nu#B4-J0S*-4IPHg_oxCZT}m{OHg zPLoK=)?l|uq#4+we#_@%mS!=s#Xy#Bqz4I6bwOJlbcU%7s#RMKSSm*{mbdH<*#^9X zR%eaIf%Z)sgU~u{v9wPjhhYmqh_nQR_a^$r8~n|oTm7H(FM8WoxrSN)3RiD8BGD4N z&+cYYCHCWgHKcBieB-ZQ<)Et>DsjWBq0Za#y~f9({l6NVNzD9=wl?q2otz`LyFTmr zZO@&!*%mdM_ALeWueni6&zgtx9bawP{%MQZb>z?5gJXaCxp^e{P%w9#F1Ec_Y&kQN zTnRSJB}>7cPlfsD9c3xlv(mn6{=<9x=T{fo56p2Z4J}U_TFh<7mKp}uT2XCqEdmp@ zry+JPy&O+0#uLT(Ni#P1Fk#jwOWcrc>)R#n9eZ=K#0{>o?O;Wu>-W3Oy(f!12aAzY zGv`-=k@>Di!S2;a_x$)5+n4(W7yAZ_eM$4ckU8|Rxr;AGt}RE>i;;9OGCo)Pq&c!0 ziY|xFE{4u7Oqf3rN+EI0gBrKJ1V6)O_)sZy7;If>#Pf+qjk{OF(EW`R!>5WF^(HU#rOz}ygiju;41pwVnPwj3B(bHmerUk)-`Pac;~ z*0uK_mHv7(fre|)-}-A|{`)|pV;G}MAFL%&~FZbK?Q93WwgoZoa3mpud zacKCcd*KKJv&E%SG)+^fsF&=bS&QrsYU5k6F_$JVWmEV`DV6#IA~c&fGMsyBQWbL< z=^b(mXtXWVVlaUd4D(Oq{~C3FjoP2HP0YUM`v#cfYd09g24;*$?zSf^_sb;2jSbuC F{s)9&Be?(o literal 0 HcmV?d00001 diff --git a/social_media_analyzer/__pycache__/scam_detector.cpython-312.pyc b/social_media_analyzer/__pycache__/scam_detector.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfb3a2adfd036115702c52d28c06dcf59b7c23b6 GIT binary patch literal 7178 zcmcIpTTC2jcCPAMHGRL)&|FMmu%R(DHoh|+YzEpGUx1~X@zU!_rRXZ4aNkr_jZLS} zlGQ}QT1&Mj}> zCcBX~ztmOr-_L)}`M-1Nf3({z7?h^3{$V~*k756UDq1mCKHmKmj$uzQ24nC97DI1c z0#E8<8e)c|F=kAfVx}Y!Ba-HrIaw2{Nm^o-q%~$u+G4h(J!VfjVveLU z=1jU`E|^=#7(c~hZpQQ}7V|)jfSMQHW_bJ9TBhc1Cx-EV&RAl89me`tKV$us?Mq#m zs#qPg)id@lfi_fdOszXu|1Yp-x&y6FwvKTHVALUf?&??=|a4`b+V1W7fp@7FHKAM5+1Do1~FqSh%4qS zpUBX>z*w8d}BRqY3d~!IV)J4K$kZ33fotz3+d)=^1cye@z8kr1-qoZRZ5v3s-9=b+FrY0uFC!T5YjTP2Qv?2BXpN%iwVvRON!nM{27S!eDEhLmlR?PA#fuzXhX%36FQtmTEDFa(bk?7anZdWxR*El+3O z)436N{!zhmzTmj9GV+FSu3amVeR*GB$?SaOc;G131vfhLb*GEwGuu|@8u8N)WkdZt zSO(iCek5e{duIZ{1K@!OL1@<`Y++Xs(42B%b_K%dkkAetqt}GS7`}ImasL=l9DD&$ zaiDxzsYkHyq38c)ub?S+Hhf>?>9{D6;=HCDWG5S%3w4tn?0q_!NwA@KI@u8Kvxx4c2u8q?LckgpN z@9x`jU&y;JY#$&`^u_m$AsBDk1jp9^wR;^5ZKXUo?3N@)1Mt#hZ#q=+?{!M z=LXE^*?<`*S4}0Wdv`qF1lz}VZ#GoWtyWb>g4QSBBO*4<|et#nm@s zWK0XS)w=e@5T9bAU#kscntZeqA|+$Y==nSNSkZ)KlVn=3YU@?s((cvdb&NfNi3khS zvVc_mK+&iGDFbcGghYTRbRKjbe^?_Cl710=Ie-fmiTDM^SfAa2Q7DIyg#_akpNGY&`;!&o`^lXCPN-ACrG2oDKwTE4yONn%xP6z{`-BBL`WdbsA|AzhN3MQYA=OZ&Ohc@-UvHZQ| z#p&NTq559}I>%2MoAI13*v=zMm^+F15l^Q?)zBlBrRXG!`~dI>s+<;Bevyl_WM@aN z6;OI6DjF%v2sUN;_xw#OyC>#7>iOde!-IgcX{YAVY`02Y?0eWUv|6<$gL5%G4je6*&(p{ z-W@aM8u-?P`A)17+gAIdiw`cY-F}+-)y%JxUnE~!FO|G?a^nY^BbyiHKv?#MU)jPt zMi>p=RMU}P_W!JZeRf0m_5CmI=g-GqM`jA=?uF&`= z;6as$#x3G#o;bRGu1Iv1oCC6X;63bpe9POF_jY~d?Or8HR?iw&u(qs^Y%IQNKdbJ4 z$2*&NzAF9oj>)vXk)C*Zs1?0?eOz5cF6|3 zaYzn$I-l)&)^K{`q5N@H`Sb7-I4Qy=wJbQauj*Tq0i5w>Mar&ax8zMRVFqe0{$%z$yci%giv-WOHl2DU^EVl`If#SVs8&_YipXi|tuPDF9+PlYJnLoy(W z>tNzTvF*)!C{8WYF&X~BR5%jN*(Z2bU{hdv;XEA*PANENO_vLD)#ve_O^{0fw}3}D%p`4) zCeW&~pI}j&)OeJ?gk~UTOsU_lD&^s3IF$SXue%={?Y&xV@Sq=>)?inS~j3b}JU`2LuH!Wz}{*!L*A zA^u7-g;Gxf`5g*f5rLn9IoHtS7GU}c_7)6meJRlN{OAil@4POXum9HFyak?zyRG0p zwsO5>bFPF-bphGnD>WT~u=u?h^R;g`oZ4>rV7sYjyS{I`@x)s@;Ww>}?Rc=3o~{f1I=`-KSsf`k{ZG4IIonH3kaOAU>d$xeZ=QakFLVuWbzRSQT`zRq zl!Lck-jQRpJUt`N#N`HNjVO5=(*O%R|6!qF;DK|M zSo4=!j&Ihzyqd3ztlHjKJtdpFSbKb{wku!TC7&325h>J;=53>^;gYRs%hr~+waLdX zZQkA-eG!uKJo({QBcWH$P{~DZxjOQ$jt!#dI$3hn z76V;dflxjWl6x<|@D>8$yes_JxMRgyui)=pn7OIqZCxr7Crh3_)wz-zy7JbpU9#6l zini|krwhLj3bi-#wi~NqaLzn+YuTstXDy|s?ouH1mN3{%JD9;}QvEK)Oi?IlNKv_8 zs>m4aC#Ta{o*YdHB0a|g?$Yr%TqTj5rv);b$_iPC93VNwEpp;gh@40O(6Hnp%X70! zq zIqT@IS?O1CAZH-S9y0fnYwS`tsor@q4dIhG4*^tyOWp0)&T)?pCE4CneW%ZyJ$L@X z#gid&Fd?M7A>*oY1grop|+{7t%;_MGb*jcV` z_|`;pHhS}Hw)O+X?r11SR$NwZ?lmD;@%DDBWn{ehXgJzrp|?v|m@d?}e` zvI%y8&j2kbwilY=xueH%{A;Y{pRv|&bq-wjEv7yG1AV4_V&n@P|?tf8heTmGS#-r$IcgPFRa$QG27P+Ytw5tWC!`$+zM#s zzX-u#Gpz&YebaJQK6gX@_>YS%v}|Zn_c~cL^r?Hv{u6R-w`_q0L+Wm_uY2Qm-gj2^ jo|7%-i-rr;?Rw