From fb32a4213e166b5a52f9f3030489601c69bdf4a7 Mon Sep 17 00:00:00 2001 From: rugantio Date: Wed, 30 Jan 2019 20:34:25 +0100 Subject: [PATCH] added experimental support for languages en, es, fr, it, pt --- fbcrawl/__pycache__/items.cpython-37.pyc | Bin 3158 -> 3219 bytes fbcrawl/__pycache__/settings.cpython-37.pyc | Bin 596 -> 596 bytes fbcrawl/items.py | 188 +++++++++--------- .../__pycache__/fbcrawl.cpython-37.pyc | Bin 5502 -> 6077 bytes fbcrawl/spiders/fbcrawl.py | 63 +++--- 5 files changed, 134 insertions(+), 117 deletions(-) diff --git a/fbcrawl/__pycache__/items.cpython-37.pyc b/fbcrawl/__pycache__/items.cpython-37.pyc index c3639e6839578e8368cfc68f331734ab61ed88ba..ccfb6f523c50bf6e5726f2609780b55645994159 100644 GIT binary patch delta 1386 zcmah}&1(}u6rb7e%~u=yVWUlHMGLX&r8F(r53GU+Do8*u6w)-at!tZ2ogKA@SpvqB zP)uF$)I)OhXz}8~i&t-ghdoKbKf;@DHZk26S{LT+o5%b8-kaaP*`FhRCbgVQ#t=O7 ze;%zo7}`sHo|42Co&>BUtlO|$CL_c#!5QR9Tq4q@)RgJU-mhVUq)6X>!jr+TgWnIn zLefMg3Q>thbYhTm`?{xc4@rlc`*5V3OXH=tpwCfVz7b_J>T1;#plMas!V3T(Y2 z9)RUDz@l&~3Z|{o3WxE|C)AxUfDzLZHcN#Tz1GDfz`h=SyW{1LQS2O1gp$hiF+@O- zo}>`z4rC05G{%KuzzQnFd7Q^K5sZDFn>@zjBWMnJrZX1C6^Iva@i;MOMQSub;i!O| z&@zeZ+~8(I7I#6+d;#SyJ`9wtva=CLHaobfyvEun+&DVwe^Zz7oUds$ob_L76Z46p zX*)J6kutM_)K;xbELy769A@n>sspB|R4vyon!=XKYiqQqZP+#H*fi&V)v`CPhrB&A z4qg!3E>qVEImLx{n_v(&a7^;E`ei?-uNs=9-~^`A{yV)e)#(j#=s1`lRjF00mc!hV z%c#B0Am)ZY5KQw2Nuv*A)6bfh9!?6xRe@0Hy6q5pO$4%Fax$HPS5aT5w&f5vz!p7G zq>ozV3bSiYx5_Jj*Sw-G0IIT=9`e7Nzwwg)DfVP|T9D?1f!`Ghlu((xKM^0r3;tq! zDRV>|dgCH>5zJfvLwv>%UM@lJ`aj~c_d~9{NDzermmYf9{td;87q2273WA5+y{dnK_$FD~tOvzFUS{5V-}k+Fc}e7cDHZ0rx^e`s z!uxwG(6d{3J4odgFX`^W`zGGOib@DjPA{HWh!XO{8onP0D&RNUJT~C#7Y94a$wQ zRLO?Yx9?3;GCQ~G=-9g5toEUlln8VdnvY#U=6_w9pHryFzo=76L4F2 zCyQk}9oUR;;!j_p?!QBfoSu48EtAbSu3b(-8kTQcWaK+2w%QcG`=_#b^k7GhJBpt6 zK<02rEup3e?1C+9VQrA6dY26gQ&_?-k?SNh{h=hFHC6104d#?g&BirBaB3By3qzO< zRel9>+Ly?vyLk9Odrpmh{K8m&^i`ju*P;=lMhBujPoM4(@~3Xh<|`zBDxIP&N^LGMC0v zc?_@wQ&_82tDet;)qu0g=BC6c5=YtO;T@|;sTnQX7Z!&lVgyaBv-OG(Y)m=^(2T0= zGCr2gbyo3w2x987KSYYmE3ffN&2Os&(MS6tosNFkKj}>LJ$JuvLbAydGyL`7vAMU! z*<|!Ee-6{0=4ag##$+XHXE)HiiazJBT5>h&u=%LlnHo#DCS}4jnpnHdy=s!1DJvz3 Mo;ZU#SgO_Y3y8lqng9R* diff --git a/fbcrawl/__pycache__/settings.cpython-37.pyc b/fbcrawl/__pycache__/settings.cpython-37.pyc index 7b51b9d7371fcbfc0dd2f68af5f4851ed21d919e..32cc9a47b77f45119999f5ce391d9b0d597ee2db 100644 GIT binary patch delta 19 Zcmcb@a)pJ1fKu^ delta 19 Zcmcb@a)pJ= 0: - pass - - #22h (yesterday) - elif date[1] == 'h' and int(str(datetime.now().time()).split(sep=':')[0]) - int(date[0]) < 0: - day = int(str(datetime.now().date()-timedelta(1)).split(sep='-')[2]) - - #yesterday - elif date[0].isdigit() == False and date[1].isdigit() == False: - day = int(str(datetime.now().date()-timedelta(1)).split(sep='-')[2]) + giorni = { + 'domenica':0, + 'lunedì':1, + 'martedì':2, + 'mercoledì':3, + 'giovedì':4, + 'venerdì':5, + 'sabato':6 + } + date = init_date + date = date[0].split() + year, month, day = [int(i) for i in str(datetime.now().date()).split(sep='-')] #default is today - #day with 3 month length of this year - elif len(date[1]) == 3 and not(date[2].isdigit()): - day = int(date[0]) - month = mesi_abbr[date[1]] - - elif len(date[1]) > 3 and not(date[2].isdigit()): - day = int(date[0]) - month = mesi[date[1]] - - elif len(date[1]) == 3 and date[2].isdigit(): - day = int(date[0]) - month = mesi_abbr[date[1]] - year = int(date[2]) - - #usual dates, with regular length month - elif date[0].isdigit() and date[2].isdigit(): - day = int(date[0]) - month = mesi[date[1]] - year = int(date[2]) - - #dates with weekdays (this function assumes that the month is the same) - elif date[0].isdigit() == False and date[1].isdigit() == False: - today = datetime.now().weekday() #today as a weekday - weekday = giorni[date[0]] #day to be match as number weekday - #weekday is chronologically always lower than day - if weekday < today: - day -= today - weekday - elif weekday > today: - weekday += 7 - day -= today - weekday + #sanity check + if len(date) == 0: + return 'Error: no data' + + #yesterday + elif len(date) == 1: + day = int(str(datetime.now().date()-timedelta(1)).split(sep='-')[2]) + + #4h + elif len(date) == 2 and int(str(datetime.now().time()).split(sep=':')[0]) - int(date[0]) >= 0: + pass + + #22h (yesterday) + elif date[1] == 'h' and int(str(datetime.now().time()).split(sep=':')[0]) - int(date[0]) < 0: + day = int(str(datetime.now().date()-timedelta(1)).split(sep='-')[2]) + + #yesterday + elif date[0].isdigit() == False and date[1].isdigit() == False: + day = int(str(datetime.now().date()-timedelta(1)).split(sep='-')[2]) + + #day with 3 month length of this year + elif len(date[1]) == 3 and not(date[2].isdigit()): + day = int(date[0]) + month = mesi_abbr[date[1]] + + elif len(date[1]) > 3 and not(date[2].isdigit()): + day = int(date[0]) + month = mesi[date[1]] + + elif len(date[1]) == 3 and date[2].isdigit(): + day = int(date[0]) + month = mesi_abbr[date[1]] + year = int(date[2]) + + #usual dates, with regular length month + elif date[0].isdigit() and date[2].isdigit(): + day = int(date[0]) + month = mesi[date[1]] + year = int(date[2]) + + #dates with weekdays (this function assumes that the month is the same) + elif date[0].isdigit() == False and date[1].isdigit() == False: + today = datetime.now().weekday() #today as a weekday + weekday = giorni[date[0]] #day to be match as number weekday + #weekday is chronologically always lower than day + if weekday < today: + day -= today - weekday + elif weekday > today: + weekday += 7 + day -= today - weekday + else: + #date item parser fail. datetime format unknown, check xpath selector or change the language of the interface' + return init_date else: - #date item parser fail. datetime format unknown, check xpath selector or change the language of the interface' return init_date date = datetime(year,month,day) return date.date() diff --git a/fbcrawl/spiders/__pycache__/fbcrawl.cpython-37.pyc b/fbcrawl/spiders/__pycache__/fbcrawl.cpython-37.pyc index c6fbfaba4cfe0f304ec0dd9b1ff8497612259c7d..9f0911a6079e7bbf31ea41ab57031195661d9ea0 100644 GIT binary patch delta 2790 zcmb_eO>7%Q6yDiCZ~Pa>`A^$)oBqXBNZOF3KWb@9TM$)6DHRn$rDp3{JL@>RW_Hu2 z=|(IpG;(){f1cK8Fsho;(>wyah0q>0+CnUEDTl414ytnV0 z_ukB#`Fr1&6X_RIskj8cu0Jna{%z;I^e05S_wfE+9p~T|gQh`CK=XeA5=a3FrCV}Y zE=r-!Z;}(WQ1e+Rl{@Njsx%0ZTD*!p)j}zsaIb_K)#haXBk1*3A8YkStM8bT*{5Ms zf4pTQUQdQvgKV_gk|qQ~mvm90$r7w8pQ6C$Qv6$TwKjgva!U(V$u_MZV9wQrz&2ek zFssbF=1`lCnU?P_%sXz$9I@SzrQ@d-7_;4=a>Mi&s#T8#HZ?0P8@_2V`-o5+1L}_e zEi$A|fV3KnM6&Hd^CP*)BH6hTL~=D)+K#I@a&uWg7d9g-&77JA4coloEilt@1Dh4C zf-Qi}BJ<`=9A5RjIn#2f8L%6sRk9qH?~{i~n!h5SIipv+QpskK>bONOQgO+VUi8?! z6-04Auvm~^U==?Xi&WpP6j>Is$hwjAAju%<=by`OPNjjx=8poANLG%^28qi$(Pb5T zph@@}WRw_kR`4p{r-UTSA1Eh}wGBt{s>OUeKkLogIhhTCvB+SyU-ewyX4_!cB*Q4s zgzgyMtxm^rr=5c44|A&CuYD01<&KchBuzPx-g2DA?i!Q!XT(cVv)A zz zy2$TnKP7hHYDa)9?cwjI22#Ft&3=yB*PMdAH^)EJNBEDrIZB!;gR?h~LV<-0RM8ms~U>YK|=(seBL8i){tYlDMK0ndl@A*@N848Ipmlr3rtW?&lYb zq3kY2k`^l>pm4}eOK)G{%f|Kzg~Bn#u!J&%T5piL+K`rI(9=Sh#s(z_pc?4qSg1nC z4E~+5rG^?>&=G342!*44S7zUb1X7k5gwqK}`!5VUP0bO(CF?0nAs}EfJ6i~F_{%8C z)YDKX1WrpwTOBX!0#^skPMV-y4x!mQ$|b2rq`S(yW*f2_mOvQRB>~#v(Rk1Q0GnKe zZKKwjp7sjZCjz!_jsPt32;1L=9e8GMyVqddCv8^;G1@iumH-_BP+9N@x}^=e^_f9? z*MYJ&D5h9proh|=n6>5rL5JI*+n*V9a2@FDV&Ap)!?I4{7?A;!-KbhzzCVwMQCS&}o!~{9YpC-=BZop3_r*^1tYMjS#+b#7??msYPGTSQ5;w4e6 zSS9nAIn8gU4)d>*Th3#Rf}(~cfgK030!OI%)%2#XrdDJSI=`;ko!O-Eys*1J)!UD_ z2RF$lg?u@c5%NQux4k9^(;#PgGdVUZo z726e#G;e}-;b8-)ei%qDCQ3Nl&CjMM3jZzNLg6wf`Imr5P+`(|FQ{qgw+cESDo!7c z;g8j4BR{lmUhw>f^f#%ah^#Q|*zQN4b>1emcFc3}HD8-+e+kt@5QMA3={0{loA(hW zHE`e(%Bd}n@T;>(UPH3MwU=S;186>8krL_S54&CjY5N#fAv^JlH^4Bc2dPBM-!J7#mF_~9|>+J}@3fMU${Xk-oR&nNR zpFhp+dQ{I&5-YYbAgC6_3Ld;D76XRii8?BtMe|Nz&x6(WmD2QogX7fK6yBL#uh&1j{!8qbBw#2~Fo?K7gGyW4gchMf1)`~-f>7gl)`@@Ob!RsS zf>T#QoM;eeu2qS9pz5U;da9^;t9t3NQsql?=q2i**Y-mD-q=pus2o^p-oBal=FRus zoA=h==bjz3PMKy&MhJ#`1&F73q^aDY6(xckK4zQ?Vno>2)j2@5l`46amy%wmx>ZB}WL zb%jOSg#X&r=*ePhD#(;dXg$p0ZNd^&mH&lvHpGm(G!3$R2=^mmI_6C-f^mvMQ16u`42psW8$81fTYE@#`K|Gs5*)S6W7s7+YQOq2qikvh#`54 zMoE-}i3KZEd=)qv|D7tz&ALnoX_I%9YuCjefjvhw=4+CQ55wA|kSx7T*7dfsNem_BPg z+Sh6W1~?I~N2GV6ub1Q~X!*M5_H_LH`Km5)=q*rCnqKx+bp}OT30HOS|EF%|x>4)H9N0NB zC};j{iR_A_=T{EPlD=Am(kzIWIY;)3x|t)##TVuUTC~NV<|`@ewB_IYP>O+4t+0?e znYq2XGLsXrcuIU^y*93aykD^=fZv3gP|6#)VM}X|$V7(e!1Qh;Ixua$I+4AWn_lF0 zDI4clf*g%-G~Edl7|Y*m6uiYQD;p2rkMap5lSqyrISQoTeH)JJb<~<&=ZJ~pr-J8@ zyo^eE$>B={j~|0c*3bJr;c_&0c}Gt`jY}}xvp^JR02T>Ti(2p=(J+2{~rHn{C`7xP=FCW9eel+Jej6sw8Ufo>l|ozqCp zA=v?b2G;J1r-^q-O&pA!eVwkWP~hQSUs9RLtRAnm!AqjZdl5M=5pDrR!)Km+K6IU2 z^!*>kPN3!#SibDpOR#4LtR0Bn17iFl;;>+ZT7JhvBI1<_2 zF%uj;;%KB}=JUl`!F7vvt(MPkd!8jcflBy!<@k@mfFX}FRE=oVN*IQ5*vJ_nDa{7t f-RYQakrx`bCeam}qnqMNYAU)7Uvk6ZQEK