From 5f60d4edfcf3fc4698bce91161caa9b8bbe905c6 Mon Sep 17 00:00:00 2001 From: san089 Date: Thu, 20 Feb 2020 03:07:20 -0500 Subject: [PATCH] Completing project --- Utility/bootstrap_script.txt | 2 ++ docs/images/goodreads_dag.PNG | Bin 0 -> 30255 bytes goodreadsfaker/generate_fake_data.py | 35 +++++++++++++++------------ src/goodreads_transform.py | 4 +-- 4 files changed, 24 insertions(+), 17 deletions(-) create mode 100644 docs/images/goodreads_dag.PNG diff --git a/Utility/bootstrap_script.txt b/Utility/bootstrap_script.txt index b17f525..6c77b0a 100644 --- a/Utility/bootstrap_script.txt +++ b/Utility/bootstrap_script.txt @@ -46,6 +46,8 @@ sudo pip install psycopg2 or try sudo pip-3.6 install psycopg2 +ssh hadoop@ec2-3-235-6-13.compute-1.amazonaws.com -i EMR_KEY_PAIR.pem "cd /home/hadoop/goodreads_etl_pipeline/src;–export PYSPARK_DRIVER_PYTHON=python3;export PYSPARK_PYTHON=python3;spark-submit --master yarn goodreads_driver.py;" + diff --git a/docs/images/goodreads_dag.PNG b/docs/images/goodreads_dag.PNG new file mode 100644 index 0000000000000000000000000000000000000000..50353e4ab1ff67b61d23caf1e9ae05631382faa8 GIT binary patch literal 30255 zcmeFYcTiJX+c%8IBNn76B1%2zQH~T5l_G&)Z7^)CRfRF?dlDx5;`@YZf%sbzF-@Jc3^Ua$H!|d#9?X}j`)>VEx z>5}z1=`BiIBqSuHFPuMpMM7dDR6=51A3a zosf{IN|Rc1-ypu;9B|$#NJ3)gzVCnQE?oI@K|&&=<-+L`w&8B`0_jRmPj|g=b%Ue} zkZ!)c)o2OYn~}@4F7#gN`2?^#VrLc$ zE?4Tey_9@>U;TIUtn=T7?x5~{*!jECBgq{bvU{hxAaZi+79$8XoU2jDw43mpAPz8U z9u@;4AbB}#?2sGX$;V7oGvv1U!Rx>N`}g9_&dzx%Nm0Co@1Ji885!u@ZH|XTe2XnO zHG84QOo5r6+sVoL47w7bfh<`(zy2p>vJUYvgr11@?2EY7E7~S#+paaF=dd|=JoLPu z`cD$a8y%^^&IXOTu{~gz(YtNs4d`0n>>K>bS*Q%W zHKSnh(lHa`7V(m_Dahkd_6YFjl$9T!sLr5-@|ctbj*;8tmHE6(T}+_an27fE45}sHd&=FW^-8g$tr%LR}gzray^> zi|#r=yGiBs46WEe+o*9FVX!|&{cDnGo~v6X*EW+c-f@;bC7bM4d*^{CGNB$i$~ys> zyiFe9{9NgsGtXvM{3*fLR}wP?7cn9Sp&b<_;(+6$VkW_f0GU<)=59S6v)NpvF0#Q) z^ob0jCniPujjM0>HY3+MptR{ig>QL6g8O6_Auc0G1RRCelmg?G!3HkU0X84z{dFHr zRdey=r6ls$VCPAfpId!qSM4zc77{yXC15_H87rP}dL6@<51$0b`?#%{C+rpVnr{+~ z#o;#fMv)qx^s8!1Vnop%j&)N*l}j{GK_YQ(Ne<210onV$d;RCV&yMf?IB%YkT^<&# zcGN?{Etk(~2dBHJO)1FVfv7 zXPqN87{}B>vS?!KLN!Ng@^Fjp$=(}()W3ZYQWB48)y{~={E}1tjrIpCrx_KQYrQk? zL*tn0Oa9I3s)E^t#+IY$!ZDcpWTzQc0=-O_E=nAo<7q$^wI)|eXop+;5?<dQ zGJ+y#4f`S8S>X1cl9(G_5UsIn@a=HAd0Yd05@#)mI^~JS&49qlF*Ls-%_35!`Q0cx zv-g5FN@XDD+!wrms9q8^k+ccQM_#FjLloZ$zHCd#S08IDg8pm0bvN4MR=g*1=5vpN z9RbZ|H~}sJog1*Wpcx1n-Dxkp9p<=tcHjGyP^yZ3&)TKPobH1OnO$mQXKUq*Y!Way zjh63TN&MEK|EJzPDJ_kt?M zN)&D57{T30$M^SdfhJ3!mBtEBem-MUVKE=|kF{+RP9m^-qgNDgSzCQ=)Yb+?2iGQ6 z91=7^#)sD?owY#|c}wEm&M06B`-@JJk#36i6gHuJHGTKav9?N^U*eWX)+It8DmJn8 z@S5M*2TNSb4j|z8MjZ>NB|KcV*0CtQQ%xH`nr2>W#89@ru$827 z(naIoPqpaMCPIG`jkO$UE)^~@n|&y3F0I)Cx1TIv3aK-4+yc?5a7H(>^A!0TQ9eem zanWC7Vk%NLD#DY#q-x_k0X^Q`+{_A$=ES6CSRyWh&~R1YK;aZr(mTpT>0xo@cE94Dm{h&o;^crIC)?DZwW>01EdChiE3t<*BPySQbcscEr4B?!i`zO z@_4aXU<)`yt{Fl%#ELxjY`l-dJ~GO zMU1tnRkpCs)!kukb)czAgP{jwJ@$PsCj(r-E)<)g<8Z5E?Cd{Ux)UN6lkuGzoAoTD z)je7F1>orL*I-&@88|3XucOUE8c>|F37Q7?U>=w&90aDQ7S&8?VrmwzbjkTyNt+js z6)#6$)m(m76@3!RV{ImikUcTxuS^*Gjmic0*rcra1}I4#G2dG(w4)OJdrPFi4%^%ra z10$940-v@rkl$7LckU@UVIIz9p~Ngdmu|yPml0=NDkf06Q`?Hn%BAwC^zbDQF!+&E5^13#M5)N8xc*yt_N`!VZAe z=+HG+4Mux&MSD$%dZr$eqRm1$^x(9}0qtn~xnBQ=<_biiD8c>iB~!z8yRR($VBAV13hW!a z59_x0=BTgw1Az$^NUU~!2cU9{qG@}RgwoTlFXp?@32Awm-;8?dQ)64ny$%PP3bASD zqV!~u&n+G`yd+fxuT4jWtacMQK59X9qES2V^vXBNTr?C~0(O5z|9+C)Nib?Wn6r}N zojo*2Yw?a6Ql@00Gne`89AOOq+}34b6J7txk)*_LNjF>LmFa*e&{x=&6rX z`bG>_=r6JhpOQ}Gt|zNw*>4vBBy%I-0kS`3r+v@LaSuvSBDjG;GgT%B4eqABVVY+P zmMw>|;{$C0-Gl>*I4VvvC(K_(Q@htxO)b2hJO0vCrN<&3<4l#05lKl?g1FY&uFEhAp4iQ3A( z)E5{<;|@Pi@bFl9l@VBWe0vH%;_XvA9fWpUGRqujuL6zqJhM}~uEq3v{@RPoo8wFw z&1a+I;8#1k9HgM7p!<{N?UI5yZeUjT7F4}z=i~+ZEU5C03^Kvy?xoMnDc{uu`>(8Y zieA$rhh?MaRe3CR9QB~f4U*O=?4tq&;<@SB0=LnPT8ML?HJxM{tM^sTS&zoIhG}Cp zr5wFh!5@GQ*DRjLo!8W1sYel7LOI$Of4A)1KWbq6YI6n!i#bU9d({}T8o!#_YF*a6 ztC^P;2~yjBf;z^Sj=1kUv|d3o=18gN`XlF2=9uuw2&CW@|3-eQQsmv5_auF8&%%SQ zYSw!L0{h@5^M1$P!!2e-)wx-u>jQ?V{D_bhq3|jsJN$08bbp!l>odq7A+NXfX3ZEu zs&KtU6-m1((it;86 zQJ~vcme1~*n5uFe_`+m;!1Zc49>ZQG4-(%&lugNj7krbkCOMAQ+Ws}26XU5b!s>Lr zyMW=CGOE2J>XZSxTYPRnFyqe_KFccjxlFe_-0M7K@qpIl$rQO6UA|A6&5K;vlP%&$ zvDAFgZ;eENCSjegYbn0>Qng2C2dSCW+pxd!t2#nTZzZvLUvE5uxY~d9QPjseLIOY} zfM+LSWO2*5UNAysJ(+JU%num2DFmvH7`mbw%tZ1B85>^q&dV}WaLlzBW=C&BXd<{o z8t`XJ%4G4m<{mSuOao)ZZ6(+ckt^GBjYABeqy0A{Mp*2H5&Qd5-o|qXW-oYDCC_A6 z=xB~KH+N&R(6kq}pBy>3VKu-Rrwzx8;ZgC9;3ox z^S4K13GhRL3IE#=4T!C+WQIhoFjH^w_Yo2EPW%V^1;}{ zfU!Yh_?cU~;7!X?;BM_yz>=nWa@3$IrI0q^tLxVJCG&5n(IeGOHh; zRm_2Q_LA${aa9hRGy4T!Cb9ERBY&G)k)6^^6dvW#EUI87f-4{JDh1qRN=FU&cDrAO zF8^Fj?>=x^_>&$Kh>m=;B<~V>y@!*i+_YSE{py*oK`aNpW({RCePnJT6IKx+0k+Ox zap{tG&T01?jM&QWdh2VFfxA-wPU-8858W@p-fJ=|eHFfd7Qe$2CySvjl!cVhSlBW1 zifVb`87SglLK1(ZG`VPBkdr9r!R?AEL%7@YV{_$h=try9ep{iHDu{hyV;oHh-`R9nIsKP>ep+Y^XUwTZY+++)K1=7&YsD(l7FIVLt)h}G}g!_WE_dv zWp52Hn;td)RBxu1$EB&qPlJ~&!#^m-D1AR7;?3c7qDG84!v@JuSx7e*VFQR!psk5> zYeft9{K+{oe%Zp={P|UIbd;I|FoZxzhBP!+sC%l$1ct3-hh-oQQxoDbX^HdddWkc{ z$f%t6Y0VWDU62$x?q~x(57Bah6JJh}952ravKp2LPp=Jd@fQ|Ddcbv>vXTCRHZYrq zSm<312buAGQPJE4?oNORIiCdDZmt#z(JQR6uh{|x3nv47P|*!S)R*~K{cx;ijv~D0 z6a_|W?391!?0+9L6BBW0?Gkv_tb>H`-x?l83y2zSb~#1fb6of2!?ligo#3=(p^|4n zAEC(RXbD%ew4W}@DBELcCt0VV&^ja^`O5wB<%yJ)h?jQDdTyh~ZoOmL-4PIarc%}j z%ZW%y^dN#nvkM_6-<~49+N7qP;oan_MS4}T6LK!%P|!)UM+e@;Ca!j(L0f0|pw=_` z4@=fIT&XITR9HQL)vVSgX^OPpto%N+NJJ&`kBd%I0mQ+)gMv?tvA6-#SGFKus2Z_) zkkcCen%S+3n${LW!ZX7k;v6~ zbF7;%!~HryXF2rY5^jsm?DT-X6T_xkV1Sfw^BaFH1SMb=xtEbPZp)G1)%n_Q5hTmc zs3-7vN3(Y>$3#9EsHcUmRjY5IXKHido5l+a5C4 z>qs?VZq{b5T%5Y*cq}3gt^P4~bQ-qSP7^^F^J$%XWu(&@*@LtdQ1sC?GQ+T=#ei$sjVWT_jg^QL2gs`dkC$&4EZ{I996ojF&tO{4(s)Z$&v z@TYvlJ(EK><;1RX?zLatkc)VDB?J5_d|$BnpO}a+%)6Z*eZPXs%1$=HO1egkHrZVz zX^DK^wsDH{Oj&+*aqnAg(3xH&6!%@2-lMqNlfAa2aV-+4T9tH&ekMBJApZny( z26!!>#y{=4x^wIN zN5h7W1x7|RtUmg~sIH>UxZIXceAIg&rVtNgraYS)<}xY+zf9vhJIvp1MCCUMj;|28 z^D;Q0xaNhUu-UtG%Sge0I2B+W7PdowidA% z%xGG=Fc$LM>3&oUk~-PS)Dv81c96NUzY*8?{^~fOudZ8cP9nzW2Eqxh)EY*!zqoq| zf-U~yK~25*-Z_n^qUQlUTr6~vT3=$WI4(VvUecFf7o*U9tq@sy_fw_dwc>d;qRI2a9)CrKO=;jg>ai6g=~Kmms<|PG(MH6X&*hPK?Q-3&k93Ted#~fNmN$55zzZza_C8+L z2O6Jk8(6mrUQ!^vUNbF)Kks{YpQI1hJ}F4kUcwcMOH^)`rueqw%}5*~h18ZLK%&x8 zWuf`4r9T4Fw!hZ*%%DLj z&8-}FteY96-3(oG52xjF6A^VVxMQ7=Q@NCao%xEIU9T9Ikxm(PObCky^Ssv%4_%&o zQ5n@<+`4dN6I2h`R8beuz1|5Y2f?Dad?bv1oZ}ol%NYouV2Nq6OljOQ%A&cay&A#9 z9xPky=dv6QGV4n8b8Da5-RD<#HGc*8f)!AV4oQ_O5V1~O7%_c5%5C)(rre&4WR;i~ zXRlr%AIDySm7qN<#MMDT(Q5i-&olPrhh{gzU?#0LPSBO5j7>?GZe2Tn=5wEKOD|mg z>Z+Hy(aG58-2{cPolF$czHhSskwmRj@~XV09| z&OYMeQpM_12t`soD8(2V>b&wcbYx;J|8VSk|w1`#9 z996M`oFlpW^%u*=XW#_hj&=JU!Fj$8;H(ruFPY}r*Q}<1L%==*39$WsY zPp}85TF*t0NV-gLlVn{rELC)k`MbSv^0MhGxL~QwIpVi8&DW!}d@!bbX&{94=cTyQ ztI_f-$JPt^@DqLPuU^B7UlsY6^mb{ZTF*>g0P>?LV6&bnIkB?6L&!=)sS8b%R_7+_ zGvsOEwa_Y?WxXb316n;pkgWM8f9>^A;o!A0?HX7OZ?tl;0ig}bm971YRJ9Tt%h?b( z+aZ{146r7Ld5FAWW)NjXKGMD7Dj^=#2*V)Rr;?6GF78Q*sPkccQ1;D79JnM>p?$JIoztTw`2XKl_eO5CMk zRII1P^JC;<82q$2MX#8H{M8MJ#y_*|Sy2H}>O6bl#jyoEGoDb$^KWS4!rswn3&paz zc_G=??>k%b#R>ONI%T^(WjVESsZAc=36421my|XL+ zCmGoN$COb^Ba`168>)MpW;T3CVYZmP*%a=q0c|}SkC~)<^B-)f24Ap~8LozNqI`eX zLsUONw$58=+W+O*XK$@%e;vBw4zH_1$)R(4uhm6noD-dXlqlK173xu-1ZHL1t+b*# z`K=*-f(>1A({PQP;mPBLhrDrb{i!_EtRJG3Pb%W_<~zX`C*dFD(N4Rsv#p?v0knJ3UbL6xNUY|31bCKVV9NoM)aOBG_8 zL^R?7{}PHhMS6W1oJMS0-QZWtNNGO-SDzo;8^7WtyMOgIpOmG0 zGsNj!Ps4M9XA0+ijA++AuTlOhzZzZBL9bA&{88w@&C56TXRMwp=kzpfBU&y#x*!Om z>rxl|%+`0Le!hU%q|MyL+QB%Gw)`8JlsdNo^SSoi1c)}J0hwBD5b888^hD85KxTRp zo23q_4E`m|Y?Vi9kR$rzlZ0lPFcC5OxxlEs$hL6Tn3uTJ@_5bQBdz0Hb44j~GopTx zQ7L*cEUt;uLl85vij$zxsmWIXZbWSONuy@~f0ZVg(&`F1bodjL-R#}@2>l}1*pvpS zgYz5lszkQ=W71#Ui`vM`JzZuEl7cISds@1+-_E`C`R1^_KxpG`mg)AT^dZmvG!DLP zcrwG{SajLKgU{Q1^PIr02f8z)luv2dDB2;#gKEKMPDptr&EL+FbcLq?1)|#6DhX(x1`F@1-2y1 z1e9QCB=Fq32F>!Y7zO!qa8UiJ!a<`*SORMPOhmRFj3-XlhIiJYYC^O9f?&?1`p*eY z%0|WdqFcAJov7RJw0=^F}G`|HKkh3ECaTI43b zY@Dnwb}w^V=)6vY0v;Ib;faxrr{ZR#XhWJ^cMO%s#Y(`@8(dE&+hLg<(i@vpW6G)6 z=U&FUP_m=j>~D*^62FOC4Vi=CH~E8a^WFNtyJ886_alD-n&v-M1};gn-=huydH-~fGcgm>f-w8S#062 zHnzcq&}b6BxwC_Mdn4LTzXkivzqzvh#q%V+~)_HO%XuR0R+;W05<3+d?@_>V^5;+r6GS|j@oPcnTkIKdGyd@*6N38Fb= zd?&>Lv}?>hN0e0=*m*m4=Fo193b~&MiaV1TbKzgki3g?)d>@!1f*(ZcDO7j`kEYNDn`zp0)-8U%;sqp zicR`uE}`Kjm8z5Xs8ZTRIazu8{r<~(dlNF!)&1fy1TO)FpoW^A=*M@SOO@6ZEqn>R z1h^Rb3*&gyPrKaXDeQ)DQNK`gHQ3chPDULOb zbLO};B!MQI6PA!sLs7983hmq{S*4qT^dbv11J8*&!BZ#x=A*>UtJUKdG7~exqP6)E z8KAE+@`z8c=?hqb4tzqkxw9RvSLU(;aG&f?Ow8yNcsUYdZZV^v9(R=SlyB*xV`Vv^ z7b7FHUA0?ao{q=LqLt8Ffti1N)FtF^efxJ$-B+?B)t`UK@z@Mfezb@t-p2%P4tsc2 z&ss*C87=PF30f)x+YpHvN5nmK6jr}12j=3qTKcK@B>7FI?>#wmy;{Wm>~iq#u{QS3 zzqecV9a@s9G9&}-a?oZwlhyW!%#y?1RS#ij5A@oI=v5h5gVFiEEF` z8`sQ#!V@GUOvc8N^<;kDkqE|jD&kgJe_b6nDszFc2Y;H1#2tv^DuvthAIK&3wXYlh zZv#Zu(`M z5+jHweyz0uxQt0_HTuzZ6Sp+WEzkcha=oq-aBa~u0`yctM!6M%B%6Ze}GzNuxwkB?6s@6x<0 zySd%9QBU8pQ{J8H)Y@3H*n{Z|oA~L(C0GV*Iqr)hh^(R-_Q7J_uxig=c`wZ-=FB3{ z>jCJU&A0j!w{cQFC=<8-1rKD<2By*xuvI@8dZ!`$HbLRRfG9ipBJF5nVNaQxoaVv zrLs^&#N2}q+PPARYg3*>y z?)=(*!9llQk7Z?9-RF-=;V+8;t3wQXk!FQy{&R+wD$E@q2MmZXu9}0-aVl{{_-`KI zju%C|FjfvwAR+}&wWhJ zuaob2-|{JPQ3*9Mdqs_u7#dgZxv)Cag}>{K(~y}XU$ykNFcwPyFP|a=xrCfGe)A2! z;xsQDFEeZ@uVXa_xQLByJgjWy!nL0pzVl_^6q#gh5Wa;g)>l6^5?Bz`mH~Rl(;RF% zWwdRpd5!s;anl-QQZuP>vB2m{zbX^$usA~26NHWGpfIiws8IVfY0M|B@Y`W zzdN^qLww{WQmRTE*4ZuimN8>l3ZGiaNr1_!Zrb=7z)cth6V-sBJSeSC*pGz{=QkHC zGNm_`d|>KMC+sZ|=+*C;!`3|S!J-pK75$Zef3Ih9aLy6WUHqj2wpP2wtQn8?f}{Zq zOOQV^QC2SjaDz=X<6E*DLS<#ADNmL5vj&8fe0<8C(5JhW^RRp8{IIXGP{S)iLK*KRTNwnt#m2GZhgtZH^+?~`K z)!dW>mXiL-(L*C2|BhAer0n}2AN0@{hCR~aN&YyqIbw~bOW0_LcpI=qpaw;+g`Y4W zXgTyF9J^!iKw7`*@>E_FV{@A6(s|>6uVT96;6~7WuGC_ayTPyY;#$fWc%WVKg}QJd zkv3C=km)t!s{99`2F3ghL;K3l7|v&hC`wrS6qA3p#F2!qbMvz=RV2JPYETv* zanJO+P?efQxT!|P-|q>e@Awe}_nq*(HgbsYhr|8SUpwz?`*p{R0HMdDZZZ$lXRMor zy7RG<0Vyxh=Dxkc1G<*(0`B^q|d0 zgNJqg+dk@xwm*S17=DI-Os~iOVMqrjyvOs5z=Ju7_!J5Bfb5Na)G{ID=zHkZk#RA!&iq?>+&lKnCaA(M zRUtnADe(HDRi%k04!3`3$17jH!KZjJ@lsTHDc(|UY=05pziUW+vBHF56BDH{Vc$G> z6=&?WyKg6ctF&|W35a*rzm>Qn|NNzqC*m-FzaL`xOZorjka6aS@_8^0 z@wL<0zfESJ>lo}*BmDfR!<(Ne+R!%+$zPVwWy+chWyZ^Sor;$nD9l$^BdRgpK zCgE11ud%WTh_wmLGx8~aEB-}f3qS6^o2*Qg#(v=$nD5ghDpY)EUwXjZycpwdunzL) z|0xgpM1!FkM*fw~+u5BDZ+qt6C3NnXgYiox8#J z>}=EI6l;T{X%)lTv*y4_@-gVW4fWP$8)|67*Ix`H?w)zw^PAz`5NaT6^*5b7$FQPm z;tmZ)j`4@zT#NGV5d(ayEa!UP@40+)UH0R8|GL%abvr?24K_sI2XbN7IoIx1*nmFx z8lxQdb8cRH{I=#!_xVvYIl4(AS#xN#*w^sPeSoHQ<#9rB(KSOAXB~O|k;?M}PVc?m zFq|Pk@SRq9I>Xj5drwQQyr$zz`hd+L2A zZLB9rUZ*c)_P}%L_MKfV!mT>nLv~i~F6KYW?p3_D*isM3=q}s@oo?B@^P35K$EO63 zWY6nx$(>0iQo|1iU|rqQQkCJ?M{_&9Y~nCMW4}uU=l&mIm$;D=@EHNw?2OtWTz(zk z5Qv%iCzl)MD|#E<1JY_HrD?AL{rC@FuX?fu25FySMjs!e&$b+T|N0WwiqwYQn>O}u zVP%`P_~is=dxdWg{fEpNiq$BFn}djr15V>dn@RiK59<6GLVfoMsT#54-`pa$Xh6oN z-oRh`VR%@HU9^T>!K=vrIC6^Hu+|MV7zD`jW<`@+)N3WK0}kvFu5)_EaL-M zw==%pUulYzYWm>c{Pq1JXSA4Ua|+Zn7}I}bl)YO>sKzVZEJ-oWR|>cSZ$*CCXqZUM zk;-BZdP4#_p+ylkwnMAwpqes8>cWeLCj$4Jlw+vFN=`%xJa?K#C}Q0rE9vG*oi)v^ z=YMK3*;+=AzLc=v#>9<8VBk-=fF`PUryy&*oXk^-eX=yX2S4@BB$-4x-gl;N)VWvd zg7k}yx}J$tou;OmcKR~(+;jJ1dJl+Jg2&3-JGs_}KtDA<`$&t`s7i{-YR}8N9`&T~ zfq3f@_u{J54&};$&|PwF@0YTLb=MrRhi9anoCsDd$qJXkj-mrCp4h?|-xTjpI>JlMN_Z)!&TW~j|H@k3;Rl1*;cux}f|sbMx}C&(_x4EF%+qm)B0%(uSAO;F_646`hUy$`$i&HVdWA70 zb)-sR;6u-L`k4%e8pE_Ao{w`WK{D23=2PJF8+&DwAuB@D3Et5vJ(&bm6w;P6{WhW> z_?n_VKvcAwy4=&CKN^kU0V!W_sTDdN+G}r-K?0S>#i=AyBTkW>bw2jtDbW>dmFoW2 z>*B1u*dg|V7?b?VndPX8+d~^+DV`Qv9fpp03i{yH`%3we?B<1#Qbe!jkwu#$@5$x$ z3_3XND8r9WI8k#8q+&0d8FH&mCyDl8a+Xk@5gE9G{ZwhvP#&UdbhnD|)aED~rTjeM z6~SP=fSz#y3C`+w>BY)_a{A$|It#Z#osIsgO?b`uKvDA&eCM(2w|CljFSMss|ugwN0Z5-?gdS%n%xsM+~B| zEQPXy9;IC{Q<>t@r5P(nf-ODvDz2@1UxIQ>s{eVE&qL=XDEnw%h)*%UGxANOGAt-`!G~NUiv?!C?KxQJ^tr^wCuHW)?R|6-@p%kM+VVpmJ?pNQ z*%y2eo`Y9p`X%6rhen&znLI6Eex$jDKs(WVkqn-IUk#{`{(uNdvTmf zmK^7x&sa^aZODGVV&b#p@w%~MUbQC4r#Wl$Y*vim=@9Z<7TdDWNs)uBQx5S9$+;Uy zRN}E+>VaY9Qpp~=Az&xu)>L*|WrxpeUdZrE{-0k$tiYw#B|1T@$Wk!kSqS50UUpee zg%8@}29IuBA(M-BdN=yMeqV({)-2(QybI2fMcF*1^W#Wtr9ZX!&d#P2FBpbyt!EtQ zd$bK0Zz1akFK%}P)|e>1nf=-b+#Pk-Na&>-5tNZU{$>_6)lFK77@!Y^jt-;m8u{rw zD^_zZXGOXS_7@I=$Mx-1{QZsTpC}@t&_&r0YWj>HGaqjCR$X~;JjL^9G+fQi+#acs zPMEBtG!553^V{XhENb%~;!iT=DRZ)BQ%ZD_dFTk~RjBl+$up&{%RNZ{7X1ve2@~J9Ywf=ICve0O&ON6avXv4PMVW~Yadi1svFq<;!BM+ccCTe8s ztar%nzT0A5yh^xK?(=d0lV30WhMpGe+OG!GPir&d|H+>CX3|h$;On=Na{HW`r@fz1 z<-w%!%Hjy@VGge#r2JiKl;-oAAK}PTtKDNLnU&M{n}YpbEh$4vR$*{py6tUdQk~LK;jB0?)DMMd%P8p{A zWK9U|8HiX`1anu!DPvX{M%^Jbc`2$q%jjIj%#afBsu4(GHNl*40FmZ3XPlZnq3t`=l&XyX z*_dUa0#8XuaVIG}8=MHSCsjt#rxqQNnVt2X_v*bIVNZ%xOOqnK)~XfggtF|59`uCG zvx+ZkhOTg>OC7F^5S#|l|z4e0Vk$RUw&+wM^H;rs($6dy* z`n{g{^l^>^xhB26-Mzmu((4=Bf<1C;RD1w}|8)TRjox{v|GL_#bfyBt|7|}3IsAap zot#?O(%Su}V&MD0EMDD9;=<^xA?5;|pw}VHh%EjFrj$uH=+k!(2n5*;F|l^Xx&zg^ zCD9zyNL;3X6tZvYks6<@k#38ED7Hx(n=8Vc;f-2^ z*WHm#IHp1E>5cEA!@I zWzzdiu9YPwJ~u=kB0{TqH}8!DpbXW4yBUkTklS@SlhxACJbm}x>bj;Rzx&F@b))ZQ z>yb{qwiUD|S|MFuufB0bq(VTJIu@Qy<-9Vmp3Llc2h(%pDk0b)Hp248EmwfDjeo=4 z+vimht-4Lz=hzm0S#(_3y@&@t0#4!^4am_MtBd^C9!iA`zf8Ka3g$~->hQDj}j zUh2RwZliSI@|S${E7=wA1?i zYo%;wO9M^P2Z;yr(Sz?R+1a^2B9({r6@R66Pq90P>6yF52Sekph-FO9NGZr&AD^8* zQ_;I1m2_gCf%sr*wX=T`D_N0ei%)+bD~<#Q8s>Q=PDMxQxnd+exzp3BK;yG2VJ`y(qyC$G8MwV|7uf2TnP~c4;8iiDk?69PV{wVWjur^pGr9dZ z6SM2+Z-%|wYCVoX%Ld#1EBH62pAVV(oHe-A9HBLO z>olzI$a9Ej#b2juFePYt_-x;+vwn%6Fecl-y{R35#qvA?bgz{5Z|U~|>12A|^JyQc z1sg_R)$X6oz<_czWx70 za$Ob*r-;4@7e4p^cg1k~amq2PNcgd}u^F8B%_`RpQ5vqDQ3Y2yXdsLIW}^;Z3Z^0KC2^+>#<;Tx=YzqGy1;V8$M>p}k5}+rsC}ca6eD0@{a- z>YtRqjfoby)nJu_GKj1m@8?Akg@rXr%4WUodC^)faM4}U%3NC&U}|O-<;_%;UvBK( z?zde8pD=J!uXIH*F>Sn{(5N^fC)TUAPWpdS`8eDmjP5a*jsqJD6nQDIMA%Yg zK+oWG9Gy{jP$hiqIM8ysNw42q5= zMfj5bvfewWdhLbP6>j%<3_M_pNQ+MtSUX0{bQ!ti_~*dA>$3i{_%zO$Hi9i$2_&0V zX0gKynMxH0WE2zi+Ee;3NM|mMtBZ`uVN_V%V3VYt3^-M}C|zlPt7-d@-p^C|y~-T* zxsBC}b{+a}t)vM%5|i>4Oc@Ogl5 zrFB8agRkHVKJitKYWMj4UGVZ01AunD6q0d#D!#K<6z%BLVLhq*b+7vB`4yTheAo5W z4|mtohL51Fc1)Q3N0II9eD9#-hgzVOa=Wt?{+h3C!*pKM65(qB52Tf+SR(&ldDaa7x1fr4*@R#|FNV5@sipadns$gi9_iR#xwhs2$)@n zR7d}Dmjl+C)_Q};PE&_oQ@UYf-rxeeA~qkq_n5T`P8?1g3RfecI*#ocepnxjwU-8) z^XE*aU!;JoeOz{tir5IMSM~)a# z zHpMu^pNY=6pQ(cwte>>L!K<<+W8>G`rEk~TbZyq|t*+I!Ep8?f))pnhT~~MpgdLy# zuq~6jxbfT?@%}B_|6~swp$Z`3jJ{K+!YM3CO^oN0at9Y2Ca;*CSQ{Yy{_utQ-Y{U` zO%Y(QExsaMOL_Nr@c6O~zJ+-pJ2duc4&^P={77%@k>0$NylD>*a8M*et_9vD*(NkY z>!G}2cd=R9{E(aWPrI&J`7Z?ZfihVcGYC@KyG8#?FNb6owCvOOl{u*WIY}QOsUgWB zVSyN@5;07)j!`gl^O1j=^p6|bdE9-u zI@%-$WyV1ast1efhK0N1>V6(k{hOif^K*lpg0|c<+(A`|@n^U3r^pVtgD!$@pX6bb zTO8*7;pcDiI@gn@lKeKevMtNa%i?_`o?Y;U1E~skuyPZ9v-|NZxBy9a(?R|MPo9W;8%(PQoY_0B5YC5f!qNNf`&|MeH zw3OPbYL{pzv8Nriw3_M^tu?x#l@etdIE+iWF zQTx?k(Yw4i54rA&am4>fsO$G5MmmNS_Xb9uylkyCsYJA-N96aa_P$7={UD0cRwj+~ zO;<&3k$4R)PP@wGUwmF=#zWmgv2N~gK~h61W4PN2?5&L33;KC0dxZjY`gjqeqG7CY z3_{A{EM{;T+xasEG1)Rjo%PAfuX_W@Lvt;@O$)Z-HV=k;S997NsfEQ!(MpA=4G^zwp>@Jy!4z^ zbBal-A2{O_K|$3H$S{2JVT+(`LtbY+o%E<$@eaeWkFbztQ}(d|&)xO~-P~ATZ~L@I z!r^)@IX7x4jqIL3e;U4uvjJ3fvpiG!uB36JH&fSAc}OblbRQ|Pms6fe7#nQdJY@B5 zT^!NoSE8q0#w*NtLUz`1KZ(1RQ8++sdZIJMd>P!rHI@&uPh9~UlKaG9z(8V<>nDm9 z3*(wp-Pp)Td8GnS0eb?D;d*00#?;x$Ci?-CESg#4N_)9=L6c#|7V6Y@0qJ5LQ}CWX z@b3d2I<*vi?Zai-&ba7ldH{w_smEk>Ld$_0fJP)C$AG2M#+Z=y+@W&PlpA{D#Q>hx zZWasv;%@!6=o-a=_9pftXoI5sd=AyJZ7cai) zZ^n^R>@~;i;cSOZ2oFkEXc}LK1d#&R>K_#vlC#VGcb>N4whw_dIL)f9$*rkcY;@WT zmT@o_Ub?hEb;$c|K)a~r=Dd;JDW>ByCkx1ORLhKCu1!8dt?Z4J>ET!B$KUvUpwH&J zs40K6B}MPY-l3FL&YOtM>^=~3AYWCR4qXgpUWV-kb>JR3qOK<0!~HmWtuo-ZsK9yW zfH0^qcu*=mLf(7JC)T6rMA@DWS5&Acvz+a;QdyC!b30|iHJAOQ9H(kTwq16t_XSe` z!viOKbV1UqDzm4!4r9*vSQOR4CL5j|Bj`cRgoKvEk0*8EYU8QUf+i@V-s)m7W<)c& zK;S~O^gQPreT5J04NkpBedjS1*))(q17-J%1_ja-Xj@6`f*j4?z<(eMJk{33A{D({=wG2i){4IXy<>d1p-2$Bzax%YIh)qhwW!U^pCuu%IKQcWAP_lVMnLx(KRq@+ zJsZ{WHYsT;g)m^1-%|{&v*nQQPZRy{Fw#B#pdXd0{VMvDffBy1lHb%7nqDQM=|JhFwSMudX{@ zDnmleI~SgXj1-?G>fb@M9y4u_Q0iKq@;0bJcZEg=bx3N&WmeWvngJ$dJ4e%I#+bAR zsLRMxCz&k#B(K3U?-Tn8TH40uw^Rnr!j>>7u*6qiW3&u4xeg1FyyGxI1=()V5g9Ln zbP{-1HpY&niO=Ft!ToJ)^D#L_W@y#ng;332Y^7Y1V9LJsCGDtN!gc02RWb(7w^=@{ z!6>V?@PvF#2S&-#m$V>hk(x5JPbsAz@_y=QMUj+=)ssZF=xy!WJ&%Iwr9gz1Dh$JE^cs{u+nSTfcp^hD;3QM{^Wpg}{ z(Da12!)|#s_~G)Xyr67rI!uLY*pfL_4-(x4L=&P%ZDLk0`P#6kKicz?5eH(6&~eVN zD<%?P#k`J4{wNvdA#Dr&9l^~~U{i3e19;UuL6cXHKl*ecW69&#)f+Y>S87NoK8AB4azg#z<4G5HmeEFcxDuqM0@v)dF`hmT7Cb!NJc&Oje-z z=5$sDR>%zauO#btDdM{OR1la-sz({0{cAthMe>#>Z_i54LGt;L z(L~=iN>Lxh&#U_(t1n+EgU-oy#hl@U*rYKdPr%uzSw#3g-_;j34UxC{DG@~QC3q#s;v{cPV32^cr?8H+)Z=X=jV+^@*zV!gdo<=BLs-L2% z_X}g;FtR_|?l4`u?gkCT4E5eT!rak*an6{>QmiApwZ=@v5eM%9>c&1^ zix`mo%aTkxRlN6|WOlcm6T}xrm!~#sRulJ$@Tl_ZTx-3M;M8Qjuf5sQ>5V1`r7L4> zr}*yK?m%t-VSSqatHmIN>TO-c#r`OjG1MrQjOk({U0lv-T8+_wmyz>KfhyvUgZw)-ubF`CwcwdjYB;Nl2(5Un5M|fpAr>w;Y2}59LBHRXVkL zD8$RCwic&Z9Xj=bcUM7mCSedAD6OAPK*gLOj>(`0L7Dup`N4$X9{2nrLj``e3O_)> zSgS3(;kJIEd{4wlENqTn_V#i#*5!-J73`tdr~8|5JHO9CixrD{MRVB0>iqu z)tr(^F-)EyEV+fG52|cZU_G^5_Adb9xU+5HwL*++7XssY1p@zx}8w zCbI)+TScTs@E*@a@xc1Zk=;Q9$9_&x4m?zDAZ*fnc<+!(q-#Ocuv}3}6sq8AeY^_6 zg1Z4hn2IjjKBU4F(_gS@bwwwcG0T5Iu4lYE;l?_C<*8aFl)MIa%lB(gQ8-Ka>TA?IcHV!fk9 zP(X-o!=BB>@+bRryNk5z=M@RKPl^3ekJI6PQ&c4r`lX%6Hk$5)_uD}C&lHIgXu0B? z{+m}Y4f<5yzPJ4BujiR{jt{AM;adoKF=jMML9u*FHT7`!6evYhOpa`F7u9mJ*ar6^ zd!qV#;GX+}{9?u;7r*H4ILiud!r$&KQ^3>x2xr_h8z#rx0F8-5P*;^JH+*_ipq#mM zB#o>dlyG%9g&23`E)mmUDRN4CA;tBSI}kQ)nal`x!5HLPo~cr0aY(U_F@dA3XqCV zdhWbT*ed>Tj5DC%CAOxm9F)R839BZQubSpDHigLC;YF;n#2=T85!gl554$xOK)3>K zkbHp+UL&e!KShaPJRanlfeQOL~4q*d15}xOHtCip#>@;f3fA%79 zsyh~x?+)>gjlHq5HGElGybKBK34irOy;OGFOOCz1mrK0FS)Rf68s<1mx`Dt)2i^%U zqPBO006Rj@qu{qYTb}#~vr23oJ=Upze1}^BGuzEg<55w&2a}u1I7>~|_MM`ppqd;* zi)_IgO~X9rr{AABa!dU4paQizUq7H3Ih4sc7SJ1RCCwrF8=F4m0Pl2P_;55DF(Pt~ zG3%4x!gu!<-9tp6#hn&4Naaupv33Ei_XShKeze=Oi*(e4(i2ZyFbN7WiaQND7m`)v z<(8+Z9T%D(ll6pIWh=Ydzq`?f0V9&99LSZ8ui5b z(OhiX+2v7QxW|o=4UZdnf=3lQ%Iw61+!|Or&Is*w&dCZ#H!$*%Z7X2&*jd889hPym zx2^=EqyyH=aNzg}0Hxj41-7|!q={5oLRY)wrom)7B>v*rB~SmALkSQcPTo_;tOtNy ztQ-V}SVWkP@J0MvzR5xUeMXs#C#*oP51+REV`d zmo@RF{$%DWjK-Sw(iYx_qL9FN;`^JPV;wfcOW%$uwd-E%AE<>yX~*Z^1CF)<0dbhh zf(!lVw4TO~B3J0*d8TtYqI~gHz)JwIa#&l{RuG;fZSvw{e~^;u#Wd6T05Ele?M`j;!O*`2dW8d|6(H>%6{h5W zH^DNco743vP}8;55H6UiMc#~2sSx9-13iQJc`UdxjWA=_5bW}@EyD4R9na`GQ7*84 z3Nlz;Ce^B}4yDDMmIlD2q#_7VH6{bLw3-fKwtV~glh zTjXd|xHBi89Lao(a5yv&9@= z0NlZKfa>5ba-KOja3v#QXhlTq$-FzRdjLY}EFpm%c4~x~DS-}Fc@;%)_(9Rb;T5G4 zh*KhCzGhgM(Hvxv>bw8rL|9_3*b&FI!_~4a;hDMb2S}m4#d_S> zw=Z(Ce>zP(xmoTd*%y3@eu5w^DLN~5XIww&f%yiX9O4btTVaCSh|?rw&ODx{&|ZP3Go5#4dm+|0G~J6h7r}+ z^;M;oeFLoDzoPvh;K93IpM&HHImW70dP{X2pJN|?|LJQg6 z`t~nmUr)Sa4ed%aWNQ5ga&YC$R}eqe@-GY*kVswRDyg0p_!l3#&WOnHO5Air2Z(5> zdm?;{r=3qCOl*LmKDhg~r)~gB8#MFcUl^GWJ3-wlQPb~kP#Rg+~n1?OTHd>Q-E2RLPnmLDl`2f>t9KdU6p2( zv82Gv*GB_W{f4h+yZWUUu1H@70SS%+a-WN`**6x606;M2Yb7#>9LZ`L*wzx0s#wUDIROnU!JX#U;=_7C#6()7J? zG7R+I_f_?p!{z74Yr~y!?+biec^}Zr@{!tE-o0AuDrGR*qZyKaK6fbR|DaTmPU^4ex%W#HwRYN+c=9)qBR zT$YG9xOb7j37Ee;%=4reFn^`Z4F5Yz9%Ep9?=PMm;6aR5h}x$(+EmnmEKV>TYCog% zSIPW;8!3?c)_*oCace9VFUb8PC%)NF#E~RVDX7+r#x7t*f9D^hLw;xkYEl|Gp_*_J zO;MTA51UcY9=6$9dPf`R2)9Y@oLxP%Etm`r!X%pshL@WA0iGDhIw)Db3;(N`5yb9! zZs{fNn6hlZ*`%9f{PI`;hU&$S($3<1!~d>d1N}3_A>d0@==OhQ!ISF#i)9V})fqgx z$G$6Nu0>p?2VBto`%lyVUXBTeasGrjEhoIK@(w)P_$y_3Xt?f)2O&YygYddWrPBdU zJ!Jx75?P0DT&!%EJenpLP{pf~_6Lx_J%N|WzH_EeVeZyJ>6oS9%n>1o5}x;{+m^@BOWMZlM!MSLji)AH`w}$QOkggAIdV;n)~c*6q=Q?fs;Waz27|0o zjNVZ?YqTmVvQbK}(O?7;7eoC~U`CyA+}lBD9q^Qh>7n{CDks$uT=AJ`5_%l6;d}Da z#Wy=%S#G}|tgnK0fF5*l+3Vr0Y>@S1e14itI$7EV*pxzNrIH`YWDg|ZnlMR8?n-Jw z7o917MEX^BFHcTf9xiwi{Ft^$6u)b21)2?F;P!JV^^F75g?0`?#SMQ)1AcKv@$|D%>yt^H1Uv=Z(_y;m9^gEPy4B0(! z$?uOy(_rY9!u+}d9+<^Yd;&QJx8{rnEFP9&AZV@BR&~7h>lksjA(G2nqtii-+ZJ?pQGU-2vD$RpI}p}GsuvY7)BkE>69A4^+yZ`N)6vv|FNd3;E- z?=iO%$_e!Cc<-(^ra7d^AwWWU;pbZ(1 zI=&4;tzTn%TJX7EiB4JYmSd-w8+ysw~wcAM4vF|TOu!x zGZzg6j{<=o2l~oh@93>Gw4Gw#Z@-Zt=ht(H<; z99iHmLl{{)AO~VF4i#m)Sx@?QM}@0j#eJMtgcMT!^>fb$lPt>Iz6{%sLVPLbzuUn) zYqi@PTBiKHcRO~S!ptj+w`#Qh?GoeiDeC8nACO(&U-m!SAtpYam~6V}LtK`M>rF&V zr77a@LreD&eH3%U8_%pt(p0r3J-T;Dx=mD0q~r-tttKG$1w2-R%XUwZc(pfynB@p= z%bHrW_M77&Dx;T1OUbQUR<7OCoyc{oP-3Cz%({|6IsFlKkEE=Qdn%$nJv1Z!VRZJcYp( z^F>?AhPHpJlKuX5#M1Q$n(goDvgQik@5wqQ(%U9Vr1}q z!v^)c))QJWE~gwA!s3sRWv1C5Ar6t>$JC#I%fnb)guhEX)Ij>Be>8KWt-Zgg8r)8$ ze;dBdJyJAEl6IJG|GxZM=r>H7{K(k!8)xpuSEJaEw$G{^gEt|{b#v8ov3wV?O$YF1 ze|2@C8eE6ic8a-lh-rA2$cR0{iQoR|Yk(I{80upK0 z0@>Bx<{lkUglL9`k)*!`PPUc^?HBWZ>gma}PBbo{*;^m2Nd9LF71)lJ$^t!9Il5pp zB1kQc0=$nmua*N=`cLTNoP_XI4Lk6RTYx6~|8e-m-$;xE+yl0j_Add4JdcYzPBMw? zJfZn&&HF3>L9ax_R+U$QUOny42gJE(ExtTr_}`{X^jxQTeY9u*vLdr5m3m^-;7scO zWoBJ}wmtX30~`_kBiri{Fgw&L1Ij~bF39~Psw0y!6m#IuRX35_nE#z_46^=fQT*Y5 zY^#3w5B<7ybsHr8!j-H=&34K!%z+7tB);FE9BDm2zknXmsF~Fi{TiQ30M+@@%W^_J z3fX#tQ@ALjD(!lLf0fj@p7vPsvKzcxAG$gOI<^5mJhsPIm=yQVz;YRSuz%A<(vB}y zibHt3`{ufQ{3P1AzGcC<-q&IIOEn95Cd~7?@m$@IbwV=R{<|s8*K9Slsglp7Eb@+_ zsgI;I>!{u)U+Ct3h5eBO*b^Qqy1ke=H2;E$i4m~1uX}+G_g=^M$GHVd$$wUNJ3nO2 zOQh(M${p!XBoPet${LBgjr-&y>RxSo=VYPqem|qKP?;bSe_gTFSow^4oJ(|C`>g0u zN%4+b@|dZKxTl_<9V}k%D%?(?)wk;2EV`RFbFb@0NgpWmRw5%CSrsr+_D~*yHG_yN zn+4Y1+n{u(EZd3d2tuo$`ev}>+sqtxNU z=i5#zX2_)W{7|<0aHArA*Fifm?WZOKT+U!cK*R`isG{o~=aC{Lmo;5d0p?waCthjp zsHAAWIZ|PQD+KHj%H4vi92sV)o}h>-?p^$9*&SB^Qu+Qbdnex^$Fbu48u1>^NkEvgX{PBjqvv%Ie=BQobdW-EtPbA#ox$Xk{ZBGd%1J9oaKi}0Y z&wH9~rFYQ!E^*;lVXK3X+623`X7w4PaDZsTN!M}~f#jTdgd?IaDig6}V6}ugS7NBW zRZCD0{9=&lv8*Q@=GoXoL!;)t*rg^Mqileo`@C~uEZaA=CkzV3L6_LpB) z-@!3O!opqeGx`kyKWFwlJEo6zeB>xwF1bHF!^aMew@=UDq9RV8Gd`c_q>vQszY%dU zcF@x PZ*=~Q)#>7s*M9$Bj>^H> literal 0 HcmV?d00001 diff --git a/goodreadsfaker/generate_fake_data.py b/goodreadsfaker/generate_fake_data.py index c4101cb..d2fcc8a 100644 --- a/goodreadsfaker/generate_fake_data.py +++ b/goodreadsfaker/generate_fake_data.py @@ -24,17 +24,18 @@ def __init__(self): self._book_data_list = list() self._base_directory = "D:\GoodReadsData\\fake" - def generate(self): - review_obj = self._generate_fake_review_obj() - self._review_data_list.append(self._parse_review_data(review_obj)) - self._user_data_list.append(self._parse_user_data(review_obj)) - self._author_data_list.append(self._parse_author_data(review_obj)) - self._book_data_list.append(self._parse_book_data(review_obj)) - + def generate(self, num_records): + for i in range(num_records): + review_obj = self._generate_fake_review_obj() + self._review_data_list.append(self._parse_review_data(review_obj)) + self._user_data_list.append(self._parse_user_data(review_obj)) + self._author_data_list.append(self._parse_author_data(review_obj)) + self._book_data_list.append(self._parse_book_data(review_obj)) for module_name, module_data in zip(["reviews", "user", "author", "book"], [self._review_data_list, self._user_data_list, self._author_data_list, self._book_data_list]): - self._write_to_disk(module_name, module_data) + self._write_to_disk(module_name, module_data) + self._clear_modules() def _write_to_disk(self, module_name, module_data): file = os.path.join(self._base_directory, f"{module_name}.csv") @@ -44,10 +45,13 @@ def _write_to_disk(self, module_name, module_data): pd \ .DataFrame(module_data) \ .to_csv(path_or_buf=file, sep=',',index=False, mode=write_mode, header=header, quoting=csv.QUOTE_MINIMAL, encoding='utf-8') - self._user_data_list = list() - self._review_data_list = list() - self._author_data_list = list() - self._book_data_list = list() + + + def _clear_modules(self): + self._user_data_list = list() + self._review_data_list = list() + self._author_data_list = list() + self._book_data_list = list() def _clean_text(cls, text): return ' '.join((text.replace('\n','')).split()) @@ -56,7 +60,7 @@ def _generate_fake_review_obj(self): return { #Fake review - "review_id" : self._faker.random_int(0, 100000), + "review_id" : self._faker.random_int(0, 10000000), "user_id" : self._faker.random_int(0, 100000), "book_id" : self._faker.random_int(0, 100000), "author_id" : self._faker.random_int(0, 100000), @@ -199,5 +203,6 @@ def _parse_author_data(self, review_obj): required.add_argument("-n", "--num_records", type=int, metavar='', required=True, help="Number of records to genertae.") args = parser.parse_args() fk = GoodreadsFake() - for i in range(args.num_records): - fk.generate() \ No newline at end of file + for i in range(100): + print(f"Running iteration : {i}") + fk.generate(args.num_records) \ No newline at end of file diff --git a/src/goodreads_transform.py b/src/goodreads_transform.py index d3c13d7..6d6a946 100644 --- a/src/goodreads_transform.py +++ b/src/goodreads_transform.py @@ -14,8 +14,8 @@ class GoodreadsTransform: def __init__(self, spark): self._spark = spark - self._load_path = 's3://' + config.get('BUCKET', 'WORKING_ZONE') - self._save_path = 's3://' + config.get('BUCKET', 'PROCESSED_ZONE') + self._load_path = 's3a://' + config.get('BUCKET', 'WORKING_ZONE') + self._save_path = 's3a://' + config.get('BUCKET', 'PROCESSED_ZONE') def transform_author_dataset(self):