From 6bb902733002a5a61a3c7a327a3b098f3dec5112 Mon Sep 17 00:00:00 2001 From: Nikhil Collooru Date: Thu, 2 Jan 2025 15:41:30 -0800 Subject: [PATCH] Avoid creating expensive Path objects in split creation code --- .idea/icon.png | Bin 13583 -> 0 bytes .../facebook/presto/hive/HiveFileInfo.java | 24 +++--- .../facebook/presto/hive/HiveBucketing.java | 5 +- .../presto/hive/HiveSplitPartitionInfo.java | 31 +------ .../com/facebook/presto/hive/HiveUtil.java | 10 +-- .../presto/hive/InternalHiveSplit.java | 20 ++--- .../presto/hive/ManifestPartitionLoader.java | 7 +- .../presto/hive/StoragePartitionLoader.java | 78 +++++++++--------- .../hive/s3select/S3SelectPushdown.java | 4 +- .../statistics/ParquetQuickStatsBuilder.java | 2 +- .../presto/hive/util/HiveFileIterator.java | 4 +- .../hive/util/InternalHiveSplitFactory.java | 19 ++--- .../presto/hive/TestHiveSplitManager.java | 2 +- .../presto/hive/TestHiveSplitSource.java | 5 +- .../presto/hive/TestHudiDirectoryLister.java | 4 +- .../hive/s3select/TestS3SelectPushdown.java | 25 +++--- 16 files changed, 106 insertions(+), 134 deletions(-) delete mode 100644 .idea/icon.png diff --git a/.idea/icon.png b/.idea/icon.png deleted file mode 100644 index 6e1312485efa831d866221714733c324d8cb7b7d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13583 zcmbWebx<5Z_cl7aEG+I42(q{acXxMBf-LT^NbtZe5*!lTA$X7k3&B|k5&{8&1cHRc zA!rDe0O9if>Q>$N`|7**k6TkSJ=6W1r+a#)`kXW8OoE}l1`$3TJ^%njTAHfH006o_ z1p&A?_l1>Tjq`m0aZ%D!0)VD8f*U*R`)inkrm-FXgmM4?G6n$t-d7=a0pOV+0Q`gl zfOIYZ&>#vsjb!f|aO`z7RDrwyY^6Qb8TU1K2+ha-01!z3pB+>^5d8DL5*nbTrv}|5 zpeN#BRg2Vt0)TLxma39z@Zw=nNG5~X+=mkDXJMMsSX#j7Ea@l32c|ewg}&5k2<3GB zLd4CP8JQ48DOVMRueqWTnVvLA6#_Liq$Hyi#l?vUaM9O9zI-imaTr#2aXE5C%p8z@ z)cee*=x3ozL%qeSQ%SAKtS)0WW_ra81b%_gEe3SX0w_u90R$I<<%9(;DhP0a4X|{OP;7J1;cu5@ zgX=aqIW!b`Yq(kl%;5?E!$2GsXezN8Y?)K%97f;&x%_&fyl}QXvmq2W?$a=^q8 z0X+%f&)vJ?R?t35hH{&GSwPvYKY$6hSusf6>yaVU6hzk8vQbj?{P$K_ubNQ}#hMs8 z3~2!uh!ndRPgLYR5k&BQ_Sp~owfXjmrIh>i+tUtq?jl7`s69{8uyO3aMu?pVU`WvRrE-V*Y-+c{?B20((h=|sG6)<^a^iA` zevY3qxQrrgpw0OLo0@8eh!Vd#xkYcG<84GuAl}sNT-1-BJNCv73l!xA&dXkx*1eJ@ z&BYJLe?YR?d9}*bNNfp&lUNo6YG_wRA8Vq@vzE=zVtlY6p2aq z$ek(tAI3>wiX;~=yn90-=<;|K68>$z{LJ-{x6#FS5ugw|Jmz`i`Q^w%)ZP48pI(*`FD0d_6h*KGSdWn) zT(`7VUZ(|E3xMqn{Ut*~73*Z8l1GH3vCyOkV+u7ffpm>6>$+rvii!*f#=yl_5TB6? zgF+W=2Lh=mV9N_VC2X~R4s)13s;>697vg5P)3y_BqyI#W&*gy3;aK>o8Eb)i5=H&N z1%yvI#f;u_>KsI9Gd(R9ZgK%6r+h7fBx8?`nc4?cJ*hJ(r7$rU?iJEp9Sy?erJ)ZH zj(PlsSoAtV_Matks#hLQ+CUiY(ihQnBjOp&!(^5NCh3HeROCJ_vbb?sukXVAEDZ|? zxT$B3-na;WJwLQ@yuNS`{`b|MdUyOpC!Hq~Mj(T+&x_c)S*hiARm@IRCng7%0-ky` zJ>OCtq919s;TVo0%1Xk`J-Ubi(SJ_Ot@G7u6v>(5| zms!gkhwP6e1({;;U{p|tCGY!Nv7b39RA2j5cTT!Vf@=rc7ExWJi-d~6nh;zss=*uj zk1Dot_?$-x+h_$M`=pB36Pr;zW?z77UcS8gLjVqP1gVg<@GTP6SF!TCm_-OewvSTa zSU6xs-@08k$ix^a3eosLk7AnZr;xnmFvm%%pDY+*xC+VACoBta zpKg~(kGf(^ob=zOy1{SWRBOYEFc~UT)JqT0TlVi?(2>zL3XY9}s~%XAqs1UaR4m$M z@1HK5LXf5o#&lMcXGslp;$VYNl zzXLJUm<*%wbVc~y=;J?s9}3OI?~1rny^iE<>iB}TuwT3Mt>??#I|r6e`(+#a3^@7U_-^L{FeYBMz>7##4~>{BjX}&zuZaC z{AB=(vzc{tLR$TRsk1qJujcuSzfSISLm{bPd7v<9YyDWq(1*tSWh+qm#!F`39U5pV z;r#N2)T0_fMf=+a+hz+7NB?r7j=4jrraIAG6vq@68l!NGCtMgIjKH02(DwK^ZFom~ z)4g%GJ=42qOuJI%staTGA-f_+@T}zQ^Mfw|TWpDmQ`0r9-&^-M~kX@XlQ+vLF^qKjc!&Gm6D~Gnw3_LM~Brw0V16Pe&{v0DgP$C}lC1>;1 zNDSgf`;P!L={Ls~&~X0rHU|f5oxS{`i#} zn({ljttA3tqY-k@F#>w03SYR@C%MT$`jyNvr1)tsGCU)q0iy*yab8l+p?UW@a-Dc; zw#2(LPj9Hj&|~X?QJt^dF3}X zbFQfyuo>l>**LD}x7F56AXz{@6@4dBl)?&U2!`(&<65H8H`@5O-|FTJmv4p4gh=u zKhZgtr^m-&=a)iy$a4y8s(58apjThGAg2KJeIyz43cV5(DUnD@Aq%;|C0A&QN0!u_ z2=tv_hFjqSaNCnK`_JM=5_irtjimS8xh^v75_zPO4a8d(Q;Bo4<9dR6YlS5E!obZi z+-CT&BzXNnJ5D>waX-+_t;Wf`1RfS@5^Zwjl&3}pY(|vCdE?vxHp1#=#=Yz*6I_3 z1@Wh{XhZ+V{JE-s=J~LlDj}K-HwQ2gQc5dsrf4U-GKO_ZGsP}T))j71e`nVzXN$v* zWuaZ%+$TCut9irBb1NiuB5_*J7hG*WL0z1M-KP>8r{?x_+ZwiscN!1sJAxZ?BoFzp~MTYe&UOPdrn#PZDD_M?8ti8$(zyqQAH;sT_A zlxu>)z|!==1SjUY%%Km=Wxu%*rjHo+F%6`q`+S_ZnM*=4=JtxY%RWziVQ2PFj*$KJ4vcAxB!6=$s`~l3hQMqBf8;_QSj?P33#mmVZMCPF@Zq!oBwO zpZ6=~^UK}qKLwaP5PUKEHH}y1ugWu-_vkfy{vp=JjZZ8C>X}sNd_&h1D(gAuDJeil zmjfFlr(k5?{*EA~25=mVFTyfL2tAImyp`K~NIh4rC>RF1X*H@>ik?qPL^PbvkeD1F z%0_yRG=FKEfo7zuKQasCp2ho!lmJ}prnll8MtqpFQ46VNe*B<+gv;avX#|wkm#oW7 zL@AGeXhlv3{-KetBDdp3XlwB5OT}Yj_I!mEy`fa@4N|K9ek;HR}ZTnE%!yR zZO;UIOI-H~d~FR!-cxUo>7>jA-;IsPldFHzea}1N^(Dx!or9@ArWN#y+ zis}{N32o@kH_ER@`OkOB85a=s*GGc@)6F;gQ?FZGHpV0=)4n{%A88EeAkzx6fRU&@ zZ*0(fEog=V_<>_ZVg<~$+~qKN>RpM$83hR>*$(y3Ui_`phS9SOSTrSe|9J8~Hh1p3 zL2(}11Lpwwig)?l9M84~u9^n0I%u4hk@(~p8oTW88>Hm?R6E<@TX`rGP^^Z$v@fIN z{Q4UtNSKQ=Ue&G6lwi_ls`SQ)!tu|8(FgAfal^wjBK2_{9{gqsibih^JKN8>s|i4U z*u2m=gZ7rv719owbw?iEO&^_lhXcXc^LB}{l?$0>SWNx(6C!K6rhU3zG6um=^N^g) z=Oqv;FkvE2)}m>M>(!H%$E^w7pexu`7$-3nKYpbkOCpKrn|gYA-~lUAr-?iAjj5f? zh)dPV8*e+82dt<`lj5zh$6Py_kaBA)d(eEX@~(IGf~TmCH@rvFVtuz z9#Rn$BB{;+J3Pt!q&qti)%@7y5=)*kj*yCzM3+j{0L#VOQzh))>#Epe&;eJ>2m%FTcTx_LE%m3+u5e5DUho{H~^1^sOzs2Xr zgHS+{O6N>7U)mGi0Y`Tl%$mNivDzLZgxEZ8Ay9u5#EB27oqewn2M=M)vL_uF0~6(n zR-01YK#G+XB`^S|!{dl83>(j-)83;Ilf3dViban%1YLFKR(O=AQiM9bZ`mkV*B2FM zB5$Yrq66Lx;LCg-;^wdlvL1R|75?sB5969&fst$3)5me7=r- zJ6zQ_;kz)sq7YGeKkuo_1)kClVOFjL>d$P|)C!g~(dI2$aBz9Qg9=*n3u2^xxy{FVwJ_x}>JBUESMkIleo5wIvAG$cn0S@0W<&@ieu!{|aq<{B&BUUq2hh6rUmJ5K@lZC7>MX}O7mY{K+6+AU>A<7KsHNfHZ{+<16a3fAT z)fM-Ng4OqxJ?!&G+NGQ8$QGo49mfMX2G$1jdwh$U#@;)AS9#`#AM5QV$dMln|L{~1 zXU2#wW}x|~Zm|umFuAh9;t_cPq!>e8k~>kj)kq{s)3bj(y3{v}a4CN0j`OImW0}fS zrXNIzuehp`ml=F#8y0-mNz^(R7T&vCUA(YRaS$nuLx$5`DU1`mmf&iu@2*h1xh74Z zoVAL^jhORFP#ig!Jqe*~bEW_1$O+w$) z2zz_#XtOsI1W`|s4O~ko*Jn6=SJ(FL8-8tO3R>1{js2v3)LB7rr4!ux@OBd=t)|B|s(^0#LT zqymVWq09SkpbA(g!OdvO3^Ec^N1#gj77Qo7AU+0)jU6*smXPZMv@0_1XCQP2-p?E! zmKeGjrtgQ-^jINAfnXzQ1BQ!{>uvt6OHPK5GSu%0aco#lLNV-U%VIO(MSqh}sy60p ziv!hdH^oaA?Zpqpxzz?nef1yTSkgv&p^K97f*DhbK5aI!6>J62kGi9UGcWiaq#u#U zq&fm8=6sxMTe;!ZLDFp$!H_GdU4h@^KD*V-zdLau-y^i4(C9{`NN^zIs$27W>@jQG zWQL)vmi%{R^xw+@326{KClTZI{%qHm{9M33atT$!Dhp7WyYP|fx_LSyV4ex*l5pu0 z{m^$$il9f}BE2w#gj7m_vz!F3X$rD_YMx6pd*j1lLt_?9o} zkj>hhR8)j%Ue4W#@%+O3pE9qb!tI!86_iTH9yanQx@KYA(}@ zYCY~?L)^Wg4>xs~=@o!Kj$aYmAIG|M$chho*)%>_p3OT1hA@u=o~`L>n@prg7k{K- zb@J6m&qc8zM{^`v+#Y{w{Pp!QI;g39nC&~ERpO^KMFPEJ#@_IuSd(_yy# zL})6sndDoIsZk>xsyc0F7KGU7liC%6aAVAxrd>ny!&sBPZ@QFRQQi1;+}sxA{py$~ zNt$tzGRuOhw97dpu@Q6}!e1q7^7Uh5^N%D!2mPG{kWkz1;KQ)H1$o=j z_dPMfP6(meA~FZ-l5G(pg}xj?KqihEG*~8M3wJ$ICNUCmKd`>Ehqh6iuoN|p1}VkqR|p;>+oYxe>a8GzTg@z#$nlTrK6w{Jb$ zHBO0O@VlYYMc)3`xGt*h^Fc8gNKryCv06wE39u(jxm3w7QN59?7g7&%)``%L6?m@A zH9N%Jcef+f7Es*Z7yrZMZt-jN@2#2OTqz55nIA>XV@_~l5z_LHC+E*m~ zDCf;t%rAMMVhq94qBPH>^C&PS!c<<6h>AdVuPkcVSmjs@nt_QoHhv^mIa7j_h2d{r>T?JXh9I?+^J-O9H`ywv!1HDe*x#ct>B&8klS z{V+ma6w|&u%MKrTZ{@(5^}%43nPb10Ckn=cU0*NrNc=DGlHWt?TcU@ptk#~lLPA+K z))Y6djh1;W!6F_?S!lvxSlF#;`JU*zrkhR7iW8YR@lRsV?E4m?;PwHF2uY?Uu|qy& zmD|Q^Z6GA`^EkL`otX)kcA}|*n@l`|KQyOVEe>?Kbo?STpr0Y}M`oMH6fY16nv2<` zvuFjG(%6Z#YPV2bb=}XezYNE|89yWBNVhA)nne+fwLn-&E*qoLc8EVXfIn{YcKXI( zjq#q`g$O~kybo^}4u2E|?pzH0EXSrYTr>ZVYrI^7Dp<$;CRv7mHb(0pGpTqC`Dfa% za|d0kYBby#LovLjYA3vU8s+r^#XVlpx6#9th>Z(R8t}yY61H}xMRcHt@52#4l8QR$ z?SCY$4hP(heG$iWk*n634ncu9*Zu&|QEBUD?gJC(Kf*@gLw$z$d&ilQ!+UKtm+)dS zM&K8lpa}F5RPXQf`(3oHZGb3jv+Tr1m+*ZdNCq5HKcGh!$CoQLkrXBU$rVl~7+ai! z7xcEe4^Q@0phl?jtuebf8|ehjy%W9|%!}G1Kh?2cHzN*%=z4pCDof%g1~-?}^(|6S zu`n9oVZQUi#%~kn}jT)R@26S$bX0EvhZ^k#RtMcpLD0rf6%~W2+==> zY_YpZc^QuADW5iqZG2pS+v=)2qAy2koGYbcl*eDQ6hgxvNTwcPM+IVp;Ju0_*Zkk+ zXYT`KEh6L9mZUXvx&ElfnKt~lY>2EHE4)s(f4awnnP%=yiMTaU64KW?D`kl@Q0sVOaN4V*z<8I*@Iz z8TJ;pj$vtc{}GXY_JXJ47Msz;a4SXH@EPI7g;sJS(I6+Xk9C>nB<-Tm`<(|xM4<0H zMMCed3fW@$@d~Lgz-yR_+A}@`UlofS=}gvg>X)b|q|fQIh!pK2+b~5}0Or?0+~1)@ zVt~b6@VrdOo>MFiZo6ik%dmtGCjvEa8JSxpwD{&Q((I-^_r(dk*+~cVs)5 zkZbUpK~&eU-LFZCPJe2^t3~GmI7F7@$BFsYcD*x~RIe=f?RTmo8asZh%#W|g{BBo# zbRWT#9)A-|zb%mSF+D5L?%PaKQ$~CwuwT>t*}$H@Q=hhxD3LiDtHt^5!8ZF8#opT# z%b&kU9M%{!2cBqLvM+b@8Z4SR7_aYO`3we@l4Oc4zNkE%fKuC|U?#I4xSADkbSoQTN1xtN+d%{bc_oPJZ!BfXOzBD$ENw1MK$-7s z{$#P&lRld0UkjIYLlA?r$v=O33fAuhf2rAPV+AJ*P<4&vZEQxE54a^b-`ZQ1b|;D? z2WN+!73=IbW6|GptCxiq0t|!2|7=Fwhe?9e>{<_Beo<-f{Hpfn)>{bY;f>bcSuY%C zu)^@yJamnj_7pLsFwu_z5uV3ii54)us#mPzRYIq9A!2^_{?N6g6rEbLr?q54PdvT6 zENlZuDsS!|&OJ-=i?!}VJfSo9VHbj2fdo4Uy%ey+!R_pa`hjY<#@I4Hu4zv$gl+lL zum_KUCIUR2Ui$Xv7w_ctOQRC?Nw-EU-F|Wr)3ZTA$7;_ao<&@LH;awZ&08r93<`~c z|G7BkHn|o6_?h#5_Nq!XtI;~U$Cw}`EHLR}=xWoI`!A)S(hNaG7CB=+>AsX=nG0Q@ zmnOFVX_;~=w?(GT5{s`sL_YQKCMPMQks?;R9ro-Y)q-fekR?#h^S;O1uN9ZcnB8JE z{h!+#5 z@Vv)529KW)Eh3~nHrnBzlcz&)Z#v`fpSwt5OWF>=JacE#122gOaX7H8%|L2m2GDWGFl!mbyMY=wHLU13CX9#tk8{UxWJf?Ow$!k7ipp@ zXRD6KuMddkRCxA(kp_p*n-uWw4AecOKLY{SM(PfqWwdbVAek5G_!ZEtIZa$n*WqswL+(+Z?l%JXJ zNUne>tw*B@NfEHuA3MxJTXKJw=EcHTPxH9IG1NITR;cP)#@MYWyQrMn0F zcb*Jk>sMiL@NDr{kk`(pd}}ytfQp z^W_=sxw%ANjq}04)T&Cp4OO@MjJay)X?$zm;i!5F=Sl{Dq?jdXy{H<7sp%z&i46}@1;Y3bL5Fz%nj z*vvkpu5s0K6O{0{Jp|mJ%a}+qftd_ejDqwa5@A- z5$AELP0wgvBbp`Ah6Og)5-Z_DAF&hnT}d%IotwqX#wFKOW-d zL)&!F#ELt5W6Ly=wzF9N$RLx@{GU7+d(Xq;R*$I~D(%BS*W(e|1wXF6m8Az)Spz$e zZyl%qU9O z#=8&6?&j;b13WJh-l+A^N)kP#J|EG40WNO%x%6Vo|5g(dx-@Q0!9B&aVKy zgri56NZ9w(Az8xI z!OQ^nMuB4vPuk`OmeaphuV+0g3h&jsZx*Rcstj}~UY1zL@~69qF!)K6>4pNRZ-D2v zP(I=;Sk_vEyq$7}m&^X4$I2}Y7B6xPp@^K__!YnevcbNIE$+JtyW_Rzq;SEwbTX#> z#y!#Zq_h2;)OXpJmHQ}9AbcO}!n(0JOpSC`(tvyPlPwHS62mL;A8}0rL)hQnbnlZ9 zrRX$%K1T>TH;55Gz~>sk$)q;5c>PP9G~bz_;Nnrvz+-e)rpsb41_4Ko$pVDNoQAlk-|IhznI1^Mh4|wi1OJYx5eQQ0n4bns`8qyW7w?zuP3tJ8!CD0}S8KtU5US$aX~8b+G8l}ehc-9X3B&NIvs2{m-zjYM zj_FC({GXOotuEdbC)^Ve%mKdDzIgqn)H5M{3j_t&^_8v~=D?2UG^KHvPYh}<_l zw zhlnKAGkI_jnQVB);mo`;gEl1yS5Oa4TeAz-UKhoqyFgXZc?+D0JDmHM&- zwry!zd5^EFeowC9VpL2g9@WrY!2-v3`%%D@U^TX?-vY;%r#h}Y&~VetpfWREw{X70 zx97Ncu>%)vc=kK1m6E%Rcves9zmaulCs>^qi_~<>T_&I(P9L@NRxkpF*mEYxOHPCW z7RrDimNirGu=rWQsDFNfit>TS)c_Lhoyp5wv9afjKK4}4?RV1sLGtNqt+U77iK(7)JZ7#P{Uaj49WF6Vr;w0=%^ z_HiB$Acg?X#YZ`{K7Vz7{7FsQ>Jf9L*Ud9E_$2eTm%qv7#zA5~y`^)#mFY99yHd_` z;GRAGpJ`6{m($YHCLDUn@6{HQO4w}t6)R(JaW#)6$UjZ-`_gL30r2dvT4__}Uk8`? z2$D59>G<#CmY85U7N0j%W?`P=Ym?f*K^%n$(sQ9`pr*|tj z=GVoW#kc(MJV2qNUgGBurycIk9vW>5r|20s1%O6V0l=Mvi_dAjMq|C+eXF@*i)|Iz zfAlveGQ$&QMbaKl+?jql9@_cQTvv3nlyeSf>}gAcO^ahbSjtC|*Mq#%Oxxu}%&qb1 z6u`?F@bXA?$E=T>%m#MD6>g1?f)K{D3Oc~M2TthARdd2wpWk3g*>1FE#7WY7p+<%~ zz<@SDeZgzJf}Hr9E)`VCJcN8mANTq3@Mz$om^jqf77Q^W{LC?);jNeO<_N&RDr=Snjs#HuaO)dR#O8yn=A*Gd@E`RKWDw?Vg1X4#!|id_ zc-^FdWj|k$Urv3ZUviPKkeGK^5>s6;Lm&b_G)tTQOHd@<8E)H69a|riQM+s7?aNT$ z-dGSKp3|hFhMS?}9zRq^+oIAU0#_Eo{K&>9Yq&ILl7E%T4z-BwFNL1$6rg-yR)o{} zWO^}y7`k!y9`br1LNm0QWMXZW-gkQbDc5ZL?7=LBqC&#nAW`*xMM~hot|zEgA>-nP zV)31V-i$DFRFmy}S|QdMO)!)r*O$k9uU#VXu;PxQoo-G6%S-P&J~B#*%vLGHBs$3M zMak;O!?=UtH^9_rrqrX-*&0F6oP`5>Giww1#9=Ps0t?1FC3-GionX1-=pz0_#}`df zQkrtRsBTJ029X_6Bg2gMq4e!gC$f&i2Gv`){8w6bQ_|!5Hzt{!a*_kibThl(>uV=B z9D&`I4+NpAk26)N+sj*DM;wPQI*N+FRq6Dyq*RROuv2UAx6-0R}s)-oK?(SivK&AIq zmLkJIK@(r?Oyhcn9Dyhc-;keBBd}Hlb9P{c&PNJe3n+a&aIUBsG!r(5^PKQwT=`VI zCU@pix3X8?AR1WiAov6>XDMYBqvF)=s=-v-HP=tZ_3J{@?j9#I^vMw~b0KbTf>OfD z*0!BQzwA<+<2`2tNnU#Byuo9&&kE(cb|<&aDLK+(Y{$Af9S}%;de-a1@3cnKat+ok zZlR>YidTba=fCU4hXy9PgPmS=Tu^xITH5!@e-Hm7Ntnl}Y@f_{Cdr3}&b?Wv?idoh;)O>1e!QH^Et! zqh_Z6=R)UoNyVv4KtQK-)GjK*>Ltqeh4G9^N&?=C*Wranhj;Yx^H78{40P-fE_!$r za-oFWn_zzrXGru5B-N613KVw+)so`;BK5?{%(UC&HIm~#>^+aNfd7rwGE6$GeJor2^DmojGlwT9qD5HNBI1>6PI6dz|zFIG3oyYHpBqGIb)7KcvRx~7{8tV}L zwWAAS>8FuyiuG~&$dZIsy}1R3v|~|vX3j>VI(FJ+mJ}=}pELUE1~*H%HBU?UJ+5&2 z-HPxPDKQ5at}@`Yt@UlYliYKVJS5%AWFv4C{je0jwl(AC*gRES4v!qrD*0MZ;q)A5 z__kOyquWlkB}NHlkHZ*)Z@~AkseD6S>l^QN1VHc zvgM-JzLPh58?O|N+VO&6C=LmpQ&zJTYS$O!aMc8}5}XLgfoADYAL<{!;Tp~JkDc`f zgGT$K@_@9Aj&E6 z(WSI!8|Y^Nk@%K?H*~`1p~UG#QLFFmc!JyI8;kK0sZ`18rp;WEaPU)tpiAugC4l~S z$!eyP(o2dbOw8)vq9drU7#k}KBI!URT;Z)Xe&yRNUOyeO51+r<(T)-m#}Wq44lT8I z-=LXgKWM5qv9{cS2(UYPAE@6i2Y$~7syhGUY4ZBQ9Ml4JqEh;GIl3|-SP`|L%S2<> z{Nk0I$m1R0OeI+E#I9Z+?h_(8q)(I$>NCwXn( zI>Jy{4#;B?LSRwQXzDKUZQ*Q~XzA(*H3B_y4Oh;rAV= zUpIpHJ!tIN{huuC0cw^3j&=b~(hh!3_XQv87oLEU3${)^}T##p#H2DttQLLZK`d=H`hFNmI-ms3Cp!U_1FQ3)WtT@;$l z_5U{>J=1#>AS5m*E+!!;AtZK8INNj&q4+Nz6SvU&2^1C<5mi>kQA_&IXtn;U+00lA zaCY-_y1#vXfoBL$A3H~X0Wom_Wdl_eZ7E*?VR0!b0fe8Ee}Ippy{)H@i;u0FgO9hZ ekhqkjh$R30u#nNE|M|lS&{ES^eX9(Q`o91ZC(jxH diff --git a/presto-hdfs-core/src/main/java/com/facebook/presto/hive/HiveFileInfo.java b/presto-hdfs-core/src/main/java/com/facebook/presto/hive/HiveFileInfo.java index 1ec09d5996edb..cf719ded38ead 100644 --- a/presto-hdfs-core/src/main/java/com/facebook/presto/hive/HiveFileInfo.java +++ b/presto-hdfs-core/src/main/java/com/facebook/presto/hive/HiveFileInfo.java @@ -18,7 +18,6 @@ import com.facebook.drift.annotations.ThriftStruct; import com.google.common.collect.ImmutableMap; import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; import org.openjdk.jol.info.ClassLayout; import java.io.IOException; @@ -69,7 +68,7 @@ public static HiveFileInfo createHiveFileInfo(LocatedFileStatus locatedFileStatu @ThriftConstructor public HiveFileInfo( - String pathString, + String path, boolean directory, List blockLocations, long length, @@ -77,7 +76,7 @@ public HiveFileInfo( Optional extraFileInfo, Map customSplitInfo) { - this.path = requireNonNull(pathString, "pathString is null"); + this.path = requireNonNull(path, "path is null"); this.isDirectory = directory; this.blockLocations = requireNonNull(blockLocations, "blockLocations is null"); this.length = length; @@ -87,9 +86,9 @@ public HiveFileInfo( } @ThriftField(1) - public String getPathString() + public String getPath() { - return path.toString(); + return path; } @ThriftField(2) @@ -128,11 +127,6 @@ public Map getCustomSplitInfo() return customSplitInfo; } - public Path getPath() - { - return new Path(path); - } - public long getRetainedSizeInBytes() { long blockLocationsSizeInBytes = blockLocations.stream().map(BlockLocation::getRetainedSizeInBytes).reduce(0L, Long::sum); @@ -141,6 +135,16 @@ public long getRetainedSizeInBytes() return INSTANCE_SIZE + path.length() + blockLocationsSizeInBytes + extraFileInfoSizeInBytes + customSplitInfoSizeInBytes; } + public String getParent() + { + return path.substring(0, path.lastIndexOf('/')); + } + + public String getFileName() + { + return path.substring(path.lastIndexOf('/') + 1); + } + @Override public boolean equals(Object o) { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveBucketing.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveBucketing.java index b4ec1d8dcb578..9e5eebe0d8e5e 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveBucketing.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveBucketing.java @@ -32,7 +32,6 @@ import com.google.common.primitives.Shorts; import com.google.common.primitives.SignedBytes; import io.airlift.slice.Slice; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -76,10 +75,10 @@ public final class HiveBucketing private HiveBucketing() {} - public static int getVirtualBucketNumber(int bucketCount, Path path) + public static int getVirtualBucketNumber(int bucketCount, String path) { // this is equivalent to bucketing the table on a VARCHAR column containing $path - return (hashBytes(0, utf8Slice(path.toString())) & Integer.MAX_VALUE) % bucketCount; + return (hashBytes(0, utf8Slice(path)) & Integer.MAX_VALUE) % bucketCount; } public static int getBucket(int bucketCount, List types, Page page, int position) diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitPartitionInfo.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitPartitionInfo.java index 5b9894839ec29..b2d00458aaa82 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitPartitionInfo.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitPartitionInfo.java @@ -16,17 +16,13 @@ import com.facebook.presto.hive.metastore.Storage; import com.facebook.presto.spi.ColumnHandle; -import com.facebook.presto.spi.PrestoException; import org.openjdk.jol.info.ClassLayout; -import java.net.URI; -import java.net.URISyntaxException; import java.util.List; import java.util.Optional; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; -import static com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; import static io.airlift.slice.SizeOf.sizeOfObjectArray; import static java.util.Objects.requireNonNull; @@ -39,7 +35,7 @@ public class HiveSplitPartitionInfo private static final int INSTANCE_SIZE = ClassLayout.parseClass(HiveSplitPartitionInfo.class).instanceSize(); private final Storage storage; - private final URI path; + private final String path; private final List partitionKeys; private final String partitionName; private final int partitionDataColumnCount; @@ -53,7 +49,7 @@ public class HiveSplitPartitionInfo HiveSplitPartitionInfo( Storage storage, - URI path, + String path, List partitionKeys, String partitionName, int partitionDataColumnCount, @@ -72,7 +68,7 @@ public class HiveSplitPartitionInfo requireNonNull(rowIdPartitionComponent, "rowIdPartitionComponent is null"); this.storage = storage; - this.path = ensurePathHasTrailingSlash(path); + this.path = path; this.partitionKeys = partitionKeys; this.partitionName = partitionName; this.partitionDataColumnCount = partitionDataColumnCount; @@ -82,25 +78,6 @@ public class HiveSplitPartitionInfo this.rowIdPartitionComponent = rowIdPartitionComponent; } - // Hadoop path strips trailing slashes from the path string, - // and Java URI has a bug where a.resolve(a.relativize(b)) - // doesn't equal 'b' if 'a' had any components after the last slash - // https://bugs.openjdk.java.net/browse/JDK-6523089 - private static URI ensurePathHasTrailingSlash(URI path) - { - // since this is the partition path, it's always a directory. - // it's safe to add a trailing slash - if (!path.getPath().endsWith("/")) { - try { - path = new URI(path.toString() + "/"); - } - catch (URISyntaxException e) { - throw new PrestoException(GENERIC_INTERNAL_ERROR, e); - } - } - return path; - } - public Storage getStorage() { return storage; @@ -164,7 +141,7 @@ public int decrementAndGetReferences() return references.decrementAndGet(); } - public URI getPath() + public String getPath() { return path; } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java index 34e879fb4f4af..55ac7edb32081 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java @@ -442,7 +442,7 @@ public static long parseHiveTimestamp(String value, DateTimeZone timeZone) return HIVE_TIMESTAMP_PARSER.withZone(timeZone).parseMillis(value); } - public static boolean isSplittable(InputFormat inputFormat, FileSystem fileSystem, Path path) + public static boolean isSplittable(InputFormat inputFormat, FileSystem fileSystem, String path) { if (inputFormat instanceof OrcInputFormat || inputFormat instanceof RCFileInputFormat) { return true; @@ -464,14 +464,14 @@ public static boolean isSplittable(InputFormat inputFormat, FileSystem fil } try { method.setAccessible(true); - return (boolean) method.invoke(inputFormat, fileSystem, path); + return (boolean) method.invoke(inputFormat, fileSystem, new Path(path)); } catch (InvocationTargetException | IllegalAccessException e) { throw new RuntimeException(e); } } - public static boolean isSelectSplittable(InputFormat inputFormat, Path path, boolean s3SelectPushdownEnabled) + public static boolean isSelectSplittable(InputFormat inputFormat, String path, boolean s3SelectPushdownEnabled) { // S3 Select supports splitting for uncompressed CSV & JSON files // Previous checks for supported input formats, SerDes, column types and S3 path @@ -479,10 +479,10 @@ public static boolean isSelectSplittable(InputFormat inputFormat, Path pat return !s3SelectPushdownEnabled || isUncompressed(inputFormat, path); } - private static boolean isUncompressed(InputFormat inputFormat, Path path) + private static boolean isUncompressed(InputFormat inputFormat, String path) { if (inputFormat instanceof TextInputFormat) { - return !getCompressionCodec((TextInputFormat) inputFormat, path).isPresent(); + return !getCompressionCodec((TextInputFormat) inputFormat, new Path(path)).isPresent(); } return false; } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/InternalHiveSplit.java b/presto-hive/src/main/java/com/facebook/presto/hive/InternalHiveSplit.java index e37a5d967ec95..9e55d561258a9 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/InternalHiveSplit.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/InternalHiveSplit.java @@ -18,7 +18,6 @@ import com.facebook.presto.spi.schedule.NodeSelectionStrategy; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import org.apache.hadoop.fs.Path; import org.openjdk.jol.info.ClassLayout; import javax.annotation.concurrent.NotThreadSafe; @@ -32,8 +31,8 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static io.airlift.slice.SizeOf.sizeOf; +import static io.airlift.slice.SizeOf.sizeOfCharArray; import static io.airlift.slice.SizeOf.sizeOfObjectArray; -import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; @NotThreadSafe @@ -45,7 +44,7 @@ public class InternalHiveSplit private static final int HOST_ADDRESS_INSTANCE_SIZE = ClassLayout.parseClass(HostAddress.class).instanceSize() + ClassLayout.parseClass(String.class).instanceSize(); - private final byte[] relativeUri; + private final String path; private final long end; private final long fileSize; private final long fileModifiedTime; @@ -72,7 +71,7 @@ public class InternalHiveSplit private int currentBlockIndex; public InternalHiveSplit( - String relativeUri, + String path, long start, long end, long fileSize, @@ -92,7 +91,7 @@ public InternalHiveSplit( checkArgument(end >= 0, "end must be non-negative"); checkArgument(fileSize >= 0, "fileSize must be non-negative"); checkArgument(fileModifiedTime >= 0, "fileModifiedTime must be non-negative"); - requireNonNull(relativeUri, "relativeUri is null"); + requireNonNull(path, "path is null"); requireNonNull(readBucketNumber, "readBucketNumber is null"); requireNonNull(tableBucketNumber, "tableBucketNumber is null"); requireNonNull(nodeSelectionStrategy, "nodeSelectionStrategy is null"); @@ -100,7 +99,7 @@ public InternalHiveSplit( requireNonNull(extraFileInfo, "extraFileInfo is null"); requireNonNull(encryptionInformation, "encryptionInformation is null"); - this.relativeUri = relativeUri.getBytes(UTF_8); + this.path = path; this.start = start; this.end = end; this.fileSize = fileSize; @@ -113,7 +112,7 @@ public InternalHiveSplit( this.partitionInfo = partitionInfo; this.extraFileInfo = extraFileInfo; this.customSplitInfo = ImmutableMap - .copyOf(requireNonNull(customSplitInfo, "customSplitInfo is null")); + .copyOf(requireNonNull(customSplitInfo, "customSplitInfo is null")); ImmutableList.Builder> addressesBuilder = ImmutableList.builder(); blockEndOffsets = new long[blocks.size()]; @@ -131,8 +130,7 @@ public InternalHiveSplit( public String getPath() { - String relativePathString = new String(relativeUri, UTF_8); - return new Path(partitionInfo.getPath().resolve(relativePathString)).toString(); + return path; } public long getStart() @@ -254,7 +252,7 @@ public void reset() public int getEstimatedSizeInBytes() { int result = INSTANCE_SIZE; - result += sizeOf(relativeUri); + result += sizeOfCharArray(path.length()); result += sizeOf(blockEndOffsets); if (!blockAddresses.isEmpty()) { result += sizeOfObjectArray(blockAddresses.size()); @@ -275,7 +273,7 @@ public int getEstimatedSizeInBytes() public String toString() { return toStringHelper(this) - .add("relativeUri", new String(relativeUri, UTF_8)) + .add("path", path) .add("start", start) .add("end", end) .add("fileSize", fileSize) diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/ManifestPartitionLoader.java b/presto-hive/src/main/java/com/facebook/presto/hive/ManifestPartitionLoader.java index 0417c741b8697..b904e21a83abe 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/ManifestPartitionLoader.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/ManifestPartitionLoader.java @@ -159,7 +159,8 @@ private InternalHiveSplitFactory createInternalHiveSplitFactory( .map(p -> p.getColumns().size()) .orElseGet(table.getDataColumns()::size); List partitionKeys = getPartitionKeys(table, partition.getPartition(), partitionName); - Path path = new Path(getPartitionLocation(table, partition.getPartition())); + String location = getPartitionLocation(table, partition.getPartition()); + Path path = new Path(location); Configuration configuration = hdfsEnvironment.getConfiguration(hdfsContext, path); InputFormat inputFormat = getInputFormat(configuration, inputFormatName, false); ExtendedFileSystem fileSystem = hdfsEnvironment.getFileSystem(hdfsContext, path); @@ -173,7 +174,7 @@ private InternalHiveSplitFactory createInternalHiveSplitFactory( false, new HiveSplitPartitionInfo( storage, - path.toUri(), + location, partitionKeys, partitionName, partitionDataColumnCount, @@ -201,7 +202,7 @@ private void validateManifest(ConnectorSession session, HivePartitionMetadata pa int fileCount = 0; while (fileInfoIterator.hasNext()) { HiveFileInfo fileInfo = fileInfoIterator.next(); - String fileName = fileInfo.getPath().getName(); + String fileName = fileInfo.getFileName(); if (!manifestFileNames.contains(fileName)) { throw new PrestoException( MALFORMED_HIVE_FILE_STATISTICS, diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java b/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java index 6a4052656ff46..110b7cb415505 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java @@ -160,16 +160,16 @@ public StoragePartitionLoader( } private ListenableFuture handleSymlinkTextInputFormat(ExtendedFileSystem fs, - Path path, - InputFormat inputFormat, - boolean s3SelectPushdownEnabled, - Storage storage, - List partitionKeys, - String partitionName, - int partitionDataColumnCount, - boolean stopped, - HivePartitionMetadata partition, - HiveSplitSource hiveSplitSource) + Path path, + InputFormat inputFormat, + boolean s3SelectPushdownEnabled, + Storage storage, + List partitionKeys, + String partitionName, + int partitionDataColumnCount, + boolean stopped, + HivePartitionMetadata partition, + HiveSplitSource hiveSplitSource) throws IOException { if (tableBucketInfo.isPresent()) { @@ -191,7 +191,7 @@ private ListenableFuture handleSymlinkTextInputFormat(ExtendedFileSystem fs, FileInputFormat.setInputPaths(targetJob, targetPath); InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0); - InternalHiveSplitFactory splitFactory = getHiveSplitFactory(fs, inputFormat, s3SelectPushdownEnabled, storage, path, partitionName, + InternalHiveSplitFactory splitFactory = getHiveSplitFactory(fs, inputFormat, s3SelectPushdownEnabled, storage, path.toUri().toString(), partitionName, partitionKeys, partitionDataColumnCount, partition, Optional.empty()); lastResult = addSplitsToSource(targetSplits, splitFactory, hiveSplitSource, stopped); if (stopped) { @@ -202,12 +202,12 @@ private ListenableFuture handleSymlinkTextInputFormat(ExtendedFileSystem fs, } private ListenableFuture handleGetSplitsFromInputFormat(Configuration configuration, - Path path, - Properties schema, - InputFormat inputFormat, - boolean stopped, - HiveSplitSource hiveSplitSource, - InternalHiveSplitFactory splitFactory) + Path path, + Properties schema, + InputFormat inputFormat, + boolean stopped, + HiveSplitSource hiveSplitSource, + InternalHiveSplitFactory splitFactory) throws IOException { if (tableBucketInfo.isPresent()) { @@ -224,15 +224,15 @@ private ListenableFuture handleGetSplitsFromInputFormat(Configuration configu } private InternalHiveSplitFactory getHiveSplitFactory(ExtendedFileSystem fs, - InputFormat inputFormat, - boolean s3SelectPushdownEnabled, - Storage storage, - Path path, - String partitionName, - List partitionKeys, - int partitionDataColumnCount, - HivePartitionMetadata partition, - Optional bucketConversion) + InputFormat inputFormat, + boolean s3SelectPushdownEnabled, + Storage storage, + String path, + String partitionName, + List partitionKeys, + int partitionDataColumnCount, + HivePartitionMetadata partition, + Optional bucketConversion) { return new InternalHiveSplitFactory( fs, @@ -243,7 +243,7 @@ private InternalHiveSplitFactory getHiveSplitFactory(ExtendedFileSystem fs, s3SelectPushdownEnabled, new HiveSplitPartitionInfo( storage, - path.toUri(), + path, partitionKeys, partitionName, partitionDataColumnCount, @@ -315,7 +315,7 @@ public ListenableFuture loadPartition(HivePartitionMetadata partition, HiveSp inputFormat, s3SelectPushdownEnabled, storage, - path, + location, partitionName, partitionKeys, partitionDataColumnCount, @@ -438,15 +438,15 @@ private List getBucketedSplits( } private ListMultimap computeBucketToFileInfoMapping(List fileInfos, - int partitionBucketCount, - String partitionName) + int partitionBucketCount, + String partitionName) { ListMultimap bucketToFileInfo = ArrayListMultimap.create(); if (!shouldCreateFilesForMissingBuckets(table, session)) { fileInfos.stream() .forEach(fileInfo -> { - String fileName = fileInfo.getPath().getName(); + String fileName = fileInfo.getFileName(); OptionalInt bucket = getBucketNumber(fileName); if (bucket.isPresent()) { bucketToFileInfo.put(bucket.getAsInt(), fileInfo); @@ -459,7 +459,7 @@ private ListMultimap computeBucketToFileInfoMapping(List< else { // build mapping of file name to bucket for (HiveFileInfo file : fileInfos) { - String fileName = file.getPath().getName(); + String fileName = file.getFileName(); OptionalInt bucket = getBucketNumber(fileName); if (bucket.isPresent()) { bucketToFileInfo.put(bucket.getAsInt(), file); @@ -478,10 +478,10 @@ private ListMultimap computeBucketToFileInfoMapping(List< partitionBucketCount, partitionName)); } - if (fileInfos.get(0).getPath().getName().matches("\\d+")) { + if (fileInfos.get(0).getFileName().matches("\\d+")) { try { // File names are integer if they are created when file_renaming_enabled is set to true - fileInfos.sort(Comparator.comparingInt(fileInfo -> Integer.parseInt(fileInfo.getPath().getName()))); + fileInfos.sort(Comparator.comparingInt(fileInfo -> Integer.parseInt(fileInfo.getFileName()))); } catch (NumberFormatException e) { throw new PrestoException( @@ -509,10 +509,10 @@ private ListMultimap computeBucketToFileInfoMapping(List< } private List convertFilesToInternalSplits(BucketSplitInfo bucketSplitInfo, - Optional bucketConversion, - ListMultimap bucketToFileInfo, - InternalHiveSplitFactory splitFactory, - boolean splittable) + Optional bucketConversion, + ListMultimap bucketToFileInfo, + InternalHiveSplitFactory splitFactory, + boolean splittable) { int readBucketCount = bucketSplitInfo.getReadBucketCount(); int tableBucketCount = bucketSplitInfo.getTableBucketCount(); @@ -595,7 +595,7 @@ private List getTargetPathsFromSymlink(ExtendedFileSystem fileSystem, Path List manifestFileInfos = ImmutableList.copyOf(directoryLister.list(fileSystem, table, symlinkDir, partition, namenodeStats, hiveDirectoryContext)); for (HiveFileInfo symlink : manifestFileInfos) { - try (BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(symlink.getPath()), StandardCharsets.UTF_8))) { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(symlink.getPath())), StandardCharsets.UTF_8))) { CharStreams.readLines(reader).stream() .map(Path::new) .forEach(targets::add); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/s3select/S3SelectPushdown.java b/presto-hive/src/main/java/com/facebook/presto/hive/s3select/S3SelectPushdown.java index a0adf59b7aeaa..2130f52ee933d 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/s3select/S3SelectPushdown.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/s3select/S3SelectPushdown.java @@ -87,10 +87,10 @@ private static boolean isInputFormatSupported(Properties schema) return SUPPORTED_INPUT_FORMATS.contains(inputFormat); } - public static boolean isCompressionCodecSupported(InputFormat inputFormat, Path path) + public static boolean isCompressionCodecSupported(InputFormat inputFormat, String path) { if (inputFormat instanceof TextInputFormat) { - return getCompressionCodec((TextInputFormat) inputFormat, path) + return getCompressionCodec((TextInputFormat) inputFormat, new Path(path)) .map(codec -> (codec instanceof GzipCodec) || (codec instanceof BZip2Codec)) .orElse(true); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/statistics/ParquetQuickStatsBuilder.java b/presto-hive/src/main/java/com/facebook/presto/hive/statistics/ParquetQuickStatsBuilder.java index a101f8b8919d7..b61e703bb0035 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/statistics/ParquetQuickStatsBuilder.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/statistics/ParquetQuickStatsBuilder.java @@ -323,7 +323,7 @@ public PartitionQuickStats buildQuickStats(ConnectorSession session, ExtendedHiv while (files.hasNext()) { HiveFileInfo file = files.next(); filesCount++; - Path path = file.getPath(); + Path path = new Path(file.getPath()); long fileSize = file.getLength(); HiveFileContext hiveFileContext = new HiveFileContext( diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java index c9d0d5ab936d4..b294e87dda1a8 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java @@ -69,7 +69,7 @@ protected HiveFileInfo computeNext() HiveFileInfo fileInfo = getLocatedFileStatus(remoteIterator); // Ignore hidden files and directories. Hive ignores files starting with _ and . as well. - String fileName = fileInfo.getPath().getName(); + String fileName = fileInfo.getFileName(); if (fileName.startsWith("_") || fileName.startsWith(".") || (fileInfo.getLength() == 0 && skipEmptyFiles)) { continue; } @@ -79,7 +79,7 @@ protected HiveFileInfo computeNext() case IGNORED: continue; case RECURSE: - paths.add(fileInfo.getPath()); + paths.add(new Path(fileInfo.getPath())); continue; case FAIL: throw new NestedDirectoryNotAllowedException(); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/InternalHiveSplitFactory.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/InternalHiveSplitFactory.java index 41a7fc01fc988..59b29074a3b4d 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/util/InternalHiveSplitFactory.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/InternalHiveSplitFactory.java @@ -27,12 +27,10 @@ import io.airlift.units.DataSize; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputFormat; import java.io.IOException; -import java.net.URI; import java.util.List; import java.util.Map; import java.util.Optional; @@ -123,7 +121,7 @@ public Optional createInternalHiveSplit(FileSplit split) FileStatus file = fileSystem.getFileStatus(split.getPath()); Map customSplitInfo = extractCustomSplitInfo(split); return createInternalHiveSplit( - split.getPath(), + split.getPath().toUri().toString(), fromHiveBlockLocations(fileSystem.getFileBlockLocations(file, split.getStart(), split.getLength())).toArray(new BlockLocation[0]), split.getStart(), split.getLength(), @@ -137,7 +135,7 @@ public Optional createInternalHiveSplit(FileSplit split) } private Optional createInternalHiveSplit( - Path path, + String path, BlockLocation[] blockLocations, long start, long length, @@ -149,9 +147,7 @@ private Optional createInternalHiveSplit( Optional extraFileInfo, Map customSplitInfo) { - String pathString = path.toString(); - - if (!infoColumnsMatchPredicates(infoColumnConstraints, pathString, fileSize, fileModificationTime)) { + if (!infoColumnsMatchPredicates(infoColumnConstraints, path, fileSize, fileModificationTime)) { return Optional.empty(); } @@ -197,9 +193,8 @@ private Optional createInternalHiveSplit( blocks = ImmutableList.of(new InternalHiveBlock(start + length, addresses)); } - URI relativePath = partitionInfo.getPath().relativize(path.toUri()); return Optional.of(new InternalHiveSplit( - relativePath.toString(), + path, start, start + length, fileSize, @@ -249,9 +244,9 @@ private static List getHostAddresses(BlockLocation blockLocation) } private static boolean infoColumnsMatchPredicates(Map constraints, - String path, - long fileSize, - long fileModificationTime) + String path, + long fileSize, + long fileModificationTime) { if (constraints.isEmpty()) { return true; diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitManager.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitManager.java index b7f2d0e58408e..9be421b8d3632 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitManager.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitManager.java @@ -815,7 +815,7 @@ public Iterator list(ExtendedFileSystem fileSystem, Table table, P return ImmutableList.of( createHiveFileInfo( new LocatedFileStatus( - new FileStatus(0, false, 1, 0, 0, path), + new FileStatus(0, false, 1, 0, 0, new Path(path.toString() + "/" + "test_file_name")), new BlockLocation[] {}), Optional.empty())) .iterator(); diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitSource.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitSource.java index 7411ba49428f6..d3e3627f3732f 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitSource.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitSource.java @@ -25,7 +25,6 @@ import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.SettableFuture; import io.airlift.units.DataSize; -import org.apache.hadoop.fs.Path; import org.testng.annotations.Test; import java.time.Instant; @@ -150,7 +149,7 @@ public void testAffinitySchedulingKey() // larger than the section size DataSize fileSize = new DataSize(sectionSize.toBytes() * 3, BYTE); - hiveSplitSource.addToQueue(new TestSplit("test-relative-path", 1, OptionalInt.empty(), fileSize, SOFT_AFFINITY)); + hiveSplitSource.addToQueue(new TestSplit("path/test-relative-path", 1, OptionalInt.empty(), fileSize, SOFT_AFFINITY)); hiveSplitSource.noMoreSplits(); List splits = new ArrayList<>(); @@ -609,7 +608,7 @@ private TestSplit(String path, int id, OptionalInt bucketNumber, DataSize fileSi false, ImmutableMap.of(), ImmutableMap.of()), - new Path("path").toUri(), + "path", ImmutableList.of(), "partition-name", id, diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java index 5dd7276277112..4459d338f40b7 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java @@ -128,7 +128,7 @@ public void testDirectoryListerForHudiTable() new RuntimeStats())); assertTrue(fileInfoIterator.hasNext()); HiveFileInfo fileInfo = fileInfoIterator.next(); - assertEquals(fileInfo.getPath().getName(), "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet"); + assertEquals(fileInfo.getFileName(), "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet"); } finally { hadoopConf = null; @@ -156,7 +156,7 @@ public void testDirectoryListerForHudiTableWithCopyOnFirstWriteEnabled() new RuntimeStats())); assertTrue(fileInfoIterator.hasNext()); HiveFileInfo fileInfo = fileInfoIterator.next(); - assertEquals(fileInfo.getPath().getName(), "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet"); + assertEquals(fileInfo.getFileName(), "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet"); } finally { hadoopConf = null; diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/s3select/TestS3SelectPushdown.java b/presto-hive/src/test/java/com/facebook/presto/hive/s3select/TestS3SelectPushdown.java index 326f12af98b61..7d2570f1a7817 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/s3select/TestS3SelectPushdown.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/s3select/TestS3SelectPushdown.java @@ -23,7 +23,6 @@ import com.facebook.presto.testing.TestingConnectorSession; import com.google.common.collect.ImmutableMap; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; @@ -95,11 +94,11 @@ public void setUp() @Test public void testIsCompressionCodecSupported() { - assertTrue(S3SelectPushdown.isCompressionCodecSupported(inputFormat, new Path("s3://fakeBucket/fakeObject.gz"))); - assertTrue(S3SelectPushdown.isCompressionCodecSupported(inputFormat, new Path("s3://fakeBucket/fakeObject"))); - assertFalse(S3SelectPushdown.isCompressionCodecSupported(inputFormat, new Path("s3://fakeBucket/fakeObject.lz4"))); - assertFalse(S3SelectPushdown.isCompressionCodecSupported(inputFormat, new Path("s3://fakeBucket/fakeObject.snappy"))); - assertTrue(S3SelectPushdown.isCompressionCodecSupported(inputFormat, new Path("s3://fakeBucket/fakeObject.bz2"))); + assertTrue(S3SelectPushdown.isCompressionCodecSupported(inputFormat, "s3://fakeBucket/fakeObject.gz")); + assertTrue(S3SelectPushdown.isCompressionCodecSupported(inputFormat, "s3://fakeBucket/fakeObject")); + assertFalse(S3SelectPushdown.isCompressionCodecSupported(inputFormat, "s3://fakeBucket/fakeObject.lz4")); + assertFalse(S3SelectPushdown.isCompressionCodecSupported(inputFormat, "s3://fakeBucket/fakeObject.snappy")); + assertTrue(S3SelectPushdown.isCompressionCodecSupported(inputFormat, "s3://fakeBucket/fakeObject.bz2")); } @Test @@ -196,20 +195,20 @@ public void testShouldNotEnableSelectPushdownWhenColumnTypesAreNotSupported() public void testShouldEnableSplits() { // Uncompressed CSV - assertTrue(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.csv"), true)); + assertTrue(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.csv", true)); // Pushdown disabled - assertTrue(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.csv"), false)); - assertTrue(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.json"), false)); - assertTrue(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.gz"), false)); - assertTrue(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.bz2"), false)); + assertTrue(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.csv", false)); + assertTrue(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.json", false)); + assertTrue(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.gz", false)); + assertTrue(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.bz2", false)); } @Test public void testShouldNotEnableSplits() { // Compressed files - assertFalse(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.gz"), true)); - assertFalse(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.bz2"), true)); + assertFalse(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.gz", true)); + assertFalse(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.bz2", true)); } @AfterClass(alwaysRun = true)