From 17923404866003e27a510be793ab65c290d8802a Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Oct 2024 21:51:45 +0100 Subject: [PATCH] feat: Add support for multi-version metadata As an example, for comparing against the most recent I've added the 5 most recent --- .../metadata_v2.5.4-v2.9.0.parquet | Bin 0 -> 11354 bytes tools/vendor_datasets.py | 11 +++++++++++ 2 files changed, 11 insertions(+) create mode 100644 tools/_vega_datasets_data/metadata_v2.5.4-v2.9.0.parquet diff --git a/tools/_vega_datasets_data/metadata_v2.5.4-v2.9.0.parquet b/tools/_vega_datasets_data/metadata_v2.5.4-v2.9.0.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5626093db560b805b33261bdc5f6b7754ab3451d GIT binary patch literal 11354 zcmeHtc|25Y*#8;p*w+~|_H`0s>`S(>FJl@)!h^xs85ts>$et}(s;3enl|2N!-mSkyBTJWA3R8PppmljUo(yIm2w_ zV1x892FjnH8XygUARqvENMBZ15tV0y0FP3$Bcz#Ltq<^af_m3 z;{_(ioFzo7>L7@|Kf4OCajFkb|7k);BV};VUc_FQ-#vBZuwh4~iH8la3%~=&D863x z(p#7KeNDAS`@#rnfw`Y)@|J22?A&%SlyJy|zWVcu_S;Y8`N+AC54DCQ{X<1(RP5z< z?`6rI<8HaSowMP>viddqH0@a#oU$l?dZL72&GkGchwWnIq-}s&mBkmkT5Q=k&7-$G zhDk#c+S4LdLPHJ}sA(iBN9&Ovd07&j*rRL-6+U5j>xDsIu)3o>Ez;8PZIWAw%M1$9{Urs*Rvqx^Wy#=Mlob)*F^35m#o*otx$2xCrREkC(j`zheUYhor zfVB5jN+kwIbce6STU!l!;IbiYA}zN=q{ZvZdZUh26}3Lc0Jlsc^B<6M+lE9?ViGCv-1S#4iASE5^#X^t4TpML4_l4mvSrDgBzrsMP!7J%{fm zwMW0|tw{c?9_A9`(ZjIsZOPZy>C;~j9~hfCKs4mh!#>ke@xZx>2i8d6U=NZfku2{) z-iv^$gGu?|An(noYYCiU1kSQ=V1)6Hg9qL8r4j7O@$St&8p2u0o(AU~yS0|RD^xW_olJIm>Nl{S* zCvzgLc%cKDKZ8+H{5W1Fj|Ejc6s&ccLt2vmh(#%nOo?NSVd!vED+2Q8p1g@{ z5cL=)`H4s+VDE;lqb7-kOBgH$-bnZbsbJa;E52_XRg2cos*ofF(?+6@!jvR)2ea- z_U^7?=sm+3Pg09yYXeWc>arSuCbo)txII}Db#y3$E%eNm(AxrzfDLe%6?R+qk1;pa zG=Kh{kAzF+o$$r>kMC@gWVaL7)Z$)#opr_DWwLT;{?b2r^_EBRiHV~tm0A+82fCB7 z_nyD_vl`j&e~n?ayZl`q5$QH0RwKuv-)$iEQNc#Een2JYw1fR!#?_PQxKrb4UEW=* z@u|~l8W52g#d5Av%{4b!2sC)luu;!!;UWHqlkSh??%qBVc~6-`RGW*1=B<%%il@z@ znQ+$40Z++WiUUQ=&$=5HO`Zn86$Ehthn7QPJZcYUirvU5=o-7(nb|$tZkm1(IkAH; z>XPbVx$`XWi%sctbgd5vy*l7OgOs_&Ev#e7++rs(bqst-? z?i9&545y%7oxt`3^|AOhj_BQa3>`OeAlB0QCc%&E*+frvoAtB$r*i9?A9<@{OF!zf z;uR&gqx;xQpw+Qgoaqdc-R#?g6d@PQyU5x`gXyrF(A|GJsnnFi{3G>Z5}>0MvNpGk zN2ctzG{23P(2NOs&+C)G&@bW9AikvCWDG5<%om~+u6ne+`6l`Lse&VEfWSy_VD-4K z|InTLx$Ha(H)?L}he7+(Zi_DM-rtRjFOQXWF5BH6d&uY}4bQrB`0$f+fNnYm1RcdO ze0K`zj6nRaoWgITngUCczdMEBMl~;cim}Asox*oiGxBn$n@J5Y<=Rv@Y?R`lln5Q(?{)O2BjGU@Dx3ZVgBY47^#@_Bd>$82o7&& zLyhoI!1y?SNVD6kCwCs@+2!;;Ibfr|pm3JNT7f)rThfv{v)hvnf7SSlfY zgeAjIu&kxPl0{IPa~48{rNN?^AO)7K7R9DMw0*R7WdN#WPN`3Q+ns>%lP*; zF|Xp*P}l^I?cB$A$OZ|;z!## z^RFC9M&hwhSm6ckhGqH!6W}UyS~7%e2E@igIvIdt z<_TF|opl@6JDw6nK1vMkd^rx068}IMl281}OY|Mut5PCog`f?c((rjfGBt1kGI7j)X-fh*&56dB{-Y~aX%<> z*|FwcA(VSw_mbJ&B)Ot#esS8Sboc1cJE!{(KjJZI5xA)kI*c1{F&Q9Ps+5dG-d=cJ z2V*?^c56d)N1D@2^}DF4>Cc?eO}09O`%KKhxO*Bs_-gE(U+6c-Oti9QFz6Jo;pw+b55Fi@@$%fEmL1?oebl+5^4JCN5>$wbpzlWeI zbt!uOO(3Z0`)F9a;9tB%q;Q)$z<)=d9~rDJCDd1D>>{}+C%d>7AwD2!?#p2NkVU!* zk_Y|?vsmkqc_spMuQZ1oyvBV8;$2RmmN= zs{^;YG&(u^L{j5sY4#WRAkK{(t5DX!|FP%tREcLt0y?q-(|2C;kW>8Yf;avuT(upa zcdAEAx#I8R(jIH1wfBByX^_ZU38uQvWYADyZ!EV(WAHR_s?(s&rqUSd(S=O3(y}iGI6Q=DEgGMy6&6XmfKjgoX|p&^5cTBosA})mD-FQPV=QzTJDv=qo3hk1+V3 z2|JnHS64`IIARD=e1FXaF3{p8^XXasOO;YL*~VR^9_WVomDFl|j^u&NKS-EeB3W9r zH*as--Sh6r zrZZ$yS z$~-0I6juVF7vB>!@z{d&;?U(e^kS8p0-17ZrrpFQ8P&U6$X-1BRww|c_uv#o*FZE7x=G=1GX|;1cJ(qE=u2be_#RXZ2P;veje)T%m zHm`1n$W7~gE`CPJA|AOwO5yCqX5%y}@-uCf92|zU8qg02I|9rPsq(U!oAlHNW?T#3 zIR(=q4tUl2@sA`u=9lCt5H&&-J@&cJ^?+Sfm0MeL%IbW+qCvi%$LmBLI%|9(@{)5( zD%@iDjVY%GtG)g4%q?cCIxx@LCA_Nb`S~|rkvlCoEN>E(ur&wpC7dOxk)msISI$pZ z*sVC&>dLn6xxT#bO8!{T3w-l|;YX|m9XqixxGNBI-`q37 zePB0%4&6yElvD~k4*Rmgbj|^8)9>%SFCs&qlv9vgIUlrTq^vsWI*NwM%&cwd) z?az%yAIw|1>U4?8ZDme6BzyeDyF@tU@a7ig|r? z`;zDMl;VnTMP%fSNH5s&=XF9O5eJ;GXYqqI>ZS(L*N@kunuzt{wdV!^) z@RVI;Ke4I`nm?uNde=}#O(!?=UT%%KS4-H;C;UmiukTk4OHFo?c9vb-BR07s_ja#g z>pX(#X>#I&q%AUHl3q_AaDTk(wkNgLqT}N!f3#h*UHS9?3pU-|lHCMZ>5uMPt2{a4 zmHN+K;)(CerBC!q4%J3wm z*?`l>8|oA0F7H7KR0tsklgJ8tiQWY2X`3e@ zlt3nik`=%@3hn_Ucd~+)mpVb6sHU#zp`zxc*HHEF&{R@YQgqkUM0+U`)CsB{ z>IB5Ucm^7+j@DGwbXNzZp*2<1Row{$RWvAFQ$tAwO(1G0qLtK$>Pj07;{R!4yBYIj z4hsozT_4JIqVy^e0)HWaio;Nt(!Ust|5*bL&gYyGTG(qCm0ALu5pl!K3GLw-1d?%2 zVu0`75cwcts6tS1;9uvm{yGa)pv-NB%~%;VkgoTCCIqCg>q35xzgzeaWr5P!D7k}H zTFPizW%MY|TAum&&YUY|miv#^5g!)$qP86bwDu`y^+MygeHF6-;7BMC1_S`ulBLqw z`)>$w72_k`tL5p}wp#4c@MpQcz-8}JxJAZmWqiWFvy42cQYI00i2Z4A@xCVy3PR+R zEV0$9i#G}sF6w_haaG)3sgt3w5X-U%tM$%w&%*J{ym^r;P%t<{tHU2N?3oRW9*{g@ z#tYDyNOGI&;{z5L=9BgpJ%W*YeKQm8k(|~_aWGon)Gafz9Vho@@f3SU^c%>8P1yBq zFFg_x9Z}LD$~txI-7|I`2A+U)qeZtuql0zZ{n5vHnXv5A%UpHyw|gB#2G3>Mc+b;3 ze-!`zWzj*x8;MA7X2=`dEbLJd>pYLlg!bOE&c)u$MGsb#69n!JH+GcKHp(cp4mUSD zN9B!V_IxI*zjS!wn*ZkdO2qMNO(D7074AQ!C&fFslC8aC3bj`7IUd>ShR@EIlxa@v z4Q3}N$e!O{vCy`+Y{D38DoskzvYr_F;9YS`nm)bR(ZAyI;J1^K^4|`|6SN8Eo;V$W zENPGyNLo7A^St?EZBOhb>u7%X;;{4c>q6uB2i18@F|?40mW!eRB&^QG9N&+UFP8&X z#&x;e9rX;o__YynQDsFHvB_!4W|(ZrZV0!EfjB2mx&XIZFo90&3@1&pTf9C7${oiE zo@f#P!m`kZ+;;gpI&nKg4rL#<%@EF3X&#++5jvR|;ofNaRZlWr+3*pjms>R_u(;#I z?UdjP1bJ7%a2X+jmbq5cr!#L}DG5h>l;3^ij+)dXiDa`C3wJSDAC;^b7u5T=uAbN3 z1l}39SU)^I_F61%FDh2B+OLMoszcU-10KY48ex3QO$T>)ckZao>+Y(ff832>TEg%3 zdo8jt-+3>fb!uA?%3AzoqYa>0|E=?id9uZrkhA?1oMG$dV*u9_C77qmQ=mxrhMqv7CB~ zHvC9I4vw$Va-?Y|-N?$C%Q=;2R{i^sK7y+GqnCLc^lpkT6}ca-Qx|o8;JJq4>lP-U5@_>Z!i=|`j`#txKJ~=K$ zDl*ge?29*bH@mWc0)`m9dE5kJ|D$NV zp~47(msK5^7WMuyb6;f$tH(v|;4*oXjwEC6-bzX|XjPB2=|01xR@QhuYUQ!v4DY+n zv*f$p`~{JPjQ7_pC#Shc7L^?iS*!Uk%7;45lG@2>`CeiQPmOh1Tvgs--d+A=rSnm& zXZ|?5l>L_0$06E)3JNl~o$Lr3GOU#Tt&pZ>v>ymEJ(D>jMF%$USd3Ng+UR`|`J?CieEiE!={-L}J&z7Wh45b@;Nca@or%za)#5nT|Hk#!&YpwCo zwDeDB=2A|z$k5MJA3G>fLd?qTX)R#qezR@(Mt9szy;mQDm}I|>?(WOkR`^tsOEKgC ziRbSAiVt~qcHNKqM#7yck%^`PZHBb)*jgQe_UB@^p-)H+t87X2w@kG*c==lTeuM4w zi9JGeX@>4+(;o8C?BKI$h85!wvk*NAn%SdsPUn$GYftZiygB3W`)*uG{HW@s;}3@< zgvuI^BJtLA7hf(uXJ(V=&Na~!pJ)q9xh1}rIS$9q#x^85jop$_dAR-4LW=bv-m64g z@uB z%un`Om-wH2F908;wl9yTs&iKCyE>c3U2po!-A(vBRY(sgL=8aoDEutsw{~OTiRqM{ z#3xXN)M*O-m*VU2(^M&qpcGvIbx-HJD!&_F+ z?Z~9(cL6`ltaYpY-}ycTpQIXK3^YI~Er41)2twq4&1wo?r7EA83V{{T0Ptg(^dQnv zQ267$;0_v65W$MEvDvXpi|pY;3?v}#Fe*?E4HZKS24f8VFdBQobc8kK7EDvKEHM}| z2CM*HtueBAusl4RQf`jHcyH7rfmZ_$O45{)jl&pVFoxD(b&MTVhBcTrq6+h+w%C-9 z!KhQ@?W4-03NxjYV~p*NpWi=^s9J(*G-jWMQ}>lgO31vS&8)&y&T zhc{)kKs~)F_6GZp3nm6qhTy+xd`wk++;`b~d6@bC`~FkLdqW-}$m*ZuQOxj*{(ql8 zMii@U$lI-?Np?34h$QUN3MgX#_1i*mJIpAtsYLJUTM ziVyJ77>ora%P}y3>}anUwtJUi044no^ZobrrHq%Uy^5!aW~8TsLAblJ1=?|!H3mR1 Z;7I|55HhkHBptu_2RfhtfCs-S{tx(skB9&O literal 0 HcmV?d00001 diff --git a/tools/vendor_datasets.py b/tools/vendor_datasets.py index 259999fa0..61c701e1e 100644 --- a/tools/vendor_datasets.py +++ b/tools/vendor_datasets.py @@ -135,6 +135,17 @@ def request_trees_to_df(tag: str, /) -> pl.DataFrame: return df.select(*sorted(df.columns)) +def request_trees_to_df_batched(*tags: str, delay: int = 5) -> pl.DataFrame: + import random + import time + + dfs: list[pl.DataFrame] = [] + for tag in tags: + time.sleep(delay + random.triangular()) + dfs.append(request_trees_to_df(tag)) + return pl.concat(dfs) + + def collect_metadata(tag: str, /, fp: Path, *, write_schema: bool = True) -> None: metadata = request_trees_to_df(tag) if not fp.exists():