From 4b5db35945273ea1c8ad81b7d21c0830e71fee4a Mon Sep 17 00:00:00 2001 From: gaotue Date: Sun, 22 Mar 2026 20:32:23 +0100 Subject: [PATCH 01/14] Add filename extension for music files with no extension during import --- beets/importer/tasks.py | 104 +++++++++++++++++++++++++++++++++++++ docs/changelog.rst | 3 ++ test/rsrc/no_ext | Bin 0 -> 37206 bytes test/rsrc/no_ext_not_music | 0 test/test_importer.py | 28 ++++++++++ 5 files changed, 135 insertions(+) create mode 100644 test/rsrc/no_ext create mode 100644 test/rsrc/no_ext_not_music diff --git a/beets/importer/tasks.py b/beets/importer/tasks.py index e56157ed0..586ef455a 100644 --- a/beets/importer/tasks.py +++ b/beets/importer/tasks.py @@ -18,10 +18,12 @@ import logging import os import re import shutil +import subprocess import time from collections import defaultdict from collections.abc import Callable from enum import Enum +from pathlib import Path from tempfile import mkdtemp from typing import TYPE_CHECKING, Any @@ -1077,6 +1079,12 @@ class ImportTaskFactory: If an item cannot be read, return `None` instead and log an error. """ + + # Check if the file has an extention, + # Add an extention if there isn't one. + if os.path.isfile(path): + path = self.check_extension(path) + try: return library.Item.from_path(path) except library.ReadError as exc: @@ -1090,6 +1098,102 @@ class ImportTaskFactory: "error reading {}: {}", util.displayable_path(path), exc ) + def check_extension(self, path: util.PathBytes): + path = Path(path.decode("utf-8")) + # if there is an extension, ignore + if path.suffix != "": + return path + + # no extension detexted + # use ffprobe to find the format + formats = [] + output = subprocess.run( + [ + "ffprobe", + "-hide_banner", + "-loglevel", + "fatal", + "-show_format", + path, + ], + capture_output=True, + ) + out = output.stdout.decode("utf-8") + err = output.stderr.decode("utf-8") + if err != "": + log.error("ffprobe error\n", err) + for line in out.split("\n"): + if line.startswith("format_name="): + formats = line.split("=")[1].split(",") + # a list of audio formats I got from wikipedia https://en.wikipedia.org/wiki/Audio_file_format + wiki_formats = [ + "3gp", + "aa", + "aac", + "aax", + "act", + "aiff", + "alac", + "amr", + "ape", + "au", + "awb", + "dss", + "dvf", + "flac", + "gsm", + "iklax", + "ivs", + "m4a", + "m4b", + "m4p", + "mmf", + "movpkg", + "mp1", + "mp2", + "mp3", + "mpc", + "msv", + "nmf", + "ogg", + "oga", + "mogg", + "opus", + "ra", + "rm", + "raw", + "rf64", + "sln", + "tta", + "voc", + "vox", + "wav", + "wma", + "wv", + "webm", + "8svx", + "cda", + ] + format = "" + # The first format from ffprobe that is on this list is taken + for f in formats: + if f in wiki_formats: + format = f + break + + # if ffprobe can't find a format, the file is prob not music + if format == "": + return path + + # cp and add ext. If already exist, use that file + # assume, for example, the only diff between 'asdf.mp3' and 'asdf' is format + new_path = path.with_suffix("." + format) + if not new_path.exists(): + util.copy(path, new_path) + else: + log.info("Import file with matching format to original target") + return new_path + MULTIDISC_MARKERS = (rb"dis[ck]", rb"cd") MULTIDISC_PAT_FMT = rb"^(.*%s[\W_]*)\d" diff --git a/docs/changelog.rst b/docs/changelog.rst index 2354e6539..8168b33c7 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -26,6 +26,9 @@ New features :bug:`2661` - :doc:`plugins/play`: Added ``-R``/``--randomize`` flag to shuffle the playlist order before passing it to the player. +- Use ffprobe to recognize format of any import music file that has no + extension. If the file cannot be recognized as a music file, leave it alone. + :bug:`4881` Bug fixes ~~~~~~~~~ diff --git a/test/rsrc/no_ext b/test/rsrc/no_ext new file mode 100644 index 0000000000000000000000000000000000000000..fed8dc6015f31a68e94a812141850e284803a300 GIT binary patch literal 37206 zcmeFZWmJ~k+BFPf3z(oFp<DE$ciU2T-_@%Z6wX~e zkDpWH?|I|frbheD+34IcJ9ka;0>zrGYw*)MCb#uQhirJh>(#L9>~0Gk#7Z zdHLdX3W~LY90^UKOs6cigp$Y08JFBQ}9 z!wuGw(&8IBH*Zv?HtE-2^vI~>hEY`!prS^Yo*P6#k)j zD^o!J^Pud1|M~9}{5J~zyAl421^w#;< zfRb>7V+SHM7z}8ferM{pIZsy9NnDijih(J`|sKZWk|3{cb-KB@@Ef(9qyPML}`a^Y!O*-$HN~Zqv4mOHH|dyFD$w?sqS0Nj0Ja zL%pjP=k<8Y%X`<~zkmOjb!kAtVkjn>zgf?dGwrcPu4C~FS<#qVcogr7fWfQJ=Qb%Q zDV4~&PQ2J7;%I$t)K_Te{VQ(mM_;h@t2x%tdL+n4%UBfB2o$}Si`?9yR~dYoTdU~t z&0NRSHK55)U#lvxXKgu=bJ9n2e&A~B)L`8a(ZxT7e|uP0-rvPjboAD55Vx85cBHg; zw`SW1lco>KJJ~LIJr?M6mfw$Od@VB4IvTh%+bMQfCiut5XtwoWZNw;kF_7`SE1cc7 z(`7beEN>ygY4$hg@8K(&^Vf4G%8q;BJCeAxvSs9#5C{W8upRdm>tzSqQhJr47og#Ir)EJt%lb}ac||uK6uu3gIz974+fy{I`?Y@P zlPZ_rR!Igm5$lI(PygB`z5xQ_1m%Wx!!?l-!47?|#YZRKxU+b$`3j$+KcT^dNig~7 z`t|Alq;}iZqoTOs=}m&{KLqmUDt;h9JsLZCs)Tp;lz4mc88u$KSoG`Li^#mZpQ@g# z58ua%I}g@H%X%@NNi$gDRL|lq@nPO;V`HrG zDh#dlN<*D~-`R)j44&V8(6;+>v{4G)_WaO;UgNJJ-CWjvf!Kyn}iRCil*3njjvXFmW@NJ z$kJ3@l3_zEjlapKZ-#6tLx`k^E{O&%u zMAvgK^tjT8$8iZ+vC6Gv_#JL5OEmsAV;#BdZ?@pFQD0m}b_M6}o~R&Gk7Cc=0!|C> zt_1Mk?Bl1y1&nN6;c^!qQQxVVF=|Ti)?A(l;2hF!{-jIepDDc#L0+;^&>&nkOq7#$ z2aDsto133>Bt7R1IJ!|?#icbrsKnR(=eL|L^55P{zVNVQ3Np)cc%U2l86kOA8(XpZIRjIXA{ z?eE`Gv5obtl7N}Jc3cuK}vp0 zrn%FN7A<$8dyRV-{xh@6`sTvezv9hf4*em!)w69Ynxk%3&Edo0F671h8JPI^_{O8f zmSTz*+7PkY_3z~zS~*oxzEY6cxr+gI;G^#o6HmM!MTXGEj^b?56c-o28Z4L;p0+aI zluu1|6Aj|y*7Q(AlKIas_gmhNJkFM?42vGiJo=f)|3p;e=1qbcZcEej zqh#B7)X9p8)banHxQTtLs^EV@vm?Sq&7t>Y4Y2-d>#P4nm0?GYgF|b|!7jny89m5| z+h3>eT%Gb)l~QN5H^2UzWZe8oZ(78>t01H6{+3f_>B#Z+W4W``nD*+}W5AqfHWR1`CNJ^bd67pGE*+bRXQPug|DsB!YCMK1tmq{udz&k~`C@Lrv zAxB3@__Dk%)&G2dokjPPBmTeC%5~Mse{MBAI`i_Kx>SE)tu*EV87 z%36*_2k-C|o_KuzLn*b-0p%Qfi^z22`X+%;R$b-LRNp9_(d?`&A;;O@?K*B-=s6w( z6m~m%VSzoFZGXIJmK+>uPVPZ^H*1_rFBW`zvZPn-g+l46VNp+u(4$@!nf0yxd|pXa zaY-X*J9m2}B^ht+d?Pe5yFf)NxUa-}Z_}@NnGoUY@&$4Sn$+#o+bqjDM~8y*W^>1i zkqeuD%>z#b5;D3y?$5`@#(H!v&!kt-3N(Ub5cJVTEOGH8PTO1WM2A==$S%%(NIqe){xDy!4b+ z-&HD5CF?zNp-ua{4Bj4=I?I)*XQ6Q8Mt}P<@}sA(%ZN6o&?Ch-rD8CtTY&Gow1w0@ zBMScnTZ*_xVt%u1M_Yc)|15Mb704U+re`hu{+dU7v?X=@zk{vl`t!8r(ORZoe#g58 z>SN?zB=hh$>%7S53pDbWc#}<1FtD!r&99)(HMOM_DH3aFEz4%ZYev&M^J=!Xcnfsw zQ8mrf^_&$0biERF$-0NN7uuEnUMKp%FJ7~boSS*h4ljoHZV{|J-5uDQDg;LBO^)AP9I7s~D#>R3Af3(gE}DU0-FyzaDS05^UTm zxPB`O(+zbS#hMwXm8Dq;X=!gD1%*(sGo`z`7k|C-rmshC6cEZHuB)qSJ=P&&n!hwN z(44GaGjrxn?ZIN8s?b3m!B6trC(7u|!5I&MNcF_VC3t*(uwl1JnA?0^ST|CuoHJJc z!VYutB7Ang+k(?Q)LiF!SJSmkm_UbpSXOY*c7MbIXKg5 z=8Lf&heurfW)m6Z=2}J`LBhwx7MiPe?ZtfjS9e(suzGfs?nR zFb~WR@3Nhtd(*YG2}DQj)3PRrLw zP12g94lcQkIdytwTJ;~}MQUwiom}Jdh)-?}V;%Tl#&ANmR&oxKg_^oS4tjA7U4J9z*6mNA}mpdmbJhB%le@9vXDR&L)Yf z7$HWcGknt;vR`P_EWaJH7G2e zT-reYxOK~R6(d5wvS}8`$;rLB%(}m8pODSaF0rLSS?#HTnnSInAk#Yn)GVK9VK}?O z^YTQ;#sv8fb`1>dKHFbuF`C-sKb7X!ALb@n;>#K~O9gRKke@03jv?RdWq1$4rM+f3 z+_XXi;m=34kv0fiUotU?zmMDLzDsHq4$PX4yji}*q&3s(ci1o{Z7q}dDdQg>p|Wg( z+%|4j*8m@ml<@T-n5x4U11;jt>MBSpw)jUcOuKQCqn9<(lDXXSgXGbw^pTjHZSW8pp9Cg&JNU?r(=F7TSzdV(@k3h8DpO8 zu%-qL^aNEqL9ke*Xg8t=*!dogr+uD#5l5pd+7E$-DtO(7@hJs%5cj}MAMDl>q1NGEjK zb-EDzJP5H~I-}nh5yW96|w>dQVv090Z zf-SF4xQ=^TfX4?|vnJ-v8!|y$Bw6;B*)Pxa2N|bNH>#F~pL;+}E;=pRIdz#ndD<(aeAh!X8hA!aoLVvh)4TrSrrNYp7N;WE1wg*0c-7s}p^k=&b5(AppS)WM zUCm$ z>zHVaBG%q%v28Wc#?l=cUg;NGacIMTu1i%n2s-}CtK51kj%T8JDkb=&4c^V7q-T^45AyH>zvjwmIl zFyD(TpnkB$r5qR}6)mnaBh5)##U3PwM-Ic~ z*&)H=pd;s#`svdt%wOUUqr}{9ysa;JeIlVV=K6xPe|~w$h4O8<`pE}ud+8*}*oAk`_j2bIy151{KR4YcFHWHyZ!_ z^K%@)R=iz;=_fJRp7-pJ#2c(Z7U>38=W@))1X&5swpqPSUl_WP&nPRlWF720@l8%y zS$QTk8#_z;%RigA4uUBhHI@5zNqmPn%)yH*%S)auh}*fU3=jt%6M1E22I4iTLH|hS zeMwc5esywxLe@|$wvy#J#IWR7I&;5;l^<>NYne9f;nBNVBX+QJuF@_761mF+grC{2 z-R1@XuWyYONRt-e=i-SPxoX&=XPYOfeT>&K>6+qoxWPmsW*inq(@T@9L{-ffkU~n# zTqlG6w)Kk!IEw&1I_^=?y|nS3mP2Y*FWGh)}Bf zdP*pERW+LiMT_~t=9&_OK*AI!(@XBlLcJ{Cc?X_+z7OdLklP#j2Y{KoI z8M>bxiOTQ$y$vjTT8)#X<;2z^nOjirIT>zHD(JSnxQEYB!Hbda*jUokNHY(uVAow+ zk@BBUFVH>#6D{prS)R3B@>M#$G(PLAv@oajd}uc(NyjHA%&w{~c~h|SQ%bP81+Yb& zIl55?Yqr`8puwhOLxIQ3JIlgXqADKzwmJHO3ZXZ_HRbbzh8E1!4cuo!;*5;H#G2N$`sUG zmi7x5rePN1>FjF_@P$k6RF9RV94`Ox~SuvqfWP5W*)$E0<-a!)eSL zqZTmB#=a}Xuv*?2b->%9Wt{+C^|Ccl)0$ZtUM`S}vIUilnBsAm_?2VgI5WZkA1^IJ zqR4e&bhqYW_ij3DL<6hLHMBgd9K=_x!!=uw1t`UeC8mNwAWu& z@;$k-*s)`Gknzq170Shk536gmg2KNKd`ItRlpAxHQu2 zc;Jsy^{5Nb&yp^B2KvvOkewvq*BYYPDD+lepM)qf7NTABd$}C zti^aFd2esz_GA4UvK4&J{1)8J2Ef*XOuQsscI`^o=Ol<;d*t(pv@-+Y4?LdFR|N7M z_W=a_0Kw}Sic9o?_BnFk0I_g#T7QFeov!Nn>CWmyyl>IT*HuMe9v!hgAFD(cijXl?iEEAaEU9N{f>(gMZ%-I;bw_Ei|YOhUd%xluyq z`h3ANZ$rN~{*_(q2fKcly__FCp5g40gT3lix(VP9oFAjD6_1&%M z%$>K`dm_Dz>Z|~406-FznjhjJu6&l`{8q;Hr*|Vm96)<|z_Gr!nj+O#w9O)Dv`GLV zmiwi5tgv;O9IMzQcwpJZ2|v~DMmdVhL3*}XpLI7qmT8DA6JJ?1NJdcZC_%ZyW(>-j zop~-ivo4?&dZB#J#?D2mn&sLcH$*n~JAuQNLXzF6CFxD%-|_4;*NJ4*HTd@YP({ZK zcm(@<^)OMFQ`1=A*%Fw!r&wkG)wJC9f4cO-#sc|_x`R3w^*GcrKI-%&)ybS8yHNcV zc{gZ$-0S@z%2KiAao_tby01Tvr{^zm=B+G^2@KnBc6cFWWhOu83u;?}z-I#CJNnJu z`ghxD>_LIuhw)OeB;j9TF#N`Ote931m7X0(!kjHXXyj%omrg>{mqN5YO_shw<@5M4 z)Lvfm{;T`jqOeLS{973+E)~cw5f{X!q{f{1q3; zl8Z!hq@+1hN?3JzHEV1VEW2izkpFTwsjGb;ATToJIgVNy88OL05`|Qyu1J79I5!VR0N;q(SYv} z;Vaqb{)vp(?=IWk2KEal@)wcqizGOq_PWv#eC3(0Fz+ltFL(7iPpo4K5wh_>u_M7? z`gk#i50`pY#!0v#N{3dLf9e1jS2T}Vy7Sz`{noyqLlZ?iWp|A>#Kn&vemN}yBWS$x z%-AlQtH+KXZ|_8QPdWDQL`CU+fA!2Eh=WohPv2${z{%N?$#L`JK2mZBHt}FEht0rG zNEGM@V`&c$UEn_HY!9pFaF@<(065rCccMCsE>+|I7C-y@sK@ zOqZ-*Rhc3Uy878HW93Rd zRdx?FC2D>)0a}GjdLQ?s=R7u)`78U79)SI*Vrs!g=o!!5Cv4@g2Js~q zE+ua__e&xmSZ6}7yasWF@>gn$`@v3XpU2%*-Ct+hd^tJ*_`y6xBzbUlB4s(YVFP=i z8cd|DocY2Bo_&Fv1&NZwAeH5+-r?6-Sy}HFIQpo4oU67W;Mn7-B*rN(KUZoo-Ec!-?!)7fdDEdZPf*0{8s#mVr^8xWT7oG{ zpGmMVkTR-V{=pAwnWqSGp!Xdk>0^F4QFg-D;q=n8v9FqEuZxEHEUqisv+#V-U9NOq zAgaEJ?rhTBnW<-v7cb_XT5SDi(@rgLs?C^oKmVe%o1TtRLUIvY-vcJ?JDnxB={QZ3 z3Hm3~)1$2dTqrKwOfy#g>@tQjIJe9E6it)>`GxCZ4{K@u@^oTENFf36`&+mSEM1xs z)O!{>S6rjSxaPc=PJb$AAhIwWW?`e#5w10=qCoQvmi4Ky$h^v>zG($+;_bF zi4F%Z&l7nW;bd07wyhw{+HW%K3A?$)k#wVEr5uOtozlf0pkj+zefsxa?mtz5Dp=NcV^cJF1 zNo}%{zn9*eI1fHh5}ALX<&=&P+}kk;j#Grq5s=c`fI?R^6hQPEJ)dD7gY#i3PrgabKSP z_gC9dOjuZ7?$4#JlA>L;+kBVNdI@AYZP0NeIaUA&y*cK{q~zb5Z~{5OZnJVm;Txtr}nL65~^71t9lKdI8&A?shTMI?SdrpHAcm^ z@xzldV>xxC(;xif}xtR!}Ou!=UsCN{Z5c+kO-)FutjRL!P@{#4=5M#I(=!uuni-+5uqWW#HZif)AO+hti&DYCw=YztN_##eSC*YA1jh{LvzIpJkFAlp z`ufzN!?K~l$rb#q#`5&ZJ10ckwXt(5hGx4xPxw@ZoDo>aI4m8w3#b$9xrqoOKy{{D z^%gwyHvD~uW)6)l@{ALx5lThP3sgfg;Cr|_Ju{R;Ze?& z{aqXn_oG?eHltrb5qk*z>SziIwXT)4z;a#+;8Jh-C5Zg`33YYdAv08kzAx~M~NER5r zNzbLWhj>LDq#_xf+8gwF)utcbZk}?1WLx6fIE*#55K(}%Z$knp-q9JaBoiVe4jMi& zQmD*ut*3t63DLw2 zt^-rW@TIoVw+ELtQ196$A9eM~n+xvzMx8LLo+LW=9w)`~Z{p-8od4b=Q}V@Ox?Pqi z&m1QR4*T?u56+)BJQE||#SA6|dPezB%_ zaah^|MpR;W8c}t)X)*CtejvL@*gH`hoJKKJ7ytIdGp3_!+ zMWg|t(*OO34_;_`^8eMQl&HasXPOYt4LHj90L@|tulzf7m5;a1F}W>{kEbTOaFY7A zIGU`I=NWhnt_53`9o@^JnrW#^C^D!eUEGaCAJC)sr~|=eAPzu$T1)IeVq`pi5+fg7 zLMpmuxzqlX0ORKYC%VuIck&0WZMUe?)WCMgK=n8# zH<-Xz19^{win-wyM`4w7o4;Ew|0w6ErHl63L;lb3v$7{cbh>z5rExtXLTjy4Vv7MH0S z`>7fU5pAR-gC=+zmgr(JzKljDI61k5=%WQ@F8_8Z%&@)iygVtX)~B>saHyoP!$9pU zhOVUN(XaGMHELpnXb8VRTw}@&4)LtE7cUZ6;6_c2tQPfNeh$3&4kYg5^R6fI{&xQ@ zKH<6@NVy(?I$%DrM89M8!FppklQP2axW@k!Ze6icCZfKaGu;ahF7j73o$Fj_gUWax z7iToJLV%l?xGxq^pQAHHSRSgYSH0|!Bs{!VhS4bOM8P(4Pq~9+Kv^z|8=_OGwcHp^WkU6$L~F0li_&$6v$+PlO2 zLqD80Pt*`tY5WCjf-c4c2)y%KFsZO=H;DAs0&*Kq2=C@p*#%rMYvMB)9yg6+6l_Y% zu9Q-Ku&Yv$#>-CPOw$KG^MpHLaf~`*n*Bp+@?uS(Dy`@qQMmYzJ5%-H^)!sT$Po=(AzW;Om)Z0^HNuRr6Ovu|*lqpOWl#pu$K?#&VxCmMV8 z=P_zLf?0NB31{)aT^DhGGz%ywDvB#Az9jd5dPDz^ff%-s7Mw_La19Yrcn1Qlv-J3121V9z-fmHV5heMpO@x_Zbx>}vFUN<>`p+OwV5v`pp<_v-Bd-73_R$?^ zl<-6*q-@v{4%bWa0d$}Gto3F-!~Yu{d_?5d%#*i4>Uj;t((`%PH6>`WP_A_ewrNyO0{W)6oSp__ z`eyT=s}2x9mDJKnH_Y1wl0-?7KHgTtl(g!G@4>SOM`R{#dvPPfu=FjdnFK#7&NQG{ za=0mTwNdW;Cczya!Fh_n*IHs1m|(hpn2-L@>u5Jc1c&azpoRyujoC0{i<-%LOu6b zzlc>OPTGAx-EY&eg~ENC`JS_v{-Nxrl#M`^`>tU6z!r0h*p(y@-N#f7c)L-tqu9t#Vttg4>B~b=a22`T6|; zj`-7sw&1P1Sy+~f{M8{<(kt^7Vy^R-8-#*HP*9k!M<2~`m6U;Y27YmsXQW+Kn5p-Msm# z;nTo?0a~DZLtwJw{(EqLr{~NjSv+bEC`I@7^k}CV!z7TBlG5s59U;@Df}cOzlsymd zmHIGSwdRc4oSa8D%f}upIt7Icc{^0+T@RE)^Ce)6-Dz_HN99X~rh){_3{kZlLG28* zU4EmWa9f&5Yv@-?0ke(|fB)L2e{XGSxt!~DJndCUtBN5jS^Vs zDQmwy(>e;y(#$ncZpA#w{r^@DL&)bd))L zeSL4F8a7yCiflwUcg9xp&H8nLu)o&gk?!tI;S5VthKuT?=H~qV?V1&lAK;Apg zE6guEN2mIgjMZo}-(+aM=LZ;IJRH;Vs;a7=v&OAy(k3k_a@VgvS8a^B_+&?*u)`-b zY>Q_ns^+`qq5sbf#QAv0Rb1rj^6wj z4of=<6k-U=H>l!z>sGjs^*}hh{Nd>)&4BduB_&zx5+zK}@3zds%GDUkpym?{-GRH* z!_4#JROOHEu$vl?lMWSDj#G-ajBS>Rx&E#`%f^V$q@})Z3(0UO zxWo&_cOBKp$>^&LiTm@%Ji62A=a+RL$!7B~@}rj)W?=~}U$?aU6yxiK`a&$$g7n2H zfTz7YexovGt|~}C_Q8g&QOdARv80?E1w&7rJ9lm#w;;R2_hy*e@|*->reUJDy!l^1 zOMkP~C;IC7^Y=k@5)fZ6PiSgTQc}tqt2ZWUCcP3r2pg(fu3 z?c2A<*e(xBW!sL{g59K=b&A@qg516f+suWj$ePjY<-dv{XSMo2d-dwQNo(32Y~7Ym zx?A=#liNLgg8}Y8B;|Jv=NuX`EVSv4UO(7Ac|}U97J@m|dCoFkBUgo+o7-52TwbvA zT*0`PWADow^|azIC}J`klpWKT2|2yh3;F2;X?{J|2_F;brTh^ zZ8Jan{8xW}^y1uP92|wRP?2oY&b)YZ-~+goAVQU{UiAXAD}A*30`>=2Td*l8ZrOJj zul|JMmi20PG6wNyBVjv$qMzGWA&G*bkgaR=Clrq)2*JT0YG2arj6Zu={sni6lfwVG z<$rIqIyL`)oTG%T+nfP*tGxjKZ$qIfKpOA+Gz*UBF+stbd3kwRu1i^3+S)IlKmYQG zlA-{+h2oE(tE($I*E3PRr)$r3x`AU=4P`HnXFj`I${dX?T;|UiRIzgZ{(V0_Bc*lg z*4;q^N0w#p=Dn3?7iIxT1dbKnNzpxYcQ5bk>>S~H^5lt!k540lIu*&-r1_IL!d%9t z16-gEC)kcl2A;Nxvi;pQSf8a^6{?Oplfq!_JDc%t@p!e&>qlgQ%?()q1LV1%aGXa- zgLTOcN$`7=S$mcuPHprQP*B+Oo{((g?s~n8kuesVn?cXTon~(o3VEW_3x^IKyomHF z|N4|JJ_Aq(NofM55VQ|lW7S`&2#!UQoR@CeUJFp;lEk|P%#3xKStJJq9#f1Z&C(;z zRBQJFWQL%)&R@6?;%KI&6#?fp7RB~5y;q767dOSD=@o-~{ubobXuP|a_CzE~ZGmU( z5;pH*zZHmFFG9m4SpCMT2oFhMbz79fG|RZ8!Jjt5SqP?m6coOfXlR9#b8>U(cJ0!6 zHC+*MW|k*O6Di7QJCZj{u}C!XDB#r5uP&F(zsduuu65=EcV91$_V)H#XHaUB!#jMY zZGsOUK2-K=&vB54Zfx}4E8zG1dBoGF+j_sfkjS*`eU8NPcEiuhm^lJ;6`>-kB>BeG z{2UugL~ZHy7jO6e0}^w{lsKuP_p_vMF!GXjEoi6B-#@prC;yC(TL`rw=GZq1{`n3z zSBqA%im17{xude7!cm@z5(keQ83wA;=8Lkkvr7xuUdm#)8V9IuNKkLpbHL4jwL2=A zs=$|8lJ$K7u+ilKC(6squV5)iNDN=A-nM&pG*W~zs%#)p5x8CS`G=chaJ-5((LF3I zY;keX4?w;!J2nj2u~jA_FYlPb`=`njYqSmPqP!t{Zre_>GBCs-Q5wO?5wht1r$Nv1 z&6_u|%E?i<%{%=4*4nCi@7}$knn-svIY+~S2|Vu(o*N6IOn>h|B>s&+`MOp3Qftbc zNI2cCA*Es~i`qi2tjoWJoI0o|?%t*Pa}qhd4!vo5+#)AWUW->rt^M<}P~PPeKTdUW zYUZio*H(tj2##Y&<~ZfTjCoK+b%~$SU1vIGFmj^5>V%n@*`XsxtSH2&C`)moh}Wcr zcL{!iQrv?3!-sdUQ%Itya40G&dXI5uIZnqmj5)%aXuwud@D&+6x8Vi2uL5!h4z}G^ zQbI%`+^&HE==3vI7u=(ou90AUy)1L}YCViXUO4dzLB-;iE*YZDAW+OLuh?Q6J$)2X z_Yk^O0xCmA;;<^JIi0WX7Aj?oS5H=ng$`b!G&8l zsdsJ>J}fG#0k={zReB1|G}B{lE3JdzSUYy?uS+-Xoq*xA#{S_Jo6KczAlgmHa(9f$oU)CFLX%T9a$1AQFz=>o$d5dDhTC%YAi~x!%)44}9d)3lDphv(h zRx>|O>BG(2lYCX@3oM2kIkQ`D0jii@$=>5o{`AAeFxPHOgU|TK$%cX}&oPNX346~E zo|AqC)A|nDLTxnzupkQkWRKc@yypjc1G>(?#JqPeSeeQo8Vv^n$M84+0vpbEe&elB z%HLhNmI>?tU9RcI{)wn>st+%rB&EXJzLuikG8n+#?w+1e|I(6@ENcfqEeYm|E%o)G z_F|l zARDX`1xim+6nq_cK=o&zq@?6fYkDG1*YbJ(+%!7P^M^OnqURB#fa!;oh5QwHocptx z{jEOQoLu^jPw$UTvyeZIoh4!)9y@%v?@4^?T}rAcRJ~M2UlcxLWCBEA$C#Vh8U9?A z4&<#tuZaG<6P8{j+cy46fW_Q|F1mfiQTy^wOq{;LrN4%D@Z9KAfcSgpH%Ev8o8B@y zOr#8mqUARs)h)6V6el;Yp)DvZDk&L4UAYd2#=b|1ZIM=B$*obxNv7DP@Pb#y4PCDQ zWdM-g%y`7&&dLP0M_|2#(*&G)Ip~;ga68cFF z!$jA0^2#{~t=+Ie_3D;Qo8;{6?H_ThjBj@|h*(2$cNiRqfnCCf>6m&}ic|ns{L`mT z6*R;A{NAFX-9D9`T62UV(gA0fh>M_>(e-Tjb#U+pY^R{*!(H}5I%R&}hNfU~>U^Dm zZxUNnbg{0J@azm)$_-EN)m|@lfo6|#Q{C&b=47A*l@Hj8!&H{2kQxVOh8B8UDH?s( z>PXaOPkYB&QioU6|Ih5`grn|!dMOl}I%ruMbb*G2Mi5G4q-NN0zFJ|}`Gz6xSr+02WeEvW|J z#q1cLpPx@S+H^kfkR4e@CBys;5#b8`AT@8%lM}d{?Y5GK+740dA%Yg?A(yzC4{K;> zpzZO+Dg8?0`TTe{%}h&i*e5pMEX!XWkA~7_7Zz4S>!sY~%a5I1Tu@bcN3aK;9weM6 ztWg0R>hOKg^pK_TB4H#ba0ol9Yin!U026wv!a7GOCGb&_i@>GB@Y=8CInNPJiQP{085~p?C4nRn5*Wb(pK9aI zfZS|I)a*1=2J@YQg>Yx$*Ef|EgJ;STRSA}OP2vrKW8wNwAIKVAJgG^)z#P9Gp!`>Je+B&P0}t+F>MzL5p@w#FQ1s4Om-A? z{yT=lqj%6h>*3{PJ&-;AXF(bDA<5e5;U*hsb>`ES7s0O>(r5|`3l*bfm|wEXohLI0 zk@`u$ipcBs42uge-s*@^0aDsx9QZQSWF9>(@1X0;$+kx=sFc#IJCpZvl*r1im;bo{Iy^^$mh?>*e-0U*|np`a3tsAR2N z%-@2Jv2=V7#v3GtWBd1i?#a*ID_~k5mzWm|BQa|<9fej}N6?c|SNCk2;VAg}cYb`l zO)NUq#$M6_1h;=0yWlJ2OhnGvduxDyeg|W~E#|>X(7h;!w~z*(=J`H04(K*s&s6(h z+ix(68>j{m2N9%D3Wul~K<(7hXz^m7R9(9YDCIMThlhXkwty~)h<0t4Bja~)TnlT-;pI)wNOEi49!?|&Ry_6eCsn8#>6PEe=gZ3rW)=9344jD<7{0Ze0wmH?I;0^jbj-vV*Q^^K72GCQ)Ebux`87!gK z$ks3f23P3*kU5e0Ie2ps=N_y-%FA0HA&k(2h*5$;3ElM)CYpnT!|U_U0PS(aY(rMY zdAh_`uU;txyk?W2t*guBT{+vD8MBF&R;F%|+2Q=4sW&6vsruPjWOu7pJ3Dl$%9!}B zS-05$79v^n1|0KuH=DV>lZlDUbSwyFl(y#c-{2t_jcGZ8IqyP&-3rFQ4Jmy)wjL5)SQONG{la=#V&iLTd1nHz?p9Ll?m2EnDOfJy-i9 zVZ#vOs6f`G0iC<5ANTuk5?7I&bf*^)PRBpYvkc2NMQ`{pyG8g z1t5ncCySKu48QQ$bu@yJ&yXF8@A+E@vpZ-oF>Lsd@KP6-QRdfwyYKFO^W(Z{w@`AFzi1S;Y^)G?? zT8+GwDkh9<-+}}ZT^GkC&{trkS4S8aTA~IpH%e~VtR36L7MVGTjSiN9c^*Pb%E#>Q zFOJBPuN^>=kjJTiMbv$=UE+jYO{Xv;I3%!Z`}UuU#i@yjWJ@9)XB~V@O-)^oZ(N8E zjV4b3Njn$BZ*nS*`RDL(8~_<{sBujB)7yhbk>$yTBfJ)PSMjsAzwG6eExWl=-Zy7L zd$}ObD+Mvm|NT3F*sFWG|H3XNCgaVqXwG?myYSH+m#1mNiTu_~JnGp1(x`hc(hP5-~Y5 z+KL7CLsfx8Q&W@J8z485sUBuH$;5}|QQkzmYuBE(q?yDahFhrD5f_=3SxC@EPSB{4 z1JtFW`%*saExikTDrEbEHN=8;>DA%0F*sSEyq2vI=D8A>y<}{KB&j(dR-$4yBcyv# zdfbYlOsU}XtW9pFU&Q#8=Lm4g*ucms$Cd^1eMuji5A=fo334wC{N)xwqk zeyK{(df+J@qq})0$+U=c)i!GpQ+yE*3^BdHrfH;}Ms(Izoc{zC2@saXTG$T!SU^?b zdmv+5ww)|QVhz?vWd6q{Fh114)Rg28fTs`;A)o`m7=Y_-*_?%=rpoI!@4ALENW;4; zyMrz~Vij@Zm6mQ|;d7a{_B?y@IeNxE$VFZ{Es~FC5(`VOV&&i(ja_RvD%moHOD zX+aI)qzZ`>1aFAe`8jQWlj@CGXP#cF9}DrlksEb3uR;68nOro7Mopj`$1LWm2HcQC zxnQvEl?qQe*1mbkTS^+JigglBwsEk}nMMOJRIt?a56rPohwFk#@*oZxK13^V$k4=v z3m2wWRj>OTntCCiGZc*8|MTb(Fh`eO+B$!J=(x+$bd#F#ntfdqUfaz7`RAYanA)L^ z-0TWhfi^mG;VGQCc+J)LLA?kZg^)`=8g-{%U$>KwwLl(-SKLD}3Jgvv%ymAd!iT=6 zr$;efg)dIFJ6H}&hYreup>^+hDBaW3(-G4K+hrgcNv@?$j7AlTp5x{%?l2T6Vv)RP zZ9@Bcd*6-sl*S>SD3vFwXGd+OJN7otLuS!5RrqMmTLa1uDsU-r^hDv4l#9wAF0G`LGn$R-8po38@|RPvw1ukxN{Z?baQjxTw-r zW1g4;Wgl*);|T0Lc<|tRC=*+wY2q!z45Du$2R3Wa^~to%1hj8L8#_l@4bERhf!>8~ zk)hx!{-JZ0(Pp$o5xkCf0T!OGh_DBWH+*P+8-P*X|9~a5)m8ppGKX!iKyK z11%LzR&f;vV1n~$mr$c;v)s_eX7HTJ6P#ms>y!&bAG)?MRF5vZ@&kTP-vUU4=)N30 zbSMT9L@%q&qg(ctfk#h}_tPaQDGz*Q1zJY8Xliy^+vWHN2OEzUxnLS(Z8C22Cgx0d z;JprsiHTu4jH<)Wk$l-MdK}Jo3=(b&b?qtK40~88mkVkkkl!RwJR`|^@OtzYn!$7K zA--a0=bi>}d(pOoDWeu6sn2t9a-VOoP-s-(su zy=vJ^C4sfzyTrs}mEr>{WW;lGb3vJ^cufkyDpcZ&r(px0VKTkE^~+VGuyiB~+fMya zp+2ZjTHleGYP2UHj81sAu)u17)D}FlfnqJw-6I=mXcQ6270YOl?I(#Mb>`fqWWEm? zcy>dslO1yWHAq8}7642m)zvS+Q03wEn52<&0htvV;~_u4KMdl7n7al;izCXcCFQ7e zNL(?B^yqG)uk)lk-uGu`MMj1oyrOT0$|xRB;rqzmIc1BqR4#x-lS2022*;;rt6@ik zp^h(yqAj0(C6jxH9kB@>i#Dbh)O;DLb9N#tE7vP5D%vq*@%u4EvZ~i#`4j`Wben_` zDcEzQ=#hdXn9dogmTAezqvr+md4~jbM);TqC$z}S-_e##D}BY7>&04GJWG5chR7lG z6>Bh+zpZ)Pp+SEbE7hc#s1lri&I`5<)%fGhvrvep`dG!|sKhiGI^ZiUI0FaX_N^?B zt<;0Udk$`un6^;3w`QGj=oZYCS+ZLI&_y-=8xQ)}cxe=Kb<62d!~SNhf-^t7D9$@L zS)Ex!%*}52vb*-~HJZ^>k(vL!gN{x&dE|COts73Fq0#W3{G^EepYVz!IoQf+21kPF z2N_B#;m~WB`lLD*3f%tk4{y~h&Z*ZR+i^w-n6xO6>gt6_oKj-7?kS;((`9v;lFLeO zjg*zb7y3nlC;Sng-nTXA-Bl#_!TpLS)G}@*OR7SS#_T*N0WY+}P(Bfw__TDP=#{PW zqaPbt&%>rOru{Y|iKUCcYsEpQ4bt8m-9$sv04|e)Z-0>AP;y6IQ6D zZ+Xd~Tsx9~K-^;h%|zF+4O4G@UgI9RnBkf0lb@55gQH}bymrp~cE8oK1!QHfV>#oM z21QVspk)$#nlPG4&{{V}angkyk)~Jo2Ap6Rw~}hocbhuU`K+?%={TaLEnouIPujTeXOV|M<}9+#4lQL?s9!3A%L5CF9@ zjh_i-0o#|fqOkhYbq)?|jbU?7A%$KFKCBv&hiDCGTf8ZceCuBLgzyVJc`BpGQNd$i zG*NeZkTPM@oY1_yb*e(3^Q{AW4HSF}^&qM6yZ6cA6m*<`FnT{)fFPI(Xsr*T48D&a ze{|g08*}~o^%Ok7W}x~UK!X%TQt=Yjvo9tyV7K6l3l;EV`&gfGtbQCwy;3<7!|T#e z07#rmV?AnDx_l4G@vuIXf1W*iHjfHIynpi$`wx|_Tf>jUIHfsQS(oSJaL%;}?SS8- zkR4m7y@KyBY8yEaKcsjHS^Bi%78>Z~yGMRuu9ZNvYZc$sF+&^oTGP#yG7DLM?em^_ zas9(h?w|^B4`tl9y*0k_LVYhDg*ci%^+Vt&Nc6oVxw;5d+W!o@bdDCmVfH(4@jXU-%+~1EOZ+A zW$e}1KAg3qZr9EXzLwvzr6PW+RsNWX?9GR?7IhR&JLeYz0|V>eV@v>M&<{0Y4gC4> zgs4SYW7jH+xdRK1X5@}ng7PV@SBbCYI`>&(do1Yl2IU!@Xkz*4S&h+~jM~e7TO`X` zU>%{k`gktPUQFjHZkfH6wW}p`>trWPCoj~OP#74`u%T?YP>|y0c0^0maZU4TJG+UB z*mb0uORobcIqX_6a1&xH!&O_zM{6Nm~V zo24ubh70;l?`T9#Q<`eu$l0n53;_xsZmqFxdXS82NI!)RgqCU)2lZe&wjzO4M>>kq zaZhYfArZc`zVv1L+=m8_Y$P`=%?d`6Kt$$IQcF3O$LOPRJvyam#}Y6l)LgJl(Q7iJ zGW<1kig2hRKeRlUL!TYL#?EalA06$tK?omR4 z=Vgt&UvA~A2HIaAfHjM^0|e(;)x!}DekggWa8r!PkgFr%5)XmW87WC2>r-<>9Sjx@ z5!l|!Ivr%O!JW;jyR|8#jo2(AB4VkSn! z$eHtT+1t{sA?ij@hK5WN+uRHQN32SBA_y=-5Tg*@oD^5RPKwh9$I?)XM@zfLJDz|2 z5NB%YJ@I$-PtY@45fXVH^ASi1bVxcT-Z+@NM-Y(#Q3y|=Ehii|X}hM8E>564V)4FN zIJSui4PT?Ful>~S>;|_hci!Kcluvh6Ww`ZFi@7$c$!KT8m=QdIAhGOQ>eyv(Z69jphGXp>h9rK?V9%gzdTnO%Rfv5}FLn-Fao zu0fC6Q+rLm6=^nj6gbv928RTPY%(o`S}9NEvw zfdRK`1Q}^{6e}{NuT3z~q~!;*hjws%74>$=abpEGGP&Nw2OF}EhyR%uoxu+VZ;R9% zIX=|DvN6*phFqVi=8kNuj2GCGt`>Rhkq>#5h@WFF+f@-N8XUFD(ZQ|a1$b96^w1%x zMSqUBKNk`aDJaqpPpr<|Q48fkxEVvh`2EOV6$iCWgn;k~iba|du`*r^Z_p9`eed3U z70=oC?$cflnuqSq4G))Kn?%GOrET|IFG-30lMqAHqJ17Yb-$u}i}NqH(7Ikt`bU_3bolWXhLgRPjyyw;FV^ruweU-W_XizGDQ)Cq1gi%Eyu zarI}b^jf6_qZ~msx^v<;UMUHQstgtRsMM1Yw{M>r>o-74eku>A&kM*wc(R9xuy8+! zrCcNZWDSlY=rFD7V2g{#rcn3XCSzw|A;d_jy*OrWd#%TM|G|R?(-IY)mP7h3k<6jg zm9;TJsq)R;t;LBP+I1Bji_SeBY?QX^P_8(kV33=O=H$*BZZB*c7|+Ve(hb8F^jy3&ossNK}AHHWt)nJ{tY4Ih3IQmOBwm8lJ z<$kq&lDW|RM?!Tr>V`t^83?$t_JlCct?|<)UsVDc8Pq;LX&Ya;pFxJwPnnyWE3}(d zHD-8&WNt6F^ZY?j-PKv$m=~r4R29&|G9&@lkR<3yZ9tw5x$m~X`IGbA%gd^=kim($ zF74}t+CKjf?M5gdilX@ZF920DE$XmjN{3mUoO7qHDFa5Bb{Ua%X^=bVr|`+U90q@O@vZe;Xa(y9Oo2iWcoZmwO-K^bK(pOemZqP8_?m|X zdp=hvX6hs!Vv(iR9t>gIxY2s-ZBEK}fFLOB+jT1WbGhmX$A@LsdbK^zx$jtXH8+6U zzU;-@NsRkzpca=!PuJe49LjO%6+f~ZxMo| zQK@W;9Hb$e0{xze&U#FAtpe=wZkj8N?(3zz3j5e|a~ub%imoh3EFJ&Bm#Z3VkNwTYBEdwAxozfJgu~YbR%qmG^H_4vN}gd857` z@wovkNmq#F^z`&Opm)hW0#FHs_1?))e2XIA-roc@7I1P}tt=gQAEA^&k|00?HKW2h zT7pb>;{Mmw=rMM&6e;5y%S@ex6!8ISNGeXYy!UJlG&AuistPB(=7~pyZ~*s*MKx!vuJk0XS_5*9l|H?wUXSE^AWU!?9iOwr^4#L7K5k(sk&9 zHISN{0kDZ0>j>AXF3RY@e&AF&#t(K$lF9-QTdM>eJ7g1QL#S3pN36Ha0*v+cK@Hnb zQ=vHCYBKgAL^u%WG7W2bBm`se9^#vFSkX5~ojQpdoT$pjky#+dfeD!X$;Ujm{j31a zADcE>irZrE>Zf?7)X$|c#AA~~*j9sMAueEZtM6_v7oK|)vnweGB~n^-=^!}Aqs{?O zA++Vhv>xiouK+;}z1#6c?HjtkH9tuq16N!e1JP^f5HF@sKZqDkjHZj~Lz^w*jlpl? zI!V`n91snEJQ3hpoz ztr7$oY535#Xc^hP^BEvU5fZCstvuzuem%kpC?$fIB?qpt<+e{Sf0XZRaMRjxaM{2r z(FR~71Tki>1C{j`4};#5U@i3@|snvYCx`PmakRV3c8r|G5xQ5Ks^w}*yo^h zod-vV?$Hxs0TcO7SfkPbwI`P(0uF=QUG-cR*l_f zIdKZ>mm|YSg=mPVi~Gf9z#kD@F(qoKx#if1;x4Po@aoUKJQ#@92x!^vy)Ozhs*U(h z3=D{zjl`Y}#sWKz<&yY@vp{nw6g!c8cS%*w6$wmu(oH!QBc!xx{}7ya8e`?rKz5sl zqJ;qBbMCOXX6z&Q?^Gq0tKP-~NsFe;cK#}=n&>FHl9Y^$E}*QQZU9FVi3bhM_tO2J zk4;YMgAykoB9Ls^`bLG8gFT-^C2ecx#$Q3FiqDX5#Qw)0-Pr<$b&0<8MfQ4=qUXrPNpM6Q8+9usQ{8p3|$GRVec52YuT zmC2xvYHSRVLt<7>N&{s_Q*OxOX9^es9*dU#(SrcpWPBGdoj6RUIumFDX6T6Wu`O`H zRTuwJ>*Ww;=aw{pK!fRoZ$Hn#^1KgIc(R|-QL*-auh_1vtPB;WA3mBuf7#^_W$)Q0 zZ!G{(gk(=IZ}1NYu*J?g=J*JrXd-_Gn<*Qkws-cMAebt}>ee|kXD(yA1*_ynGHZKE z{v%pF;h5-@o}KVHT@GA|GG^OehBX{JUJi*x%K}~21`HF!yAh=RQJA{XFXj?u17wYA zWUBz3<@WTok=>1ROb}=)#$Cyoj$tHF(#%U^FtnO6@_NX;+0W)7^pT(UXcLx_I)?Rb z1om2$9N_Oi09nX|Z*OlP&k#BIkEE{%ghYee))L=NOltP6a)JGyI^0WPdqc6@01dV6 z)J0Tx1Y)|k7%N{1fg<3Ak5DGt7n8xBq-=}ojf0F9Qyt56cfM%(3GcpfN|atztWv90 z?st?MHn>M%(7e)Qwa`yyLUBSxw z&54dG1)WQ5a+DVcuzIjbRr2R=UB9kPAYHjHS#E7^LrpbSe3}q`4d4+rQL)?`p~?a% zlKZR#iX~a6u6-#>hVTsr@GzRkMuiDb4G={R9?Ie9?2;Bh7*lLEbfG56qx;ZLlKeK| zRvQlcttz>UAyg`(>BzIxmJg~o*lHm9wNo~XNCFkTXF~z&rfvHmw9Mj2vTJ*)P0m(f zF^RzB%*%Hs(pO~65z)J|zjqUj^Ljo!zgwgzO9vwUchXGOgc6h+TWX^@vaRZ39QlczD1}hYYHDh&m$V@0mxb~o zdJt$NK|Kes9tAG_A?D`hx&R9{NE+#q$^~ms?U04Qo`|ZG6dmIgUj~q3tV+rdD2Y#? z|4=gyEuqoT5=iE~MaIiDD_=n~Ejdlfi5ZhBs2&M$2U+RKn_VvR)8}Dj6stF4BoH!4 zh$^3m)f=UTbaw`*eK?AnD0}hPJkYdg7A9`QYNx&{$d9w2&8wBYzyuD*F2$%FNuuEG z)ii*D1+LZvlv70CXD9f@vQz5}^@^j9XaFR)fr>Jty;0z?G2G+uk@n z*rY`3K!cgQGCK!^5_vXw=p9#1dxjt>eS?EM$i*oKL#xzGkn(&i+gAOm-V3z} zP_Z4`_2oA2c-;4Me5)H~Pi#5$)G4aO4PA$$vp@6go$EK|PsNA$B+?w9J5v`8f&d*;u)L)ouB#9-`8k+}- zZziYZ)mZ~=7m3j5Y(ah~%yrtw02YWRK`u+F^d^A%o`l$A+aR7 zd0My$q+#eIbP#)FFAnGia%Bg|il6qL(1Whm6g#OA`dEi`5(m}PY@rEDJNY;P(91;C zqJNx%+cAVh130L1tUvIk$XgT1JRpPm1XU*DLoN3WQ0h5JB(KC5lHy;kiT{rftBbBm z<|@}3gdTN(8+t{a6Td{bJpGkX5Sp8S3ObHtYr+oe7fcCu4XKL6E~DLFD+pmW1_x!e z?03Qf_^;uMOK5R!3b z(?RFI1GM;%s)MSC0PMi@&f%xEJUyS-D*uP3CMvKTsM3fB3?#jkeJX<+X+&($1|VRh zLjt=R!gPW27uCtoHRxIMCC!?yoi%ZgwaGk3`UvgVj}!GgU65x0fW`w-@g>Wc`MQpwrK-8 zo`)c}@<#$uiK@qWPe#I|m<~@~KgJrNk9&SJsOcttz^YM@4ipLFP;6R45eRAGVR9A%D3(v3kV2EM}ZiU1kni@lEBS|7wr>= zr2?MM(k|hQ@$VQuUfH&9zZ}Xpfj@Q*cjeY6~-r2$Ksjf=1%fcPCXS+2f3Z8`cyzE^_32MM(LydP`bQ$@d8cd=5X?W zA&L_>{4M!ROiXpe=aicP>>$|}6_FvJ$m0G{AOS?J<3C>o-PDDF3G@pFSQTM)z>FUO zbnW?N2^U4L;#7$2+bn@pd=I%*5>`W^wG$8!2uT8ErH1g;h&Qv$MAA_NL|Y>DEcoty z(a|vgw=16X#IIi9^!YC`9|DQtA8WbFWyk8$;UU>vctLBR7%?SL42S%=kQm%iIJ%$E zud~cW;`5}3g|R`iZ5TH3twseTzBaT<5TTmcgkJ!EiU@fc*}LmLF{|G@D4Iyw@sb2b zFnzkFZg_ITuXjz2$j)2yLhOzbulu9z2h?-W6g1Ih$h%7AevM9`wArIZ0gW@e(wJ51ayI72kF>;yAJoi=-7=GoD|#UJ3Krn&#aRci_UQi}Kk#cK{Ff08rS!%ip4;T#SI*K4c>a997KS{nUq{%B zjNha*t^XCpV!Oy|CYLed_bgE|{=rX)jJTo7@8tj>zzh2>yhOs>Z->MW*#{6m#&KD zar_m{!_sZK$5ruYLJA%QwRFFO*#-U+j#e&iW*02^H8fPndw&1=y#l{i;Q!ARpnuu) zlCpZK**t}!cDOC#JB7PrA7vwbjE_#I&p)Iqty}dfPQAUo|7n2o@$C-R_Lsg2V5joN z%?{J2wCE*xQRBzSBkAiV$aU7QthC}^Nx7vVR-=8~?_Jz>HgSb0li+IOw26|RA~#)6 zYuc(AKenWs=EV(<&eJ@E4iw0ExS=*g;R2(dw(5!i#FoFK1u3 zo?J&^RpDv025N_8j;~#|v25kbGl8BDLT{@6&Dpv@r<2!O_(UA34PmWF8%a>TU9f9> zcI6WDv%=14%tdh@iVfeJ_Ut9s(OOwYzL%y3-a!{Loi)7^bmk9P;SeRt*FU+(U-<3Y z;mkKl_m+EPx=3CzS(B@3t@1aQVAR!R+DE1Pcgu8J-67YpTFG>J3)aV$nfv^oAM6?G z<%TB)hsj&!r(Di!R9#izSoi+zt55GQJYcd*7BJK(d3*W@L&+3YZ(ty6QtvaVXADfnR{LAQG2 zxvyy-c`w0#fNlFSJ|laP+8|DiCQBKiTmE`{6LfkqxlZ~jTPvZpl*n@<^O0OX8jbXW zjOg^i$jIgv$0FZc=aKT8*>?gZ=VG||Qt2`W4sfNqA3ebM&RqO~P+bP&%(q=7F>!Qq zozm4?&H2~j^O(-b9?r%c@}2tmX#2+M+IRArE1w*ah}?8DIn18v%Ch)`9hCm%5@r+{ z_lBkPy>ZRGMgx~-csAfOlk3#3Hg>@0IbqS4LiL!{8r+lb`o6{{T+lJ(q1h!jb(Wit zwfl9?DL7OHy(#q%p7d__g}Q2I{v5k{LlYL|2KGu(yIUf literal 0 HcmV?d00001 diff --git a/test/rsrc/no_ext_not_music b/test/rsrc/no_ext_not_music new file mode 100644 index 000000000..e69de29bb diff --git a/test/test_importer.py b/test/test_importer.py index 1a8983c11..7de432266 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -351,6 +351,34 @@ class ImportTest(PathsMixin, AutotagImportTestCase): self.prepare_album_for_import(1) self.setup_importer() + def test_recognize_format(self): + resource_path = os.path.join(_common.RSRC, b"no_ext") + self.setup_importer() + self.importer.paths = [resource_path] + self.importer.run() + assert self.lib.items().get().path.endswith(b".mp3") + util.remove(os.path.join(_common.RSRC, b"no_ext.mp3")) + + def test_recognize_format_already_exist(self): + resource_path = os.path.join(_common.RSRC, b"no_ext") + new_path = os.path.join(_common.RSRC, b"no_ext.mp3") + util.copy(resource_path, new_path) + self.setup_importer() + self.importer.paths = [resource_path] + self.importer.run() + assert self.lib.items().get().path.endswith(b".mp3") + with capture_log() as logs: + self.importer.run() + assert "Import file with matching format to original target" in logs + util.remove(new_path) + + def test_recognize_format_not_music(self): + resource_path = os.path.join(_common.RSRC, b"no_ext_not_music") + self.setup_importer() + self.importer.paths = [resource_path] + self.importer.run() + assert len(self.lib.items()) == 0 + def test_asis_moves_album_and_track(self): self.importer.add_choice(importer.Action.ASIS) self.importer.run() From e89648b12b04e9388e165add1c3570993098c722 Mon Sep 17 00:00:00 2001 From: gaotue Date: Mon, 23 Mar 2026 14:37:09 +0100 Subject: [PATCH 02/14] Update beets/importer/tasks.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- beets/importer/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beets/importer/tasks.py b/beets/importer/tasks.py index 73dff68fc..fd42f63c6 100644 --- a/beets/importer/tasks.py +++ b/beets/importer/tasks.py @@ -1184,7 +1184,7 @@ class ImportTaskFactory: # assume, for example, the only diff between 'asdf.mp3' and 'asdf' is format new_path = path.with_suffix("." + format) if not new_path.exists(): - util.copy(path, new_path) + util.move(path, new_path) else: log.info("Import file with matching format to original target") return new_path From 5e9ea54429dbef5f824fea3813f8c998626883b9 Mon Sep 17 00:00:00 2001 From: gaotue Date: Mon, 23 Mar 2026 14:37:30 +0100 Subject: [PATCH 03/14] Update beets/importer/tasks.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- beets/importer/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beets/importer/tasks.py b/beets/importer/tasks.py index fd42f63c6..af1e2a4f4 100644 --- a/beets/importer/tasks.py +++ b/beets/importer/tasks.py @@ -1099,7 +1099,7 @@ class ImportTaskFactory: if path.suffix != "": return path - # no extension detexted + # no extension detected # use ffprobe to find the format formats = [] output = subprocess.run( From 8f2b4e50b3bc4f00d1370931938a20891a785d25 Mon Sep 17 00:00:00 2001 From: gaotue Date: Mon, 23 Mar 2026 14:38:49 +0100 Subject: [PATCH 04/14] Update test/test_importer.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- test/test_importer.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/test/test_importer.py b/test/test_importer.py index c963e526f..337d44b25 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -353,16 +353,22 @@ class ImportTest(PathsMixin, AutotagImportTestCase): self.setup_importer() def test_recognize_format(self): - resource_path = os.path.join(_common.RSRC, b"no_ext") + resource_src = os.path.join(_common.RSRC, b"no_ext") + resource_path = os.path.join(self.import_dir, b"no_ext") + util.copy(resource_src, resource_path) self.setup_importer() self.importer.paths = [resource_path] self.importer.run() assert self.lib.items().get().path.endswith(b".mp3") - util.remove(os.path.join(_common.RSRC, b"no_ext.mp3")) + generated_path = os.path.join(self.import_dir, b"no_ext.mp3") + if os.path.exists(generated_path): + util.remove(generated_path) def test_recognize_format_already_exist(self): - resource_path = os.path.join(_common.RSRC, b"no_ext") - new_path = os.path.join(_common.RSRC, b"no_ext.mp3") + resource_src = os.path.join(_common.RSRC, b"no_ext") + resource_path = os.path.join(self.import_dir, b"no_ext") + util.copy(resource_src, resource_path) + new_path = os.path.join(self.import_dir, b"no_ext.mp3") util.copy(resource_path, new_path) self.setup_importer() self.importer.paths = [resource_path] From cccc1fd513d82302d3508aae3c7882816cd83db4 Mon Sep 17 00:00:00 2001 From: gaotue Date: Mon, 23 Mar 2026 14:41:36 +0100 Subject: [PATCH 05/14] Update beets/importer/tasks.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- beets/importer/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/beets/importer/tasks.py b/beets/importer/tasks.py index af1e2a4f4..356e6b5fc 100644 --- a/beets/importer/tasks.py +++ b/beets/importer/tasks.py @@ -1109,7 +1109,8 @@ class ImportTaskFactory: "-loglevel", "fatal", "-show_format", - path, + "--", + str(path), ], capture_output=True, ) From d5f64fdba88c640ff1b92492fc5ed7ee6d551337 Mon Sep 17 00:00:00 2001 From: gaotue Date: Mon, 23 Mar 2026 14:43:28 +0100 Subject: [PATCH 06/14] Update beets/importer/tasks.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- beets/importer/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beets/importer/tasks.py b/beets/importer/tasks.py index 356e6b5fc..43312e943 100644 --- a/beets/importer/tasks.py +++ b/beets/importer/tasks.py @@ -1094,7 +1094,7 @@ class ImportTaskFactory: ) def check_extension(self, path: util.PathBytes): - path = Path(path.decode("utf-8")) + path = Path(os.fsdecode(path)) # if there is an extension, ignore if path.suffix != "": return path From b963a14d411cb67d0872aa544f306b5bb7ed53a1 Mon Sep 17 00:00:00 2001 From: gaotue Date: Mon, 23 Mar 2026 14:43:53 +0100 Subject: [PATCH 07/14] Update beets/importer/tasks.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- beets/importer/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beets/importer/tasks.py b/beets/importer/tasks.py index 43312e943..19cf2bcd4 100644 --- a/beets/importer/tasks.py +++ b/beets/importer/tasks.py @@ -1075,8 +1075,8 @@ class ImportTaskFactory: error. """ - # Check if the file has an extention, - # Add an extention if there isn't one. + # Check if the file has an extension, + # Add an extension if there isn't one. if os.path.isfile(path): path = self.check_extension(path) From d5a5a0225a2f814e24f22067369b30da2cba2b31 Mon Sep 17 00:00:00 2001 From: gaotue Date: Mon, 23 Mar 2026 14:44:24 +0100 Subject: [PATCH 08/14] Update beets/importer/tasks.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- beets/importer/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beets/importer/tasks.py b/beets/importer/tasks.py index 19cf2bcd4..506785e80 100644 --- a/beets/importer/tasks.py +++ b/beets/importer/tasks.py @@ -1117,7 +1117,7 @@ class ImportTaskFactory: out = output.stdout.decode("utf-8") err = output.stderr.decode("utf-8") if err != "": - log.error("ffprobe error\n", err) + log.error("ffprobe error: %s", err) for line in out.split("\n"): if line.startswith("format_name="): formats = line.split("=")[1].split(",") From 54f438f17614b188abe14ca37af479c69f21852b Mon Sep 17 00:00:00 2001 From: gaotue Date: Mon, 23 Mar 2026 14:44:57 +0100 Subject: [PATCH 09/14] Update test/test_importer.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- test/test_importer.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/test/test_importer.py b/test/test_importer.py index 337d44b25..93f179873 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -360,24 +360,26 @@ class ImportTest(PathsMixin, AutotagImportTestCase): self.importer.paths = [resource_path] self.importer.run() assert self.lib.items().get().path.endswith(b".mp3") - generated_path = os.path.join(self.import_dir, b"no_ext.mp3") - if os.path.exists(generated_path): - util.remove(generated_path) + temp_resource_path = os.path.join(self.temp_dir, b"no_ext") + util.copy(resource_path, temp_resource_path) + self.setup_importer() + self.importer.paths = [temp_resource_path] + self.importer.run() + assert self.lib.items().get().path.endswith(b".mp3") def test_recognize_format_already_exist(self): - resource_src = os.path.join(_common.RSRC, b"no_ext") - resource_path = os.path.join(self.import_dir, b"no_ext") - util.copy(resource_src, resource_path) - new_path = os.path.join(self.import_dir, b"no_ext.mp3") - util.copy(resource_path, new_path) + resource_path = os.path.join(_common.RSRC, b"no_ext") + temp_resource_path = os.path.join(self.temp_dir, b"no_ext") + util.copy(resource_path, temp_resource_path) + new_path = os.path.join(self.temp_dir, b"no_ext.mp3") + util.copy(temp_resource_path, new_path) self.setup_importer() - self.importer.paths = [resource_path] + self.importer.paths = [temp_resource_path] self.importer.run() assert self.lib.items().get().path.endswith(b".mp3") with capture_log() as logs: self.importer.run() assert "Import file with matching format to original target" in logs - util.remove(new_path) def test_recognize_format_not_music(self): resource_path = os.path.join(_common.RSRC, b"no_ext_not_music") From 93b4eccc39d21fe4e55447559f1d02b525e26056 Mon Sep 17 00:00:00 2001 From: gaotue Date: Mon, 23 Mar 2026 14:46:08 +0100 Subject: [PATCH 10/14] Update beets/importer/tasks.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- beets/importer/tasks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/beets/importer/tasks.py b/beets/importer/tasks.py index 506785e80..7164448bd 100644 --- a/beets/importer/tasks.py +++ b/beets/importer/tasks.py @@ -1170,20 +1170,20 @@ class ImportTaskFactory: "8svx", "cda", ] - format = "" + detected_format = "" # The first format from ffprobe that is on this list is taken for f in formats: if f in wiki_formats: - format = f + detected_format = f break # if ffprobe can't find a format, the file is prob not music - if format == "": + if detected_format == "": return path # cp and add ext. If already exist, use that file # assume, for example, the only diff between 'asdf.mp3' and 'asdf' is format - new_path = path.with_suffix("." + format) + new_path = path.with_suffix("." + detected_format) if not new_path.exists(): util.move(path, new_path) else: From 414c1da6866af35eccc2798fdd918a3bb0436bd6 Mon Sep 17 00:00:00 2001 From: gaotue Date: Mon, 23 Mar 2026 15:41:16 +0100 Subject: [PATCH 11/14] Recognize format during import. Fix issues raised by copilot --- CONTRIBUTING.rst | 862 +- beets/autotag/__init__.py | 98 +- beets/autotag/distance.py | 1112 +-- beets/autotag/hooks.py | 1228 +-- beets/autotag/match.py | 772 +- beets/importer/tasks.py | 2618 +++--- beets/ui/commands/import_/display.py | 794 +- beets/ui/commands/import_/session.py | 1090 +-- beets/util/color.py | 430 +- beetsplug/bpsync.py | 374 +- beetsplug/lyrics.py | 2284 ++--- beetsplug/mbsync.py | 374 +- docs/changelog.rst | 11360 ++++++++++++------------- docs/guides/installation.rst | 360 +- docs/plugins/lyrics.rst | 456 +- test/autotag/test_hooks.py | 714 +- test/plugins/lyrics_pages.py | 1172 +-- test/plugins/test_art.py | 2100 ++--- test/plugins/test_edit.py | 988 +-- test/plugins/test_lyrics.py | 1680 ++-- test/test_importer.py | 3462 ++++---- 21 files changed, 17167 insertions(+), 17161 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index f7372d81e..3a2adb724 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -1,431 +1,431 @@ -Contributing -============ - -.. contents:: - :depth: 3 - -Thank you! ----------- - -First off, thank you for considering contributing to beets! It’s people like you -that make beets continue to succeed. - -These guidelines describe how you can help most effectively. By following these -guidelines, you can make life easier for the development team as it indicates -you respect the maintainers’ time; in return, the maintainers will reciprocate -by helping to address your issue, review changes, and finalize pull requests. - -Types of Contributions ----------------------- - -We love to get contributions from our community—you! There are many ways to -contribute, whether you’re a programmer or not. - -The first thing to do, regardless of how you'd like to contribute to the -project, is to check out our :doc:`Code of Conduct ` and to -keep that in mind while interacting with other contributors and users. - -Non-Programming -~~~~~~~~~~~~~~~ - -- Promote beets! Help get the word out by telling your friends, writing a blog - post, or discussing it on a forum you frequent. -- Improve the documentation_. It’s incredibly easy to contribute here: just find - a page you want to modify and hit the “Edit on GitHub” button in the - upper-right. You can automatically send us a pull request for your changes. -- GUI design. For the time being, beets is a command-line-only affair. But - that’s mostly because we don’t have any great ideas for what a good GUI should - look like. If you have those great ideas, please get in touch. -- Benchmarks. We’d like to have a consistent way of measuring speed improvements - in beets’ tagger and other functionality as well as a way of comparing beets’ - performance to other tools. You can help by compiling a library of - freely-licensed music files (preferably with incorrect metadata) for testing - and measurement. -- Think you have a nice config or cool use-case for beets? We’d love to hear - about it! Submit a post to our `discussion board - `__ - under the “Show and Tell” category for a chance to get featured in `the docs - `__. -- Consider helping out fellow users by `responding to support requests - `__ . - -Programming -~~~~~~~~~~~ - -- As a programmer (even if you’re just a beginner!), you have a ton of - opportunities to get your feet wet with beets. -- For developing plugins, or hacking away at beets, there’s some good - information in the `“For Developers” section of the docs - `__. - -.. _development-tools: - -Development Tools -+++++++++++++++++ - -In order to develop beets, you will need a few tools installed: - -- poetry_ for packaging, virtual environment and dependency management -- poethepoet_ to run tasks, such as linting, formatting, testing - -Python community recommends using pipx_ to install stand-alone command-line -applications such as above. pipx_ installs each application in an isolated -virtual environment, where its dependencies will not interfere with your system -and other CLI tools. - -If you do not have pipx_ installed in your system, follow `pipx installation -instructions `__ or - -.. code-block:: sh - - $ python3 -m pip install --user pipx - -Install poetry_ and poethepoet_ using pipx_: - -:: - - $ pipx install poetry poethepoet - -.. admonition:: Check ``tool.pipx-install`` section in ``pyproject.toml`` to see supported versions - - .. code-block:: toml - - [tool.pipx-install] - poethepoet = ">=0.26" - poetry = "<2" - -.. _getting-the-source: - -Getting the Source -++++++++++++++++++ - -The easiest way to get started with the latest beets source is to clone the -repository and install ``beets`` in a local virtual environment using poetry_. -This can be done with: - -.. code-block:: bash - - $ git clone https://github.com/beetbox/beets.git - $ cd beets - $ poetry install - -This will install ``beets`` and all development dependencies into its own -virtual environment in your ``$POETRY_CACHE_DIR``. See ``poetry install --help`` -for installation options, including installing ``extra`` dependencies for -plugins. - -In order to run something within this virtual environment, start the command -with ``poetry run`` to them, for example ``poetry run pytest``. - -On the other hand, it may get tedious to type ``poetry run`` before every -command. Instead, you can activate the virtual environment in your shell with: - -:: - - $ poetry shell - -You should see ``(beets-py3.10)`` prefix in your shell prompt. Now you can run -commands directly, for example: - -:: - - $ (beets-py3.10) pytest - -Additionally, poethepoet_ task runner assists us with the most common -operations. Formatting, linting, testing are defined as ``poe`` tasks in -pyproject.toml_. Run: - -:: - - $ poe - -to see all available tasks. They can be used like this, for example - -.. code-block:: sh - - $ poe lint # check code style - $ poe format # fix formatting issues - $ poe test # run tests - # ... fix failing tests - $ poe test --lf # re-run failing tests (note the additional pytest option) - $ poe check-types --pretty # check types with an extra option for mypy - -Code Contribution Ideas -+++++++++++++++++++++++ - -- We maintain a set of `issues marked as “good first issue” - `__. These are - issues that would serve as a good introduction to the codebase. Claim one and - start exploring! -- Like testing? Our `test coverage - `__ is somewhat low. You can help - out by finding low-coverage modules or checking out other `testing-related - issues `__. -- There are several ways to improve the tests in general (see :ref:`testing` and - some places to think about performance optimization (see `Optimization - `__). -- Not all of our code is up to our coding conventions. In particular, the - `library API documentation - `__ are currently - quite sparse. You can help by adding to the docstrings in the code and to the - documentation pages themselves. beets follows `PEP-257 - `__ for docstrings and in some places, we - also sometimes use `ReST autodoc syntax for Sphinx - `__ to, - for example, refer to a class name. - -Your First Contribution ------------------------ - -If this is your first time contributing to an open source project, welcome! If -you are confused at all about how to contribute or what to contribute, take a -look at `this great tutorial `__, or stop by our -`discussion board`_ if you have any questions. - -We maintain a list of issues we reserved for those new to open source labeled -`first timers only`_. Since the goal of these issues is to get users comfortable -with contributing to an open source project, please do not hesitate to ask any -questions. - -.. _first timers only: https://github.com/beetbox/beets/issues?q=is%3Aopen+is%3Aissue+label%3A%22first+timers+only%22 - -How to Submit Your Work ------------------------ - -Do you have a great bug fix, new feature, or documentation expansion you’d like -to contribute? Follow these steps to create a GitHub pull request and your code -will ship in no time. - -1. Fork the beets repository and clone it (see above) to create a workspace. -2. Install pre-commit, following the instructions `here - `_. -3. Make your changes. -4. Add tests. If you’ve fixed a bug, write a test to ensure that you’ve actually - fixed it. If there’s a new feature or plugin, please contribute tests that - show that your code does what it says. -5. Add documentation. If you’ve added a new command flag, for example, find the - appropriate page under ``docs/`` where it needs to be listed. -6. Add a changelog entry to ``docs/changelog.rst`` near the top of the document. -7. Run the tests and style checker, see :ref:`testing`. -8. Push to your fork and open a pull request! We’ll be in touch shortly. -9. If you add commits to a pull request, please add a comment or re-request a - review after you push them since GitHub doesn’t automatically notify us when - commits are added. - -Remember, code contributions have four parts: the code, the tests, the -documentation, and the changelog entry. Thank you for contributing! - -.. admonition:: Ownership - - If you are the owner of a plugin, please consider reviewing pull requests - that affect your plugin. If you are not the owner of a plugin, please - consider becoming one! You can do so by adding an entry to - ``.github/CODEOWNERS``. This way, you will automatically receive a review - request for pull requests that adjust the code that you own. If you have any - questions, please ask on our `discussion board`_. - -The Code --------- - -The documentation has a section on the `library API -`__ that serves as an -introduction to beets’ design. - -Coding Conventions ------------------- - -General -~~~~~~~ - -There are a few coding conventions we use in beets: - -- Whenever you access the library database, do so through the provided Library - methods or via a Transaction object. Never call ``lib.conn.*`` directly. For - example, do this: - - .. code-block:: python - - with g.lib.transaction() as tx: - rows = tx.query("SELECT DISTINCT {field} FROM {model._table} ORDER BY {sort_field}") - - To fetch Item objects from the database, use lib.items(…) and supply a query - as an argument. Resist the urge to write raw SQL for your query. If you must - use lower-level queries into the database, do this, for example: - - .. code-block:: python - - with lib.transaction() as tx: - rows = tx.query("SELECT path FROM items WHERE album_id = ?", (album_id,)) - - Transaction objects help control concurrent access to the database and assist - in debugging conflicting accesses. - -- f-strings should be used instead of the ``%`` operator and ``str.format()`` - calls. -- Never ``print`` informational messages; use the `logging - `__ module instead. In - particular, we have our own logging shim, so you’ll see ``from beets import - logging`` in most files. - - - The loggers use `str.format - `__-style logging instead - of ``%``-style, so you can type ``log.debug("{}", obj)`` to do your - formatting. - -- Exception handlers must use ``except A as B:`` instead of ``except A, B:``. - -Style -~~~~~ - -We use `ruff `__ to format and lint the codebase. - -Run ``poe check-format`` and ``poe lint`` to check your code for style and -linting errors. Running ``poe format`` will automatically format your code -according to the specifications required by the project. - -Similarly, run ``poe format-docs`` and ``poe lint-docs`` to ensure consistent -documentation formatting and check for any issues. - -Editor Settings -~~~~~~~~~~~~~~~ - -Personally, I work on beets with vim_. Here are some ``.vimrc`` lines that might -help with PEP 8-compliant Python coding: - -:: - - filetype indent on - autocmd FileType python setlocal shiftwidth=4 tabstop=4 softtabstop=4 expandtab shiftround autoindent - -Consider installing `this alternative Python indentation plugin -`__. I also like `neomake -`__ with its flake8 checker. - -.. _testing: - -Testing -------- - -Running the Tests -~~~~~~~~~~~~~~~~~ - -Use ``poe`` to run tests: - -:: - - $ poe test [pytest options] - -You can disable a hand-selected set of "slow" tests by setting the environment -variable ``SKIP_SLOW_TESTS``, for example: - -:: - - $ SKIP_SLOW_TESTS=1 poe test - -Coverage -++++++++ - -The ``test`` command does not include coverage as it slows down testing. In -order to measure it, use the ``test-with-coverage`` task - - $ poe test-with-coverage [pytest options] - -You are welcome to explore coverage by opening the HTML report in -``.reports/html/index.html``. - -Note that for each covered line the report shows **which tests cover it** -(expand the list on the right-hand side of the affected line). - -You can find project coverage status on Codecov_. - -Red Flags -+++++++++ - -The pytest-random_ plugin makes it easy to randomize the order of tests. ``poe -test --random`` will occasionally turn up failing tests that reveal ordering -dependencies—which are bad news! - -Test Dependencies -+++++++++++++++++ - -The tests have a few more dependencies than beets itself. (The additional -dependencies consist of testing utilities and dependencies of non-default -plugins exercised by the test suite.) The dependencies are listed under the -``tool.poetry.group.test.dependencies`` section in pyproject.toml_. - -Writing Tests -~~~~~~~~~~~~~ - -Writing tests is done by adding or modifying files in folder test_. Take a look -at test-query_ to get a basic view on how tests are written. Since we are -currently migrating the tests from unittest_ to pytest_, new tests should be -written using pytest_. Contributions migrating existing tests are welcome! - -External API requests under test should be mocked with requests-mock_, However, -we still want to know whether external APIs are up and that they return expected -responses, therefore we test them weekly with our `integration test`_ suite. - -In order to add such a test, mark your test with the ``integration_test`` marker - -.. code-block:: python - - @pytest.mark.integration_test - def test_external_api_call(): ... - -This way, the test will be run only in the integration test suite. - -beets also defines custom pytest markers in ``test/conftest.py``: - -- ``integration_test``: runs only when ``INTEGRATION_TEST=true`` is set. -- ``on_lyrics_update``: runs only when ``LYRICS_UPDATED=true`` is set. -- ``requires_import("module", force_ci=True)``: runs the test only when the - module is importable. With the default ``force_ci=True``, this import check is - bypassed on GitHub Actions for ``beetbox/beets`` so CI still runs the test. - Set ``force_ci=False`` to allow CI to skip when the module is missing. - -.. code-block:: python - - @pytest.mark.integration_test - def test_external_api_call(): ... - - - @pytest.mark.on_lyrics_update - def test_real_lyrics_backend(): ... - - - @pytest.mark.requires_import("langdetect") - def test_language_detection(): ... - - - @pytest.mark.requires_import("librosa", force_ci=False) - def test_autobpm_command(): ... - -.. _codecov: https://app.codecov.io/github/beetbox/beets - -.. _discussion board: https://github.com/beetbox/beets/discussions - -.. _documentation: https://beets.readthedocs.io/en/stable/ - -.. _integration test: https://github.com/beetbox/beets/actions?query=workflow%3A%22integration+tests%22 - -.. _pipx: https://pipx.pypa.io/stable - -.. _poethepoet: https://poethepoet.natn.io/index.html - -.. _poetry: https://python-poetry.org/docs/ - -.. _pyproject.toml: https://github.com/beetbox/beets/blob/master/pyproject.toml - -.. _pytest: https://docs.pytest.org/en/stable/ - -.. _pytest-random: https://github.com/klrmn/pytest-random - -.. _requests-mock: https://requests-mock.readthedocs.io/en/latest/response.html - -.. _test: https://github.com/beetbox/beets/tree/master/test - -.. _test-query: https://github.com/beetbox/beets/blob/master/test/test_query.py - -.. _unittest: https://docs.python.org/3/library/unittest.html - -.. _vim: https://www.vim.org/ +Contributing +============ + +.. contents:: + :depth: 3 + +Thank you! +---------- + +First off, thank you for considering contributing to beets! It’s people like you +that make beets continue to succeed. + +These guidelines describe how you can help most effectively. By following these +guidelines, you can make life easier for the development team as it indicates +you respect the maintainers’ time; in return, the maintainers will reciprocate +by helping to address your issue, review changes, and finalize pull requests. + +Types of Contributions +---------------------- + +We love to get contributions from our community—you! There are many ways to +contribute, whether you’re a programmer or not. + +The first thing to do, regardless of how you'd like to contribute to the +project, is to check out our :doc:`Code of Conduct ` and to +keep that in mind while interacting with other contributors and users. + +Non-Programming +~~~~~~~~~~~~~~~ + +- Promote beets! Help get the word out by telling your friends, writing a blog + post, or discussing it on a forum you frequent. +- Improve the documentation_. It’s incredibly easy to contribute here: just find + a page you want to modify and hit the “Edit on GitHub” button in the + upper-right. You can automatically send us a pull request for your changes. +- GUI design. For the time being, beets is a command-line-only affair. But + that’s mostly because we don’t have any great ideas for what a good GUI should + look like. If you have those great ideas, please get in touch. +- Benchmarks. We’d like to have a consistent way of measuring speed improvements + in beets’ tagger and other functionality as well as a way of comparing beets’ + performance to other tools. You can help by compiling a library of + freely-licensed music files (preferably with incorrect metadata) for testing + and measurement. +- Think you have a nice config or cool use-case for beets? We’d love to hear + about it! Submit a post to our `discussion board + `__ + under the “Show and Tell” category for a chance to get featured in `the docs + `__. +- Consider helping out fellow users by `responding to support requests + `__ . + +Programming +~~~~~~~~~~~ + +- As a programmer (even if you’re just a beginner!), you have a ton of + opportunities to get your feet wet with beets. +- For developing plugins, or hacking away at beets, there’s some good + information in the `“For Developers” section of the docs + `__. + +.. _development-tools: + +Development Tools ++++++++++++++++++ + +In order to develop beets, you will need a few tools installed: + +- poetry_ for packaging, virtual environment and dependency management +- poethepoet_ to run tasks, such as linting, formatting, testing + +Python community recommends using pipx_ to install stand-alone command-line +applications such as above. pipx_ installs each application in an isolated +virtual environment, where its dependencies will not interfere with your system +and other CLI tools. + +If you do not have pipx_ installed in your system, follow `pipx installation +instructions `__ or + +.. code-block:: sh + + $ python3 -m pip install --user pipx + +Install poetry_ and poethepoet_ using pipx_: + +:: + + $ pipx install poetry poethepoet + +.. admonition:: Check ``tool.pipx-install`` section in ``pyproject.toml`` to see supported versions + + .. code-block:: toml + + [tool.pipx-install] + poethepoet = ">=0.26" + poetry = "<2" + +.. _getting-the-source: + +Getting the Source +++++++++++++++++++ + +The easiest way to get started with the latest beets source is to clone the +repository and install ``beets`` in a local virtual environment using poetry_. +This can be done with: + +.. code-block:: bash + + $ git clone https://github.com/beetbox/beets.git + $ cd beets + $ poetry install + +This will install ``beets`` and all development dependencies into its own +virtual environment in your ``$POETRY_CACHE_DIR``. See ``poetry install --help`` +for installation options, including installing ``extra`` dependencies for +plugins. + +In order to run something within this virtual environment, start the command +with ``poetry run`` to them, for example ``poetry run pytest``. + +On the other hand, it may get tedious to type ``poetry run`` before every +command. Instead, you can activate the virtual environment in your shell with: + +:: + + $ poetry shell + +You should see ``(beets-py3.10)`` prefix in your shell prompt. Now you can run +commands directly, for example: + +:: + + $ (beets-py3.10) pytest + +Additionally, poethepoet_ task runner assists us with the most common +operations. Formatting, linting, testing are defined as ``poe`` tasks in +pyproject.toml_. Run: + +:: + + $ poe + +to see all available tasks. They can be used like this, for example + +.. code-block:: sh + + $ poe lint # check code style + $ poe format # fix formatting issues + $ poe test # run tests + # ... fix failing tests + $ poe test --lf # re-run failing tests (note the additional pytest option) + $ poe check-types --pretty # check types with an extra option for mypy + +Code Contribution Ideas ++++++++++++++++++++++++ + +- We maintain a set of `issues marked as “good first issue” + `__. These are + issues that would serve as a good introduction to the codebase. Claim one and + start exploring! +- Like testing? Our `test coverage + `__ is somewhat low. You can help + out by finding low-coverage modules or checking out other `testing-related + issues `__. +- There are several ways to improve the tests in general (see :ref:`testing` and + some places to think about performance optimization (see `Optimization + `__). +- Not all of our code is up to our coding conventions. In particular, the + `library API documentation + `__ are currently + quite sparse. You can help by adding to the docstrings in the code and to the + documentation pages themselves. beets follows `PEP-257 + `__ for docstrings and in some places, we + also sometimes use `ReST autodoc syntax for Sphinx + `__ to, + for example, refer to a class name. + +Your First Contribution +----------------------- + +If this is your first time contributing to an open source project, welcome! If +you are confused at all about how to contribute or what to contribute, take a +look at `this great tutorial `__, or stop by our +`discussion board`_ if you have any questions. + +We maintain a list of issues we reserved for those new to open source labeled +`first timers only`_. Since the goal of these issues is to get users comfortable +with contributing to an open source project, please do not hesitate to ask any +questions. + +.. _first timers only: https://github.com/beetbox/beets/issues?q=is%3Aopen+is%3Aissue+label%3A%22first+timers+only%22 + +How to Submit Your Work +----------------------- + +Do you have a great bug fix, new feature, or documentation expansion you’d like +to contribute? Follow these steps to create a GitHub pull request and your code +will ship in no time. + +1. Fork the beets repository and clone it (see above) to create a workspace. +2. Install pre-commit, following the instructions `here + `_. +3. Make your changes. +4. Add tests. If you’ve fixed a bug, write a test to ensure that you’ve actually + fixed it. If there’s a new feature or plugin, please contribute tests that + show that your code does what it says. +5. Add documentation. If you’ve added a new command flag, for example, find the + appropriate page under ``docs/`` where it needs to be listed. +6. Add a changelog entry to ``docs/changelog.rst`` near the top of the document. +7. Run the tests and style checker, see :ref:`testing`. +8. Push to your fork and open a pull request! We’ll be in touch shortly. +9. If you add commits to a pull request, please add a comment or re-request a + review after you push them since GitHub doesn’t automatically notify us when + commits are added. + +Remember, code contributions have four parts: the code, the tests, the +documentation, and the changelog entry. Thank you for contributing! + +.. admonition:: Ownership + + If you are the owner of a plugin, please consider reviewing pull requests + that affect your plugin. If you are not the owner of a plugin, please + consider becoming one! You can do so by adding an entry to + ``.github/CODEOWNERS``. This way, you will automatically receive a review + request for pull requests that adjust the code that you own. If you have any + questions, please ask on our `discussion board`_. + +The Code +-------- + +The documentation has a section on the `library API +`__ that serves as an +introduction to beets’ design. + +Coding Conventions +------------------ + +General +~~~~~~~ + +There are a few coding conventions we use in beets: + +- Whenever you access the library database, do so through the provided Library + methods or via a Transaction object. Never call ``lib.conn.*`` directly. For + example, do this: + + .. code-block:: python + + with g.lib.transaction() as tx: + rows = tx.query("SELECT DISTINCT {field} FROM {model._table} ORDER BY {sort_field}") + + To fetch Item objects from the database, use lib.items(…) and supply a query + as an argument. Resist the urge to write raw SQL for your query. If you must + use lower-level queries into the database, do this, for example: + + .. code-block:: python + + with lib.transaction() as tx: + rows = tx.query("SELECT path FROM items WHERE album_id = ?", (album_id,)) + + Transaction objects help control concurrent access to the database and assist + in debugging conflicting accesses. + +- f-strings should be used instead of the ``%`` operator and ``str.format()`` + calls. +- Never ``print`` informational messages; use the `logging + `__ module instead. In + particular, we have our own logging shim, so you’ll see ``from beets import + logging`` in most files. + + - The loggers use `str.format + `__-style logging instead + of ``%``-style, so you can type ``log.debug("{}", obj)`` to do your + formatting. + +- Exception handlers must use ``except A as B:`` instead of ``except A, B:``. + +Style +~~~~~ + +We use `ruff `__ to format and lint the codebase. + +Run ``poe check-format`` and ``poe lint`` to check your code for style and +linting errors. Running ``poe format`` will automatically format your code +according to the specifications required by the project. + +Similarly, run ``poe format-docs`` and ``poe lint-docs`` to ensure consistent +documentation formatting and check for any issues. + +Editor Settings +~~~~~~~~~~~~~~~ + +Personally, I work on beets with vim_. Here are some ``.vimrc`` lines that might +help with PEP 8-compliant Python coding: + +:: + + filetype indent on + autocmd FileType python setlocal shiftwidth=4 tabstop=4 softtabstop=4 expandtab shiftround autoindent + +Consider installing `this alternative Python indentation plugin +`__. I also like `neomake +`__ with its flake8 checker. + +.. _testing: + +Testing +------- + +Running the Tests +~~~~~~~~~~~~~~~~~ + +Use ``poe`` to run tests: + +:: + + $ poe test [pytest options] + +You can disable a hand-selected set of "slow" tests by setting the environment +variable ``SKIP_SLOW_TESTS``, for example: + +:: + + $ SKIP_SLOW_TESTS=1 poe test + +Coverage +++++++++ + +The ``test`` command does not include coverage as it slows down testing. In +order to measure it, use the ``test-with-coverage`` task + + $ poe test-with-coverage [pytest options] + +You are welcome to explore coverage by opening the HTML report in +``.reports/html/index.html``. + +Note that for each covered line the report shows **which tests cover it** +(expand the list on the right-hand side of the affected line). + +You can find project coverage status on Codecov_. + +Red Flags ++++++++++ + +The pytest-random_ plugin makes it easy to randomize the order of tests. ``poe +test --random`` will occasionally turn up failing tests that reveal ordering +dependencies—which are bad news! + +Test Dependencies ++++++++++++++++++ + +The tests have a few more dependencies than beets itself. (The additional +dependencies consist of testing utilities and dependencies of non-default +plugins exercised by the test suite.) The dependencies are listed under the +``tool.poetry.group.test.dependencies`` section in pyproject.toml_. + +Writing Tests +~~~~~~~~~~~~~ + +Writing tests is done by adding or modifying files in folder test_. Take a look +at test-query_ to get a basic view on how tests are written. Since we are +currently migrating the tests from unittest_ to pytest_, new tests should be +written using pytest_. Contributions migrating existing tests are welcome! + +External API requests under test should be mocked with requests-mock_, However, +we still want to know whether external APIs are up and that they return expected +responses, therefore we test them weekly with our `integration test`_ suite. + +In order to add such a test, mark your test with the ``integration_test`` marker + +.. code-block:: python + + @pytest.mark.integration_test + def test_external_api_call(): ... + +This way, the test will be run only in the integration test suite. + +beets also defines custom pytest markers in ``test/conftest.py``: + +- ``integration_test``: runs only when ``INTEGRATION_TEST=true`` is set. +- ``on_lyrics_update``: runs only when ``LYRICS_UPDATED=true`` is set. +- ``requires_import("module", force_ci=True)``: runs the test only when the + module is importable. With the default ``force_ci=True``, this import check is + bypassed on GitHub Actions for ``beetbox/beets`` so CI still runs the test. + Set ``force_ci=False`` to allow CI to skip when the module is missing. + +.. code-block:: python + + @pytest.mark.integration_test + def test_external_api_call(): ... + + + @pytest.mark.on_lyrics_update + def test_real_lyrics_backend(): ... + + + @pytest.mark.requires_import("langdetect") + def test_language_detection(): ... + + + @pytest.mark.requires_import("librosa", force_ci=False) + def test_autobpm_command(): ... + +.. _codecov: https://app.codecov.io/github/beetbox/beets + +.. _discussion board: https://github.com/beetbox/beets/discussions + +.. _documentation: https://beets.readthedocs.io/en/stable/ + +.. _integration test: https://github.com/beetbox/beets/actions?query=workflow%3A%22integration+tests%22 + +.. _pipx: https://pipx.pypa.io/stable + +.. _poethepoet: https://poethepoet.natn.io/index.html + +.. _poetry: https://python-poetry.org/docs/ + +.. _pyproject.toml: https://github.com/beetbox/beets/blob/master/pyproject.toml + +.. _pytest: https://docs.pytest.org/en/stable/ + +.. _pytest-random: https://github.com/klrmn/pytest-random + +.. _requests-mock: https://requests-mock.readthedocs.io/en/latest/response.html + +.. _test: https://github.com/beetbox/beets/tree/master/test + +.. _test-query: https://github.com/beetbox/beets/blob/master/test/test_query.py + +.. _unittest: https://docs.python.org/3/library/unittest.html + +.. _vim: https://www.vim.org/ diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py index 094ed9e9b..8c21b8449 100644 --- a/beets/autotag/__init__.py +++ b/beets/autotag/__init__.py @@ -1,49 +1,49 @@ -# This file is part of beets. -# Copyright 2016, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Facilities for automatically determining files' correct metadata.""" - -from __future__ import annotations - -from importlib import import_module - -# Parts of external interface. -from beets.util.deprecation import deprecate_for_maintainers, deprecate_imports - -from .hooks import AlbumInfo, AlbumMatch, TrackInfo, TrackMatch -from .match import Proposal, Recommendation, tag_album, tag_item - - -def __getattr__(name: str): - if name == "current_metadata": - deprecate_for_maintainers( - f"'beets.autotag.{name}'", "'beets.util.get_most_common_tags'" - ) - return import_module("beets.util").get_most_common_tags - - return deprecate_imports( - __name__, {"Distance": "beets.autotag.distance"}, name - ) - - -__all__ = [ - "AlbumInfo", - "AlbumMatch", - "Proposal", - "Recommendation", - "TrackInfo", - "TrackMatch", - "tag_album", - "tag_item", -] +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Facilities for automatically determining files' correct metadata.""" + +from __future__ import annotations + +from importlib import import_module + +# Parts of external interface. +from beets.util.deprecation import deprecate_for_maintainers, deprecate_imports + +from .hooks import AlbumInfo, AlbumMatch, TrackInfo, TrackMatch +from .match import Proposal, Recommendation, tag_album, tag_item + + +def __getattr__(name: str): + if name == "current_metadata": + deprecate_for_maintainers( + f"'beets.autotag.{name}'", "'beets.util.get_most_common_tags'" + ) + return import_module("beets.util").get_most_common_tags + + return deprecate_imports( + __name__, {"Distance": "beets.autotag.distance"}, name + ) + + +__all__ = [ + "AlbumInfo", + "AlbumMatch", + "Proposal", + "Recommendation", + "TrackInfo", + "TrackMatch", + "tag_album", + "tag_item", +] diff --git a/beets/autotag/distance.py b/beets/autotag/distance.py index b2c02ebd1..6cd0e4b10 100644 --- a/beets/autotag/distance.py +++ b/beets/autotag/distance.py @@ -1,556 +1,556 @@ -from __future__ import annotations - -import datetime -import re -from functools import cache, total_ordering -from typing import TYPE_CHECKING, Any - -from jellyfish import levenshtein_distance -from unidecode import unidecode - -from beets import config, metadata_plugins -from beets.util import as_string, cached_classproperty, get_most_common_tags -from beets.util.color import colorize - -if TYPE_CHECKING: - from collections.abc import Iterator, Sequence - - from beets.library import Item - from beets.util.color import ColorName - - from .hooks import AlbumInfo, TrackInfo - -# Candidate distance scoring. - -# Artist signals that indicate "various artists". These are used at the -# album level to determine whether a given release is likely a VA -# release and also on the track level to to remove the penalty for -# differing artists. -VA_ARTISTS = ("", "various artists", "various", "va", "unknown") - -# Parameters for string distance function. -# Words that can be moved to the end of a string using a comma. -SD_END_WORDS = ["the", "a", "an"] -# Reduced weights for certain portions of the string. -SD_PATTERNS = [ - (r"^the ", 0.1), - (r"[\[\(]?(ep|single)[\]\)]?", 0.0), - (r"[\[\(]?(featuring|feat|ft)[\. :].+", 0.1), - (r"\(.*?\)", 0.3), - (r"\[.*?\]", 0.3), - (r"(, )?(pt\.|part) .+", 0.2), -] -# Replacements to use before testing distance. -SD_REPLACE = [ - (r"&", "and"), -] - - -def _string_dist_basic(str1: str, str2: str) -> float: - """Basic edit distance between two strings, ignoring - non-alphanumeric characters and case. Comparisons are based on a - transliteration/lowering to ASCII characters. Normalized by string - length. - """ - assert isinstance(str1, str) - assert isinstance(str2, str) - str1 = as_string(unidecode(str1)) - str2 = as_string(unidecode(str2)) - str1 = re.sub(r"[^a-z0-9]", "", str1.lower()) - str2 = re.sub(r"[^a-z0-9]", "", str2.lower()) - if not str1 and not str2: - return 0.0 - return levenshtein_distance(str1, str2) / float(max(len(str1), len(str2))) - - -def string_dist(str1: str | None, str2: str | None) -> float: - """Gives an "intuitive" edit distance between two strings. This is - an edit distance, normalized by the string length, with a number of - tweaks that reflect intuition about text. - """ - if str1 is None and str2 is None: - return 0.0 - if str1 is None or str2 is None: - return 1.0 - - str1 = str1.lower() - str2 = str2.lower() - - # Don't penalize strings that move certain words to the end. For - # example, "the something" should be considered equal to - # "something, the". - for word in SD_END_WORDS: - if str1.endswith(f", {word}"): - str1 = f"{word} {str1[: -len(word) - 2]}" - if str2.endswith(f", {word}"): - str2 = f"{word} {str2[: -len(word) - 2]}" - - # Perform a couple of basic normalizing substitutions. - for pat, repl in SD_REPLACE: - str1 = re.sub(pat, repl, str1) - str2 = re.sub(pat, repl, str2) - - # Change the weight for certain string portions matched by a set - # of regular expressions. We gradually change the strings and build - # up penalties associated with parts of the string that were - # deleted. - base_dist = _string_dist_basic(str1, str2) - penalty = 0.0 - for pat, weight in SD_PATTERNS: - # Get strings that drop the pattern. - case_str1 = re.sub(pat, "", str1) - case_str2 = re.sub(pat, "", str2) - - if case_str1 != str1 or case_str2 != str2: - # If the pattern was present (i.e., it is deleted in the - # the current case), recalculate the distances for the - # modified strings. - case_dist = _string_dist_basic(case_str1, case_str2) - case_delta = max(0.0, base_dist - case_dist) - if case_delta == 0.0: - continue - - # Shift our baseline strings down (to avoid rematching the - # same part of the string) and add a scaled distance - # amount to the penalties. - str1 = case_str1 - str2 = case_str2 - base_dist = case_dist - penalty += weight * case_delta - - return base_dist + penalty - - -@total_ordering -class Distance: - """Keeps track of multiple distance penalties. Provides a single - weighted distance for all penalties as well as a weighted distance - for each individual penalty. - """ - - def __init__(self) -> None: - self._penalties: dict[str, list[float]] = {} - self.tracks: dict[TrackInfo, Distance] = {} - - @cached_classproperty - def _weights(cls) -> dict[str, float]: - """A dictionary from keys to floating-point weights.""" - weights_view = config["match"]["distance_weights"] - weights = {} - for key in weights_view.keys(): - weights[key] = weights_view[key].as_number() - return weights - - @property - def generic_penalty_keys(self) -> list[str]: - return [ - k.replace("album_", "").replace("track_", "").replace("_", " ") - for k in self._penalties - ] - - # Access the components and their aggregates. - - @property - def distance(self) -> float: - """Return a weighted and normalized distance across all - penalties. - """ - dist_max = self.max_distance - if dist_max: - return self.raw_distance / self.max_distance - return 0.0 - - @property - def max_distance(self) -> float: - """Return the maximum distance penalty (normalization factor).""" - dist_max = 0.0 - for key, penalty in self._penalties.items(): - dist_max += len(penalty) * self._weights[key] - return dist_max - - @property - def raw_distance(self) -> float: - """Return the raw (denormalized) distance.""" - dist_raw = 0.0 - for key, penalty in self._penalties.items(): - dist_raw += sum(penalty) * self._weights[key] - return dist_raw - - @property - def color(self) -> ColorName: - if self.distance <= config["match"]["strong_rec_thresh"].as_number(): - return "text_success" - if self.distance <= config["match"]["medium_rec_thresh"].as_number(): - return "text_warning" - return "text_error" - - @property - def string(self) -> str: - return colorize(self.color, f"{(1 - self.distance) * 100:.1f}%") - - def items(self) -> list[tuple[str, float]]: - """Return a list of (key, dist) pairs, with `dist` being the - weighted distance, sorted from highest to lowest. Does not - include penalties with a zero value. - """ - list_ = [] - for key in self._penalties: - dist = self[key] - if dist: - list_.append((key, dist)) - # Convert distance into a negative float we can sort items in - # ascending order (for keys, when the penalty is equal) and - # still get the items with the biggest distance first. - return sorted( - list_, key=lambda key_and_dist: (-key_and_dist[1], key_and_dist[0]) - ) - - def __hash__(self) -> int: - return id(self) - - def __eq__(self, other) -> bool: - return self.distance == other - - # Behave like a float. - - def __lt__(self, other) -> bool: - return self.distance < other - - def __float__(self) -> float: - return self.distance - - def __sub__(self, other) -> float: - return self.distance - other - - def __rsub__(self, other) -> float: - return other - self.distance - - def __str__(self) -> str: - return f"{self.distance:.2f}" - - # Behave like a dict. - - def __getitem__(self, key) -> float: - """Returns the weighted distance for a named penalty.""" - dist = sum(self._penalties[key]) * self._weights[key] - dist_max = self.max_distance - if dist_max: - return dist / dist_max - return 0.0 - - def __iter__(self) -> Iterator[tuple[str, float]]: - return iter(self.items()) - - def __len__(self) -> int: - return len(self.items()) - - def keys(self) -> list[str]: - return [key for key, _ in self.items()] - - def update(self, dist: Distance): - """Adds all the distance penalties from `dist`.""" - if not isinstance(dist, Distance): - raise ValueError( - f"`dist` must be a Distance object, not {type(dist)}" - ) - for key, penalties in dist._penalties.items(): - self._penalties.setdefault(key, []).extend(penalties) - - # Adding components. - - def _eq(self, value1: re.Pattern[str] | Any, value2: Any) -> bool: - """Returns True if `value1` is equal to `value2`. `value1` may - be a compiled regular expression, in which case it will be - matched against `value2`. - """ - if isinstance(value1, re.Pattern): - return bool(value1.match(value2)) - return value1 == value2 - - def add(self, key: str, dist: float): - """Adds a distance penalty. `key` must correspond with a - configured weight setting. `dist` must be a float between 0.0 - and 1.0, and will be added to any existing distance penalties - for the same key. - """ - if not 0.0 <= dist <= 1.0: - raise ValueError(f"`dist` must be between 0.0 and 1.0, not {dist}") - self._penalties.setdefault(key, []).append(dist) - - def add_equality( - self, - key: str, - value: Any, - options: list[Any] | tuple[Any, ...] | Any, - ): - """Adds a distance penalty of 1.0 if `value` doesn't match any - of the values in `options`. If an option is a compiled regular - expression, it will be considered equal if it matches against - `value`. - """ - if not isinstance(options, (list, tuple)): - options = [options] - for opt in options: - if self._eq(opt, value): - dist = 0.0 - break - else: - dist = 1.0 - self.add(key, dist) - - def add_expr(self, key: str, expr: bool): - """Adds a distance penalty of 1.0 if `expr` evaluates to True, - or 0.0. - """ - if expr: - self.add(key, 1.0) - else: - self.add(key, 0.0) - - def add_number(self, key: str, number1: int, number2: int): - """Adds a distance penalty of 1.0 for each number of difference - between `number1` and `number2`, or 0.0 when there is no - difference. Use this when there is no upper limit on the - difference between the two numbers. - """ - diff = abs(number1 - number2) - if diff: - for i in range(diff): - self.add(key, 1.0) - else: - self.add(key, 0.0) - - def add_priority( - self, - key: str, - value: Any, - options: list[Any] | tuple[Any, ...] | Any, - ): - """Adds a distance penalty that corresponds to the position at - which `value` appears in `options`. A distance penalty of 0.0 - for the first option, or 1.0 if there is no matching option. If - an option is a compiled regular expression, it will be - considered equal if it matches against `value`. - """ - if not isinstance(options, (list, tuple)): - options = [options] - unit = 1.0 / (len(options) or 1) - for i, opt in enumerate(options): - if self._eq(opt, value): - dist = i * unit - break - else: - dist = 1.0 - self.add(key, dist) - - def add_ratio( - self, - key: str, - number1: int | float, - number2: int | float, - ): - """Adds a distance penalty for `number1` as a ratio of `number2`. - `number1` is bound at 0 and `number2`. - """ - number = float(max(min(number1, number2), 0)) - if number2: - dist = number / number2 - else: - dist = 0.0 - self.add(key, dist) - - def add_string(self, key: str, str1: str | None, str2: str | None): - """Adds a distance penalty based on the edit distance between - `str1` and `str2`. - """ - dist = string_dist(str1, str2) - self.add(key, dist) - - def add_data_source(self, before: str | None, after: str | None) -> None: - if before != after and ( - before or len(metadata_plugins.find_metadata_source_plugins()) > 1 - ): - self.add("data_source", metadata_plugins.get_penalty(after)) - - -@cache -def get_track_length_grace() -> float: - """Get cached grace period for track length matching.""" - return config["match"]["track_length_grace"].as_number() - - -@cache -def get_track_length_max() -> float: - """Get cached maximum track length for track length matching.""" - return config["match"]["track_length_max"].as_number() - - -def track_index_changed(item: Item, track_info: TrackInfo) -> bool: - """Returns True if the item and track info index is different. Tolerates - per disc and per release numbering. - """ - return item.track not in (track_info.medium_index, track_info.index) - - -def track_distance( - item: Item, - track_info: TrackInfo, - incl_artist: bool = False, -) -> Distance: - """Determines the significance of a track metadata change. Returns a - Distance object. `incl_artist` indicates that a distance component should - be included for the track artist (i.e., for various-artist releases). - - ``track_length_grace`` and ``track_length_max`` configuration options are - cached because this function is called many times during the matching - process and their access comes with a performance overhead. - """ - dist = Distance() - - # Length. - if info_length := track_info.length: - diff = abs(item.length - info_length) - get_track_length_grace() - dist.add_ratio("track_length", diff, get_track_length_max()) - - # Title. - dist.add_string("track_title", item.title, track_info.title) - - # Artist. Only check if there is actually an artist in the track data. - if ( - incl_artist - and track_info.artist - and item.artist.lower() not in VA_ARTISTS - ): - dist.add_string("track_artist", item.artist, track_info.artist) - - # Track index. - if track_info.index and item.track: - dist.add_expr("track_index", track_index_changed(item, track_info)) - - # Track ID. - if item.mb_trackid: - dist.add_expr("track_id", item.mb_trackid != track_info.track_id) - - # Penalize mismatching disc numbers. - if track_info.medium and item.disc: - dist.add_expr("medium", item.disc != track_info.medium) - - dist.add_data_source(item.get("data_source"), track_info.data_source) - - return dist - - -def distance( - items: Sequence[Item], - album_info: AlbumInfo, - item_info_pairs: list[tuple[Item, TrackInfo]], -) -> Distance: - """Determines how "significant" an album metadata change would be. - Returns a Distance object. `album_info` is an AlbumInfo object - reflecting the album to be compared. `items` is a sequence of all - Item objects that will be matched (order is not important). - `mapping` is a dictionary mapping Items to TrackInfo objects; the - keys are a subset of `items` and the values are a subset of - `album_info.tracks`. - """ - likelies, _ = get_most_common_tags(items) - - dist = Distance() - - # Artist, if not various. - if not album_info.va: - dist.add_string("artist", likelies["artist"], album_info.artist) - - # Album. - dist.add_string("album", likelies["album"], album_info.album) - - preferred_config = config["match"]["preferred"] - # Current or preferred media. - if album_info.media: - # Preferred media options. - media_patterns: Sequence[str] = preferred_config["media"].as_str_seq() - options = [ - re.compile(rf"(\d+x)?({pat})", re.I) for pat in media_patterns - ] - if options: - dist.add_priority("media", album_info.media, options) - # Current media. - elif likelies["media"]: - dist.add_equality("media", album_info.media, likelies["media"]) - - # Mediums. - if likelies["disctotal"] and album_info.mediums: - dist.add_number("mediums", likelies["disctotal"], album_info.mediums) - - # Prefer earliest release. - if album_info.year and preferred_config["original_year"]: - # Assume 1889 (earliest first gramophone discs) if we don't know the - # original year. - original = album_info.original_year or 1889 - diff = abs(album_info.year - original) - diff_max = abs(datetime.date.today().year - original) - dist.add_ratio("year", diff, diff_max) - # Year. - elif likelies["year"] and album_info.year: - if likelies["year"] in (album_info.year, album_info.original_year): - # No penalty for matching release or original year. - dist.add("year", 0.0) - elif album_info.original_year: - # Prefer matchest closest to the release year. - diff = abs(likelies["year"] - album_info.year) - diff_max = abs( - datetime.date.today().year - album_info.original_year - ) - dist.add_ratio("year", diff, diff_max) - else: - # Full penalty when there is no original year. - dist.add("year", 1.0) - - # Preferred countries. - country_patterns: Sequence[str] = preferred_config["countries"].as_str_seq() - options = [re.compile(pat, re.I) for pat in country_patterns] - if album_info.country and options: - dist.add_priority("country", album_info.country, options) - # Country. - elif likelies["country"] and album_info.country: - dist.add_string("country", likelies["country"], album_info.country) - - # Label. - if likelies["label"] and album_info.label: - dist.add_string("label", likelies["label"], album_info.label) - - # Catalog number. - if likelies["catalognum"] and album_info.catalognum: - dist.add_string( - "catalognum", likelies["catalognum"], album_info.catalognum - ) - - # Disambiguation. - if likelies["albumdisambig"] and album_info.albumdisambig: - dist.add_string( - "albumdisambig", likelies["albumdisambig"], album_info.albumdisambig - ) - - # Album ID. - if likelies["mb_albumid"]: - dist.add_equality( - "album_id", likelies["mb_albumid"], album_info.album_id - ) - - # Tracks. - dist.tracks = {} - for item, track in item_info_pairs: - dist.tracks[track] = track_distance(item, track, album_info.va) - dist.add("tracks", dist.tracks[track].distance) - - # Missing tracks. - for _ in range(len(album_info.tracks) - len(item_info_pairs)): - dist.add("missing_tracks", 1.0) - - # Unmatched tracks. - for _ in range(len(items) - len(item_info_pairs)): - dist.add("unmatched_tracks", 1.0) - - dist.add_data_source(likelies["data_source"], album_info.data_source) - - return dist +from __future__ import annotations + +import datetime +import re +from functools import cache, total_ordering +from typing import TYPE_CHECKING, Any + +from jellyfish import levenshtein_distance +from unidecode import unidecode + +from beets import config, metadata_plugins +from beets.util import as_string, cached_classproperty, get_most_common_tags +from beets.util.color import colorize + +if TYPE_CHECKING: + from collections.abc import Iterator, Sequence + + from beets.library import Item + from beets.util.color import ColorName + + from .hooks import AlbumInfo, TrackInfo + +# Candidate distance scoring. + +# Artist signals that indicate "various artists". These are used at the +# album level to determine whether a given release is likely a VA +# release and also on the track level to to remove the penalty for +# differing artists. +VA_ARTISTS = ("", "various artists", "various", "va", "unknown") + +# Parameters for string distance function. +# Words that can be moved to the end of a string using a comma. +SD_END_WORDS = ["the", "a", "an"] +# Reduced weights for certain portions of the string. +SD_PATTERNS = [ + (r"^the ", 0.1), + (r"[\[\(]?(ep|single)[\]\)]?", 0.0), + (r"[\[\(]?(featuring|feat|ft)[\. :].+", 0.1), + (r"\(.*?\)", 0.3), + (r"\[.*?\]", 0.3), + (r"(, )?(pt\.|part) .+", 0.2), +] +# Replacements to use before testing distance. +SD_REPLACE = [ + (r"&", "and"), +] + + +def _string_dist_basic(str1: str, str2: str) -> float: + """Basic edit distance between two strings, ignoring + non-alphanumeric characters and case. Comparisons are based on a + transliteration/lowering to ASCII characters. Normalized by string + length. + """ + assert isinstance(str1, str) + assert isinstance(str2, str) + str1 = as_string(unidecode(str1)) + str2 = as_string(unidecode(str2)) + str1 = re.sub(r"[^a-z0-9]", "", str1.lower()) + str2 = re.sub(r"[^a-z0-9]", "", str2.lower()) + if not str1 and not str2: + return 0.0 + return levenshtein_distance(str1, str2) / float(max(len(str1), len(str2))) + + +def string_dist(str1: str | None, str2: str | None) -> float: + """Gives an "intuitive" edit distance between two strings. This is + an edit distance, normalized by the string length, with a number of + tweaks that reflect intuition about text. + """ + if str1 is None and str2 is None: + return 0.0 + if str1 is None or str2 is None: + return 1.0 + + str1 = str1.lower() + str2 = str2.lower() + + # Don't penalize strings that move certain words to the end. For + # example, "the something" should be considered equal to + # "something, the". + for word in SD_END_WORDS: + if str1.endswith(f", {word}"): + str1 = f"{word} {str1[: -len(word) - 2]}" + if str2.endswith(f", {word}"): + str2 = f"{word} {str2[: -len(word) - 2]}" + + # Perform a couple of basic normalizing substitutions. + for pat, repl in SD_REPLACE: + str1 = re.sub(pat, repl, str1) + str2 = re.sub(pat, repl, str2) + + # Change the weight for certain string portions matched by a set + # of regular expressions. We gradually change the strings and build + # up penalties associated with parts of the string that were + # deleted. + base_dist = _string_dist_basic(str1, str2) + penalty = 0.0 + for pat, weight in SD_PATTERNS: + # Get strings that drop the pattern. + case_str1 = re.sub(pat, "", str1) + case_str2 = re.sub(pat, "", str2) + + if case_str1 != str1 or case_str2 != str2: + # If the pattern was present (i.e., it is deleted in the + # the current case), recalculate the distances for the + # modified strings. + case_dist = _string_dist_basic(case_str1, case_str2) + case_delta = max(0.0, base_dist - case_dist) + if case_delta == 0.0: + continue + + # Shift our baseline strings down (to avoid rematching the + # same part of the string) and add a scaled distance + # amount to the penalties. + str1 = case_str1 + str2 = case_str2 + base_dist = case_dist + penalty += weight * case_delta + + return base_dist + penalty + + +@total_ordering +class Distance: + """Keeps track of multiple distance penalties. Provides a single + weighted distance for all penalties as well as a weighted distance + for each individual penalty. + """ + + def __init__(self) -> None: + self._penalties: dict[str, list[float]] = {} + self.tracks: dict[TrackInfo, Distance] = {} + + @cached_classproperty + def _weights(cls) -> dict[str, float]: + """A dictionary from keys to floating-point weights.""" + weights_view = config["match"]["distance_weights"] + weights = {} + for key in weights_view.keys(): + weights[key] = weights_view[key].as_number() + return weights + + @property + def generic_penalty_keys(self) -> list[str]: + return [ + k.replace("album_", "").replace("track_", "").replace("_", " ") + for k in self._penalties + ] + + # Access the components and their aggregates. + + @property + def distance(self) -> float: + """Return a weighted and normalized distance across all + penalties. + """ + dist_max = self.max_distance + if dist_max: + return self.raw_distance / self.max_distance + return 0.0 + + @property + def max_distance(self) -> float: + """Return the maximum distance penalty (normalization factor).""" + dist_max = 0.0 + for key, penalty in self._penalties.items(): + dist_max += len(penalty) * self._weights[key] + return dist_max + + @property + def raw_distance(self) -> float: + """Return the raw (denormalized) distance.""" + dist_raw = 0.0 + for key, penalty in self._penalties.items(): + dist_raw += sum(penalty) * self._weights[key] + return dist_raw + + @property + def color(self) -> ColorName: + if self.distance <= config["match"]["strong_rec_thresh"].as_number(): + return "text_success" + if self.distance <= config["match"]["medium_rec_thresh"].as_number(): + return "text_warning" + return "text_error" + + @property + def string(self) -> str: + return colorize(self.color, f"{(1 - self.distance) * 100:.1f}%") + + def items(self) -> list[tuple[str, float]]: + """Return a list of (key, dist) pairs, with `dist` being the + weighted distance, sorted from highest to lowest. Does not + include penalties with a zero value. + """ + list_ = [] + for key in self._penalties: + dist = self[key] + if dist: + list_.append((key, dist)) + # Convert distance into a negative float we can sort items in + # ascending order (for keys, when the penalty is equal) and + # still get the items with the biggest distance first. + return sorted( + list_, key=lambda key_and_dist: (-key_and_dist[1], key_and_dist[0]) + ) + + def __hash__(self) -> int: + return id(self) + + def __eq__(self, other) -> bool: + return self.distance == other + + # Behave like a float. + + def __lt__(self, other) -> bool: + return self.distance < other + + def __float__(self) -> float: + return self.distance + + def __sub__(self, other) -> float: + return self.distance - other + + def __rsub__(self, other) -> float: + return other - self.distance + + def __str__(self) -> str: + return f"{self.distance:.2f}" + + # Behave like a dict. + + def __getitem__(self, key) -> float: + """Returns the weighted distance for a named penalty.""" + dist = sum(self._penalties[key]) * self._weights[key] + dist_max = self.max_distance + if dist_max: + return dist / dist_max + return 0.0 + + def __iter__(self) -> Iterator[tuple[str, float]]: + return iter(self.items()) + + def __len__(self) -> int: + return len(self.items()) + + def keys(self) -> list[str]: + return [key for key, _ in self.items()] + + def update(self, dist: Distance): + """Adds all the distance penalties from `dist`.""" + if not isinstance(dist, Distance): + raise ValueError( + f"`dist` must be a Distance object, not {type(dist)}" + ) + for key, penalties in dist._penalties.items(): + self._penalties.setdefault(key, []).extend(penalties) + + # Adding components. + + def _eq(self, value1: re.Pattern[str] | Any, value2: Any) -> bool: + """Returns True if `value1` is equal to `value2`. `value1` may + be a compiled regular expression, in which case it will be + matched against `value2`. + """ + if isinstance(value1, re.Pattern): + return bool(value1.match(value2)) + return value1 == value2 + + def add(self, key: str, dist: float): + """Adds a distance penalty. `key` must correspond with a + configured weight setting. `dist` must be a float between 0.0 + and 1.0, and will be added to any existing distance penalties + for the same key. + """ + if not 0.0 <= dist <= 1.0: + raise ValueError(f"`dist` must be between 0.0 and 1.0, not {dist}") + self._penalties.setdefault(key, []).append(dist) + + def add_equality( + self, + key: str, + value: Any, + options: list[Any] | tuple[Any, ...] | Any, + ): + """Adds a distance penalty of 1.0 if `value` doesn't match any + of the values in `options`. If an option is a compiled regular + expression, it will be considered equal if it matches against + `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + for opt in options: + if self._eq(opt, value): + dist = 0.0 + break + else: + dist = 1.0 + self.add(key, dist) + + def add_expr(self, key: str, expr: bool): + """Adds a distance penalty of 1.0 if `expr` evaluates to True, + or 0.0. + """ + if expr: + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_number(self, key: str, number1: int, number2: int): + """Adds a distance penalty of 1.0 for each number of difference + between `number1` and `number2`, or 0.0 when there is no + difference. Use this when there is no upper limit on the + difference between the two numbers. + """ + diff = abs(number1 - number2) + if diff: + for i in range(diff): + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_priority( + self, + key: str, + value: Any, + options: list[Any] | tuple[Any, ...] | Any, + ): + """Adds a distance penalty that corresponds to the position at + which `value` appears in `options`. A distance penalty of 0.0 + for the first option, or 1.0 if there is no matching option. If + an option is a compiled regular expression, it will be + considered equal if it matches against `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + unit = 1.0 / (len(options) or 1) + for i, opt in enumerate(options): + if self._eq(opt, value): + dist = i * unit + break + else: + dist = 1.0 + self.add(key, dist) + + def add_ratio( + self, + key: str, + number1: int | float, + number2: int | float, + ): + """Adds a distance penalty for `number1` as a ratio of `number2`. + `number1` is bound at 0 and `number2`. + """ + number = float(max(min(number1, number2), 0)) + if number2: + dist = number / number2 + else: + dist = 0.0 + self.add(key, dist) + + def add_string(self, key: str, str1: str | None, str2: str | None): + """Adds a distance penalty based on the edit distance between + `str1` and `str2`. + """ + dist = string_dist(str1, str2) + self.add(key, dist) + + def add_data_source(self, before: str | None, after: str | None) -> None: + if before != after and ( + before or len(metadata_plugins.find_metadata_source_plugins()) > 1 + ): + self.add("data_source", metadata_plugins.get_penalty(after)) + + +@cache +def get_track_length_grace() -> float: + """Get cached grace period for track length matching.""" + return config["match"]["track_length_grace"].as_number() + + +@cache +def get_track_length_max() -> float: + """Get cached maximum track length for track length matching.""" + return config["match"]["track_length_max"].as_number() + + +def track_index_changed(item: Item, track_info: TrackInfo) -> bool: + """Returns True if the item and track info index is different. Tolerates + per disc and per release numbering. + """ + return item.track not in (track_info.medium_index, track_info.index) + + +def track_distance( + item: Item, + track_info: TrackInfo, + incl_artist: bool = False, +) -> Distance: + """Determines the significance of a track metadata change. Returns a + Distance object. `incl_artist` indicates that a distance component should + be included for the track artist (i.e., for various-artist releases). + + ``track_length_grace`` and ``track_length_max`` configuration options are + cached because this function is called many times during the matching + process and their access comes with a performance overhead. + """ + dist = Distance() + + # Length. + if info_length := track_info.length: + diff = abs(item.length - info_length) - get_track_length_grace() + dist.add_ratio("track_length", diff, get_track_length_max()) + + # Title. + dist.add_string("track_title", item.title, track_info.title) + + # Artist. Only check if there is actually an artist in the track data. + if ( + incl_artist + and track_info.artist + and item.artist.lower() not in VA_ARTISTS + ): + dist.add_string("track_artist", item.artist, track_info.artist) + + # Track index. + if track_info.index and item.track: + dist.add_expr("track_index", track_index_changed(item, track_info)) + + # Track ID. + if item.mb_trackid: + dist.add_expr("track_id", item.mb_trackid != track_info.track_id) + + # Penalize mismatching disc numbers. + if track_info.medium and item.disc: + dist.add_expr("medium", item.disc != track_info.medium) + + dist.add_data_source(item.get("data_source"), track_info.data_source) + + return dist + + +def distance( + items: Sequence[Item], + album_info: AlbumInfo, + item_info_pairs: list[tuple[Item, TrackInfo]], +) -> Distance: + """Determines how "significant" an album metadata change would be. + Returns a Distance object. `album_info` is an AlbumInfo object + reflecting the album to be compared. `items` is a sequence of all + Item objects that will be matched (order is not important). + `mapping` is a dictionary mapping Items to TrackInfo objects; the + keys are a subset of `items` and the values are a subset of + `album_info.tracks`. + """ + likelies, _ = get_most_common_tags(items) + + dist = Distance() + + # Artist, if not various. + if not album_info.va: + dist.add_string("artist", likelies["artist"], album_info.artist) + + # Album. + dist.add_string("album", likelies["album"], album_info.album) + + preferred_config = config["match"]["preferred"] + # Current or preferred media. + if album_info.media: + # Preferred media options. + media_patterns: Sequence[str] = preferred_config["media"].as_str_seq() + options = [ + re.compile(rf"(\d+x)?({pat})", re.I) for pat in media_patterns + ] + if options: + dist.add_priority("media", album_info.media, options) + # Current media. + elif likelies["media"]: + dist.add_equality("media", album_info.media, likelies["media"]) + + # Mediums. + if likelies["disctotal"] and album_info.mediums: + dist.add_number("mediums", likelies["disctotal"], album_info.mediums) + + # Prefer earliest release. + if album_info.year and preferred_config["original_year"]: + # Assume 1889 (earliest first gramophone discs) if we don't know the + # original year. + original = album_info.original_year or 1889 + diff = abs(album_info.year - original) + diff_max = abs(datetime.date.today().year - original) + dist.add_ratio("year", diff, diff_max) + # Year. + elif likelies["year"] and album_info.year: + if likelies["year"] in (album_info.year, album_info.original_year): + # No penalty for matching release or original year. + dist.add("year", 0.0) + elif album_info.original_year: + # Prefer matchest closest to the release year. + diff = abs(likelies["year"] - album_info.year) + diff_max = abs( + datetime.date.today().year - album_info.original_year + ) + dist.add_ratio("year", diff, diff_max) + else: + # Full penalty when there is no original year. + dist.add("year", 1.0) + + # Preferred countries. + country_patterns: Sequence[str] = preferred_config["countries"].as_str_seq() + options = [re.compile(pat, re.I) for pat in country_patterns] + if album_info.country and options: + dist.add_priority("country", album_info.country, options) + # Country. + elif likelies["country"] and album_info.country: + dist.add_string("country", likelies["country"], album_info.country) + + # Label. + if likelies["label"] and album_info.label: + dist.add_string("label", likelies["label"], album_info.label) + + # Catalog number. + if likelies["catalognum"] and album_info.catalognum: + dist.add_string( + "catalognum", likelies["catalognum"], album_info.catalognum + ) + + # Disambiguation. + if likelies["albumdisambig"] and album_info.albumdisambig: + dist.add_string( + "albumdisambig", likelies["albumdisambig"], album_info.albumdisambig + ) + + # Album ID. + if likelies["mb_albumid"]: + dist.add_equality( + "album_id", likelies["mb_albumid"], album_info.album_id + ) + + # Tracks. + dist.tracks = {} + for item, track in item_info_pairs: + dist.tracks[track] = track_distance(item, track, album_info.va) + dist.add("tracks", dist.tracks[track].distance) + + # Missing tracks. + for _ in range(len(album_info.tracks) - len(item_info_pairs)): + dist.add("missing_tracks", 1.0) + + # Unmatched tracks. + for _ in range(len(items) - len(item_info_pairs)): + dist.add("unmatched_tracks", 1.0) + + dist.add_data_source(likelies["data_source"], album_info.data_source) + + return dist diff --git a/beets/autotag/hooks.py b/beets/autotag/hooks.py index 5ad832e96..b58a07c06 100644 --- a/beets/autotag/hooks.py +++ b/beets/autotag/hooks.py @@ -1,614 +1,614 @@ -# This file is part of beets. -# Copyright 2016, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Glue between metadata sources and the matching logic.""" - -from __future__ import annotations - -from copy import deepcopy -from dataclasses import dataclass, field -from functools import cached_property -from typing import TYPE_CHECKING, Any, ClassVar, TypeVar - -from typing_extensions import Self - -from beets import config, logging, plugins -from beets.util import cached_classproperty, unique_list -from beets.util.deprecation import deprecate_for_maintainers - -if TYPE_CHECKING: - from collections.abc import Sequence - - from beets.library import Album, Item - - from .distance import Distance - -V = TypeVar("V") - -JSONDict = dict[str, Any] - -log = logging.getLogger("beets") - -SYNCHRONISED_LIST_FIELDS = { - ("albumtype", "albumtypes"), - ("artist", "artists"), - ("artist_id", "artists_ids"), - ("artist_sort", "artists_sort"), - ("artist_credit", "artists_credit"), -} - - -def correct_list_fields(input_data: JSONDict) -> JSONDict: - """Synchronise single and list values for certain metadata fields. - - For fields listed in :data:`SYNCHRONISED_LIST_FIELDS`, beets stores both a - scalar value (for example ``artist_id``) and a corresponding list value - (for example ``artists_ids``). Under the current :class:`MediaFile` - implementation, only the list value is actually written to files; the - scalar is effectively mapped to the first element of the list. - - Beets, however, still treats the scalar fields as independent and stores - them in the database. When the scalar value and the first list element - differ (for example, ``artist_id`` != ``artists_ids[0]``), commands like - ``beet write`` can repeatedly report changes that will never be written to - the underlying files. - - This helper reduces such mismatches by keeping the scalar and list values - in sync where appropriate: it usually makes sure that the scalar value is - present (and, when necessary, first) in the corresponding list, or that an - existing list value is copied back into the scalar field. In cases where - the scalar value is already represented in the list (ignoring case and - simple word ordering), the list is left unchanged. - """ - data = deepcopy(input_data) - - def ensure_first_value(single_field: str, list_field: str) -> None: - """Ensure the first ``list_field`` item is equal to ``single_field``.""" - list_val: list[str] - single_val, list_val = ( - data.get(single_field) or "", - data.get(list_field) or [], - ) - if single_val not in list_val and set(single_val.lower().split()) & set( - map(str.lower, list_val) - ): - return - - if single_val: - data[list_field] = unique_list([single_val, *list_val]) - elif list_val: - data[single_field] = list_val[0] - - for pair in SYNCHRONISED_LIST_FIELDS: - ensure_first_value(*pair) - - return data - - -# Classes used to represent candidate options. -class AttrDict(dict[str, V]): - """Mapping enabling attribute-style access to stored metadata values.""" - - def copy(self) -> Self: - """Return a detached copy preserving subclass-specific behavior.""" - return deepcopy(self) - - def __getattr__(self, attr: str) -> V: - if attr in self: - return self[attr] - - raise AttributeError( - f"'{self.__class__.__name__}' object has no attribute '{attr}'" - ) - - def __setattr__(self, key: str, value: V) -> None: - self.__setitem__(key, value) - - def __hash__(self) -> int: # type: ignore[override] - return id(self) - - -class Info(AttrDict[Any]): - """Container for metadata about a musical entity.""" - - Identifier = tuple[str | None, str | None] - - type: ClassVar[str] - - IGNORED_FIELDS: ClassVar[set[str]] = {"data_url"} - MEDIA_FIELD_MAP: ClassVar[dict[str, str]] = {} - - @cached_classproperty - def nullable_fields(cls) -> set[str]: - """Return fields that may be cleared when new metadata is applied.""" - return set(config["overwrite_null"][cls.type.lower()].as_str_seq()) - - @property - def id(self) -> str | None: - """Return the provider-specific identifier for this metadata object.""" - raise NotImplementedError - - @property - def identifier(self) -> Identifier: - """Return a cross-provider key in ``(data_source, id)`` form.""" - return (self.data_source, self.id) - - @cached_property - def name(self) -> str: - raise NotImplementedError - - @cached_property - def raw_data(self) -> JSONDict: - """Provide metadata with artist credits applied when configured.""" - data = self.__class__(**self.copy()) - if config["artist_credit"]: - data.update( - artist=self.artist_credit or self.artist, - artists=self.artists_credit or self.artists, - ) - return correct_list_fields(data) - - @cached_property - def item_data(self) -> JSONDict: - """Metadata for items with field mappings and exclusions applied. - - Filters out null values and empty lists except for explicitly nullable - fields, removes ignored fields, and applies media-specific field name - mappings for compatibility with the item model. - """ - data = { - k: v - for k, v in self.raw_data.items() - if k not in self.IGNORED_FIELDS - and (v not in [None, []] or k in self.nullable_fields) - } - for info_field, media_field in ( - (k, v) for k, v in self.MEDIA_FIELD_MAP.items() if k in data - ): - data[media_field] = data.pop(info_field) - - return data - - def __init__( - self, - album: str | None = None, - artist_credit: str | None = None, - artist_id: str | None = None, - artist: str | None = None, - artists_credit: list[str] | None = None, - artists_ids: list[str] | None = None, - artists: list[str] | None = None, - artist_sort: str | None = None, - artists_sort: list[str] | None = None, - data_source: str | None = None, - data_url: str | None = None, - genre: str | None = None, - genres: list[str] | None = None, - media: str | None = None, - **kwargs, - ) -> None: - if genre is not None: - deprecate_for_maintainers( - "The 'genre' parameter", "'genres' (list)", stacklevel=3 - ) - if not genres: - try: - sep = next(s for s in ["; ", ", ", " / "] if s in genre) - except StopIteration: - genres = [genre] - else: - genres = list(map(str.strip, genre.split(sep))) - - self.album = album - self.artist = artist - self.artist_credit = artist_credit - self.artist_id = artist_id - self.artists = artists - self.artists_credit = artists_credit - self.artists_ids = artists_ids - self.artist_sort = artist_sort - self.artists_sort = artists_sort - self.data_source = data_source - self.data_url = data_url - self.genre = None - self.genres = genres - self.media = media - self.update(kwargs) - - -class AlbumInfo(Info): - """Metadata snapshot representing a single album candidate. - - Aggregates track entries and album-wide context gathered from an external - provider. Used during matching to evaluate similarity against a group of - user items, and later to drive tagging decisions once selected. - """ - - type = "Album" - - IGNORED_FIELDS: ClassVar[set[str]] = {*Info.IGNORED_FIELDS, "tracks"} - MEDIA_FIELD_MAP: ClassVar[dict[str, str]] = { - **Info.MEDIA_FIELD_MAP, - "album_id": "mb_albumid", - "artist": "albumartist", - "artists": "albumartists", - "artist_id": "mb_albumartistid", - "artists_ids": "mb_albumartistids", - "artist_credit": "albumartist_credit", - "artists_credit": "albumartists_credit", - "artist_sort": "albumartist_sort", - "artists_sort": "albumartists_sort", - "mediums": "disctotal", - "releasegroup_id": "mb_releasegroupid", - "va": "comp", - } - - @property - def id(self) -> str | None: - return self.album_id - - @cached_property - def name(self) -> str: - return self.album or "" - - @cached_property - def raw_data(self) -> JSONDict: - """Metadata with month and day reset to 0 when only year is present.""" - data = {**super().raw_data} - if data["year"]: - data["month"] = self.month or 0 - data["day"] = self.day or 0 - - return data - - def __init__( - self, - tracks: list[TrackInfo], - *, - album_id: str | None = None, - albumdisambig: str | None = None, - albumstatus: str | None = None, - albumtype: str | None = None, - albumtypes: list[str] | None = None, - asin: str | None = None, - barcode: str | None = None, - catalognum: str | None = None, - country: str | None = None, - day: int | None = None, - discogs_albumid: str | None = None, - discogs_artistid: str | None = None, - discogs_labelid: str | None = None, - label: str | None = None, - language: str | None = None, - mediums: int | None = None, - month: int | None = None, - original_day: int | None = None, - original_month: int | None = None, - original_year: int | None = None, - release_group_title: str | None = None, - releasegroup_id: str | None = None, - releasegroupdisambig: str | None = None, - script: str | None = None, - style: str | None = None, - va: bool = False, - year: int | None = None, - **kwargs, - ) -> None: - self.tracks = tracks - self.album_id = album_id - self.albumdisambig = albumdisambig - self.albumstatus = albumstatus - self.albumtype = albumtype - self.albumtypes = albumtypes - self.asin = asin - self.barcode = barcode - self.catalognum = catalognum - self.country = country - self.day = day - self.discogs_albumid = discogs_albumid - self.discogs_artistid = discogs_artistid - self.discogs_labelid = discogs_labelid - self.label = label - self.language = language - self.mediums = mediums - self.month = month - self.original_day = original_day - self.original_month = original_month - self.original_year = original_year - self.release_group_title = release_group_title - self.releasegroup_id = releasegroup_id - self.releasegroupdisambig = releasegroupdisambig - self.script = script - self.style = style - self.va = va - self.year = year - super().__init__(**kwargs) - - -class TrackInfo(Info): - """Metadata snapshot for a single track candidate. - - Captures identifying details and creative credits used to compare against - a user's item. Instances often originate within an AlbumInfo but may also - stand alone for singleton matching. - """ - - type = "Track" - - IGNORED_FIELDS: ClassVar[set[str]] = { - *Info.IGNORED_FIELDS, - "index", - "medium_total", - } - MEDIA_FIELD_MAP: ClassVar[dict[str, str]] = { - **Info.MEDIA_FIELD_MAP, - "artist_id": "mb_artistid", - "artists_ids": "mb_artistids", - "medium": "disc", - "release_track_id": "mb_releasetrackid", - "track_id": "mb_trackid", - "medium_index": "track", - } - - @property - def id(self) -> str | None: - return self.track_id - - @cached_property - def name(self) -> str: - return self.title or "" - - @cached_property - def raw_data(self) -> JSONDict: - """Provide track metadata with numbering adapted to import settings.""" - data = { - **super().raw_data, - "mb_releasetrackid": self.release_track_id or self.track_id, - "track": self.index, - "medium_index": ( - ( - mindex - if (mindex := self.medium_index) is not None - else self.index - ) - if config["per_disc_numbering"] - else self.index - ), - } - if config["per_disc_numbering"] and self.medium_total is not None: - data["tracktotal"] = self.medium_total - - return data - - def __init__( - self, - *, - arranger: str | None = None, - bpm: str | None = None, - composer: str | None = None, - composer_sort: str | None = None, - disctitle: str | None = None, - index: int | None = None, - initial_key: str | None = None, - length: float | None = None, - lyricist: str | None = None, - mb_workid: str | None = None, - medium: int | None = None, - medium_index: int | None = None, - medium_total: int | None = None, - release_track_id: str | None = None, - title: str | None = None, - track_alt: str | None = None, - track_id: str | None = None, - work: str | None = None, - work_disambig: str | None = None, - **kwargs, - ) -> None: - self.arranger = arranger - self.bpm = bpm - self.composer = composer - self.composer_sort = composer_sort - self.disctitle = disctitle - self.index = index - self.initial_key = initial_key - self.length = length - self.lyricist = lyricist - self.mb_workid = mb_workid - self.medium = medium - self.medium_index = medium_index - self.medium_total = medium_total - self.release_track_id = release_track_id - self.title = title - self.track_alt = track_alt - self.track_id = track_id - self.work = work - self.work_disambig = work_disambig - super().__init__(**kwargs) - - def merge_with_album(self, album_info: AlbumInfo) -> JSONDict: - """Merge track metadata with album-level data as fallback. - - Combines this track's metadata with album-wide values, using album data - to fill missing track fields while preserving track-specific artist - credits. - """ - album = album_info.raw_data - raw_track = self.raw_data - track = self.__class__(**self.copy()) - - # Do not inherit album artist_credit onto tracks. When artist_credit - # mode is enabled, raw_data() uses artist_credit to rewrite artist, and - # inheriting the album credit here would override albumartist fallback - # for tracks that have no track-level credit. - for k in raw_track.keys() - {"artist_credit"}: - if not raw_track[k] and (v := album.get(k)): - track[k] = v - - merged = ( - album_info.item_data - | {"tracktotal": len(album_info.tracks)} - | track.item_data - ) - - # When configured, prefer original release date over album date. - # This keeps logic local and simple; no need to change AlbumInfo. - if config["original_date"].get(bool) and ( - original_year := merged.get("original_year") - ): - merged["year"] = original_year - merged["month"] = merged.get("original_month") or 0 - merged["day"] = merged.get("original_day") or 0 - return merged - - -# Structures that compose all the information for a candidate match. -@dataclass -class Match: - """Represent a chosen metadata candidate and its application behavior.""" - - disambig_fields_key: ClassVar[str] - - distance: Distance - info: Info - - def apply_metadata(self) -> None: - """Apply this match's metadata to its target library objects.""" - raise NotImplementedError - - @cached_property - def type(self) -> str: - return self.info.type - - @cached_property - def from_scratch(self) -> bool: - return bool(config["import"]["from_scratch"]) - - @property - def disambig_fields(self) -> Sequence[str]: - """Return configured disambiguation fields that exist on this match.""" - chosen_fields = config["match"][self.disambig_fields_key].as_str_seq() - valid_fields = [f for f in chosen_fields if f in self.info] - if missing_fields := set(chosen_fields) - set(valid_fields): - log.warning( - "Disambiguation string keys {} do not exist.", missing_fields - ) - - return valid_fields - - @property - def base_disambig_data(self) -> JSONDict: - """Return supplemental values used when formatting disambiguation.""" - return {} - - @property - def disambig_string(self) -> str: - """Build a display string from the candidate's disambiguation fields. - - Merges base disambiguation data with instance-specific field values, - then formats them as a comma-separated string in field definition order. - """ - data = { - k: self.info[k] for k in self.disambig_fields - } | self.base_disambig_data - return ", ".join(str(data[k]) for k in self.disambig_fields) - - -@dataclass -class AlbumMatch(Match): - """Represent an album candidate together with its item-to-track mapping.""" - - disambig_fields_key = "album_disambig_fields" - - info: AlbumInfo - mapping: dict[Item, TrackInfo] - extra_items: list[Item] = field(default_factory=list) - extra_tracks: list[TrackInfo] = field(default_factory=list) - - def __post_init__(self) -> None: - """Notify listeners when an album candidate has been matched.""" - plugins.send("album_matched", match=self) - - @property - def item_info_pairs(self) -> list[tuple[Item, TrackInfo]]: - """Return matched items together with their selected track metadata.""" - return list(self.mapping.items()) - - @property - def items(self) -> list[Item]: - """Return the items that participate in this album match.""" - return [i for i, _ in self.item_info_pairs] - - @property - def base_disambig_data(self) -> JSONDict: - """Return album-specific values used in disambiguation displays.""" - return { - "media": ( - f"{mediums}x{self.info.media}" - if (mediums := self.info.mediums) and mediums > 1 - else self.info.media - ), - } - - @property - def merged_pairs(self) -> list[tuple[Item, JSONDict]]: - """Generate item-data pairs with album-level fallback values.""" - return [ - (i, ti.merge_with_album(self.info)) - for i, ti in self.item_info_pairs - ] - - def apply_metadata(self) -> None: - """Apply metadata to each of the items.""" - for item, data in self.merged_pairs: - if self.from_scratch: - item.clear() - - item.update(data) - - def apply_album_metadata(self, album: Album) -> None: - """Apply album-level metadata to the Album object.""" - album.update(self.info.item_data) - - -@dataclass -class TrackMatch(Match): - """Represent a singleton candidate and the item it updates.""" - - disambig_fields_key = "singleton_disambig_fields" - - info: TrackInfo - item: Item - - @property - def base_disambig_data(self) -> JSONDict: - """Return singleton-specific values used in disambiguation displays.""" - return { - "index": f"Index {self.info.index}", - "track_alt": f"Track {self.info.track_alt}", - "album": ( - f"[{self.info.album}]" - if ( - config["import"]["singleton_album_disambig"].get() - and self.info.album - ) - else "" - ), - } - - def apply_metadata(self) -> None: - """Apply metadata to the item.""" - if self.from_scratch: - self.item.clear() - - self.item.update(self.info.item_data) +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Glue between metadata sources and the matching logic.""" + +from __future__ import annotations + +from copy import deepcopy +from dataclasses import dataclass, field +from functools import cached_property +from typing import TYPE_CHECKING, Any, ClassVar, TypeVar + +from typing_extensions import Self + +from beets import config, logging, plugins +from beets.util import cached_classproperty, unique_list +from beets.util.deprecation import deprecate_for_maintainers + +if TYPE_CHECKING: + from collections.abc import Sequence + + from beets.library import Album, Item + + from .distance import Distance + +V = TypeVar("V") + +JSONDict = dict[str, Any] + +log = logging.getLogger("beets") + +SYNCHRONISED_LIST_FIELDS = { + ("albumtype", "albumtypes"), + ("artist", "artists"), + ("artist_id", "artists_ids"), + ("artist_sort", "artists_sort"), + ("artist_credit", "artists_credit"), +} + + +def correct_list_fields(input_data: JSONDict) -> JSONDict: + """Synchronise single and list values for certain metadata fields. + + For fields listed in :data:`SYNCHRONISED_LIST_FIELDS`, beets stores both a + scalar value (for example ``artist_id``) and a corresponding list value + (for example ``artists_ids``). Under the current :class:`MediaFile` + implementation, only the list value is actually written to files; the + scalar is effectively mapped to the first element of the list. + + Beets, however, still treats the scalar fields as independent and stores + them in the database. When the scalar value and the first list element + differ (for example, ``artist_id`` != ``artists_ids[0]``), commands like + ``beet write`` can repeatedly report changes that will never be written to + the underlying files. + + This helper reduces such mismatches by keeping the scalar and list values + in sync where appropriate: it usually makes sure that the scalar value is + present (and, when necessary, first) in the corresponding list, or that an + existing list value is copied back into the scalar field. In cases where + the scalar value is already represented in the list (ignoring case and + simple word ordering), the list is left unchanged. + """ + data = deepcopy(input_data) + + def ensure_first_value(single_field: str, list_field: str) -> None: + """Ensure the first ``list_field`` item is equal to ``single_field``.""" + list_val: list[str] + single_val, list_val = ( + data.get(single_field) or "", + data.get(list_field) or [], + ) + if single_val not in list_val and set(single_val.lower().split()) & set( + map(str.lower, list_val) + ): + return + + if single_val: + data[list_field] = unique_list([single_val, *list_val]) + elif list_val: + data[single_field] = list_val[0] + + for pair in SYNCHRONISED_LIST_FIELDS: + ensure_first_value(*pair) + + return data + + +# Classes used to represent candidate options. +class AttrDict(dict[str, V]): + """Mapping enabling attribute-style access to stored metadata values.""" + + def copy(self) -> Self: + """Return a detached copy preserving subclass-specific behavior.""" + return deepcopy(self) + + def __getattr__(self, attr: str) -> V: + if attr in self: + return self[attr] + + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{attr}'" + ) + + def __setattr__(self, key: str, value: V) -> None: + self.__setitem__(key, value) + + def __hash__(self) -> int: # type: ignore[override] + return id(self) + + +class Info(AttrDict[Any]): + """Container for metadata about a musical entity.""" + + Identifier = tuple[str | None, str | None] + + type: ClassVar[str] + + IGNORED_FIELDS: ClassVar[set[str]] = {"data_url"} + MEDIA_FIELD_MAP: ClassVar[dict[str, str]] = {} + + @cached_classproperty + def nullable_fields(cls) -> set[str]: + """Return fields that may be cleared when new metadata is applied.""" + return set(config["overwrite_null"][cls.type.lower()].as_str_seq()) + + @property + def id(self) -> str | None: + """Return the provider-specific identifier for this metadata object.""" + raise NotImplementedError + + @property + def identifier(self) -> Identifier: + """Return a cross-provider key in ``(data_source, id)`` form.""" + return (self.data_source, self.id) + + @cached_property + def name(self) -> str: + raise NotImplementedError + + @cached_property + def raw_data(self) -> JSONDict: + """Provide metadata with artist credits applied when configured.""" + data = self.__class__(**self.copy()) + if config["artist_credit"]: + data.update( + artist=self.artist_credit or self.artist, + artists=self.artists_credit or self.artists, + ) + return correct_list_fields(data) + + @cached_property + def item_data(self) -> JSONDict: + """Metadata for items with field mappings and exclusions applied. + + Filters out null values and empty lists except for explicitly nullable + fields, removes ignored fields, and applies media-specific field name + mappings for compatibility with the item model. + """ + data = { + k: v + for k, v in self.raw_data.items() + if k not in self.IGNORED_FIELDS + and (v not in [None, []] or k in self.nullable_fields) + } + for info_field, media_field in ( + (k, v) for k, v in self.MEDIA_FIELD_MAP.items() if k in data + ): + data[media_field] = data.pop(info_field) + + return data + + def __init__( + self, + album: str | None = None, + artist_credit: str | None = None, + artist_id: str | None = None, + artist: str | None = None, + artists_credit: list[str] | None = None, + artists_ids: list[str] | None = None, + artists: list[str] | None = None, + artist_sort: str | None = None, + artists_sort: list[str] | None = None, + data_source: str | None = None, + data_url: str | None = None, + genre: str | None = None, + genres: list[str] | None = None, + media: str | None = None, + **kwargs, + ) -> None: + if genre is not None: + deprecate_for_maintainers( + "The 'genre' parameter", "'genres' (list)", stacklevel=3 + ) + if not genres: + try: + sep = next(s for s in ["; ", ", ", " / "] if s in genre) + except StopIteration: + genres = [genre] + else: + genres = list(map(str.strip, genre.split(sep))) + + self.album = album + self.artist = artist + self.artist_credit = artist_credit + self.artist_id = artist_id + self.artists = artists + self.artists_credit = artists_credit + self.artists_ids = artists_ids + self.artist_sort = artist_sort + self.artists_sort = artists_sort + self.data_source = data_source + self.data_url = data_url + self.genre = None + self.genres = genres + self.media = media + self.update(kwargs) + + +class AlbumInfo(Info): + """Metadata snapshot representing a single album candidate. + + Aggregates track entries and album-wide context gathered from an external + provider. Used during matching to evaluate similarity against a group of + user items, and later to drive tagging decisions once selected. + """ + + type = "Album" + + IGNORED_FIELDS: ClassVar[set[str]] = {*Info.IGNORED_FIELDS, "tracks"} + MEDIA_FIELD_MAP: ClassVar[dict[str, str]] = { + **Info.MEDIA_FIELD_MAP, + "album_id": "mb_albumid", + "artist": "albumartist", + "artists": "albumartists", + "artist_id": "mb_albumartistid", + "artists_ids": "mb_albumartistids", + "artist_credit": "albumartist_credit", + "artists_credit": "albumartists_credit", + "artist_sort": "albumartist_sort", + "artists_sort": "albumartists_sort", + "mediums": "disctotal", + "releasegroup_id": "mb_releasegroupid", + "va": "comp", + } + + @property + def id(self) -> str | None: + return self.album_id + + @cached_property + def name(self) -> str: + return self.album or "" + + @cached_property + def raw_data(self) -> JSONDict: + """Metadata with month and day reset to 0 when only year is present.""" + data = {**super().raw_data} + if data["year"]: + data["month"] = self.month or 0 + data["day"] = self.day or 0 + + return data + + def __init__( + self, + tracks: list[TrackInfo], + *, + album_id: str | None = None, + albumdisambig: str | None = None, + albumstatus: str | None = None, + albumtype: str | None = None, + albumtypes: list[str] | None = None, + asin: str | None = None, + barcode: str | None = None, + catalognum: str | None = None, + country: str | None = None, + day: int | None = None, + discogs_albumid: str | None = None, + discogs_artistid: str | None = None, + discogs_labelid: str | None = None, + label: str | None = None, + language: str | None = None, + mediums: int | None = None, + month: int | None = None, + original_day: int | None = None, + original_month: int | None = None, + original_year: int | None = None, + release_group_title: str | None = None, + releasegroup_id: str | None = None, + releasegroupdisambig: str | None = None, + script: str | None = None, + style: str | None = None, + va: bool = False, + year: int | None = None, + **kwargs, + ) -> None: + self.tracks = tracks + self.album_id = album_id + self.albumdisambig = albumdisambig + self.albumstatus = albumstatus + self.albumtype = albumtype + self.albumtypes = albumtypes + self.asin = asin + self.barcode = barcode + self.catalognum = catalognum + self.country = country + self.day = day + self.discogs_albumid = discogs_albumid + self.discogs_artistid = discogs_artistid + self.discogs_labelid = discogs_labelid + self.label = label + self.language = language + self.mediums = mediums + self.month = month + self.original_day = original_day + self.original_month = original_month + self.original_year = original_year + self.release_group_title = release_group_title + self.releasegroup_id = releasegroup_id + self.releasegroupdisambig = releasegroupdisambig + self.script = script + self.style = style + self.va = va + self.year = year + super().__init__(**kwargs) + + +class TrackInfo(Info): + """Metadata snapshot for a single track candidate. + + Captures identifying details and creative credits used to compare against + a user's item. Instances often originate within an AlbumInfo but may also + stand alone for singleton matching. + """ + + type = "Track" + + IGNORED_FIELDS: ClassVar[set[str]] = { + *Info.IGNORED_FIELDS, + "index", + "medium_total", + } + MEDIA_FIELD_MAP: ClassVar[dict[str, str]] = { + **Info.MEDIA_FIELD_MAP, + "artist_id": "mb_artistid", + "artists_ids": "mb_artistids", + "medium": "disc", + "release_track_id": "mb_releasetrackid", + "track_id": "mb_trackid", + "medium_index": "track", + } + + @property + def id(self) -> str | None: + return self.track_id + + @cached_property + def name(self) -> str: + return self.title or "" + + @cached_property + def raw_data(self) -> JSONDict: + """Provide track metadata with numbering adapted to import settings.""" + data = { + **super().raw_data, + "mb_releasetrackid": self.release_track_id or self.track_id, + "track": self.index, + "medium_index": ( + ( + mindex + if (mindex := self.medium_index) is not None + else self.index + ) + if config["per_disc_numbering"] + else self.index + ), + } + if config["per_disc_numbering"] and self.medium_total is not None: + data["tracktotal"] = self.medium_total + + return data + + def __init__( + self, + *, + arranger: str | None = None, + bpm: str | None = None, + composer: str | None = None, + composer_sort: str | None = None, + disctitle: str | None = None, + index: int | None = None, + initial_key: str | None = None, + length: float | None = None, + lyricist: str | None = None, + mb_workid: str | None = None, + medium: int | None = None, + medium_index: int | None = None, + medium_total: int | None = None, + release_track_id: str | None = None, + title: str | None = None, + track_alt: str | None = None, + track_id: str | None = None, + work: str | None = None, + work_disambig: str | None = None, + **kwargs, + ) -> None: + self.arranger = arranger + self.bpm = bpm + self.composer = composer + self.composer_sort = composer_sort + self.disctitle = disctitle + self.index = index + self.initial_key = initial_key + self.length = length + self.lyricist = lyricist + self.mb_workid = mb_workid + self.medium = medium + self.medium_index = medium_index + self.medium_total = medium_total + self.release_track_id = release_track_id + self.title = title + self.track_alt = track_alt + self.track_id = track_id + self.work = work + self.work_disambig = work_disambig + super().__init__(**kwargs) + + def merge_with_album(self, album_info: AlbumInfo) -> JSONDict: + """Merge track metadata with album-level data as fallback. + + Combines this track's metadata with album-wide values, using album data + to fill missing track fields while preserving track-specific artist + credits. + """ + album = album_info.raw_data + raw_track = self.raw_data + track = self.__class__(**self.copy()) + + # Do not inherit album artist_credit onto tracks. When artist_credit + # mode is enabled, raw_data() uses artist_credit to rewrite artist, and + # inheriting the album credit here would override albumartist fallback + # for tracks that have no track-level credit. + for k in raw_track.keys() - {"artist_credit"}: + if not raw_track[k] and (v := album.get(k)): + track[k] = v + + merged = ( + album_info.item_data + | {"tracktotal": len(album_info.tracks)} + | track.item_data + ) + + # When configured, prefer original release date over album date. + # This keeps logic local and simple; no need to change AlbumInfo. + if config["original_date"].get(bool) and ( + original_year := merged.get("original_year") + ): + merged["year"] = original_year + merged["month"] = merged.get("original_month") or 0 + merged["day"] = merged.get("original_day") or 0 + return merged + + +# Structures that compose all the information for a candidate match. +@dataclass +class Match: + """Represent a chosen metadata candidate and its application behavior.""" + + disambig_fields_key: ClassVar[str] + + distance: Distance + info: Info + + def apply_metadata(self) -> None: + """Apply this match's metadata to its target library objects.""" + raise NotImplementedError + + @cached_property + def type(self) -> str: + return self.info.type + + @cached_property + def from_scratch(self) -> bool: + return bool(config["import"]["from_scratch"]) + + @property + def disambig_fields(self) -> Sequence[str]: + """Return configured disambiguation fields that exist on this match.""" + chosen_fields = config["match"][self.disambig_fields_key].as_str_seq() + valid_fields = [f for f in chosen_fields if f in self.info] + if missing_fields := set(chosen_fields) - set(valid_fields): + log.warning( + "Disambiguation string keys {} do not exist.", missing_fields + ) + + return valid_fields + + @property + def base_disambig_data(self) -> JSONDict: + """Return supplemental values used when formatting disambiguation.""" + return {} + + @property + def disambig_string(self) -> str: + """Build a display string from the candidate's disambiguation fields. + + Merges base disambiguation data with instance-specific field values, + then formats them as a comma-separated string in field definition order. + """ + data = { + k: self.info[k] for k in self.disambig_fields + } | self.base_disambig_data + return ", ".join(str(data[k]) for k in self.disambig_fields) + + +@dataclass +class AlbumMatch(Match): + """Represent an album candidate together with its item-to-track mapping.""" + + disambig_fields_key = "album_disambig_fields" + + info: AlbumInfo + mapping: dict[Item, TrackInfo] + extra_items: list[Item] = field(default_factory=list) + extra_tracks: list[TrackInfo] = field(default_factory=list) + + def __post_init__(self) -> None: + """Notify listeners when an album candidate has been matched.""" + plugins.send("album_matched", match=self) + + @property + def item_info_pairs(self) -> list[tuple[Item, TrackInfo]]: + """Return matched items together with their selected track metadata.""" + return list(self.mapping.items()) + + @property + def items(self) -> list[Item]: + """Return the items that participate in this album match.""" + return [i for i, _ in self.item_info_pairs] + + @property + def base_disambig_data(self) -> JSONDict: + """Return album-specific values used in disambiguation displays.""" + return { + "media": ( + f"{mediums}x{self.info.media}" + if (mediums := self.info.mediums) and mediums > 1 + else self.info.media + ), + } + + @property + def merged_pairs(self) -> list[tuple[Item, JSONDict]]: + """Generate item-data pairs with album-level fallback values.""" + return [ + (i, ti.merge_with_album(self.info)) + for i, ti in self.item_info_pairs + ] + + def apply_metadata(self) -> None: + """Apply metadata to each of the items.""" + for item, data in self.merged_pairs: + if self.from_scratch: + item.clear() + + item.update(data) + + def apply_album_metadata(self, album: Album) -> None: + """Apply album-level metadata to the Album object.""" + album.update(self.info.item_data) + + +@dataclass +class TrackMatch(Match): + """Represent a singleton candidate and the item it updates.""" + + disambig_fields_key = "singleton_disambig_fields" + + info: TrackInfo + item: Item + + @property + def base_disambig_data(self) -> JSONDict: + """Return singleton-specific values used in disambiguation displays.""" + return { + "index": f"Index {self.info.index}", + "track_alt": f"Track {self.info.track_alt}", + "album": ( + f"[{self.info.album}]" + if ( + config["import"]["singleton_album_disambig"].get() + and self.info.album + ) + else "" + ), + } + + def apply_metadata(self) -> None: + """Apply metadata to the item.""" + if self.from_scratch: + self.item.clear() + + self.item.update(self.info.item_data) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 9d8a210cc..e27588a41 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -1,386 +1,386 @@ -# This file is part of beets. -# Copyright 2016, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Matches existing metadata with canonical information to identify -releases and tracks. -""" - -from __future__ import annotations - -from enum import IntEnum -from typing import TYPE_CHECKING, NamedTuple, TypeVar - -import lap -import numpy as np - -from beets import config, logging, metadata_plugins -from beets.autotag import AlbumMatch, TrackMatch, hooks -from beets.util import get_most_common_tags - -from .distance import VA_ARTISTS, distance, track_distance -from .hooks import Info - -if TYPE_CHECKING: - from collections.abc import Iterable, Sequence - - from beets.autotag import AlbumInfo, TrackInfo - from beets.library import Item - - -AnyMatch = TypeVar("AnyMatch", TrackMatch, AlbumMatch) -Candidates = dict[Info.Identifier, AnyMatch] - -# Global logger. -log = logging.getLogger("beets") - - -# Recommendation enumeration. - - -class Recommendation(IntEnum): - """Indicates a qualitative suggestion to the user about what should - be done with a given match. - """ - - none = 0 - low = 1 - medium = 2 - strong = 3 - - -# A structure for holding a set of possible matches to choose between. This -# consists of a list of possible candidates (i.e., AlbumInfo or TrackInfo -# objects) and a recommendation value. - - -class Proposal(NamedTuple): - candidates: Sequence[AlbumMatch | TrackMatch] - recommendation: Recommendation - - -# Primary matching functionality. - - -def assign_items( - items: Sequence[Item], - tracks: Sequence[TrackInfo], -) -> tuple[list[tuple[Item, TrackInfo]], list[Item], list[TrackInfo]]: - """Given a list of Items and a list of TrackInfo objects, find the - best mapping between them. Returns a mapping from Items to TrackInfo - objects, a set of extra Items, and a set of extra TrackInfo - objects. These "extra" objects occur when there is an unequal number - of objects of the two types. - """ - log.debug("Computing track assignment...") - # Construct the cost matrix. - costs = [[float(track_distance(i, t)) for t in tracks] for i in items] - # Assign items to tracks - _, _, assigned_item_idxs = lap.lapjv(np.array(costs), extend_cost=True) - log.debug("...done.") - - # Each item in `assigned_item_idxs` list corresponds to a track in the - # `tracks` list. Each value is either an index into the assigned item in - # `items` list, or -1 if that track has no match. - mapping = { - items[iidx]: t - for iidx, t in zip(assigned_item_idxs, tracks) - if iidx != -1 - } - extra_items = list(set(items) - mapping.keys()) - extra_items.sort(key=lambda i: (i.disc, i.track, i.title)) - extra_tracks = list(set(tracks) - set(mapping.values())) - extra_tracks.sort(key=lambda t: (t.index, t.title)) - return list(mapping.items()), extra_items, extra_tracks - - -def match_by_id(album_id: str | None, consensus: bool) -> Iterable[AlbumInfo]: - """Return album candidates for the given album id. - - Make sure that the ID is present and that there is consensus on it among - the items being tagged. - """ - if not album_id: - log.debug("No album ID found.") - elif not consensus: - log.debug("No album ID consensus.") - else: - log.debug("Searching for discovered album ID: {}", album_id) - return metadata_plugins.albums_for_ids([album_id]) - - return () - - -def _recommendation( - results: Sequence[AlbumMatch | TrackMatch], -) -> Recommendation: - """Given a sorted list of AlbumMatch or TrackMatch objects, return a - recommendation based on the results' distances. - - If the recommendation is higher than the configured maximum for - an applied penalty, the recommendation will be downgraded to the - configured maximum for that penalty. - """ - if not results: - # No candidates: no recommendation. - return Recommendation.none - - # Basic distance thresholding. - min_dist = results[0].distance - if min_dist < config["match"]["strong_rec_thresh"].as_number(): - # Strong recommendation level. - rec = Recommendation.strong - elif min_dist <= config["match"]["medium_rec_thresh"].as_number(): - # Medium recommendation level. - rec = Recommendation.medium - elif len(results) == 1: - # Only a single candidate. - rec = Recommendation.low - elif ( - results[1].distance - min_dist - >= config["match"]["rec_gap_thresh"].as_number() - ): - # Gap between first two candidates is large. - rec = Recommendation.low - else: - # No conclusion. Return immediately. Can't be downgraded any further. - return Recommendation.none - - # Downgrade to the max rec if it is lower than the current rec for an - # applied penalty. - keys = set(min_dist.keys()) - if isinstance(results[0], hooks.AlbumMatch): - for track_dist in min_dist.tracks.values(): - keys.update(list(track_dist.keys())) - max_rec_view = config["match"]["max_rec"] - for key in keys: - if key in list(max_rec_view.keys()): - max_rec = max_rec_view[key].as_choice( - { - "strong": Recommendation.strong, - "medium": Recommendation.medium, - "low": Recommendation.low, - "none": Recommendation.none, - } - ) - rec = min(rec, max_rec) - - return rec - - -def _sort_candidates(candidates: Iterable[AnyMatch]) -> Sequence[AnyMatch]: - """Sort candidates by distance.""" - return sorted(candidates, key=lambda match: match.distance) - - -def _add_candidate( - items: Sequence[Item], - results: Candidates[AlbumMatch], - info: AlbumInfo, -): - """Given a candidate AlbumInfo object, attempt to add the candidate - to the output dictionary of AlbumMatch objects. This involves - checking the track count, ordering the items, checking for - duplicates, and calculating the distance. - """ - log.debug( - "Candidate: {0.artist} - {0.album} ({0.album_id}) from {0.data_source}", - info, - ) - - # Discard albums with zero tracks. - if not info.tracks: - log.debug("No tracks.") - return - - # Prevent duplicates. - if info.album_id and info.identifier in results: - log.debug("Duplicate.") - return - - # Discard matches without required tags. - required_tags: Sequence[str] = config["match"]["required"].as_str_seq() - for req_tag in required_tags: - if getattr(info, req_tag) is None: - log.debug("Ignored. Missing required tag: {}", req_tag) - return - - # Find mapping between the items and the track info. - item_info_pairs, extra_items, extra_tracks = assign_items( - items, info.tracks - ) - - # Get the change distance. - dist = distance(items, info, item_info_pairs) - - # Skip matches with ignored penalties. - penalties = [key for key, _ in dist] - ignored_tags: Sequence[str] = config["match"]["ignored"].as_str_seq() - for penalty in ignored_tags: - if penalty in penalties: - log.debug("Ignored. Penalty: {}", penalty) - return - - log.debug("Success. Distance: {}", dist) - results[info.identifier] = hooks.AlbumMatch( - dist, info, dict(item_info_pairs), extra_items, extra_tracks - ) - - -def tag_album( - items, - search_artist: str | None = None, - search_name: str | None = None, - search_ids: list[str] = [], -) -> tuple[str, str, Proposal]: - """Return a tuple of the current artist name, the current album - name, and a `Proposal` containing `AlbumMatch` candidates. - - The artist and album are the most common values of these fields - among `items`. - - The `AlbumMatch` objects are generated by searching the metadata - backends. By default, the metadata of the items is used for the - search. This can be customized by setting the parameters. - `search_ids` is a list of metadata backend IDs: if specified, - it will restrict the candidates to those IDs, ignoring - `search_artist` and `search album`. The `mapping` field of the - album has the matched `items` as keys. - - The recommendation is calculated from the match quality of the - candidates. - """ - # Get current metadata. - likelies, consensus = get_most_common_tags(items) - cur_artist: str = likelies["artist"] - cur_album: str = likelies["album"] - log.debug("Tagging {} - {}", cur_artist, cur_album) - - # The output result, keys are (data_source, album_id) pairs, values are - # AlbumMatch objects. - candidates: Candidates[AlbumMatch] = {} - - # Search by explicit ID. - if search_ids: - log.debug("Searching for album IDs: {}", ", ".join(search_ids)) - for _info in metadata_plugins.albums_for_ids(search_ids): - _add_candidate(items, candidates, _info) - - # Use existing metadata or text search. - else: - # Try search based on current ID. - for info in match_by_id( - likelies["mb_albumid"], consensus["mb_albumid"] - ): - _add_candidate(items, candidates, info) - - rec = _recommendation(list(candidates.values())) - log.debug("Album ID match recommendation is {}", rec) - if candidates and not config["import"]["timid"]: - # If we have a very good MBID match, return immediately. - # Otherwise, this match will compete against metadata-based - # matches. - if rec == Recommendation.strong: - log.debug("ID match.") - return ( - cur_artist, - cur_album, - Proposal(list(candidates.values()), rec), - ) - - # Search terms. - if not (search_artist and search_name): - # No explicit search terms -- use current metadata. - search_artist, search_name = cur_artist, cur_album - log.debug("Search terms: {} - {}", search_artist, search_name) - - # Is this album likely to be a "various artist" release? - va_likely = ( - (not consensus["artist"]) - or (search_artist.lower() in VA_ARTISTS) - or any(item.comp for item in items) - ) - log.debug("Album might be VA: {}", va_likely) - - # Get the results from the data sources. - for matched_candidate in metadata_plugins.candidates( - items, search_artist, search_name, va_likely - ): - _add_candidate(items, candidates, matched_candidate) - - log.debug("Evaluating {} candidates.", len(candidates)) - # Sort and get the recommendation. - candidates_sorted = _sort_candidates(candidates.values()) - rec = _recommendation(candidates_sorted) - return cur_artist, cur_album, Proposal(candidates_sorted, rec) - - -def tag_item( - item, - search_artist: str | None = None, - search_name: str | None = None, - search_ids: list[str] | None = None, -) -> Proposal: - """Find metadata for a single track. Return a `Proposal` consisting - of `TrackMatch` objects. - - `search_artist` and `search_title` may be used to override the item - metadata in the search query. `search_ids` may be used for restricting the - search to a list of metadata backend IDs. - """ - # Holds candidates found so far: keys are (data_source, track_id) pairs, - # values TrackMatch objects - candidates: Candidates[TrackMatch] = {} - rec: Recommendation | None = None - - # First, try matching by the external source ID. - trackids = search_ids or [t for t in [item.mb_trackid] if t] - if trackids: - log.debug("Searching for track IDs: {}", ", ".join(trackids)) - for info in metadata_plugins.tracks_for_ids(trackids): - dist = track_distance(item, info, incl_artist=True) - candidates[info.identifier] = hooks.TrackMatch(dist, info, item) - - # If this is a good match, then don't keep searching. - rec = _recommendation(_sort_candidates(candidates.values())) - if rec == Recommendation.strong and not config["import"]["timid"]: - log.debug("Track ID match.") - return Proposal(_sort_candidates(candidates.values()), rec) - - # If we're searching by ID, don't proceed. - if search_ids: - if candidates: - assert rec is not None - return Proposal(_sort_candidates(candidates.values()), rec) - else: - return Proposal([], Recommendation.none) - - # Search terms. - search_artist = search_artist or item.artist - search_name = search_name or item.title - log.debug("Item search terms: {} - {}", search_artist, search_name) - - # Get and evaluate candidate metadata. - for track_info in metadata_plugins.item_candidates( - item, search_artist, search_name - ): - dist = track_distance(item, track_info, incl_artist=True) - candidates[track_info.identifier] = hooks.TrackMatch( - dist, track_info, item - ) - - # Sort by distance and return with recommendation. - log.debug("Found {} candidates.", len(candidates)) - candidates_sorted = _sort_candidates(candidates.values()) - rec = _recommendation(candidates_sorted) - return Proposal(candidates_sorted, rec) +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Matches existing metadata with canonical information to identify +releases and tracks. +""" + +from __future__ import annotations + +from enum import IntEnum +from typing import TYPE_CHECKING, NamedTuple, TypeVar + +import lap +import numpy as np + +from beets import config, logging, metadata_plugins +from beets.autotag import AlbumMatch, TrackMatch, hooks +from beets.util import get_most_common_tags + +from .distance import VA_ARTISTS, distance, track_distance +from .hooks import Info + +if TYPE_CHECKING: + from collections.abc import Iterable, Sequence + + from beets.autotag import AlbumInfo, TrackInfo + from beets.library import Item + + +AnyMatch = TypeVar("AnyMatch", TrackMatch, AlbumMatch) +Candidates = dict[Info.Identifier, AnyMatch] + +# Global logger. +log = logging.getLogger("beets") + + +# Recommendation enumeration. + + +class Recommendation(IntEnum): + """Indicates a qualitative suggestion to the user about what should + be done with a given match. + """ + + none = 0 + low = 1 + medium = 2 + strong = 3 + + +# A structure for holding a set of possible matches to choose between. This +# consists of a list of possible candidates (i.e., AlbumInfo or TrackInfo +# objects) and a recommendation value. + + +class Proposal(NamedTuple): + candidates: Sequence[AlbumMatch | TrackMatch] + recommendation: Recommendation + + +# Primary matching functionality. + + +def assign_items( + items: Sequence[Item], + tracks: Sequence[TrackInfo], +) -> tuple[list[tuple[Item, TrackInfo]], list[Item], list[TrackInfo]]: + """Given a list of Items and a list of TrackInfo objects, find the + best mapping between them. Returns a mapping from Items to TrackInfo + objects, a set of extra Items, and a set of extra TrackInfo + objects. These "extra" objects occur when there is an unequal number + of objects of the two types. + """ + log.debug("Computing track assignment...") + # Construct the cost matrix. + costs = [[float(track_distance(i, t)) for t in tracks] for i in items] + # Assign items to tracks + _, _, assigned_item_idxs = lap.lapjv(np.array(costs), extend_cost=True) + log.debug("...done.") + + # Each item in `assigned_item_idxs` list corresponds to a track in the + # `tracks` list. Each value is either an index into the assigned item in + # `items` list, or -1 if that track has no match. + mapping = { + items[iidx]: t + for iidx, t in zip(assigned_item_idxs, tracks) + if iidx != -1 + } + extra_items = list(set(items) - mapping.keys()) + extra_items.sort(key=lambda i: (i.disc, i.track, i.title)) + extra_tracks = list(set(tracks) - set(mapping.values())) + extra_tracks.sort(key=lambda t: (t.index, t.title)) + return list(mapping.items()), extra_items, extra_tracks + + +def match_by_id(album_id: str | None, consensus: bool) -> Iterable[AlbumInfo]: + """Return album candidates for the given album id. + + Make sure that the ID is present and that there is consensus on it among + the items being tagged. + """ + if not album_id: + log.debug("No album ID found.") + elif not consensus: + log.debug("No album ID consensus.") + else: + log.debug("Searching for discovered album ID: {}", album_id) + return metadata_plugins.albums_for_ids([album_id]) + + return () + + +def _recommendation( + results: Sequence[AlbumMatch | TrackMatch], +) -> Recommendation: + """Given a sorted list of AlbumMatch or TrackMatch objects, return a + recommendation based on the results' distances. + + If the recommendation is higher than the configured maximum for + an applied penalty, the recommendation will be downgraded to the + configured maximum for that penalty. + """ + if not results: + # No candidates: no recommendation. + return Recommendation.none + + # Basic distance thresholding. + min_dist = results[0].distance + if min_dist < config["match"]["strong_rec_thresh"].as_number(): + # Strong recommendation level. + rec = Recommendation.strong + elif min_dist <= config["match"]["medium_rec_thresh"].as_number(): + # Medium recommendation level. + rec = Recommendation.medium + elif len(results) == 1: + # Only a single candidate. + rec = Recommendation.low + elif ( + results[1].distance - min_dist + >= config["match"]["rec_gap_thresh"].as_number() + ): + # Gap between first two candidates is large. + rec = Recommendation.low + else: + # No conclusion. Return immediately. Can't be downgraded any further. + return Recommendation.none + + # Downgrade to the max rec if it is lower than the current rec for an + # applied penalty. + keys = set(min_dist.keys()) + if isinstance(results[0], hooks.AlbumMatch): + for track_dist in min_dist.tracks.values(): + keys.update(list(track_dist.keys())) + max_rec_view = config["match"]["max_rec"] + for key in keys: + if key in list(max_rec_view.keys()): + max_rec = max_rec_view[key].as_choice( + { + "strong": Recommendation.strong, + "medium": Recommendation.medium, + "low": Recommendation.low, + "none": Recommendation.none, + } + ) + rec = min(rec, max_rec) + + return rec + + +def _sort_candidates(candidates: Iterable[AnyMatch]) -> Sequence[AnyMatch]: + """Sort candidates by distance.""" + return sorted(candidates, key=lambda match: match.distance) + + +def _add_candidate( + items: Sequence[Item], + results: Candidates[AlbumMatch], + info: AlbumInfo, +): + """Given a candidate AlbumInfo object, attempt to add the candidate + to the output dictionary of AlbumMatch objects. This involves + checking the track count, ordering the items, checking for + duplicates, and calculating the distance. + """ + log.debug( + "Candidate: {0.artist} - {0.album} ({0.album_id}) from {0.data_source}", + info, + ) + + # Discard albums with zero tracks. + if not info.tracks: + log.debug("No tracks.") + return + + # Prevent duplicates. + if info.album_id and info.identifier in results: + log.debug("Duplicate.") + return + + # Discard matches without required tags. + required_tags: Sequence[str] = config["match"]["required"].as_str_seq() + for req_tag in required_tags: + if getattr(info, req_tag) is None: + log.debug("Ignored. Missing required tag: {}", req_tag) + return + + # Find mapping between the items and the track info. + item_info_pairs, extra_items, extra_tracks = assign_items( + items, info.tracks + ) + + # Get the change distance. + dist = distance(items, info, item_info_pairs) + + # Skip matches with ignored penalties. + penalties = [key for key, _ in dist] + ignored_tags: Sequence[str] = config["match"]["ignored"].as_str_seq() + for penalty in ignored_tags: + if penalty in penalties: + log.debug("Ignored. Penalty: {}", penalty) + return + + log.debug("Success. Distance: {}", dist) + results[info.identifier] = hooks.AlbumMatch( + dist, info, dict(item_info_pairs), extra_items, extra_tracks + ) + + +def tag_album( + items, + search_artist: str | None = None, + search_name: str | None = None, + search_ids: list[str] = [], +) -> tuple[str, str, Proposal]: + """Return a tuple of the current artist name, the current album + name, and a `Proposal` containing `AlbumMatch` candidates. + + The artist and album are the most common values of these fields + among `items`. + + The `AlbumMatch` objects are generated by searching the metadata + backends. By default, the metadata of the items is used for the + search. This can be customized by setting the parameters. + `search_ids` is a list of metadata backend IDs: if specified, + it will restrict the candidates to those IDs, ignoring + `search_artist` and `search album`. The `mapping` field of the + album has the matched `items` as keys. + + The recommendation is calculated from the match quality of the + candidates. + """ + # Get current metadata. + likelies, consensus = get_most_common_tags(items) + cur_artist: str = likelies["artist"] + cur_album: str = likelies["album"] + log.debug("Tagging {} - {}", cur_artist, cur_album) + + # The output result, keys are (data_source, album_id) pairs, values are + # AlbumMatch objects. + candidates: Candidates[AlbumMatch] = {} + + # Search by explicit ID. + if search_ids: + log.debug("Searching for album IDs: {}", ", ".join(search_ids)) + for _info in metadata_plugins.albums_for_ids(search_ids): + _add_candidate(items, candidates, _info) + + # Use existing metadata or text search. + else: + # Try search based on current ID. + for info in match_by_id( + likelies["mb_albumid"], consensus["mb_albumid"] + ): + _add_candidate(items, candidates, info) + + rec = _recommendation(list(candidates.values())) + log.debug("Album ID match recommendation is {}", rec) + if candidates and not config["import"]["timid"]: + # If we have a very good MBID match, return immediately. + # Otherwise, this match will compete against metadata-based + # matches. + if rec == Recommendation.strong: + log.debug("ID match.") + return ( + cur_artist, + cur_album, + Proposal(list(candidates.values()), rec), + ) + + # Search terms. + if not (search_artist and search_name): + # No explicit search terms -- use current metadata. + search_artist, search_name = cur_artist, cur_album + log.debug("Search terms: {} - {}", search_artist, search_name) + + # Is this album likely to be a "various artist" release? + va_likely = ( + (not consensus["artist"]) + or (search_artist.lower() in VA_ARTISTS) + or any(item.comp for item in items) + ) + log.debug("Album might be VA: {}", va_likely) + + # Get the results from the data sources. + for matched_candidate in metadata_plugins.candidates( + items, search_artist, search_name, va_likely + ): + _add_candidate(items, candidates, matched_candidate) + + log.debug("Evaluating {} candidates.", len(candidates)) + # Sort and get the recommendation. + candidates_sorted = _sort_candidates(candidates.values()) + rec = _recommendation(candidates_sorted) + return cur_artist, cur_album, Proposal(candidates_sorted, rec) + + +def tag_item( + item, + search_artist: str | None = None, + search_name: str | None = None, + search_ids: list[str] | None = None, +) -> Proposal: + """Find metadata for a single track. Return a `Proposal` consisting + of `TrackMatch` objects. + + `search_artist` and `search_title` may be used to override the item + metadata in the search query. `search_ids` may be used for restricting the + search to a list of metadata backend IDs. + """ + # Holds candidates found so far: keys are (data_source, track_id) pairs, + # values TrackMatch objects + candidates: Candidates[TrackMatch] = {} + rec: Recommendation | None = None + + # First, try matching by the external source ID. + trackids = search_ids or [t for t in [item.mb_trackid] if t] + if trackids: + log.debug("Searching for track IDs: {}", ", ".join(trackids)) + for info in metadata_plugins.tracks_for_ids(trackids): + dist = track_distance(item, info, incl_artist=True) + candidates[info.identifier] = hooks.TrackMatch(dist, info, item) + + # If this is a good match, then don't keep searching. + rec = _recommendation(_sort_candidates(candidates.values())) + if rec == Recommendation.strong and not config["import"]["timid"]: + log.debug("Track ID match.") + return Proposal(_sort_candidates(candidates.values()), rec) + + # If we're searching by ID, don't proceed. + if search_ids: + if candidates: + assert rec is not None + return Proposal(_sort_candidates(candidates.values()), rec) + else: + return Proposal([], Recommendation.none) + + # Search terms. + search_artist = search_artist or item.artist + search_name = search_name or item.title + log.debug("Item search terms: {} - {}", search_artist, search_name) + + # Get and evaluate candidate metadata. + for track_info in metadata_plugins.item_candidates( + item, search_artist, search_name + ): + dist = track_distance(item, track_info, incl_artist=True) + candidates[track_info.identifier] = hooks.TrackMatch( + dist, track_info, item + ) + + # Sort by distance and return with recommendation. + log.debug("Found {} candidates.", len(candidates)) + candidates_sorted = _sort_candidates(candidates.values()) + rec = _recommendation(candidates_sorted) + return Proposal(candidates_sorted, rec) diff --git a/beets/importer/tasks.py b/beets/importer/tasks.py index 7164448bd..9b8c7f9e1 100644 --- a/beets/importer/tasks.py +++ b/beets/importer/tasks.py @@ -1,1309 +1,1309 @@ -# This file is part of beets. -# Copyright 2016, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -from __future__ import annotations - -import logging -import os -import re -import shutil -import subprocess -import time -from collections import defaultdict -from collections.abc import Callable -from enum import Enum -from pathlib import Path -from tempfile import mkdtemp -from typing import TYPE_CHECKING, Any - -import mediafile - -from beets import autotag, config, library, plugins, util -from beets.dbcore.query import PathQuery - -from .state import ImportState - -if TYPE_CHECKING: - from collections.abc import Iterable, Sequence - - from beets.autotag.match import Recommendation - - from .session import ImportSession - -# Global logger. -log = logging.getLogger("beets") - - -SINGLE_ARTIST_THRESH = 0.25 - -# Usually flexible attributes are preserved (i.e., not updated) during -# reimports. The following two lists (globally) change this behaviour for -# certain fields. To alter these lists only when a specific plugin is in use, -# something like this can be used within that plugin's code: -# -# from beets import importer -# def extend_reimport_fresh_fields_item(): -# importer.REIMPORT_FRESH_FIELDS_ITEM.extend(['tidal_track_popularity'] -# ) -REIMPORT_FRESH_FIELDS_ITEM = [ - "data_source", - "bandcamp_album_id", - "spotify_album_id", - "deezer_album_id", - "beatport_album_id", - "tidal_album_id", - "data_url", -] -REIMPORT_FRESH_FIELDS_ALBUM = [*REIMPORT_FRESH_FIELDS_ITEM, "media"] - -# Global logger. -log = logging.getLogger("beets") - - -class ImportAbortError(Exception): - """Raised when the user aborts the tagging operation.""" - - pass - - -class Action(Enum): - """Enumeration of possible actions for an import task.""" - - SKIP = "SKIP" - ASIS = "ASIS" - TRACKS = "TRACKS" - APPLY = "APPLY" - ALBUMS = "ALBUMS" - RETAG = "RETAG" - # The RETAG action represents "don't apply any match, but do record - # new metadata". It's not reachable via the standard command prompt but - # can be used by plugins. - - -class BaseImportTask: - """An abstract base class for importer tasks. - - Tasks flow through the importer pipeline. Each stage can update - them.""" - - toppath: util.PathBytes | None - paths: list[util.PathBytes] - items: list[library.Item] - - def __init__( - self, - toppath: util.PathBytes | None, - paths: Iterable[util.PathBytes] | None, - items: Iterable[library.Item] | None, - ): - """Create a task. The primary fields that define a task are: - - * `toppath`: The user-specified base directory that contains the - music for this task. If the task has *no* user-specified base - (for example, when importing based on an -L query), this can - be None. This is used for tracking progress and history. - * `paths`: A list of *specific* paths where the music for this task - came from. These paths can be directories, when their entire - contents are being imported, or files, when the task comprises - individual tracks. This is used for progress/history tracking and - for displaying the task to the user. - * `items`: A list of `Item` objects representing the music being - imported. - - These fields should not change after initialization. - """ - self.toppath = toppath - self.paths = list(paths) if paths is not None else [] - self.items = list(items) if items is not None else [] - - -class ImportTask(BaseImportTask): - """Represents a single set of items to be imported along with its - intermediate state. May represent an album or a single item. - - The import session and stages call the following methods in the - given order. - - * `lookup_candidates()` Sets the `common_artist`, `common_album`, - `candidates`, and `rec` attributes. `candidates` is a list of - `AlbumMatch` objects. - - * `choose_match()` Uses the session to set the `match` attribute - from the `candidates` list. - - * `find_duplicates()` Returns a list of albums from `lib` with the - same artist and album name as the task. - - * `apply_metadata()` Sets the attributes of the items from the - task's `match` attribute. - - * `add()` Add the imported items and album to the database. - - * `manipulate_files()` Copy, move, and write files depending on the - session configuration. - - * `set_fields()` Sets the fields given at CLI or configuration to - the specified values. - - * `finalize()` Update the import progress and cleanup the file - system. - """ - - choice_flag: Action | None = None - match: autotag.AlbumMatch | autotag.TrackMatch | None = None - - # Keep track of the current task item - cur_album: str | None = None - cur_artist: str | None = None - candidates: Sequence[autotag.AlbumMatch | autotag.TrackMatch] = [] - rec: Recommendation | None = None - - def __init__( - self, - toppath: util.PathBytes | None, - paths: Iterable[util.PathBytes] | None, - items: Iterable[library.Item] | None, - ): - super().__init__(toppath, paths, items) - self.should_remove_duplicates = False - self.should_merge_duplicates = False - self.is_album = True - - def set_choice( - self, choice: Action | autotag.AlbumMatch | autotag.TrackMatch - ): - """Given an AlbumMatch or TrackMatch object or an action constant, - indicates that an action has been selected for this task. - - Album and trackmatch are implemented as tuples, so we can't - use isinstance to check for them. - """ - # Not part of the task structure: - assert choice != Action.APPLY # Only used internally. - - if choice in ( - Action.SKIP, - Action.ASIS, - Action.TRACKS, - Action.ALBUMS, - Action.RETAG, - ): - # TODO: redesign to stricten the type - self.choice_flag = choice # type: ignore[assignment] - self.match = None - else: - self.choice_flag = Action.APPLY # Implicit choice. - self.match = choice # type: ignore[assignment] - - def save_progress(self): - """Updates the progress state to indicate that this album has - finished. - """ - if self.toppath: - ImportState().progress_add(self.toppath, *self.paths) - - def save_history(self): - """Save the directory in the history for incremental imports.""" - ImportState().history_add(self.paths) - - # Logical decisions. - - @property - def apply(self): - return self.choice_flag == Action.APPLY - - @property - def skip(self): - return self.choice_flag == Action.SKIP - - # Convenient data. - - def chosen_info(self): - """Return a dictionary of metadata about the current choice. - May only be called when the choice flag is ASIS or RETAG - (in which case the data comes from the files' current metadata) - or APPLY (in which case the data comes from the choice). - """ - if self.choice_flag in (Action.ASIS, Action.RETAG): - likelies, _ = util.get_most_common_tags(self.items) - return likelies - elif self.choice_flag is Action.APPLY and self.match: - return self.match.info.copy() - assert False - - def imported_items(self): - """Return a list of Items that should be added to the library. - - If the tasks applies an album match the method only returns the - matched items. - """ - if self.choice_flag in (Action.ASIS, Action.RETAG): - return self.items - elif self.choice_flag == Action.APPLY and isinstance( - self.match, autotag.AlbumMatch - ): - return self.match.items - else: - return [] - - def apply_metadata(self) -> None: - """Copy metadata from match info to the items.""" - if self.match: # TODO: redesign to remove the conditional - self.match.apply_metadata() - - def duplicate_items(self, lib: library.Library): - duplicate_items = [] - for album in self.find_duplicates(lib): - duplicate_items += album.items() - return duplicate_items - - def remove_duplicates(self, lib: library.Library): - duplicate_items = self.duplicate_items(lib) - log.debug("removing {} old duplicated items", len(duplicate_items)) - for item in duplicate_items: - item.remove() - if lib.directory in util.ancestry(item.path): - log.debug("deleting duplicate {.filepath}", item) - util.remove(item.path) - util.prune_dirs(os.path.dirname(item.path), lib.directory) - - def set_fields(self, lib: library.Library): - """Sets the fields given at CLI or configuration to the specified - values, for both the album and all its items. - """ - items = self.imported_items() - for field, view in config["import"]["set_fields"].items(): - value = str(view.get()) - log.debug( - "Set field {}={} for {}", - field, - value, - util.displayable_path(self.paths), - ) - self.album.set_parse(field, format(self.album, value)) - for item in items: - item.set_parse(field, format(item, value)) - with lib.transaction(): - for item in items: - item.store() - self.album.store() - - def finalize(self, session: ImportSession): - """Save progress, clean up files, and emit plugin event.""" - # Update progress. - if session.want_resume: - self.save_progress() - if session.config["incremental"] and not ( - # Should we skip recording to incremental list? - self.skip and session.config["incremental_skip_later"] - ): - self.save_history() - - self.cleanup( - copy=session.config["copy"], - delete=session.config["delete"], - move=session.config["move"], - ) - - if not self.skip: - self._emit_imported(session.lib) - - def cleanup(self, copy=False, delete=False, move=False): - """Remove and prune imported paths.""" - # Do not delete any files or prune directories when skipping. - if self.skip: - return - - items = self.imported_items() - - # When copying and deleting originals, delete old files. - if copy and delete: - new_paths = [os.path.realpath(item.path) for item in items] - for old_path in self.old_paths: - # Only delete files that were actually copied. - if old_path not in new_paths: - util.remove(old_path, False) - self.prune(old_path) - - # When moving, prune empty directories containing the original files. - elif move: - for old_path in self.old_paths: - self.prune(old_path) - - def _emit_imported(self, lib: library.Library): - plugins.send("album_imported", lib=lib, album=self.album) - - def handle_created(self, session: ImportSession): - """Send the `import_task_created` event for this task. Return a list of - tasks that should continue through the pipeline. By default, this is a - list containing only the task itself, but plugins can replace the task - with new ones. - """ - tasks = plugins.send("import_task_created", session=session, task=self) - if not tasks: - tasks = [self] - else: - # The plugins gave us a list of lists of tasks. Flatten it. - tasks = [t for inner in tasks for t in inner] - return tasks - - def lookup_candidates(self, search_ids: list[str]) -> None: - """Retrieve and store candidates for this album. - - If User-specified ``search_ids`` list is not empty, the lookup is - restricted to only those IDs. - """ - self.cur_artist, self.cur_album, (self.candidates, self.rec) = ( - autotag.tag_album(self.items, search_ids=search_ids) - ) - - def find_duplicates(self, lib: library.Library) -> list[library.Album]: - """Return a list of albums from `lib` with the same artist and - album name as the task. - """ - info = self.chosen_info() - info["albumartist"] = info["artist"] - - if info["artist"] is None: - # As-is import with no artist. Skip check. - return [] - - # Construct a query to find duplicates with this metadata. We - # use a temporary Album object to generate any computed fields. - tmp_album = library.Album(lib, **info) - keys: list[str] = config["import"]["duplicate_keys"][ - "album" - ].as_str_seq() - dup_query = tmp_album.duplicates_query(keys) - - # Don't count albums with the same files as duplicates. - task_paths = {i.path for i in self.items if i} - - duplicates = [] - for album in lib.albums(dup_query): - # Check whether the album paths are all present in the task - # i.e. album is being completely re-imported by the task, - # in which case it is not a duplicate (will be replaced). - album_paths = {i.path for i in album.items()} - if not (album_paths <= task_paths): - duplicates.append(album) - - return duplicates - - def align_album_level_fields(self): - """Make some album fields equal across `self.items`. For the - RETAG action, we assume that the responsible for returning it - (ie. a plugin) always ensures that the first item contains - valid data on the relevant fields. - """ - changes = {} - - if self.choice_flag == Action.ASIS: - # Taking metadata "as-is". Guess whether this album is VA. - plur_albumartist, freq = util.plurality( - [i.albumartist or i.artist for i in self.items] - ) - if freq == len(self.items) or ( - freq > 1 - and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH - ): - # Single-artist album. - changes["albumartist"] = plur_albumartist - changes["comp"] = False - else: - # VA. - changes["albumartist"] = config["va_name"].as_str() - changes["comp"] = True - - elif self.choice_flag in (Action.APPLY, Action.RETAG): - # Applying autotagged metadata. Just get AA from the first - # item. - first = self.items[0] - if not first.albumartist: - changes["albumartist"] = first.artist - if not first.albumartists: - changes["albumartists"] = first.artists or [first.artist] - if not first.mb_albumartistid: - changes["mb_albumartistid"] = first.mb_artistid - if not first.mb_albumartistids: - changes["mb_albumartistids"] = first.mb_artistids or [ - first.mb_artistid - ] - - # Apply new metadata. - for item in self.items: - item.update(changes) - - def manipulate_files( - self, - session: ImportSession, - operation: util.MoveOperation | None = None, - write=False, - ): - """Copy, move, link, hardlink or reflink (depending on `operation`) - the files as well as write metadata. - - `operation` should be an instance of `util.MoveOperation`. - - If `write` is `True` metadata is written to the files. - # TODO: Introduce a MoveOperation.NONE or SKIP - """ - - items = self.imported_items() - # Save the original paths of all items for deletion and pruning - # in the next step (finalization). - self.old_paths: list[util.PathBytes] = [item.path for item in items] - for item in items: - if operation is not None: - # In copy and link modes, treat re-imports specially: - # move in-library files. (Out-of-library files are - # copied/moved as usual). - old_path = item.path - if ( - operation != util.MoveOperation.MOVE - and self.replaced_items[item] - and session.lib.directory in util.ancestry(old_path) - ): - item.move() - # We moved the item, so remove the - # now-nonexistent file from old_paths. - self.old_paths.remove(old_path) - else: - # A normal import. Just copy files and keep track of - # old paths. - item.move(operation) - - if write and (self.apply or self.choice_flag == Action.RETAG): - item.try_write() - - with session.lib.transaction(): - for item in self.imported_items(): - item.store() - - plugins.send("import_task_files", session=session, task=self) - - def add(self, lib: library.Library): - """Add the items as an album to the library and remove replaced items.""" - self.align_album_level_fields() - with lib.transaction(): - self.record_replaced(lib) - self.remove_replaced(lib) - - self.album = lib.add_album(self.imported_items()) - if self.choice_flag == Action.APPLY and isinstance( - self.match, autotag.AlbumMatch - ): - # Copy album flexible fields to the DB - # TODO: change the flow so we create the `Album` object earlier, - # and we can move this into `self.apply_metadata`, just like - # is done for tracks. - self.match.apply_album_metadata(self.album) - self.album.store() - - self.reimport_metadata(lib) - - def record_replaced(self, lib: library.Library): - """Records the replaced items and albums in the `replaced_items` - and `replaced_albums` dictionaries. - """ - self.replaced_items = defaultdict(list) - self.replaced_albums: dict[util.PathBytes, library.Album] = ( - defaultdict() - ) - replaced_album_ids = set() - for item in self.imported_items(): - dup_items = list(lib.items(query=PathQuery("path", item.path))) - self.replaced_items[item] = dup_items - for dup_item in dup_items: - if ( - not dup_item.album_id - or dup_item.album_id in replaced_album_ids - ): - continue - replaced_album = dup_item._cached_album - if replaced_album: - replaced_album_ids.add(dup_item.album_id) - self.replaced_albums[replaced_album.path] = replaced_album - - def reimport_metadata(self, lib: library.Library): - """For reimports, preserves metadata for reimported items and - albums. - """ - - def _reduce_and_log(new_obj, existing_fields, overwrite_keys): - """Some flexible attributes should be overwritten (rather than - preserved) on reimports; Copies existing_fields, logs and removes - entries that should not be preserved and returns a dict containing - those fields left to actually be preserved. - """ - noun = "album" if isinstance(new_obj, library.Album) else "item" - existing_fields = dict(existing_fields) - overwritten_fields = [ - k - for k in existing_fields - if k in overwrite_keys - and new_obj.get(k) - and existing_fields.get(k) != new_obj.get(k) - ] - if overwritten_fields: - log.debug( - "Reimported {0} {1.id}. Not preserving flexible attributes {2}. " - "Path: {1.filepath}", - noun, - new_obj, - overwritten_fields, - ) - for key in overwritten_fields: - del existing_fields[key] - return existing_fields - - if self.is_album: - replaced_album = self.replaced_albums.get(self.album.path) - if replaced_album: - album_fields = _reduce_and_log( - self.album, - replaced_album._values_flex, - REIMPORT_FRESH_FIELDS_ALBUM, - ) - self.album.added = replaced_album.added - self.album.update(album_fields) - self.album.artpath = replaced_album.artpath - self.album.store() - log.debug( - "Reimported album {0.album.id}. Preserving attribute ['added']. " - "Path: {0.album.filepath}", - self, - ) - log.debug( - "Reimported album {0.album.id}. Preserving flexible" - " attributes {1}. Path: {0.album.filepath}", - self, - list(album_fields.keys()), - ) - - for item in self.imported_items(): - dup_items = self.replaced_items[item] - for dup_item in dup_items: - if dup_item.added and dup_item.added != item.added: - item.added = dup_item.added - log.debug( - "Reimported item {0.id}. Preserving attribute ['added']. " - "Path: {0.filepath}", - item, - ) - item_fields = _reduce_and_log( - item, dup_item._values_flex, REIMPORT_FRESH_FIELDS_ITEM - ) - item.update(item_fields) - log.debug( - "Reimported item {0.id}. Preserving flexible attributes {1}. " - "Path: {0.filepath}", - item, - list(item_fields.keys()), - ) - item.store() - - def remove_replaced(self, lib): - """Removes all the items from the library that have the same - path as an item from this task. - """ - for item in self.imported_items(): - for dup_item in self.replaced_items[item]: - log.debug("Replacing item {.id}: {.filepath}", dup_item, item) - dup_item.remove() - log.debug( - "{} of {} items replaced", - sum(bool(v) for v in self.replaced_items.values()), - len(self.imported_items()), - ) - - def choose_match(self, session): - """Ask the session which match should apply and apply it.""" - choice = session.choose_match(self) - self.set_choice(choice) - session.log_choice(self) - - def reload(self): - """Reload albums and items from the database.""" - for item in self.imported_items(): - item.load() - self.album.load() - - # Utilities. - - def prune(self, filename): - """Prune any empty directories above the given file. If this - task has no `toppath` or the file path provided is not within - the `toppath`, then this function has no effect. Similarly, if - the file still exists, no pruning is performed, so it's safe to - call when the file in question may not have been removed. - """ - if self.toppath and not os.path.exists(util.syspath(filename)): - util.prune_dirs( - os.path.dirname(filename), - self.toppath, - clutter=config["clutter"].as_str_seq(), - ) - - -class SingletonImportTask(ImportTask): - """ImportTask for a single track that is not associated to an album.""" - - def __init__(self, toppath: util.PathBytes | None, item: library.Item): - super().__init__(toppath, [item.path], [item]) - self.item = item - self.is_album = False - self.paths = [item.path] - - def chosen_info(self): - """Return a dictionary of metadata about the current choice. - May only be called when the choice flag is ASIS or RETAG - (in which case the data comes from the files' current metadata) - or APPLY (in which case the data comes from the choice). - """ - assert self.choice_flag in (Action.ASIS, Action.RETAG, Action.APPLY) - if self.choice_flag in (Action.ASIS, Action.RETAG): - return dict(self.item) - elif self.choice_flag is Action.APPLY: - return self.match.info.copy() - - def imported_items(self): - return [self.item] - - def _emit_imported(self, lib): - for item in self.imported_items(): - plugins.send("item_imported", lib=lib, item=item) - - def lookup_candidates(self, search_ids: list[str]) -> None: - self.candidates, self.rec = autotag.tag_item( - self.item, search_ids=search_ids - ) - - def find_duplicates(self, lib: library.Library) -> list[library.Item]: # type: ignore[override] # Need splitting Singleton and Album tasks into separate classes - """Return a list of items from `lib` that have the same artist - and title as the task. - """ - info = self.chosen_info() - - # Query for existing items using the same metadata. We use a - # temporary `Item` object to generate any computed fields. - tmp_item = library.Item(lib, **info) - keys: list[str] = config["import"]["duplicate_keys"][ - "item" - ].as_str_seq() - dup_query = tmp_item.duplicates_query(keys) - - found_items = [] - for other_item in lib.items(dup_query): - # Existing items not considered duplicates. - if other_item.path != self.item.path: - found_items.append(other_item) - return found_items - - duplicate_items = find_duplicates - - def add(self, lib): - with lib.transaction(): - self.record_replaced(lib) - self.remove_replaced(lib) - lib.add(self.item) - self.reimport_metadata(lib) - - def infer_album_fields(self): - raise NotImplementedError - - def choose_match(self, session: ImportSession): - """Ask the session which match should apply and apply it.""" - choice = session.choose_item(self) - self.set_choice(choice) - session.log_choice(self) - - def reload(self): - self.item.load() - - def set_fields(self, lib): - """Sets the fields given at CLI or configuration to the specified - values, for the singleton item. - """ - for field, view in config["import"]["set_fields"].items(): - value = str(view.get()) - log.debug( - "Set field {}={} for {}", - field, - value, - util.displayable_path(self.paths), - ) - self.item.set_parse(field, format(self.item, value)) - self.item.store() - - -# FIXME The inheritance relationships are inverted. This is why there -# are so many methods which pass. More responsibility should be delegated to -# the BaseImportTask class. -class SentinelImportTask(ImportTask): - """A sentinel task marks the progress of an import and does not - import any items itself. - - If only `toppath` is set the task indicates the end of a top-level - directory import. If the `paths` argument is also given, the task - indicates the progress in the `toppath` import. - """ - - def __init__(self, toppath, paths): - super().__init__(toppath, paths, ()) - # TODO Remove the remaining attributes eventually - self.should_remove_duplicates = False - self.is_album = True - self.choice_flag = None - - def save_history(self): - pass - - def save_progress(self): - if not self.paths: - # "Done" sentinel. - ImportState().progress_reset(self.toppath) - elif self.toppath: - # "Directory progress" sentinel for singletons - super().save_progress() - - @property - def skip(self) -> bool: - return True - - def set_choice(self, choice): - raise NotImplementedError - - def cleanup(self, copy=False, delete=False, move=False): - pass - - def _emit_imported(self, lib): - pass - - -ArchiveHandler = tuple[ - Callable[[util.StrPath], bool], Callable[[util.StrPath], Any] -] - - -class ArchiveImportTask(SentinelImportTask): - """An import task that represents the processing of an archive. - - `toppath` must be a `zip`, `tar`, or `rar` archive. Archive tasks - serve two purposes: - - First, it will unarchive the files to a temporary directory and - return it. The client should read tasks from the resulting - directory and send them through the pipeline. - - Second, it will clean up the temporary directory when it proceeds - through the pipeline. The client should send the archive task - after sending the rest of the music tasks to make this work. - """ - - def __init__(self, toppath): - super().__init__(toppath, ()) - self.extracted = False - - @classmethod - def is_archive(cls, path): - """Returns true if the given path points to an archive that can - be handled. - """ - if not os.path.isfile(path): - return False - - for path_test, _ in cls.handlers: - if path_test(os.fsdecode(path)): - return True - return False - - @util.cached_classproperty - def handlers(cls) -> list[ArchiveHandler]: - """Returns a list of archive handlers. - - Each handler is a `(path_test, ArchiveClass)` tuple. `path_test` - is a function that returns `True` if the given path can be - handled by `ArchiveClass`. `ArchiveClass` is a class that - implements the same interface as `tarfile.TarFile`. - """ - _handlers: list[ArchiveHandler] = [] - from zipfile import ZipFile, is_zipfile - - _handlers.append((is_zipfile, ZipFile)) - import tarfile - - _handlers.append((tarfile.is_tarfile, tarfile.open)) - try: - from rarfile import RarFile, is_rarfile - except ImportError: - pass - else: - _handlers.append((is_rarfile, RarFile)) - try: - from py7zr import SevenZipFile, is_7zfile - except ImportError: - pass - else: - _handlers.append((is_7zfile, SevenZipFile)) - - return _handlers - - def cleanup(self, copy=False, delete=False, move=False): - """Removes the temporary directory the archive was extracted to.""" - if self.extracted and self.toppath: - log.debug( - "Removing extracted directory: {}", - util.displayable_path(self.toppath), - ) - shutil.rmtree(util.syspath(self.toppath)) - - def extract(self): - """Extracts the archive to a temporary directory and sets - `toppath` to that directory. - """ - assert self.toppath is not None, "toppath must be set" - - for path_test, handler_class in self.handlers: - if path_test(os.fsdecode(self.toppath)): - break - else: - raise ValueError(f"No handler found for archive: {self.toppath}") - extract_to = mkdtemp() - archive = handler_class(os.fsdecode(self.toppath), mode="r") - try: - archive.extractall(extract_to) - - # Adjust the files' mtimes to match the information from the - # archive. Inspired by: https://stackoverflow.com/q/9813243 - for f in archive.infolist(): - # The date_time will need to adjusted otherwise - # the item will have the current date_time of extraction. - # The (0, 0, -1) is added to date_time because the - # function time.mktime expects a 9-element tuple. - # The -1 indicates that the DST flag is unknown. - date_time = time.mktime((*f.date_time, 0, 0, -1)) - fullpath = os.path.join(extract_to, f.filename) - os.utime(fullpath, (date_time, date_time)) - - finally: - archive.close() - self.extracted = True - self.toppath = extract_to - - -class ImportTaskFactory: - """Generate album and singleton import tasks for all media files - indicated by a path. - """ - - def __init__(self, toppath: util.PathBytes, session: ImportSession): - """Create a new task factory. - - `toppath` is the user-specified path to search for music to - import. `session` is the `ImportSession`, which controls how - tasks are read from the directory. - """ - self.toppath = toppath - self.session = session - self.skipped = 0 # Skipped due to incremental/resume. - self.imported = 0 # "Real" tasks created. - self.is_archive = ArchiveImportTask.is_archive(util.syspath(toppath)) - - def tasks(self) -> Iterable[ImportTask]: - """Yield all import tasks for music found in the user-specified - path `self.toppath`. Any necessary sentinel tasks are also - produced. - - During generation, update `self.skipped` and `self.imported` - with the number of tasks that were not produced (due to - incremental mode or resumed imports) and the number of concrete - tasks actually produced, respectively. - - If `self.toppath` is an archive, it is adjusted to point to the - extracted data. - """ - # Check whether this is an archive. - archive_task: ArchiveImportTask | None = None - if self.is_archive: - archive_task = self.unarchive() - if not archive_task: - return - - # Search for music in the directory. - for dirs, paths in self.paths(): - if self.session.config["singletons"]: - for path in paths: - tasks = self._create(self.singleton(path)) - yield from tasks - yield self.sentinel(dirs) - - else: - tasks = self._create(self.album(paths, dirs)) - yield from tasks - - # Produce the final sentinel for this toppath to indicate that - # it is finished. This is usually just a SentinelImportTask, but - # for archive imports, send the archive task instead (to remove - # the extracted directory). - yield archive_task or self.sentinel() - - def _create(self, task: ImportTask | None): - """Handle a new task to be emitted by the factory. - - Emit the `import_task_created` event and increment the - `imported` count if the task is not skipped. Return the same - task. If `task` is None, do nothing. - """ - if task: - tasks = task.handle_created(self.session) - self.imported += len(tasks) - return tasks - return [] - - def paths(self): - """Walk `self.toppath` and yield `(dirs, files)` pairs where - `files` are individual music files and `dirs` the set of - containing directories where the music was found. - - This can either be a recursive search in the ordinary case, a - single track when `toppath` is a file, a single directory in - `flat` mode. - """ - if not os.path.isdir(util.syspath(self.toppath)): - yield [self.toppath], [self.toppath] - elif self.session.config["flat"]: - paths = [] - for dirs, paths_in_dir in albums_in_dir(self.toppath): - paths += paths_in_dir - yield [self.toppath], paths - else: - for dirs, paths in albums_in_dir(self.toppath): - yield dirs, paths - - def singleton(self, path: util.PathBytes): - """Return a `SingletonImportTask` for the music file.""" - if self.session.already_imported(self.toppath, [path]): - log.debug( - "Skipping previously-imported path: {}", - util.displayable_path(path), - ) - self.skipped += 1 - return None - - item = self.read_item(path) - if item: - return SingletonImportTask(self.toppath, item) - else: - return None - - def album(self, paths: Iterable[util.PathBytes], dirs=None): - """Return a `ImportTask` with all media files from paths. - - `dirs` is a list of parent directories used to record already - imported albums. - """ - - if dirs is None: - dirs = list({os.path.dirname(p) for p in paths}) - - if self.session.already_imported(self.toppath, dirs): - log.debug( - "Skipping previously-imported path: {}", - util.displayable_path(dirs), - ) - self.skipped += 1 - return None - - items: list[library.Item] = [ - item for item in map(self.read_item, paths) if item - ] - - if len(items) > 0: - return ImportTask(self.toppath, dirs, items) - else: - return None - - def sentinel(self, paths: Iterable[util.PathBytes] | None = None): - """Return a `SentinelImportTask` indicating the end of a - top-level directory import. - """ - return SentinelImportTask(self.toppath, paths) - - def unarchive(self): - """Extract the archive for this `toppath`. - - Extract the archive to a new directory, adjust `toppath` to - point to the extracted directory, and return an - `ArchiveImportTask`. If extraction fails, return None. - """ - assert self.is_archive - - if not (self.session.config["move"] or self.session.config["copy"]): - log.warning( - "Archive importing requires either " - "'copy' or 'move' to be enabled." - ) - return - - log.debug("Extracting archive: {}", util.displayable_path(self.toppath)) - archive_task = ArchiveImportTask(self.toppath) - try: - archive_task.extract() - except Exception as exc: - log.error("extraction failed: {}", exc) - return - - # Now read albums from the extracted directory. - self.toppath = archive_task.toppath - log.debug("Archive extracted to: {.toppath}", self) - return archive_task - - def read_item(self, path: util.PathBytes): - """Return an `Item` read from the path. - - If an item cannot be read, return `None` instead and log an - error. - """ - - # Check if the file has an extension, - # Add an extension if there isn't one. - if os.path.isfile(path): - path = self.check_extension(path) - - try: - return library.Item.from_path(path) - except library.ReadError as exc: - if isinstance(exc.reason, mediafile.FileTypeError): - # Silently ignore non-music files. - pass - elif isinstance(exc.reason, mediafile.UnreadableFileError): - log.warning("unreadable file: {}", util.displayable_path(path)) - else: - log.error( - "error reading {}: {}", util.displayable_path(path), exc - ) - - def check_extension(self, path: util.PathBytes): - path = Path(os.fsdecode(path)) - # if there is an extension, ignore - if path.suffix != "": - return path - - # no extension detected - # use ffprobe to find the format - formats = [] - output = subprocess.run( - [ - "ffprobe", - "-hide_banner", - "-loglevel", - "fatal", - "-show_format", - "--", - str(path), - ], - capture_output=True, - ) - out = output.stdout.decode("utf-8") - err = output.stderr.decode("utf-8") - if err != "": - log.error("ffprobe error: %s", err) - for line in out.split("\n"): - if line.startswith("format_name="): - formats = line.split("=")[1].split(",") - # a list of audio formats I got from wikipedia https://en.wikipedia.org/wiki/Audio_file_format - wiki_formats = [ - "3gp", - "aa", - "aac", - "aax", - "act", - "aiff", - "alac", - "amr", - "ape", - "au", - "awb", - "dss", - "dvf", - "flac", - "gsm", - "iklax", - "ivs", - "m4a", - "m4b", - "m4p", - "mmf", - "movpkg", - "mp1", - "mp2", - "mp3", - "mpc", - "msv", - "nmf", - "ogg", - "oga", - "mogg", - "opus", - "ra", - "rm", - "raw", - "rf64", - "sln", - "tta", - "voc", - "vox", - "wav", - "wma", - "wv", - "webm", - "8svx", - "cda", - ] - detected_format = "" - # The first format from ffprobe that is on this list is taken - for f in formats: - if f in wiki_formats: - detected_format = f - break - - # if ffprobe can't find a format, the file is prob not music - if detected_format == "": - return path - - # cp and add ext. If already exist, use that file - # assume, for example, the only diff between 'asdf.mp3' and 'asdf' is format - new_path = path.with_suffix("." + detected_format) - if not new_path.exists(): - util.move(path, new_path) - else: - log.info("Import file with matching format to original target") - return new_path - - -MULTIDISC_MARKERS = (rb"dis[ck]", rb"cd") -MULTIDISC_PAT_FMT = rb"^(.*%s[\W_]*)\d" - - -def is_subdir_of_any_in_list(path, dirs): - """Returns True if path os a subdirectory of any directory in dirs - (a list). In other case, returns False. - """ - ancestors = util.ancestry(path) - return any(d in ancestors for d in dirs) - - -def albums_in_dir(path: util.PathBytes): - """Recursively searches the given directory and returns an iterable - of (paths, items) where paths is a list of directories and items is - a list of Items that is probably an album. Specifically, any folder - containing any media files is an album. - """ - collapse_paths: list[util.PathBytes] = [] - collapse_items: list[util.PathBytes] = [] - collapse_pat = None - - ignore: list[str] = config["ignore"].as_str_seq() - ignore_hidden: bool = config["ignore_hidden"].get(bool) - - for root, dirs, files in util.sorted_walk( - path, ignore=ignore, ignore_hidden=ignore_hidden, logger=log - ): - items = [os.path.join(root, f) for f in files] - # If we're currently collapsing the constituent directories in a - # multi-disc album, check whether we should continue collapsing - # and add the current directory. If so, just add the directory - # and move on to the next directory. If not, stop collapsing. - if collapse_paths: - if (is_subdir_of_any_in_list(root, collapse_paths)) or ( - collapse_pat and collapse_pat.match(os.path.basename(root)) - ): - # Still collapsing. - collapse_paths.append(root) - collapse_items += items - continue - else: - # Collapse finished. Yield the collapsed directory and - # proceed to process the current one. - if collapse_items: - yield collapse_paths, collapse_items - collapse_pat, collapse_paths, collapse_items = None, [], [] - - # Check whether this directory looks like the *first* directory - # in a multi-disc sequence. There are two indicators: the file - # is named like part of a multi-disc sequence (e.g., "Title Disc - # 1") or it contains no items but only directories that are - # named in this way. - start_collapsing = False - for marker in MULTIDISC_MARKERS: - # We're using replace on %s due to lack of .format() on bytestrings - p = MULTIDISC_PAT_FMT.replace(b"%s", marker) - marker_pat = re.compile(p, re.I) - match = marker_pat.match(os.path.basename(root)) - - # Is this directory the root of a nested multi-disc album? - if dirs and not items: - # Check whether all subdirectories have the same prefix. - start_collapsing = True - subdir_pat = None - for subdir in dirs: - subdir = util.bytestring_path(subdir) - # The first directory dictates the pattern for - # the remaining directories. - if not subdir_pat: - match = marker_pat.match(subdir) - if match: - match_group = re.escape(match.group(1)) - subdir_pat = re.compile( - b"".join([b"^", match_group, rb"\d"]), re.I - ) - else: - start_collapsing = False - break - - # Subsequent directories must match the pattern. - elif not subdir_pat.match(subdir): - start_collapsing = False - break - - # If all subdirectories match, don't check other - # markers. - if start_collapsing: - break - - # Is this directory the first in a flattened multi-disc album? - elif match: - start_collapsing = True - # Set the current pattern to match directories with the same - # prefix as this one, followed by a digit. - collapse_pat = re.compile( - b"".join([b"^", re.escape(match.group(1)), rb"\d"]), re.I - ) - break - - # If either of the above heuristics indicated that this is the - # beginning of a multi-disc album, initialize the collapsed - # directory and item lists and check the next directory. - if start_collapsing: - # Start collapsing; continue to the next iteration. - collapse_paths = [root] - collapse_items = items - continue - - # If it's nonempty, yield it. - if items: - yield [root], items - - # Clear out any unfinished collapse. - if collapse_paths and collapse_items: - yield collapse_paths, collapse_items +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +from __future__ import annotations + +import logging +import os +import re +import shutil +import subprocess +import time +from collections import defaultdict +from collections.abc import Callable +from enum import Enum +from pathlib import Path +from tempfile import mkdtemp +from typing import TYPE_CHECKING, Any + +import mediafile + +from beets import autotag, config, library, plugins, util +from beets.dbcore.query import PathQuery + +from .state import ImportState + +if TYPE_CHECKING: + from collections.abc import Iterable, Sequence + + from beets.autotag.match import Recommendation + + from .session import ImportSession + +# Global logger. +log = logging.getLogger("beets") + + +SINGLE_ARTIST_THRESH = 0.25 + +# Usually flexible attributes are preserved (i.e., not updated) during +# reimports. The following two lists (globally) change this behaviour for +# certain fields. To alter these lists only when a specific plugin is in use, +# something like this can be used within that plugin's code: +# +# from beets import importer +# def extend_reimport_fresh_fields_item(): +# importer.REIMPORT_FRESH_FIELDS_ITEM.extend(['tidal_track_popularity'] +# ) +REIMPORT_FRESH_FIELDS_ITEM = [ + "data_source", + "bandcamp_album_id", + "spotify_album_id", + "deezer_album_id", + "beatport_album_id", + "tidal_album_id", + "data_url", +] +REIMPORT_FRESH_FIELDS_ALBUM = [*REIMPORT_FRESH_FIELDS_ITEM, "media"] + +# Global logger. +log = logging.getLogger("beets") + + +class ImportAbortError(Exception): + """Raised when the user aborts the tagging operation.""" + + pass + + +class Action(Enum): + """Enumeration of possible actions for an import task.""" + + SKIP = "SKIP" + ASIS = "ASIS" + TRACKS = "TRACKS" + APPLY = "APPLY" + ALBUMS = "ALBUMS" + RETAG = "RETAG" + # The RETAG action represents "don't apply any match, but do record + # new metadata". It's not reachable via the standard command prompt but + # can be used by plugins. + + +class BaseImportTask: + """An abstract base class for importer tasks. + + Tasks flow through the importer pipeline. Each stage can update + them.""" + + toppath: util.PathBytes | None + paths: list[util.PathBytes] + items: list[library.Item] + + def __init__( + self, + toppath: util.PathBytes | None, + paths: Iterable[util.PathBytes] | None, + items: Iterable[library.Item] | None, + ): + """Create a task. The primary fields that define a task are: + + * `toppath`: The user-specified base directory that contains the + music for this task. If the task has *no* user-specified base + (for example, when importing based on an -L query), this can + be None. This is used for tracking progress and history. + * `paths`: A list of *specific* paths where the music for this task + came from. These paths can be directories, when their entire + contents are being imported, or files, when the task comprises + individual tracks. This is used for progress/history tracking and + for displaying the task to the user. + * `items`: A list of `Item` objects representing the music being + imported. + + These fields should not change after initialization. + """ + self.toppath = toppath + self.paths = list(paths) if paths is not None else [] + self.items = list(items) if items is not None else [] + + +class ImportTask(BaseImportTask): + """Represents a single set of items to be imported along with its + intermediate state. May represent an album or a single item. + + The import session and stages call the following methods in the + given order. + + * `lookup_candidates()` Sets the `common_artist`, `common_album`, + `candidates`, and `rec` attributes. `candidates` is a list of + `AlbumMatch` objects. + + * `choose_match()` Uses the session to set the `match` attribute + from the `candidates` list. + + * `find_duplicates()` Returns a list of albums from `lib` with the + same artist and album name as the task. + + * `apply_metadata()` Sets the attributes of the items from the + task's `match` attribute. + + * `add()` Add the imported items and album to the database. + + * `manipulate_files()` Copy, move, and write files depending on the + session configuration. + + * `set_fields()` Sets the fields given at CLI or configuration to + the specified values. + + * `finalize()` Update the import progress and cleanup the file + system. + """ + + choice_flag: Action | None = None + match: autotag.AlbumMatch | autotag.TrackMatch | None = None + + # Keep track of the current task item + cur_album: str | None = None + cur_artist: str | None = None + candidates: Sequence[autotag.AlbumMatch | autotag.TrackMatch] = [] + rec: Recommendation | None = None + + def __init__( + self, + toppath: util.PathBytes | None, + paths: Iterable[util.PathBytes] | None, + items: Iterable[library.Item] | None, + ): + super().__init__(toppath, paths, items) + self.should_remove_duplicates = False + self.should_merge_duplicates = False + self.is_album = True + + def set_choice( + self, choice: Action | autotag.AlbumMatch | autotag.TrackMatch + ): + """Given an AlbumMatch or TrackMatch object or an action constant, + indicates that an action has been selected for this task. + + Album and trackmatch are implemented as tuples, so we can't + use isinstance to check for them. + """ + # Not part of the task structure: + assert choice != Action.APPLY # Only used internally. + + if choice in ( + Action.SKIP, + Action.ASIS, + Action.TRACKS, + Action.ALBUMS, + Action.RETAG, + ): + # TODO: redesign to stricten the type + self.choice_flag = choice # type: ignore[assignment] + self.match = None + else: + self.choice_flag = Action.APPLY # Implicit choice. + self.match = choice # type: ignore[assignment] + + def save_progress(self): + """Updates the progress state to indicate that this album has + finished. + """ + if self.toppath: + ImportState().progress_add(self.toppath, *self.paths) + + def save_history(self): + """Save the directory in the history for incremental imports.""" + ImportState().history_add(self.paths) + + # Logical decisions. + + @property + def apply(self): + return self.choice_flag == Action.APPLY + + @property + def skip(self): + return self.choice_flag == Action.SKIP + + # Convenient data. + + def chosen_info(self): + """Return a dictionary of metadata about the current choice. + May only be called when the choice flag is ASIS or RETAG + (in which case the data comes from the files' current metadata) + or APPLY (in which case the data comes from the choice). + """ + if self.choice_flag in (Action.ASIS, Action.RETAG): + likelies, _ = util.get_most_common_tags(self.items) + return likelies + elif self.choice_flag is Action.APPLY and self.match: + return self.match.info.copy() + assert False + + def imported_items(self): + """Return a list of Items that should be added to the library. + + If the tasks applies an album match the method only returns the + matched items. + """ + if self.choice_flag in (Action.ASIS, Action.RETAG): + return self.items + elif self.choice_flag == Action.APPLY and isinstance( + self.match, autotag.AlbumMatch + ): + return self.match.items + else: + return [] + + def apply_metadata(self) -> None: + """Copy metadata from match info to the items.""" + if self.match: # TODO: redesign to remove the conditional + self.match.apply_metadata() + + def duplicate_items(self, lib: library.Library): + duplicate_items = [] + for album in self.find_duplicates(lib): + duplicate_items += album.items() + return duplicate_items + + def remove_duplicates(self, lib: library.Library): + duplicate_items = self.duplicate_items(lib) + log.debug("removing {} old duplicated items", len(duplicate_items)) + for item in duplicate_items: + item.remove() + if lib.directory in util.ancestry(item.path): + log.debug("deleting duplicate {.filepath}", item) + util.remove(item.path) + util.prune_dirs(os.path.dirname(item.path), lib.directory) + + def set_fields(self, lib: library.Library): + """Sets the fields given at CLI or configuration to the specified + values, for both the album and all its items. + """ + items = self.imported_items() + for field, view in config["import"]["set_fields"].items(): + value = str(view.get()) + log.debug( + "Set field {}={} for {}", + field, + value, + util.displayable_path(self.paths), + ) + self.album.set_parse(field, format(self.album, value)) + for item in items: + item.set_parse(field, format(item, value)) + with lib.transaction(): + for item in items: + item.store() + self.album.store() + + def finalize(self, session: ImportSession): + """Save progress, clean up files, and emit plugin event.""" + # Update progress. + if session.want_resume: + self.save_progress() + if session.config["incremental"] and not ( + # Should we skip recording to incremental list? + self.skip and session.config["incremental_skip_later"] + ): + self.save_history() + + self.cleanup( + copy=session.config["copy"], + delete=session.config["delete"], + move=session.config["move"], + ) + + if not self.skip: + self._emit_imported(session.lib) + + def cleanup(self, copy=False, delete=False, move=False): + """Remove and prune imported paths.""" + # Do not delete any files or prune directories when skipping. + if self.skip: + return + + items = self.imported_items() + + # When copying and deleting originals, delete old files. + if copy and delete: + new_paths = [os.path.realpath(item.path) for item in items] + for old_path in self.old_paths: + # Only delete files that were actually copied. + if old_path not in new_paths: + util.remove(old_path, False) + self.prune(old_path) + + # When moving, prune empty directories containing the original files. + elif move: + for old_path in self.old_paths: + self.prune(old_path) + + def _emit_imported(self, lib: library.Library): + plugins.send("album_imported", lib=lib, album=self.album) + + def handle_created(self, session: ImportSession): + """Send the `import_task_created` event for this task. Return a list of + tasks that should continue through the pipeline. By default, this is a + list containing only the task itself, but plugins can replace the task + with new ones. + """ + tasks = plugins.send("import_task_created", session=session, task=self) + if not tasks: + tasks = [self] + else: + # The plugins gave us a list of lists of tasks. Flatten it. + tasks = [t for inner in tasks for t in inner] + return tasks + + def lookup_candidates(self, search_ids: list[str]) -> None: + """Retrieve and store candidates for this album. + + If User-specified ``search_ids`` list is not empty, the lookup is + restricted to only those IDs. + """ + self.cur_artist, self.cur_album, (self.candidates, self.rec) = ( + autotag.tag_album(self.items, search_ids=search_ids) + ) + + def find_duplicates(self, lib: library.Library) -> list[library.Album]: + """Return a list of albums from `lib` with the same artist and + album name as the task. + """ + info = self.chosen_info() + info["albumartist"] = info["artist"] + + if info["artist"] is None: + # As-is import with no artist. Skip check. + return [] + + # Construct a query to find duplicates with this metadata. We + # use a temporary Album object to generate any computed fields. + tmp_album = library.Album(lib, **info) + keys: list[str] = config["import"]["duplicate_keys"][ + "album" + ].as_str_seq() + dup_query = tmp_album.duplicates_query(keys) + + # Don't count albums with the same files as duplicates. + task_paths = {i.path for i in self.items if i} + + duplicates = [] + for album in lib.albums(dup_query): + # Check whether the album paths are all present in the task + # i.e. album is being completely re-imported by the task, + # in which case it is not a duplicate (will be replaced). + album_paths = {i.path for i in album.items()} + if not (album_paths <= task_paths): + duplicates.append(album) + + return duplicates + + def align_album_level_fields(self): + """Make some album fields equal across `self.items`. For the + RETAG action, we assume that the responsible for returning it + (ie. a plugin) always ensures that the first item contains + valid data on the relevant fields. + """ + changes = {} + + if self.choice_flag == Action.ASIS: + # Taking metadata "as-is". Guess whether this album is VA. + plur_albumartist, freq = util.plurality( + [i.albumartist or i.artist for i in self.items] + ) + if freq == len(self.items) or ( + freq > 1 + and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH + ): + # Single-artist album. + changes["albumartist"] = plur_albumartist + changes["comp"] = False + else: + # VA. + changes["albumartist"] = config["va_name"].as_str() + changes["comp"] = True + + elif self.choice_flag in (Action.APPLY, Action.RETAG): + # Applying autotagged metadata. Just get AA from the first + # item. + first = self.items[0] + if not first.albumartist: + changes["albumartist"] = first.artist + if not first.albumartists: + changes["albumartists"] = first.artists or [first.artist] + if not first.mb_albumartistid: + changes["mb_albumartistid"] = first.mb_artistid + if not first.mb_albumartistids: + changes["mb_albumartistids"] = first.mb_artistids or [ + first.mb_artistid + ] + + # Apply new metadata. + for item in self.items: + item.update(changes) + + def manipulate_files( + self, + session: ImportSession, + operation: util.MoveOperation | None = None, + write=False, + ): + """Copy, move, link, hardlink or reflink (depending on `operation`) + the files as well as write metadata. + + `operation` should be an instance of `util.MoveOperation`. + + If `write` is `True` metadata is written to the files. + # TODO: Introduce a MoveOperation.NONE or SKIP + """ + + items = self.imported_items() + # Save the original paths of all items for deletion and pruning + # in the next step (finalization). + self.old_paths: list[util.PathBytes] = [item.path for item in items] + for item in items: + if operation is not None: + # In copy and link modes, treat re-imports specially: + # move in-library files. (Out-of-library files are + # copied/moved as usual). + old_path = item.path + if ( + operation != util.MoveOperation.MOVE + and self.replaced_items[item] + and session.lib.directory in util.ancestry(old_path) + ): + item.move() + # We moved the item, so remove the + # now-nonexistent file from old_paths. + self.old_paths.remove(old_path) + else: + # A normal import. Just copy files and keep track of + # old paths. + item.move(operation) + + if write and (self.apply or self.choice_flag == Action.RETAG): + item.try_write() + + with session.lib.transaction(): + for item in self.imported_items(): + item.store() + + plugins.send("import_task_files", session=session, task=self) + + def add(self, lib: library.Library): + """Add the items as an album to the library and remove replaced items.""" + self.align_album_level_fields() + with lib.transaction(): + self.record_replaced(lib) + self.remove_replaced(lib) + + self.album = lib.add_album(self.imported_items()) + if self.choice_flag == Action.APPLY and isinstance( + self.match, autotag.AlbumMatch + ): + # Copy album flexible fields to the DB + # TODO: change the flow so we create the `Album` object earlier, + # and we can move this into `self.apply_metadata`, just like + # is done for tracks. + self.match.apply_album_metadata(self.album) + self.album.store() + + self.reimport_metadata(lib) + + def record_replaced(self, lib: library.Library): + """Records the replaced items and albums in the `replaced_items` + and `replaced_albums` dictionaries. + """ + self.replaced_items = defaultdict(list) + self.replaced_albums: dict[util.PathBytes, library.Album] = ( + defaultdict() + ) + replaced_album_ids = set() + for item in self.imported_items(): + dup_items = list(lib.items(query=PathQuery("path", item.path))) + self.replaced_items[item] = dup_items + for dup_item in dup_items: + if ( + not dup_item.album_id + or dup_item.album_id in replaced_album_ids + ): + continue + replaced_album = dup_item._cached_album + if replaced_album: + replaced_album_ids.add(dup_item.album_id) + self.replaced_albums[replaced_album.path] = replaced_album + + def reimport_metadata(self, lib: library.Library): + """For reimports, preserves metadata for reimported items and + albums. + """ + + def _reduce_and_log(new_obj, existing_fields, overwrite_keys): + """Some flexible attributes should be overwritten (rather than + preserved) on reimports; Copies existing_fields, logs and removes + entries that should not be preserved and returns a dict containing + those fields left to actually be preserved. + """ + noun = "album" if isinstance(new_obj, library.Album) else "item" + existing_fields = dict(existing_fields) + overwritten_fields = [ + k + for k in existing_fields + if k in overwrite_keys + and new_obj.get(k) + and existing_fields.get(k) != new_obj.get(k) + ] + if overwritten_fields: + log.debug( + "Reimported {0} {1.id}. Not preserving flexible attributes {2}. " + "Path: {1.filepath}", + noun, + new_obj, + overwritten_fields, + ) + for key in overwritten_fields: + del existing_fields[key] + return existing_fields + + if self.is_album: + replaced_album = self.replaced_albums.get(self.album.path) + if replaced_album: + album_fields = _reduce_and_log( + self.album, + replaced_album._values_flex, + REIMPORT_FRESH_FIELDS_ALBUM, + ) + self.album.added = replaced_album.added + self.album.update(album_fields) + self.album.artpath = replaced_album.artpath + self.album.store() + log.debug( + "Reimported album {0.album.id}. Preserving attribute ['added']. " + "Path: {0.album.filepath}", + self, + ) + log.debug( + "Reimported album {0.album.id}. Preserving flexible" + " attributes {1}. Path: {0.album.filepath}", + self, + list(album_fields.keys()), + ) + + for item in self.imported_items(): + dup_items = self.replaced_items[item] + for dup_item in dup_items: + if dup_item.added and dup_item.added != item.added: + item.added = dup_item.added + log.debug( + "Reimported item {0.id}. Preserving attribute ['added']. " + "Path: {0.filepath}", + item, + ) + item_fields = _reduce_and_log( + item, dup_item._values_flex, REIMPORT_FRESH_FIELDS_ITEM + ) + item.update(item_fields) + log.debug( + "Reimported item {0.id}. Preserving flexible attributes {1}. " + "Path: {0.filepath}", + item, + list(item_fields.keys()), + ) + item.store() + + def remove_replaced(self, lib): + """Removes all the items from the library that have the same + path as an item from this task. + """ + for item in self.imported_items(): + for dup_item in self.replaced_items[item]: + log.debug("Replacing item {.id}: {.filepath}", dup_item, item) + dup_item.remove() + log.debug( + "{} of {} items replaced", + sum(bool(v) for v in self.replaced_items.values()), + len(self.imported_items()), + ) + + def choose_match(self, session): + """Ask the session which match should apply and apply it.""" + choice = session.choose_match(self) + self.set_choice(choice) + session.log_choice(self) + + def reload(self): + """Reload albums and items from the database.""" + for item in self.imported_items(): + item.load() + self.album.load() + + # Utilities. + + def prune(self, filename): + """Prune any empty directories above the given file. If this + task has no `toppath` or the file path provided is not within + the `toppath`, then this function has no effect. Similarly, if + the file still exists, no pruning is performed, so it's safe to + call when the file in question may not have been removed. + """ + if self.toppath and not os.path.exists(util.syspath(filename)): + util.prune_dirs( + os.path.dirname(filename), + self.toppath, + clutter=config["clutter"].as_str_seq(), + ) + + +class SingletonImportTask(ImportTask): + """ImportTask for a single track that is not associated to an album.""" + + def __init__(self, toppath: util.PathBytes | None, item: library.Item): + super().__init__(toppath, [item.path], [item]) + self.item = item + self.is_album = False + self.paths = [item.path] + + def chosen_info(self): + """Return a dictionary of metadata about the current choice. + May only be called when the choice flag is ASIS or RETAG + (in which case the data comes from the files' current metadata) + or APPLY (in which case the data comes from the choice). + """ + assert self.choice_flag in (Action.ASIS, Action.RETAG, Action.APPLY) + if self.choice_flag in (Action.ASIS, Action.RETAG): + return dict(self.item) + elif self.choice_flag is Action.APPLY: + return self.match.info.copy() + + def imported_items(self): + return [self.item] + + def _emit_imported(self, lib): + for item in self.imported_items(): + plugins.send("item_imported", lib=lib, item=item) + + def lookup_candidates(self, search_ids: list[str]) -> None: + self.candidates, self.rec = autotag.tag_item( + self.item, search_ids=search_ids + ) + + def find_duplicates(self, lib: library.Library) -> list[library.Item]: # type: ignore[override] # Need splitting Singleton and Album tasks into separate classes + """Return a list of items from `lib` that have the same artist + and title as the task. + """ + info = self.chosen_info() + + # Query for existing items using the same metadata. We use a + # temporary `Item` object to generate any computed fields. + tmp_item = library.Item(lib, **info) + keys: list[str] = config["import"]["duplicate_keys"][ + "item" + ].as_str_seq() + dup_query = tmp_item.duplicates_query(keys) + + found_items = [] + for other_item in lib.items(dup_query): + # Existing items not considered duplicates. + if other_item.path != self.item.path: + found_items.append(other_item) + return found_items + + duplicate_items = find_duplicates + + def add(self, lib): + with lib.transaction(): + self.record_replaced(lib) + self.remove_replaced(lib) + lib.add(self.item) + self.reimport_metadata(lib) + + def infer_album_fields(self): + raise NotImplementedError + + def choose_match(self, session: ImportSession): + """Ask the session which match should apply and apply it.""" + choice = session.choose_item(self) + self.set_choice(choice) + session.log_choice(self) + + def reload(self): + self.item.load() + + def set_fields(self, lib): + """Sets the fields given at CLI or configuration to the specified + values, for the singleton item. + """ + for field, view in config["import"]["set_fields"].items(): + value = str(view.get()) + log.debug( + "Set field {}={} for {}", + field, + value, + util.displayable_path(self.paths), + ) + self.item.set_parse(field, format(self.item, value)) + self.item.store() + + +# FIXME The inheritance relationships are inverted. This is why there +# are so many methods which pass. More responsibility should be delegated to +# the BaseImportTask class. +class SentinelImportTask(ImportTask): + """A sentinel task marks the progress of an import and does not + import any items itself. + + If only `toppath` is set the task indicates the end of a top-level + directory import. If the `paths` argument is also given, the task + indicates the progress in the `toppath` import. + """ + + def __init__(self, toppath, paths): + super().__init__(toppath, paths, ()) + # TODO Remove the remaining attributes eventually + self.should_remove_duplicates = False + self.is_album = True + self.choice_flag = None + + def save_history(self): + pass + + def save_progress(self): + if not self.paths: + # "Done" sentinel. + ImportState().progress_reset(self.toppath) + elif self.toppath: + # "Directory progress" sentinel for singletons + super().save_progress() + + @property + def skip(self) -> bool: + return True + + def set_choice(self, choice): + raise NotImplementedError + + def cleanup(self, copy=False, delete=False, move=False): + pass + + def _emit_imported(self, lib): + pass + + +ArchiveHandler = tuple[ + Callable[[util.StrPath], bool], Callable[[util.StrPath], Any] +] + + +class ArchiveImportTask(SentinelImportTask): + """An import task that represents the processing of an archive. + + `toppath` must be a `zip`, `tar`, or `rar` archive. Archive tasks + serve two purposes: + - First, it will unarchive the files to a temporary directory and + return it. The client should read tasks from the resulting + directory and send them through the pipeline. + - Second, it will clean up the temporary directory when it proceeds + through the pipeline. The client should send the archive task + after sending the rest of the music tasks to make this work. + """ + + def __init__(self, toppath): + super().__init__(toppath, ()) + self.extracted = False + + @classmethod + def is_archive(cls, path): + """Returns true if the given path points to an archive that can + be handled. + """ + if not os.path.isfile(path): + return False + + for path_test, _ in cls.handlers: + if path_test(os.fsdecode(path)): + return True + return False + + @util.cached_classproperty + def handlers(cls) -> list[ArchiveHandler]: + """Returns a list of archive handlers. + + Each handler is a `(path_test, ArchiveClass)` tuple. `path_test` + is a function that returns `True` if the given path can be + handled by `ArchiveClass`. `ArchiveClass` is a class that + implements the same interface as `tarfile.TarFile`. + """ + _handlers: list[ArchiveHandler] = [] + from zipfile import ZipFile, is_zipfile + + _handlers.append((is_zipfile, ZipFile)) + import tarfile + + _handlers.append((tarfile.is_tarfile, tarfile.open)) + try: + from rarfile import RarFile, is_rarfile + except ImportError: + pass + else: + _handlers.append((is_rarfile, RarFile)) + try: + from py7zr import SevenZipFile, is_7zfile + except ImportError: + pass + else: + _handlers.append((is_7zfile, SevenZipFile)) + + return _handlers + + def cleanup(self, copy=False, delete=False, move=False): + """Removes the temporary directory the archive was extracted to.""" + if self.extracted and self.toppath: + log.debug( + "Removing extracted directory: {}", + util.displayable_path(self.toppath), + ) + shutil.rmtree(util.syspath(self.toppath)) + + def extract(self): + """Extracts the archive to a temporary directory and sets + `toppath` to that directory. + """ + assert self.toppath is not None, "toppath must be set" + + for path_test, handler_class in self.handlers: + if path_test(os.fsdecode(self.toppath)): + break + else: + raise ValueError(f"No handler found for archive: {self.toppath}") + extract_to = mkdtemp() + archive = handler_class(os.fsdecode(self.toppath), mode="r") + try: + archive.extractall(extract_to) + + # Adjust the files' mtimes to match the information from the + # archive. Inspired by: https://stackoverflow.com/q/9813243 + for f in archive.infolist(): + # The date_time will need to adjusted otherwise + # the item will have the current date_time of extraction. + # The (0, 0, -1) is added to date_time because the + # function time.mktime expects a 9-element tuple. + # The -1 indicates that the DST flag is unknown. + date_time = time.mktime((*f.date_time, 0, 0, -1)) + fullpath = os.path.join(extract_to, f.filename) + os.utime(fullpath, (date_time, date_time)) + + finally: + archive.close() + self.extracted = True + self.toppath = extract_to + + +class ImportTaskFactory: + """Generate album and singleton import tasks for all media files + indicated by a path. + """ + + def __init__(self, toppath: util.PathBytes, session: ImportSession): + """Create a new task factory. + + `toppath` is the user-specified path to search for music to + import. `session` is the `ImportSession`, which controls how + tasks are read from the directory. + """ + self.toppath = toppath + self.session = session + self.skipped = 0 # Skipped due to incremental/resume. + self.imported = 0 # "Real" tasks created. + self.is_archive = ArchiveImportTask.is_archive(util.syspath(toppath)) + + def tasks(self) -> Iterable[ImportTask]: + """Yield all import tasks for music found in the user-specified + path `self.toppath`. Any necessary sentinel tasks are also + produced. + + During generation, update `self.skipped` and `self.imported` + with the number of tasks that were not produced (due to + incremental mode or resumed imports) and the number of concrete + tasks actually produced, respectively. + + If `self.toppath` is an archive, it is adjusted to point to the + extracted data. + """ + # Check whether this is an archive. + archive_task: ArchiveImportTask | None = None + if self.is_archive: + archive_task = self.unarchive() + if not archive_task: + return + + # Search for music in the directory. + for dirs, paths in self.paths(): + if self.session.config["singletons"]: + for path in paths: + tasks = self._create(self.singleton(path)) + yield from tasks + yield self.sentinel(dirs) + + else: + tasks = self._create(self.album(paths, dirs)) + yield from tasks + + # Produce the final sentinel for this toppath to indicate that + # it is finished. This is usually just a SentinelImportTask, but + # for archive imports, send the archive task instead (to remove + # the extracted directory). + yield archive_task or self.sentinel() + + def _create(self, task: ImportTask | None): + """Handle a new task to be emitted by the factory. + + Emit the `import_task_created` event and increment the + `imported` count if the task is not skipped. Return the same + task. If `task` is None, do nothing. + """ + if task: + tasks = task.handle_created(self.session) + self.imported += len(tasks) + return tasks + return [] + + def paths(self): + """Walk `self.toppath` and yield `(dirs, files)` pairs where + `files` are individual music files and `dirs` the set of + containing directories where the music was found. + + This can either be a recursive search in the ordinary case, a + single track when `toppath` is a file, a single directory in + `flat` mode. + """ + if not os.path.isdir(util.syspath(self.toppath)): + yield [self.toppath], [self.toppath] + elif self.session.config["flat"]: + paths = [] + for dirs, paths_in_dir in albums_in_dir(self.toppath): + paths += paths_in_dir + yield [self.toppath], paths + else: + for dirs, paths in albums_in_dir(self.toppath): + yield dirs, paths + + def singleton(self, path: util.PathBytes): + """Return a `SingletonImportTask` for the music file.""" + if self.session.already_imported(self.toppath, [path]): + log.debug( + "Skipping previously-imported path: {}", + util.displayable_path(path), + ) + self.skipped += 1 + return None + + item = self.read_item(path) + if item: + return SingletonImportTask(self.toppath, item) + else: + return None + + def album(self, paths: Iterable[util.PathBytes], dirs=None): + """Return a `ImportTask` with all media files from paths. + + `dirs` is a list of parent directories used to record already + imported albums. + """ + + if dirs is None: + dirs = list({os.path.dirname(p) for p in paths}) + + if self.session.already_imported(self.toppath, dirs): + log.debug( + "Skipping previously-imported path: {}", + util.displayable_path(dirs), + ) + self.skipped += 1 + return None + + items: list[library.Item] = [ + item for item in map(self.read_item, paths) if item + ] + + if len(items) > 0: + return ImportTask(self.toppath, dirs, items) + else: + return None + + def sentinel(self, paths: Iterable[util.PathBytes] | None = None): + """Return a `SentinelImportTask` indicating the end of a + top-level directory import. + """ + return SentinelImportTask(self.toppath, paths) + + def unarchive(self): + """Extract the archive for this `toppath`. + + Extract the archive to a new directory, adjust `toppath` to + point to the extracted directory, and return an + `ArchiveImportTask`. If extraction fails, return None. + """ + assert self.is_archive + + if not (self.session.config["move"] or self.session.config["copy"]): + log.warning( + "Archive importing requires either " + "'copy' or 'move' to be enabled." + ) + return + + log.debug("Extracting archive: {}", util.displayable_path(self.toppath)) + archive_task = ArchiveImportTask(self.toppath) + try: + archive_task.extract() + except Exception as exc: + log.error("extraction failed: {}", exc) + return + + # Now read albums from the extracted directory. + self.toppath = archive_task.toppath + log.debug("Archive extracted to: {.toppath}", self) + return archive_task + + def read_item(self, path: util.PathBytes): + """Return an `Item` read from the path. + + If an item cannot be read, return `None` instead and log an + error. + """ + + # Check if the file has an extension, + # Add an extension if there isn't one. + if os.path.isfile(path): + path = self.check_extension(path) + + try: + return library.Item.from_path(path) + except library.ReadError as exc: + if isinstance(exc.reason, mediafile.FileTypeError): + # Silently ignore non-music files. + pass + elif isinstance(exc.reason, mediafile.UnreadableFileError): + log.warning("unreadable file: {}", util.displayable_path(path)) + else: + log.error( + "error reading {}: {}", util.displayable_path(path), exc + ) + + def check_extension(self, path: util.PathBytes): + path = Path(os.fsdecode(path)) + # if there is an extension, ignore + if path.suffix != "": + return Path(path) + + # no extension detected + # use ffprobe to find the format + formats = [] + output = subprocess.run( + [ + "ffprobe", + "-hide_banner", + "-loglevel", + "fatal", + "-show_format", + "--", + str(path), + ], + capture_output=True, + ) + out = output.stdout.decode("utf-8") + err = output.stderr.decode("utf-8") + if err != "": + log.error("ffprobe error: %s", err) + for line in out.split("\n"): + if line.startswith("format_name="): + formats = line.split("=")[1].split(",") + # a list of audio formats I got from wikipedia https://en.wikipedia.org/wiki/Audio_file_format + wiki_formats = { + "3gp", + "aa", + "aac", + "aax", + "act", + "aiff", + "alac", + "amr", + "ape", + "au", + "awb", + "dss", + "dvf", + "flac", + "gsm", + "iklax", + "ivs", + "m4a", + "m4b", + "m4p", + "mmf", + "movpkg", + "mp1", + "mp2", + "mp3", + "mpc", + "msv", + "nmf", + "ogg", + "oga", + "mogg", + "opus", + "ra", + "rm", + "raw", + "rf64", + "sln", + "tta", + "voc", + "vox", + "wav", + "wma", + "wv", + "webm", + "8svx", + "cda", + } + detected_format = "" + # The first format from ffprobe that is on this list is taken + for f in formats: + if f in wiki_formats: + detected_format = f + break + + # if ffprobe can't find a format, the file is prob not music + if detected_format == "": + return Path(path) + + # cp and add ext. If already exist, use that file + # assume, for example, the only diff between 'asdf.mp3' and 'asdf' is format + new_path = path.with_suffix("." + detected_format) + if not new_path.exists(): + util.move(path, new_path) + else: + log.info("Import file with matching format to original target") + return new_path + + +MULTIDISC_MARKERS = (rb"dis[ck]", rb"cd") +MULTIDISC_PAT_FMT = rb"^(.*%s[\W_]*)\d" + + +def is_subdir_of_any_in_list(path, dirs): + """Returns True if path os a subdirectory of any directory in dirs + (a list). In other case, returns False. + """ + ancestors = util.ancestry(path) + return any(d in ancestors for d in dirs) + + +def albums_in_dir(path: util.PathBytes): + """Recursively searches the given directory and returns an iterable + of (paths, items) where paths is a list of directories and items is + a list of Items that is probably an album. Specifically, any folder + containing any media files is an album. + """ + collapse_paths: list[util.PathBytes] = [] + collapse_items: list[util.PathBytes] = [] + collapse_pat = None + + ignore: list[str] = config["ignore"].as_str_seq() + ignore_hidden: bool = config["ignore_hidden"].get(bool) + + for root, dirs, files in util.sorted_walk( + path, ignore=ignore, ignore_hidden=ignore_hidden, logger=log + ): + items = [os.path.join(root, f) for f in files] + # If we're currently collapsing the constituent directories in a + # multi-disc album, check whether we should continue collapsing + # and add the current directory. If so, just add the directory + # and move on to the next directory. If not, stop collapsing. + if collapse_paths: + if (is_subdir_of_any_in_list(root, collapse_paths)) or ( + collapse_pat and collapse_pat.match(os.path.basename(root)) + ): + # Still collapsing. + collapse_paths.append(root) + collapse_items += items + continue + else: + # Collapse finished. Yield the collapsed directory and + # proceed to process the current one. + if collapse_items: + yield collapse_paths, collapse_items + collapse_pat, collapse_paths, collapse_items = None, [], [] + + # Check whether this directory looks like the *first* directory + # in a multi-disc sequence. There are two indicators: the file + # is named like part of a multi-disc sequence (e.g., "Title Disc + # 1") or it contains no items but only directories that are + # named in this way. + start_collapsing = False + for marker in MULTIDISC_MARKERS: + # We're using replace on %s due to lack of .format() on bytestrings + p = MULTIDISC_PAT_FMT.replace(b"%s", marker) + marker_pat = re.compile(p, re.I) + match = marker_pat.match(os.path.basename(root)) + + # Is this directory the root of a nested multi-disc album? + if dirs and not items: + # Check whether all subdirectories have the same prefix. + start_collapsing = True + subdir_pat = None + for subdir in dirs: + subdir = util.bytestring_path(subdir) + # The first directory dictates the pattern for + # the remaining directories. + if not subdir_pat: + match = marker_pat.match(subdir) + if match: + match_group = re.escape(match.group(1)) + subdir_pat = re.compile( + b"".join([b"^", match_group, rb"\d"]), re.I + ) + else: + start_collapsing = False + break + + # Subsequent directories must match the pattern. + elif not subdir_pat.match(subdir): + start_collapsing = False + break + + # If all subdirectories match, don't check other + # markers. + if start_collapsing: + break + + # Is this directory the first in a flattened multi-disc album? + elif match: + start_collapsing = True + # Set the current pattern to match directories with the same + # prefix as this one, followed by a digit. + collapse_pat = re.compile( + b"".join([b"^", re.escape(match.group(1)), rb"\d"]), re.I + ) + break + + # If either of the above heuristics indicated that this is the + # beginning of a multi-disc album, initialize the collapsed + # directory and item lists and check the next directory. + if start_collapsing: + # Start collapsing; continue to the next iteration. + collapse_paths = [root] + collapse_items = items + continue + + # If it's nonempty, yield it. + if items: + yield [root], items + + # Clear out any unfinished collapse. + if collapse_paths and collapse_items: + yield collapse_paths, collapse_items diff --git a/beets/ui/commands/import_/display.py b/beets/ui/commands/import_/display.py index a89b8795f..87a072e06 100644 --- a/beets/ui/commands/import_/display.py +++ b/beets/ui/commands/import_/display.py @@ -1,397 +1,397 @@ -from __future__ import annotations - -import os -import textwrap -from dataclasses import dataclass -from functools import cached_property -from typing import TYPE_CHECKING - -from beets import config, ui -from beets.autotag import hooks -from beets.util import displayable_path -from beets.util.color import colorize -from beets.util.diff import colordiff -from beets.util.layout import Side, get_layout_lines, indent -from beets.util.units import human_seconds_short - -if TYPE_CHECKING: - import confuse - - from beets import autotag - from beets.library.models import Item - from beets.util.color import ColorName - -VARIOUS_ARTISTS = "Various Artists" - - -@dataclass -class ChangeRepresentation: - """Keeps track of all information needed to generate a (colored) text - representation of the changes that will be made if an album or singleton's - tags are changed according to `match`, which must be an AlbumMatch or - TrackMatch object, accordingly. - """ - - cur_artist: str - cur_name: str - match: autotag.hooks.Match - - @cached_property - def changed_prefix(self) -> str: - return colorize("changed", "\u2260") - - @cached_property - def _indentation_config(self) -> confuse.Subview: - return config["ui"]["import"]["indentation"] - - @cached_property - def indent(self) -> int: - return self._indentation_config["match_header"].get(int) - - @cached_property - def indent_header(self) -> str: - return indent(self.indent) - - @cached_property - def indent_detail(self) -> str: - return indent(self._indentation_config["match_details"].get(int)) - - @cached_property - def indent_tracklist(self) -> str: - return indent(self._indentation_config["match_tracklist"].get(int)) - - def print_layout(self, indent: str, left: Side, right: Side) -> None: - for line in get_layout_lines(indent, left, right, ui.term_width()): - ui.print_(line) - - def show_match_header(self) -> None: - """Print out a 'header' identifying the suggested match (album name, - artist name,...) and summarizing the changes that would be made should - the user accept the match. - """ - # Print newline at beginning of change block. - parts = [""] - - # 'Match' line and similarity. - parts.append(f"Match ({self.match.distance.string}):") - parts.append( - ui.colorize( - self.match.distance.color, - f"{self.match.info.artist} - {self.match.info.name}", - ) - ) - - if penalty_keys := self.match.distance.generic_penalty_keys: - parts.append( - ui.colorize("changed", f"\u2260 {', '.join(penalty_keys)}") - ) - - if disambig := self.match.disambig_string: - parts.append(disambig) - - if data_url := self.match.info.data_url: - parts.append(ui.colorize("text_faint", f"{data_url}")) - - ui.print_(textwrap.indent("\n".join(parts), self.indent_header)) - - def show_match_details(self) -> None: - """Print out the details of the match, including changes in album name - and artist name. - """ - # Artist. - artist_l, artist_r = self.cur_artist or "", self.match.info.artist or "" - if artist_r == VARIOUS_ARTISTS: - # Hide artists for VA releases. - artist_l, artist_r = "", "" - if artist_l != artist_r: - artist_l, artist_r = colordiff(artist_l, artist_r) - left = Side(f"{self.changed_prefix} Artist: ", artist_l, "") - right = Side("", artist_r, "") - self.print_layout(self.indent_detail, left, right) - - else: - ui.print_(f"{self.indent_detail}*", "Artist:", artist_r) - - if self.cur_name: - type_ = self.match.type - name_l, name_r = self.cur_name or "", self.match.info.name - if self.cur_name != self.match.info.name != VARIOUS_ARTISTS: - name_l, name_r = colordiff(name_l, name_r) - left = Side(f"{self.changed_prefix} {type_}: ", name_l, "") - right = Side("", name_r, "") - self.print_layout(self.indent_detail, left, right) - else: - ui.print_(f"{self.indent_detail}*", f"{type_}:", name_r) - - def make_medium_info_line(self, track_info: hooks.TrackInfo) -> str: - """Construct a line with the current medium's info.""" - track_media = track_info.get("media", "Media") - # Build output string. - if self.match.info.mediums > 1 and track_info.disctitle: - return ( - f"* {track_media} {track_info.medium}: {track_info.disctitle}" - ) - elif self.match.info.mediums > 1: - return f"* {track_media} {track_info.medium}" - elif track_info.disctitle: - return f"* {track_media}: {track_info.disctitle}" - else: - return "" - - def format_index(self, track_info: hooks.TrackInfo | Item) -> str: - """Return a string representing the track index of the given - TrackInfo or Item object. - """ - if isinstance(track_info, hooks.TrackInfo): - index = track_info.index - medium_index = track_info.medium_index - medium = track_info.medium - mediums = self.match.info.mediums - else: - index = medium_index = track_info.track - medium = track_info.disc - mediums = track_info.disctotal - if config["per_disc_numbering"]: - if mediums and mediums > 1: - return f"{medium}-{medium_index}" - else: - return str(medium_index if medium_index is not None else index) - else: - return str(index) - - def make_track_numbers( - self, item: Item, track_info: hooks.TrackInfo - ) -> tuple[str, str, bool]: - """Format colored track indices.""" - cur_track = self.format_index(item) - new_track = self.format_index(track_info) - changed = False - # Choose color based on change. - highlight_color: ColorName - if cur_track != new_track: - changed = True - if item.track in (track_info.index, track_info.medium_index): - highlight_color = "text_highlight_minor" - else: - highlight_color = "text_highlight" - else: - highlight_color = "text_faint" - - lhs_track = colorize(highlight_color, f"(#{cur_track})") - rhs_track = colorize(highlight_color, f"(#{new_track})") - return lhs_track, rhs_track, changed - - @staticmethod - def make_track_titles( - item: Item, track_info: hooks.TrackInfo - ) -> tuple[str, str, bool]: - """Format colored track titles.""" - new_title = track_info.name - if not item.title.strip(): - # If there's no title, we use the filename. Don't colordiff. - cur_title = displayable_path(os.path.basename(item.path)) - return cur_title, new_title, True - else: - # If there is a title, highlight differences. - cur_title = item.title.strip() - cur_col, new_col = colordiff(cur_title, new_title) - return cur_col, new_col, cur_title != new_title - - @staticmethod - def make_track_lengths( - item: Item, track_info: hooks.TrackInfo - ) -> tuple[str, str, bool]: - """Format colored track lengths.""" - changed = False - highlight_color: ColorName - if ( - item.length - and track_info.length - and abs(item.length - track_info.length) - >= config["ui"]["length_diff_thresh"].as_number() - ): - highlight_color = "text_highlight" - changed = True - else: - highlight_color = "text_highlight_minor" - - # Handle nonetype lengths by setting to 0 - cur_length0 = item.length if item.length else 0 - new_length0 = track_info.length if track_info.length else 0 - # format into string - cur_length = f"({human_seconds_short(cur_length0)})" - new_length = f"({human_seconds_short(new_length0)})" - # colorize - lhs_length = colorize(highlight_color, cur_length) - rhs_length = colorize(highlight_color, new_length) - - return lhs_length, rhs_length, changed - - def make_line( - self, item: Item, track_info: hooks.TrackInfo - ) -> tuple[Side, Side]: - """Extract changes from item -> new TrackInfo object, and colorize - appropriately. Returns (lhs, rhs) for column printing. - """ - # Track titles. - lhs_title, rhs_title, diff_title = self.make_track_titles( - item, track_info - ) - # Track number change. - lhs_track, rhs_track, diff_track = self.make_track_numbers( - item, track_info - ) - # Length change. - lhs_length, rhs_length, diff_length = self.make_track_lengths( - item, track_info - ) - - changed = diff_title or diff_track or diff_length - - # Construct lhs and rhs dicts. - # Previously, we printed the penalties, however this is no longer - # the case, thus the 'info' dictionary is unneeded. - # penalties = penalty_string(self.match.distance.tracks[track_info]) - - lhs = Side( - f"{self.changed_prefix if changed else '*'} {lhs_track} ", - lhs_title, - f" {lhs_length}", - ) - if not changed: - # Only return the left side, as nothing changed. - return (lhs, Side("", "", "")) - - return (lhs, Side(f"{rhs_track} ", rhs_title, f" {rhs_length}")) - - def print_tracklist(self, lines: list[tuple[Side, Side]]) -> None: - """Calculates column widths for tracks stored as line tuples: - (left, right). Then prints each line of tracklist. - """ - if len(lines) == 0: - # If no lines provided, e.g. details not required, do nothing. - return - - # Check how to fit content into terminal window - indent_width = len(self.indent_tracklist) - terminal_width = ui.term_width() - joiner_width = len("* -> ") - col_width = (terminal_width - indent_width - joiner_width) // 2 - max_width_l = max(left.rendered_width for left, _ in lines) - max_width_r = max(right.rendered_width for _, right in lines) - - if ((max_width_l <= col_width) and (max_width_r <= col_width)) or ( - ((max_width_l > col_width) or (max_width_r > col_width)) - and ((max_width_l + max_width_r) <= col_width * 2) - ): - # All content fits. Either both maximum widths are below column - # widths, or one of the columns is larger than allowed but the - # other is smaller than allowed. - # In this case we can afford to shrink the columns to fit their - # largest string - col_width_l = max_width_l - col_width_r = max_width_r - else: - # Not all content fits - stick with original half/half split - col_width_l = col_width - col_width_r = col_width - - # Print out each line, using the calculated width from above. - for left, right in lines: - left = left._replace(width=col_width_l) - right = right._replace(width=col_width_r) - self.print_layout(self.indent_tracklist, left, right) - - -class AlbumChange(ChangeRepresentation): - match: autotag.hooks.AlbumMatch - - def show_match_tracks(self) -> None: - """Print out the tracks of the match, summarizing changes the match - suggests for them. - """ - pairs = sorted( - self.match.item_info_pairs, key=lambda pair: pair[1].index or 0 - ) - # Build up LHS and RHS for track difference display. The `lines` list - # contains `(left, right)` tuples. - lines: list[tuple[Side, Side]] = [] - medium = disctitle = None - for item, track_info in pairs: - # If the track is the first on a new medium, show medium - # number and title. - if medium != track_info.medium or disctitle != track_info.disctitle: - # Create header for new medium - header = self.make_medium_info_line(track_info) - if header != "": - # Print tracks from previous medium - self.print_tracklist(lines) - lines = [] - ui.print_(f"{self.indent_detail}{header}") - # Save new medium details for future comparison. - medium, disctitle = track_info.medium, track_info.disctitle - - # Construct the line tuple for the track. - left, right = self.make_line(item, track_info) - if right.contents != "": - lines.append((left, right)) - else: - if config["import"]["detail"]: - lines.append((left, right)) - self.print_tracklist(lines) - - # Missing and unmatched tracks. - if self.match.extra_tracks: - ui.print_( - "Missing tracks" - f" ({len(self.match.extra_tracks)}/{len(self.match.info.tracks)} -" - f" {len(self.match.extra_tracks) / len(self.match.info.tracks):.1%}):" - ) - for track_info in self.match.extra_tracks: - line = f" ! {track_info.title} (#{self.format_index(track_info)})" - if track_info.length: - line += f" ({human_seconds_short(track_info.length)})" - ui.print_(colorize("text_warning", line)) - if self.match.extra_items: - ui.print_(f"Unmatched tracks ({len(self.match.extra_items)}):") - for item in self.match.extra_items: - line = f" ! {item.title} (#{self.format_index(item)})" - if item.length: - line += f" ({human_seconds_short(item.length)})" - ui.print_(colorize("text_warning", line)) - - -class TrackChange(ChangeRepresentation): - """Track change representation, comparing item with match.""" - - match: autotag.hooks.TrackMatch - - -def show_change( - cur_artist: str, cur_album: str, match: hooks.AlbumMatch -) -> None: - """Print out a representation of the changes that will be made if an - album's tags are changed according to `match`, which must be an AlbumMatch - object. - """ - change = AlbumChange(cur_artist, cur_album, match) - - # Print the match header. - change.show_match_header() - - # Print the match details. - change.show_match_details() - - # Print the match tracks. - change.show_match_tracks() - - -def show_item_change(item: Item, match: hooks.TrackMatch) -> None: - """Print out the change that would occur by tagging `item` with the - metadata from `match`, a TrackMatch object. - """ - change = TrackChange(item.artist, item.title, match) - # Print the match header. - change.show_match_header() - # Print the match details. - change.show_match_details() +from __future__ import annotations + +import os +import textwrap +from dataclasses import dataclass +from functools import cached_property +from typing import TYPE_CHECKING + +from beets import config, ui +from beets.autotag import hooks +from beets.util import displayable_path +from beets.util.color import colorize +from beets.util.diff import colordiff +from beets.util.layout import Side, get_layout_lines, indent +from beets.util.units import human_seconds_short + +if TYPE_CHECKING: + import confuse + + from beets import autotag + from beets.library.models import Item + from beets.util.color import ColorName + +VARIOUS_ARTISTS = "Various Artists" + + +@dataclass +class ChangeRepresentation: + """Keeps track of all information needed to generate a (colored) text + representation of the changes that will be made if an album or singleton's + tags are changed according to `match`, which must be an AlbumMatch or + TrackMatch object, accordingly. + """ + + cur_artist: str + cur_name: str + match: autotag.hooks.Match + + @cached_property + def changed_prefix(self) -> str: + return colorize("changed", "\u2260") + + @cached_property + def _indentation_config(self) -> confuse.Subview: + return config["ui"]["import"]["indentation"] + + @cached_property + def indent(self) -> int: + return self._indentation_config["match_header"].get(int) + + @cached_property + def indent_header(self) -> str: + return indent(self.indent) + + @cached_property + def indent_detail(self) -> str: + return indent(self._indentation_config["match_details"].get(int)) + + @cached_property + def indent_tracklist(self) -> str: + return indent(self._indentation_config["match_tracklist"].get(int)) + + def print_layout(self, indent: str, left: Side, right: Side) -> None: + for line in get_layout_lines(indent, left, right, ui.term_width()): + ui.print_(line) + + def show_match_header(self) -> None: + """Print out a 'header' identifying the suggested match (album name, + artist name,...) and summarizing the changes that would be made should + the user accept the match. + """ + # Print newline at beginning of change block. + parts = [""] + + # 'Match' line and similarity. + parts.append(f"Match ({self.match.distance.string}):") + parts.append( + ui.colorize( + self.match.distance.color, + f"{self.match.info.artist} - {self.match.info.name}", + ) + ) + + if penalty_keys := self.match.distance.generic_penalty_keys: + parts.append( + ui.colorize("changed", f"\u2260 {', '.join(penalty_keys)}") + ) + + if disambig := self.match.disambig_string: + parts.append(disambig) + + if data_url := self.match.info.data_url: + parts.append(ui.colorize("text_faint", f"{data_url}")) + + ui.print_(textwrap.indent("\n".join(parts), self.indent_header)) + + def show_match_details(self) -> None: + """Print out the details of the match, including changes in album name + and artist name. + """ + # Artist. + artist_l, artist_r = self.cur_artist or "", self.match.info.artist or "" + if artist_r == VARIOUS_ARTISTS: + # Hide artists for VA releases. + artist_l, artist_r = "", "" + if artist_l != artist_r: + artist_l, artist_r = colordiff(artist_l, artist_r) + left = Side(f"{self.changed_prefix} Artist: ", artist_l, "") + right = Side("", artist_r, "") + self.print_layout(self.indent_detail, left, right) + + else: + ui.print_(f"{self.indent_detail}*", "Artist:", artist_r) + + if self.cur_name: + type_ = self.match.type + name_l, name_r = self.cur_name or "", self.match.info.name + if self.cur_name != self.match.info.name != VARIOUS_ARTISTS: + name_l, name_r = colordiff(name_l, name_r) + left = Side(f"{self.changed_prefix} {type_}: ", name_l, "") + right = Side("", name_r, "") + self.print_layout(self.indent_detail, left, right) + else: + ui.print_(f"{self.indent_detail}*", f"{type_}:", name_r) + + def make_medium_info_line(self, track_info: hooks.TrackInfo) -> str: + """Construct a line with the current medium's info.""" + track_media = track_info.get("media", "Media") + # Build output string. + if self.match.info.mediums > 1 and track_info.disctitle: + return ( + f"* {track_media} {track_info.medium}: {track_info.disctitle}" + ) + elif self.match.info.mediums > 1: + return f"* {track_media} {track_info.medium}" + elif track_info.disctitle: + return f"* {track_media}: {track_info.disctitle}" + else: + return "" + + def format_index(self, track_info: hooks.TrackInfo | Item) -> str: + """Return a string representing the track index of the given + TrackInfo or Item object. + """ + if isinstance(track_info, hooks.TrackInfo): + index = track_info.index + medium_index = track_info.medium_index + medium = track_info.medium + mediums = self.match.info.mediums + else: + index = medium_index = track_info.track + medium = track_info.disc + mediums = track_info.disctotal + if config["per_disc_numbering"]: + if mediums and mediums > 1: + return f"{medium}-{medium_index}" + else: + return str(medium_index if medium_index is not None else index) + else: + return str(index) + + def make_track_numbers( + self, item: Item, track_info: hooks.TrackInfo + ) -> tuple[str, str, bool]: + """Format colored track indices.""" + cur_track = self.format_index(item) + new_track = self.format_index(track_info) + changed = False + # Choose color based on change. + highlight_color: ColorName + if cur_track != new_track: + changed = True + if item.track in (track_info.index, track_info.medium_index): + highlight_color = "text_highlight_minor" + else: + highlight_color = "text_highlight" + else: + highlight_color = "text_faint" + + lhs_track = colorize(highlight_color, f"(#{cur_track})") + rhs_track = colorize(highlight_color, f"(#{new_track})") + return lhs_track, rhs_track, changed + + @staticmethod + def make_track_titles( + item: Item, track_info: hooks.TrackInfo + ) -> tuple[str, str, bool]: + """Format colored track titles.""" + new_title = track_info.name + if not item.title.strip(): + # If there's no title, we use the filename. Don't colordiff. + cur_title = displayable_path(os.path.basename(item.path)) + return cur_title, new_title, True + else: + # If there is a title, highlight differences. + cur_title = item.title.strip() + cur_col, new_col = colordiff(cur_title, new_title) + return cur_col, new_col, cur_title != new_title + + @staticmethod + def make_track_lengths( + item: Item, track_info: hooks.TrackInfo + ) -> tuple[str, str, bool]: + """Format colored track lengths.""" + changed = False + highlight_color: ColorName + if ( + item.length + and track_info.length + and abs(item.length - track_info.length) + >= config["ui"]["length_diff_thresh"].as_number() + ): + highlight_color = "text_highlight" + changed = True + else: + highlight_color = "text_highlight_minor" + + # Handle nonetype lengths by setting to 0 + cur_length0 = item.length if item.length else 0 + new_length0 = track_info.length if track_info.length else 0 + # format into string + cur_length = f"({human_seconds_short(cur_length0)})" + new_length = f"({human_seconds_short(new_length0)})" + # colorize + lhs_length = colorize(highlight_color, cur_length) + rhs_length = colorize(highlight_color, new_length) + + return lhs_length, rhs_length, changed + + def make_line( + self, item: Item, track_info: hooks.TrackInfo + ) -> tuple[Side, Side]: + """Extract changes from item -> new TrackInfo object, and colorize + appropriately. Returns (lhs, rhs) for column printing. + """ + # Track titles. + lhs_title, rhs_title, diff_title = self.make_track_titles( + item, track_info + ) + # Track number change. + lhs_track, rhs_track, diff_track = self.make_track_numbers( + item, track_info + ) + # Length change. + lhs_length, rhs_length, diff_length = self.make_track_lengths( + item, track_info + ) + + changed = diff_title or diff_track or diff_length + + # Construct lhs and rhs dicts. + # Previously, we printed the penalties, however this is no longer + # the case, thus the 'info' dictionary is unneeded. + # penalties = penalty_string(self.match.distance.tracks[track_info]) + + lhs = Side( + f"{self.changed_prefix if changed else '*'} {lhs_track} ", + lhs_title, + f" {lhs_length}", + ) + if not changed: + # Only return the left side, as nothing changed. + return (lhs, Side("", "", "")) + + return (lhs, Side(f"{rhs_track} ", rhs_title, f" {rhs_length}")) + + def print_tracklist(self, lines: list[tuple[Side, Side]]) -> None: + """Calculates column widths for tracks stored as line tuples: + (left, right). Then prints each line of tracklist. + """ + if len(lines) == 0: + # If no lines provided, e.g. details not required, do nothing. + return + + # Check how to fit content into terminal window + indent_width = len(self.indent_tracklist) + terminal_width = ui.term_width() + joiner_width = len("* -> ") + col_width = (terminal_width - indent_width - joiner_width) // 2 + max_width_l = max(left.rendered_width for left, _ in lines) + max_width_r = max(right.rendered_width for _, right in lines) + + if ((max_width_l <= col_width) and (max_width_r <= col_width)) or ( + ((max_width_l > col_width) or (max_width_r > col_width)) + and ((max_width_l + max_width_r) <= col_width * 2) + ): + # All content fits. Either both maximum widths are below column + # widths, or one of the columns is larger than allowed but the + # other is smaller than allowed. + # In this case we can afford to shrink the columns to fit their + # largest string + col_width_l = max_width_l + col_width_r = max_width_r + else: + # Not all content fits - stick with original half/half split + col_width_l = col_width + col_width_r = col_width + + # Print out each line, using the calculated width from above. + for left, right in lines: + left = left._replace(width=col_width_l) + right = right._replace(width=col_width_r) + self.print_layout(self.indent_tracklist, left, right) + + +class AlbumChange(ChangeRepresentation): + match: autotag.hooks.AlbumMatch + + def show_match_tracks(self) -> None: + """Print out the tracks of the match, summarizing changes the match + suggests for them. + """ + pairs = sorted( + self.match.item_info_pairs, key=lambda pair: pair[1].index or 0 + ) + # Build up LHS and RHS for track difference display. The `lines` list + # contains `(left, right)` tuples. + lines: list[tuple[Side, Side]] = [] + medium = disctitle = None + for item, track_info in pairs: + # If the track is the first on a new medium, show medium + # number and title. + if medium != track_info.medium or disctitle != track_info.disctitle: + # Create header for new medium + header = self.make_medium_info_line(track_info) + if header != "": + # Print tracks from previous medium + self.print_tracklist(lines) + lines = [] + ui.print_(f"{self.indent_detail}{header}") + # Save new medium details for future comparison. + medium, disctitle = track_info.medium, track_info.disctitle + + # Construct the line tuple for the track. + left, right = self.make_line(item, track_info) + if right.contents != "": + lines.append((left, right)) + else: + if config["import"]["detail"]: + lines.append((left, right)) + self.print_tracklist(lines) + + # Missing and unmatched tracks. + if self.match.extra_tracks: + ui.print_( + "Missing tracks" + f" ({len(self.match.extra_tracks)}/{len(self.match.info.tracks)} -" + f" {len(self.match.extra_tracks) / len(self.match.info.tracks):.1%}):" + ) + for track_info in self.match.extra_tracks: + line = f" ! {track_info.title} (#{self.format_index(track_info)})" + if track_info.length: + line += f" ({human_seconds_short(track_info.length)})" + ui.print_(colorize("text_warning", line)) + if self.match.extra_items: + ui.print_(f"Unmatched tracks ({len(self.match.extra_items)}):") + for item in self.match.extra_items: + line = f" ! {item.title} (#{self.format_index(item)})" + if item.length: + line += f" ({human_seconds_short(item.length)})" + ui.print_(colorize("text_warning", line)) + + +class TrackChange(ChangeRepresentation): + """Track change representation, comparing item with match.""" + + match: autotag.hooks.TrackMatch + + +def show_change( + cur_artist: str, cur_album: str, match: hooks.AlbumMatch +) -> None: + """Print out a representation of the changes that will be made if an + album's tags are changed according to `match`, which must be an AlbumMatch + object. + """ + change = AlbumChange(cur_artist, cur_album, match) + + # Print the match header. + change.show_match_header() + + # Print the match details. + change.show_match_details() + + # Print the match tracks. + change.show_match_tracks() + + +def show_item_change(item: Item, match: hooks.TrackMatch) -> None: + """Print out the change that would occur by tagging `item` with the + metadata from `match`, a TrackMatch object. + """ + change = TrackChange(item.artist, item.title, match) + # Print the match header. + change.show_match_header() + # Print the match details. + change.show_match_details() diff --git a/beets/ui/commands/import_/session.py b/beets/ui/commands/import_/session.py index 8c3404bea..47dd0ce6f 100644 --- a/beets/ui/commands/import_/session.py +++ b/beets/ui/commands/import_/session.py @@ -1,545 +1,545 @@ -from __future__ import annotations - -from collections import Counter -from itertools import chain - -from beets import autotag, config, importer, logging, plugins, ui -from beets.autotag import Recommendation -from beets.util import PromptChoice, displayable_path -from beets.util.color import colorize -from beets.util.units import human_bytes, human_seconds_short - -from .display import show_change, show_item_change - -# Global logger. -log = logging.getLogger("beets") - - -class TerminalImportSession(importer.ImportSession): - """An import session that runs in a terminal.""" - - def choose_match(self, task): - """Given an initial autotagging of items, go through an interactive - dance with the user to ask for a choice of metadata. Returns an - AlbumMatch object, ASIS, or SKIP. - """ - # Show what we're tagging. - ui.print_() - - path_str0 = displayable_path(task.paths, "\n") - path_str = colorize("import_path", path_str0) - items_str0 = f"({len(task.items)} items)" - items_str = colorize("import_path_items", items_str0) - ui.print_(" ".join([path_str, items_str])) - - # Let plugins display info or prompt the user before we go through the - # process of selecting candidate. - results = plugins.send( - "import_task_before_choice", session=self, task=task - ) - actions = [action for action in results if action] - - if len(actions) == 1: - return actions[0] - elif len(actions) > 1: - raise plugins.PluginConflictError( - "Only one handler for `import_task_before_choice` may return " - "an action." - ) - - # Take immediate action if appropriate. - action = _summary_judgment(task.rec) - if action == importer.Action.APPLY: - match = task.candidates[0] - show_change(task.cur_artist, task.cur_album, match) - return match - elif action is not None: - return action - - # Loop until we have a choice. - while True: - # Ask for a choice from the user. The result of - # `choose_candidate` may be an `importer.Action`, an - # `AlbumMatch` object for a specific selection, or a - # `PromptChoice`. - choices = self._get_choices(task) - choice = choose_candidate( - task.candidates, - False, - task.rec, - task.cur_artist, - task.cur_album, - itemcount=len(task.items), - choices=choices, - ) - - # Basic choices that require no more action here. - if choice in (importer.Action.SKIP, importer.Action.ASIS): - # Pass selection to main control flow. - return choice - - # Plugin-provided choices. We invoke the associated callback - # function. - elif choice in choices: - post_choice = choice.callback(self, task) - if isinstance(post_choice, importer.Action): - return post_choice - elif isinstance(post_choice, autotag.Proposal): - # Use the new candidates and continue around the loop. - task.candidates = post_choice.candidates - task.rec = post_choice.recommendation - - # Otherwise, we have a specific match selection. - else: - # We have a candidate! Finish tagging. Here, choice is an - # AlbumMatch object. - assert isinstance(choice, autotag.AlbumMatch) - return choice - - def choose_item(self, task): - """Ask the user for a choice about tagging a single item. Returns - either an action constant or a TrackMatch object. - """ - ui.print_() - ui.print_(displayable_path(task.item.path)) - candidates, rec = task.candidates, task.rec - - # Take immediate action if appropriate. - action = _summary_judgment(task.rec) - if action == importer.Action.APPLY: - match = candidates[0] - show_item_change(task.item, match) - return match - elif action is not None: - return action - - while True: - # Ask for a choice. - choices = self._get_choices(task) - choice = choose_candidate( - candidates, True, rec, item=task.item, choices=choices - ) - - if choice in (importer.Action.SKIP, importer.Action.ASIS): - return choice - - elif choice in choices: - post_choice = choice.callback(self, task) - if isinstance(post_choice, importer.Action): - return post_choice - elif isinstance(post_choice, autotag.Proposal): - candidates = post_choice.candidates - rec = post_choice.recommendation - - else: - # Chose a candidate. - assert isinstance(choice, autotag.TrackMatch) - return choice - - def resolve_duplicate(self, task, found_duplicates): - """Decide what to do when a new album or item seems similar to one - that's already in the library. - """ - log.warning( - "This {} is already in the library!", - ("album" if task.is_album else "item"), - ) - - if config["import"]["quiet"]: - # In quiet mode, don't prompt -- just skip. - log.info("Skipping.") - sel = "s" - else: - # Print some detail about the existing and new items so the - # user can make an informed decision. - for duplicate in found_duplicates: - ui.print_( - "Old: " - + summarize_items( - ( - list(duplicate.items()) - if task.is_album - else [duplicate] - ), - not task.is_album, - ) - ) - if config["import"]["duplicate_verbose_prompt"]: - if task.is_album: - for dup in duplicate.items(): - print(f" {dup}") - else: - print(f" {duplicate}") - - ui.print_( - "New: " - + summarize_items( - task.imported_items(), - not task.is_album, - ) - ) - if config["import"]["duplicate_verbose_prompt"]: - for item in task.imported_items(): - print(f" {item}") - - sel = ui.input_options( - ("Skip new", "Keep all", "Remove old", "Merge all") - ) - - if sel == "s": - # Skip new. - task.set_choice(importer.Action.SKIP) - elif sel == "k": - # Keep both. Do nothing; leave the choice intact. - pass - elif sel == "r": - # Remove old. - task.should_remove_duplicates = True - elif sel == "m": - task.should_merge_duplicates = True - else: - assert False - - def should_resume(self, path): - return ui.input_yn( - f"Import of the directory:\n{displayable_path(path)}\n" - "was interrupted. Resume (Y/n)?" - ) - - def _get_choices(self, task): - """Get the list of prompt choices that should be presented to the - user. This consists of both built-in choices and ones provided by - plugins. - - The `before_choose_candidate` event is sent to the plugins, with - session and task as its parameters. Plugins are responsible for - checking the right conditions and returning a list of `PromptChoice`s, - which is flattened and checked for conflicts. - - If two or more choices have the same short letter, a warning is - emitted and all but one choices are discarded, giving preference - to the default importer choices. - - Returns a list of `PromptChoice`s. - """ - # Standard, built-in choices. - choices = [ - PromptChoice("s", "Skip", lambda s, t: importer.Action.SKIP), - PromptChoice("u", "Use as-is", lambda s, t: importer.Action.ASIS), - ] - if task.is_album: - choices += [ - PromptChoice( - "t", "as Tracks", lambda s, t: importer.Action.TRACKS - ), - PromptChoice( - "g", "Group albums", lambda s, t: importer.Action.ALBUMS - ), - ] - choices += [ - PromptChoice("e", "Enter search", manual_search), - PromptChoice("i", "enter Id", manual_id), - PromptChoice("b", "aBort", abort_action), - ] - - # Send the before_choose_candidate event and flatten list. - extra_choices = list( - chain( - *plugins.send( - "before_choose_candidate", session=self, task=task - ) - ) - ) - - # Add a "dummy" choice for the other baked-in option, for - # duplicate checking. - all_choices = [ - PromptChoice("a", "Apply", None), - *choices, - *extra_choices, - ] - - # Check for conflicts. - short_letters = [c.short for c in all_choices] - if len(short_letters) != len(set(short_letters)): - # Duplicate short letter has been found. - duplicates = [ - i for i, count in Counter(short_letters).items() if count > 1 - ] - for short in duplicates: - # Keep the first of the choices, removing the rest. - dup_choices = [c for c in all_choices if c.short == short] - for c in dup_choices[1:]: - log.warning( - "Prompt choice '{0.long}' removed due to conflict " - "with '{1[0].long}' (short letter: '{0.short}')", - c, - dup_choices, - ) - extra_choices.remove(c) - - return choices + extra_choices - - -def summarize_items(items, singleton): - """Produces a brief summary line describing a set of items. Used for - manually resolving duplicates during import. - - `items` is a list of `Item` objects. `singleton` indicates whether - this is an album or single-item import (if the latter, them `items` - should only have one element). - """ - summary_parts = [] - if not singleton: - summary_parts.append(f"{len(items)} items") - - format_counts = {} - for item in items: - format_counts[item.format] = format_counts.get(item.format, 0) + 1 - if len(format_counts) == 1: - # A single format. - summary_parts.append(items[0].format) - else: - # Enumerate all the formats by decreasing frequencies: - for fmt, count in sorted( - format_counts.items(), - key=lambda fmt_and_count: (-fmt_and_count[1], fmt_and_count[0]), - ): - summary_parts.append(f"{fmt} {count}") - - if items: - average_bitrate = sum([item.bitrate for item in items]) / len(items) - total_duration = sum([item.length for item in items]) - total_filesize = sum([item.filesize for item in items]) - summary_parts.append(f"{int(average_bitrate / 1000)}kbps") - if items[0].format == "FLAC": - sample_bits = ( - f"{round(int(items[0].samplerate) / 1000, 1)}kHz" - f"/{items[0].bitdepth} bit" - ) - summary_parts.append(sample_bits) - summary_parts.append(human_seconds_short(total_duration)) - summary_parts.append(human_bytes(total_filesize)) - - return ", ".join(summary_parts) - - -def _summary_judgment(rec: Recommendation) -> importer.Action | None: - """Determines whether a decision should be made without even asking - the user. This occurs in quiet mode and when an action is chosen for - NONE recommendations. Return None if the user should be queried. - Otherwise, returns an action. May also print to the console if a - summary judgment is made. - """ - - action: importer.Action | None - if config["import"]["quiet"]: - if rec == Recommendation.strong: - return importer.Action.APPLY - else: - action = config["import"]["quiet_fallback"].as_choice( - { - "skip": importer.Action.SKIP, - "asis": importer.Action.ASIS, - } - ) - elif config["import"]["timid"]: - return None - elif rec == Recommendation.none: - action = config["import"]["none_rec_action"].as_choice( - { - "skip": importer.Action.SKIP, - "asis": importer.Action.ASIS, - "ask": None, - } - ) - else: - return None - - if action == importer.Action.SKIP: - ui.print_("Skipping.") - elif action == importer.Action.ASIS: - ui.print_("Importing as-is.") - return action - - -def choose_candidate( - candidates, - singleton, - rec, - cur_artist=None, - cur_album=None, - item=None, - itemcount=None, - choices=[], -): - """Given a sorted list of candidates, ask the user for a selection - of which candidate to use. Applies to both full albums and - singletons (tracks). Candidates are either AlbumMatch or TrackMatch - objects depending on `singleton`. for albums, `cur_artist`, - `cur_album`, and `itemcount` must be provided. For singletons, - `item` must be provided. - - `choices` is a list of `PromptChoice`s to be used in each prompt. - - Returns one of the following: - * the result of the choice, which may be SKIP or ASIS - * a candidate (an AlbumMatch/TrackMatch object) - * a chosen `PromptChoice` from `choices` - """ - # Sanity check. - if singleton: - assert item is not None - else: - assert cur_artist is not None - assert cur_album is not None - - # Build helper variables for the prompt choices. - choice_opts = tuple(c.long for c in choices) - choice_actions = {c.short: c for c in choices} - - # Zero candidates. - if not candidates: - if singleton: - ui.print_("No matching recordings found.") - else: - ui.print_(f"No matching release found for {itemcount} tracks.") - ui.print_( - "For help, see: " - "https://beets.readthedocs.org/en/latest/faq.html#nomatch" - ) - sel = ui.input_options(choice_opts) - if sel in choice_actions: - return choice_actions[sel] - else: - assert False - - # Is the change good enough? - bypass_candidates = False - if rec != Recommendation.none: - match = candidates[0] - bypass_candidates = True - - while True: - # Display and choose from candidates. - require = rec <= Recommendation.low - - if not bypass_candidates: - # Display list of candidates. - ui.print_("") - ui.print_( - f"Finding tags for {'track' if singleton else 'album'} " - f'"{item.artist if singleton else cur_artist} -' - f' {item.title if singleton else cur_album}".' - ) - - ui.print_(" Candidates:") - for i, match in enumerate(candidates): - # Index, metadata, and distance. - dist_color = match.distance.color - line_parts = [ - colorize(dist_color, f"{i + 1}."), - match.distance.string, - colorize( - dist_color if i == 0 else "text_highlight_minor", - f"{match.info.artist} - {match.info.name}", - ), - ] - ui.print_(f" {' '.join(line_parts)}") - - # Penalties. - if penalty_keys := match.distance.generic_penalty_keys: - if len(penalty_keys) > 3: - penalty_keys = [*penalty_keys[:3], "..."] - penalty_text = colorize( - "changed", f"\u2260 {', '.join(penalty_keys)}" - ) - ui.print_(f"{' ' * 13}{penalty_text}") - - # Disambiguation - if disambig := match.disambig_string: - ui.print_(f"{' ' * 13}{disambig}") - - # Ask the user for a choice. - sel = ui.input_options(choice_opts, numrange=(1, len(candidates))) - if sel == "m": - pass - elif sel in choice_actions: - return choice_actions[sel] - else: # Numerical selection. - match = candidates[sel - 1] - if sel != 1: - # When choosing anything but the first match, - # disable the default action. - require = True - bypass_candidates = False - - # Show what we're about to do. - if singleton: - show_item_change(item, match) - else: - show_change(cur_artist, cur_album, match) - - # Exact match => tag automatically if we're not in timid mode. - if rec == Recommendation.strong and not config["import"]["timid"]: - return match - - # Ask for confirmation. - default = config["import"]["default_action"].as_choice( - { - "apply": "a", - "skip": "s", - "asis": "u", - "none": None, - } - ) - if default is None: - require = True - # Bell ring when user interaction is needed. - if config["import"]["bell"]: - ui.print_("\a", end="") - sel = ui.input_options( - ("Apply", "More candidates", *choice_opts), - require=require, - default=default, - ) - if sel == "a": - return match - elif sel in choice_actions: - return choice_actions[sel] - - -def manual_search(session, task): - """Get a new `Proposal` using manual search criteria. - - Input either an artist and album (for full albums) or artist and - track name (for singletons) for manual search. - """ - artist = ui.input_("Artist:").strip() - name = ui.input_("Album:" if task.is_album else "Track:").strip() - - if task.is_album: - _, _, prop = autotag.tag_album(task.items, artist, name) - return prop - else: - return autotag.tag_item(task.item, artist, name) - - -def manual_id(session, task): - """Get a new `Proposal` using a manually-entered ID. - - Input an ID, either for an album ("release") or a track ("recording"). - """ - prompt = f"Enter {'release' if task.is_album else 'recording'} ID:" - search_id = ui.input_(prompt).strip() - - if task.is_album: - _, _, prop = autotag.tag_album(task.items, search_ids=search_id.split()) - return prop - else: - return autotag.tag_item(task.item, search_ids=search_id.split()) - - -def abort_action(session, task): - """A prompt choice callback that aborts the importer.""" - raise importer.ImportAbortError() +from __future__ import annotations + +from collections import Counter +from itertools import chain + +from beets import autotag, config, importer, logging, plugins, ui +from beets.autotag import Recommendation +from beets.util import PromptChoice, displayable_path +from beets.util.color import colorize +from beets.util.units import human_bytes, human_seconds_short + +from .display import show_change, show_item_change + +# Global logger. +log = logging.getLogger("beets") + + +class TerminalImportSession(importer.ImportSession): + """An import session that runs in a terminal.""" + + def choose_match(self, task): + """Given an initial autotagging of items, go through an interactive + dance with the user to ask for a choice of metadata. Returns an + AlbumMatch object, ASIS, or SKIP. + """ + # Show what we're tagging. + ui.print_() + + path_str0 = displayable_path(task.paths, "\n") + path_str = colorize("import_path", path_str0) + items_str0 = f"({len(task.items)} items)" + items_str = colorize("import_path_items", items_str0) + ui.print_(" ".join([path_str, items_str])) + + # Let plugins display info or prompt the user before we go through the + # process of selecting candidate. + results = plugins.send( + "import_task_before_choice", session=self, task=task + ) + actions = [action for action in results if action] + + if len(actions) == 1: + return actions[0] + elif len(actions) > 1: + raise plugins.PluginConflictError( + "Only one handler for `import_task_before_choice` may return " + "an action." + ) + + # Take immediate action if appropriate. + action = _summary_judgment(task.rec) + if action == importer.Action.APPLY: + match = task.candidates[0] + show_change(task.cur_artist, task.cur_album, match) + return match + elif action is not None: + return action + + # Loop until we have a choice. + while True: + # Ask for a choice from the user. The result of + # `choose_candidate` may be an `importer.Action`, an + # `AlbumMatch` object for a specific selection, or a + # `PromptChoice`. + choices = self._get_choices(task) + choice = choose_candidate( + task.candidates, + False, + task.rec, + task.cur_artist, + task.cur_album, + itemcount=len(task.items), + choices=choices, + ) + + # Basic choices that require no more action here. + if choice in (importer.Action.SKIP, importer.Action.ASIS): + # Pass selection to main control flow. + return choice + + # Plugin-provided choices. We invoke the associated callback + # function. + elif choice in choices: + post_choice = choice.callback(self, task) + if isinstance(post_choice, importer.Action): + return post_choice + elif isinstance(post_choice, autotag.Proposal): + # Use the new candidates and continue around the loop. + task.candidates = post_choice.candidates + task.rec = post_choice.recommendation + + # Otherwise, we have a specific match selection. + else: + # We have a candidate! Finish tagging. Here, choice is an + # AlbumMatch object. + assert isinstance(choice, autotag.AlbumMatch) + return choice + + def choose_item(self, task): + """Ask the user for a choice about tagging a single item. Returns + either an action constant or a TrackMatch object. + """ + ui.print_() + ui.print_(displayable_path(task.item.path)) + candidates, rec = task.candidates, task.rec + + # Take immediate action if appropriate. + action = _summary_judgment(task.rec) + if action == importer.Action.APPLY: + match = candidates[0] + show_item_change(task.item, match) + return match + elif action is not None: + return action + + while True: + # Ask for a choice. + choices = self._get_choices(task) + choice = choose_candidate( + candidates, True, rec, item=task.item, choices=choices + ) + + if choice in (importer.Action.SKIP, importer.Action.ASIS): + return choice + + elif choice in choices: + post_choice = choice.callback(self, task) + if isinstance(post_choice, importer.Action): + return post_choice + elif isinstance(post_choice, autotag.Proposal): + candidates = post_choice.candidates + rec = post_choice.recommendation + + else: + # Chose a candidate. + assert isinstance(choice, autotag.TrackMatch) + return choice + + def resolve_duplicate(self, task, found_duplicates): + """Decide what to do when a new album or item seems similar to one + that's already in the library. + """ + log.warning( + "This {} is already in the library!", + ("album" if task.is_album else "item"), + ) + + if config["import"]["quiet"]: + # In quiet mode, don't prompt -- just skip. + log.info("Skipping.") + sel = "s" + else: + # Print some detail about the existing and new items so the + # user can make an informed decision. + for duplicate in found_duplicates: + ui.print_( + "Old: " + + summarize_items( + ( + list(duplicate.items()) + if task.is_album + else [duplicate] + ), + not task.is_album, + ) + ) + if config["import"]["duplicate_verbose_prompt"]: + if task.is_album: + for dup in duplicate.items(): + print(f" {dup}") + else: + print(f" {duplicate}") + + ui.print_( + "New: " + + summarize_items( + task.imported_items(), + not task.is_album, + ) + ) + if config["import"]["duplicate_verbose_prompt"]: + for item in task.imported_items(): + print(f" {item}") + + sel = ui.input_options( + ("Skip new", "Keep all", "Remove old", "Merge all") + ) + + if sel == "s": + # Skip new. + task.set_choice(importer.Action.SKIP) + elif sel == "k": + # Keep both. Do nothing; leave the choice intact. + pass + elif sel == "r": + # Remove old. + task.should_remove_duplicates = True + elif sel == "m": + task.should_merge_duplicates = True + else: + assert False + + def should_resume(self, path): + return ui.input_yn( + f"Import of the directory:\n{displayable_path(path)}\n" + "was interrupted. Resume (Y/n)?" + ) + + def _get_choices(self, task): + """Get the list of prompt choices that should be presented to the + user. This consists of both built-in choices and ones provided by + plugins. + + The `before_choose_candidate` event is sent to the plugins, with + session and task as its parameters. Plugins are responsible for + checking the right conditions and returning a list of `PromptChoice`s, + which is flattened and checked for conflicts. + + If two or more choices have the same short letter, a warning is + emitted and all but one choices are discarded, giving preference + to the default importer choices. + + Returns a list of `PromptChoice`s. + """ + # Standard, built-in choices. + choices = [ + PromptChoice("s", "Skip", lambda s, t: importer.Action.SKIP), + PromptChoice("u", "Use as-is", lambda s, t: importer.Action.ASIS), + ] + if task.is_album: + choices += [ + PromptChoice( + "t", "as Tracks", lambda s, t: importer.Action.TRACKS + ), + PromptChoice( + "g", "Group albums", lambda s, t: importer.Action.ALBUMS + ), + ] + choices += [ + PromptChoice("e", "Enter search", manual_search), + PromptChoice("i", "enter Id", manual_id), + PromptChoice("b", "aBort", abort_action), + ] + + # Send the before_choose_candidate event and flatten list. + extra_choices = list( + chain( + *plugins.send( + "before_choose_candidate", session=self, task=task + ) + ) + ) + + # Add a "dummy" choice for the other baked-in option, for + # duplicate checking. + all_choices = [ + PromptChoice("a", "Apply", None), + *choices, + *extra_choices, + ] + + # Check for conflicts. + short_letters = [c.short for c in all_choices] + if len(short_letters) != len(set(short_letters)): + # Duplicate short letter has been found. + duplicates = [ + i for i, count in Counter(short_letters).items() if count > 1 + ] + for short in duplicates: + # Keep the first of the choices, removing the rest. + dup_choices = [c for c in all_choices if c.short == short] + for c in dup_choices[1:]: + log.warning( + "Prompt choice '{0.long}' removed due to conflict " + "with '{1[0].long}' (short letter: '{0.short}')", + c, + dup_choices, + ) + extra_choices.remove(c) + + return choices + extra_choices + + +def summarize_items(items, singleton): + """Produces a brief summary line describing a set of items. Used for + manually resolving duplicates during import. + + `items` is a list of `Item` objects. `singleton` indicates whether + this is an album or single-item import (if the latter, them `items` + should only have one element). + """ + summary_parts = [] + if not singleton: + summary_parts.append(f"{len(items)} items") + + format_counts = {} + for item in items: + format_counts[item.format] = format_counts.get(item.format, 0) + 1 + if len(format_counts) == 1: + # A single format. + summary_parts.append(items[0].format) + else: + # Enumerate all the formats by decreasing frequencies: + for fmt, count in sorted( + format_counts.items(), + key=lambda fmt_and_count: (-fmt_and_count[1], fmt_and_count[0]), + ): + summary_parts.append(f"{fmt} {count}") + + if items: + average_bitrate = sum([item.bitrate for item in items]) / len(items) + total_duration = sum([item.length for item in items]) + total_filesize = sum([item.filesize for item in items]) + summary_parts.append(f"{int(average_bitrate / 1000)}kbps") + if items[0].format == "FLAC": + sample_bits = ( + f"{round(int(items[0].samplerate) / 1000, 1)}kHz" + f"/{items[0].bitdepth} bit" + ) + summary_parts.append(sample_bits) + summary_parts.append(human_seconds_short(total_duration)) + summary_parts.append(human_bytes(total_filesize)) + + return ", ".join(summary_parts) + + +def _summary_judgment(rec: Recommendation) -> importer.Action | None: + """Determines whether a decision should be made without even asking + the user. This occurs in quiet mode and when an action is chosen for + NONE recommendations. Return None if the user should be queried. + Otherwise, returns an action. May also print to the console if a + summary judgment is made. + """ + + action: importer.Action | None + if config["import"]["quiet"]: + if rec == Recommendation.strong: + return importer.Action.APPLY + else: + action = config["import"]["quiet_fallback"].as_choice( + { + "skip": importer.Action.SKIP, + "asis": importer.Action.ASIS, + } + ) + elif config["import"]["timid"]: + return None + elif rec == Recommendation.none: + action = config["import"]["none_rec_action"].as_choice( + { + "skip": importer.Action.SKIP, + "asis": importer.Action.ASIS, + "ask": None, + } + ) + else: + return None + + if action == importer.Action.SKIP: + ui.print_("Skipping.") + elif action == importer.Action.ASIS: + ui.print_("Importing as-is.") + return action + + +def choose_candidate( + candidates, + singleton, + rec, + cur_artist=None, + cur_album=None, + item=None, + itemcount=None, + choices=[], +): + """Given a sorted list of candidates, ask the user for a selection + of which candidate to use. Applies to both full albums and + singletons (tracks). Candidates are either AlbumMatch or TrackMatch + objects depending on `singleton`. for albums, `cur_artist`, + `cur_album`, and `itemcount` must be provided. For singletons, + `item` must be provided. + + `choices` is a list of `PromptChoice`s to be used in each prompt. + + Returns one of the following: + * the result of the choice, which may be SKIP or ASIS + * a candidate (an AlbumMatch/TrackMatch object) + * a chosen `PromptChoice` from `choices` + """ + # Sanity check. + if singleton: + assert item is not None + else: + assert cur_artist is not None + assert cur_album is not None + + # Build helper variables for the prompt choices. + choice_opts = tuple(c.long for c in choices) + choice_actions = {c.short: c for c in choices} + + # Zero candidates. + if not candidates: + if singleton: + ui.print_("No matching recordings found.") + else: + ui.print_(f"No matching release found for {itemcount} tracks.") + ui.print_( + "For help, see: " + "https://beets.readthedocs.org/en/latest/faq.html#nomatch" + ) + sel = ui.input_options(choice_opts) + if sel in choice_actions: + return choice_actions[sel] + else: + assert False + + # Is the change good enough? + bypass_candidates = False + if rec != Recommendation.none: + match = candidates[0] + bypass_candidates = True + + while True: + # Display and choose from candidates. + require = rec <= Recommendation.low + + if not bypass_candidates: + # Display list of candidates. + ui.print_("") + ui.print_( + f"Finding tags for {'track' if singleton else 'album'} " + f'"{item.artist if singleton else cur_artist} -' + f' {item.title if singleton else cur_album}".' + ) + + ui.print_(" Candidates:") + for i, match in enumerate(candidates): + # Index, metadata, and distance. + dist_color = match.distance.color + line_parts = [ + colorize(dist_color, f"{i + 1}."), + match.distance.string, + colorize( + dist_color if i == 0 else "text_highlight_minor", + f"{match.info.artist} - {match.info.name}", + ), + ] + ui.print_(f" {' '.join(line_parts)}") + + # Penalties. + if penalty_keys := match.distance.generic_penalty_keys: + if len(penalty_keys) > 3: + penalty_keys = [*penalty_keys[:3], "..."] + penalty_text = colorize( + "changed", f"\u2260 {', '.join(penalty_keys)}" + ) + ui.print_(f"{' ' * 13}{penalty_text}") + + # Disambiguation + if disambig := match.disambig_string: + ui.print_(f"{' ' * 13}{disambig}") + + # Ask the user for a choice. + sel = ui.input_options(choice_opts, numrange=(1, len(candidates))) + if sel == "m": + pass + elif sel in choice_actions: + return choice_actions[sel] + else: # Numerical selection. + match = candidates[sel - 1] + if sel != 1: + # When choosing anything but the first match, + # disable the default action. + require = True + bypass_candidates = False + + # Show what we're about to do. + if singleton: + show_item_change(item, match) + else: + show_change(cur_artist, cur_album, match) + + # Exact match => tag automatically if we're not in timid mode. + if rec == Recommendation.strong and not config["import"]["timid"]: + return match + + # Ask for confirmation. + default = config["import"]["default_action"].as_choice( + { + "apply": "a", + "skip": "s", + "asis": "u", + "none": None, + } + ) + if default is None: + require = True + # Bell ring when user interaction is needed. + if config["import"]["bell"]: + ui.print_("\a", end="") + sel = ui.input_options( + ("Apply", "More candidates", *choice_opts), + require=require, + default=default, + ) + if sel == "a": + return match + elif sel in choice_actions: + return choice_actions[sel] + + +def manual_search(session, task): + """Get a new `Proposal` using manual search criteria. + + Input either an artist and album (for full albums) or artist and + track name (for singletons) for manual search. + """ + artist = ui.input_("Artist:").strip() + name = ui.input_("Album:" if task.is_album else "Track:").strip() + + if task.is_album: + _, _, prop = autotag.tag_album(task.items, artist, name) + return prop + else: + return autotag.tag_item(task.item, artist, name) + + +def manual_id(session, task): + """Get a new `Proposal` using a manually-entered ID. + + Input an ID, either for an album ("release") or a track ("recording"). + """ + prompt = f"Enter {'release' if task.is_album else 'recording'} ID:" + search_id = ui.input_(prompt).strip() + + if task.is_album: + _, _, prop = autotag.tag_album(task.items, search_ids=search_id.split()) + return prop + else: + return autotag.tag_item(task.item, search_ids=search_id.split()) + + +def abort_action(session, task): + """A prompt choice callback that aborts the importer.""" + raise importer.ImportAbortError() diff --git a/beets/util/color.py b/beets/util/color.py index 8e83ba7cb..7f264a88b 100644 --- a/beets/util/color.py +++ b/beets/util/color.py @@ -1,215 +1,215 @@ -from __future__ import annotations - -import os -import re -from functools import cache -from typing import Literal - -import confuse - -from beets import config - -# ANSI terminal colorization code heavily inspired by pygments: -# https://bitbucket.org/birkenfeld/pygments-main/src/default/pygments/console.py -# (pygments is by Tim Hatch, Armin Ronacher, et al.) -COLOR_ESCAPE = "\x1b" -LEGACY_COLORS = { - "black": ["black"], - "darkred": ["red"], - "darkgreen": ["green"], - "brown": ["yellow"], - "darkyellow": ["yellow"], - "darkblue": ["blue"], - "purple": ["magenta"], - "darkmagenta": ["magenta"], - "teal": ["cyan"], - "darkcyan": ["cyan"], - "lightgray": ["white"], - "darkgray": ["bold", "black"], - "red": ["bold", "red"], - "green": ["bold", "green"], - "yellow": ["bold", "yellow"], - "blue": ["bold", "blue"], - "fuchsia": ["bold", "magenta"], - "magenta": ["bold", "magenta"], - "turquoise": ["bold", "cyan"], - "cyan": ["bold", "cyan"], - "white": ["bold", "white"], -} -# All ANSI Colors. -CODE_BY_COLOR = { - # Styles. - "normal": 0, - "bold": 1, - "faint": 2, - "italic": 3, - "underline": 4, - "blink_slow": 5, - "blink_rapid": 6, - "inverse": 7, - "conceal": 8, - "crossed_out": 9, - # Text colors. - "black": 30, - "red": 31, - "green": 32, - "yellow": 33, - "blue": 34, - "magenta": 35, - "cyan": 36, - "white": 37, - "bright_black": 90, - "bright_red": 91, - "bright_green": 92, - "bright_yellow": 93, - "bright_blue": 94, - "bright_magenta": 95, - "bright_cyan": 96, - "bright_white": 97, - # Background colors. - "bg_black": 40, - "bg_red": 41, - "bg_green": 42, - "bg_yellow": 43, - "bg_blue": 44, - "bg_magenta": 45, - "bg_cyan": 46, - "bg_white": 47, - "bg_bright_black": 100, - "bg_bright_red": 101, - "bg_bright_green": 102, - "bg_bright_yellow": 103, - "bg_bright_blue": 104, - "bg_bright_magenta": 105, - "bg_bright_cyan": 106, - "bg_bright_white": 107, -} -RESET_COLOR = f"{COLOR_ESCAPE}[39;49;00m" -# Precompile common ANSI-escape regex patterns -ANSI_CODE_REGEX = re.compile(rf"({COLOR_ESCAPE}\[[;0-9]*m)") -ESC_TEXT_REGEX = re.compile( - rf"""(?P[^{COLOR_ESCAPE}]*) - (?P(?:{ANSI_CODE_REGEX.pattern})+) - (?P[^{COLOR_ESCAPE}]+)(?P{re.escape(RESET_COLOR)}) - (?P[^{COLOR_ESCAPE}]*)""", - re.VERBOSE, -) -ColorName = Literal[ - "text_success", - "text_warning", - "text_error", - "text_highlight", - "text_highlight_minor", - "action_default", - "action", - # New Colors - "text_faint", - "import_path", - "import_path_items", - "action_description", - "changed", - "text_diff_added", - "text_diff_removed", -] - - -@cache -def get_color_config() -> dict[ColorName, str]: - """Parse and validate color configuration, converting names to ANSI codes. - - Processes the UI color configuration, handling both new list format and - legacy single-color format. Validates all color names against known codes - and raises an error for any invalid entries. - """ - template_dict: dict[ColorName, confuse.OneOf[str | list[str]]] = { - n: confuse.OneOf( - [ - confuse.Choice(sorted(LEGACY_COLORS)), - confuse.Sequence(confuse.Choice(sorted(CODE_BY_COLOR))), - ] - ) - for n in ColorName.__args__ # type: ignore[attr-defined] - } - template = confuse.MappingTemplate(template_dict) - colors_by_color_name = { - k: (v if isinstance(v, list) else LEGACY_COLORS.get(v, [v])) - for k, v in config["ui"]["colors"].get(template).items() - } - - return { - n: ";".join(str(CODE_BY_COLOR[c]) for c in colors) - for n, colors in colors_by_color_name.items() - } - - -def _colorize(color_name: ColorName, text: str) -> str: - """Apply ANSI color formatting to text based on configuration settings.""" - color_code = get_color_config()[color_name] - return f"{COLOR_ESCAPE}[{color_code}m{text}{RESET_COLOR}" - - -def colorize(color_name: ColorName, text: str) -> str: - """Colorize text when color output is enabled.""" - if config["ui"]["color"] and "NO_COLOR" not in os.environ: - return _colorize(color_name, text) - - return text - - -def uncolorize(colored_text: str) -> str: - """Remove colors from a string.""" - # Define a regular expression to match ANSI codes. - # See: http://stackoverflow.com/a/2187024/1382707 - # Explanation of regular expression: - # \x1b - matches ESC character - # \[ - matches opening square bracket - # [;\d]* - matches a sequence consisting of one or more digits or - # semicola - # [A-Za-z] - matches a letter - return ANSI_CODE_REGEX.sub("", colored_text) - - -def color_split(colored_text: str, index: int) -> tuple[str, str]: - length = 0 - pre_split = "" - post_split = "" - found_color_code = None - found_split = False - for part in ANSI_CODE_REGEX.split(colored_text): - # Count how many real letters we have passed - length += color_len(part) - if found_split: - post_split += part - else: - if ANSI_CODE_REGEX.match(part): - # This is a color code - if part == RESET_COLOR: - found_color_code = None - else: - found_color_code = part - pre_split += part - else: - if index < length: - # Found part with our split in. - split_index = index - (length - color_len(part)) - found_split = True - if found_color_code: - pre_split += f"{part[:split_index]}{RESET_COLOR}" - post_split += f"{found_color_code}{part[split_index:]}" - else: - pre_split += part[:split_index] - post_split += part[split_index:] - else: - # Not found, add this part to the pre split - pre_split += part - return pre_split, post_split - - -def color_len(colored_text: str) -> int: - """Measure the length of a string while excluding ANSI codes from the - measurement. The standard `len(my_string)` method also counts ANSI codes - to the string length, which is counterproductive when layouting a - Terminal interface. - """ - # Return the length of the uncolored string. - return len(uncolorize(colored_text)) +from __future__ import annotations + +import os +import re +from functools import cache +from typing import Literal + +import confuse + +from beets import config + +# ANSI terminal colorization code heavily inspired by pygments: +# https://bitbucket.org/birkenfeld/pygments-main/src/default/pygments/console.py +# (pygments is by Tim Hatch, Armin Ronacher, et al.) +COLOR_ESCAPE = "\x1b" +LEGACY_COLORS = { + "black": ["black"], + "darkred": ["red"], + "darkgreen": ["green"], + "brown": ["yellow"], + "darkyellow": ["yellow"], + "darkblue": ["blue"], + "purple": ["magenta"], + "darkmagenta": ["magenta"], + "teal": ["cyan"], + "darkcyan": ["cyan"], + "lightgray": ["white"], + "darkgray": ["bold", "black"], + "red": ["bold", "red"], + "green": ["bold", "green"], + "yellow": ["bold", "yellow"], + "blue": ["bold", "blue"], + "fuchsia": ["bold", "magenta"], + "magenta": ["bold", "magenta"], + "turquoise": ["bold", "cyan"], + "cyan": ["bold", "cyan"], + "white": ["bold", "white"], +} +# All ANSI Colors. +CODE_BY_COLOR = { + # Styles. + "normal": 0, + "bold": 1, + "faint": 2, + "italic": 3, + "underline": 4, + "blink_slow": 5, + "blink_rapid": 6, + "inverse": 7, + "conceal": 8, + "crossed_out": 9, + # Text colors. + "black": 30, + "red": 31, + "green": 32, + "yellow": 33, + "blue": 34, + "magenta": 35, + "cyan": 36, + "white": 37, + "bright_black": 90, + "bright_red": 91, + "bright_green": 92, + "bright_yellow": 93, + "bright_blue": 94, + "bright_magenta": 95, + "bright_cyan": 96, + "bright_white": 97, + # Background colors. + "bg_black": 40, + "bg_red": 41, + "bg_green": 42, + "bg_yellow": 43, + "bg_blue": 44, + "bg_magenta": 45, + "bg_cyan": 46, + "bg_white": 47, + "bg_bright_black": 100, + "bg_bright_red": 101, + "bg_bright_green": 102, + "bg_bright_yellow": 103, + "bg_bright_blue": 104, + "bg_bright_magenta": 105, + "bg_bright_cyan": 106, + "bg_bright_white": 107, +} +RESET_COLOR = f"{COLOR_ESCAPE}[39;49;00m" +# Precompile common ANSI-escape regex patterns +ANSI_CODE_REGEX = re.compile(rf"({COLOR_ESCAPE}\[[;0-9]*m)") +ESC_TEXT_REGEX = re.compile( + rf"""(?P[^{COLOR_ESCAPE}]*) + (?P(?:{ANSI_CODE_REGEX.pattern})+) + (?P[^{COLOR_ESCAPE}]+)(?P{re.escape(RESET_COLOR)}) + (?P[^{COLOR_ESCAPE}]*)""", + re.VERBOSE, +) +ColorName = Literal[ + "text_success", + "text_warning", + "text_error", + "text_highlight", + "text_highlight_minor", + "action_default", + "action", + # New Colors + "text_faint", + "import_path", + "import_path_items", + "action_description", + "changed", + "text_diff_added", + "text_diff_removed", +] + + +@cache +def get_color_config() -> dict[ColorName, str]: + """Parse and validate color configuration, converting names to ANSI codes. + + Processes the UI color configuration, handling both new list format and + legacy single-color format. Validates all color names against known codes + and raises an error for any invalid entries. + """ + template_dict: dict[ColorName, confuse.OneOf[str | list[str]]] = { + n: confuse.OneOf( + [ + confuse.Choice(sorted(LEGACY_COLORS)), + confuse.Sequence(confuse.Choice(sorted(CODE_BY_COLOR))), + ] + ) + for n in ColorName.__args__ # type: ignore[attr-defined] + } + template = confuse.MappingTemplate(template_dict) + colors_by_color_name = { + k: (v if isinstance(v, list) else LEGACY_COLORS.get(v, [v])) + for k, v in config["ui"]["colors"].get(template).items() + } + + return { + n: ";".join(str(CODE_BY_COLOR[c]) for c in colors) + for n, colors in colors_by_color_name.items() + } + + +def _colorize(color_name: ColorName, text: str) -> str: + """Apply ANSI color formatting to text based on configuration settings.""" + color_code = get_color_config()[color_name] + return f"{COLOR_ESCAPE}[{color_code}m{text}{RESET_COLOR}" + + +def colorize(color_name: ColorName, text: str) -> str: + """Colorize text when color output is enabled.""" + if config["ui"]["color"] and "NO_COLOR" not in os.environ: + return _colorize(color_name, text) + + return text + + +def uncolorize(colored_text: str) -> str: + """Remove colors from a string.""" + # Define a regular expression to match ANSI codes. + # See: http://stackoverflow.com/a/2187024/1382707 + # Explanation of regular expression: + # \x1b - matches ESC character + # \[ - matches opening square bracket + # [;\d]* - matches a sequence consisting of one or more digits or + # semicola + # [A-Za-z] - matches a letter + return ANSI_CODE_REGEX.sub("", colored_text) + + +def color_split(colored_text: str, index: int) -> tuple[str, str]: + length = 0 + pre_split = "" + post_split = "" + found_color_code = None + found_split = False + for part in ANSI_CODE_REGEX.split(colored_text): + # Count how many real letters we have passed + length += color_len(part) + if found_split: + post_split += part + else: + if ANSI_CODE_REGEX.match(part): + # This is a color code + if part == RESET_COLOR: + found_color_code = None + else: + found_color_code = part + pre_split += part + else: + if index < length: + # Found part with our split in. + split_index = index - (length - color_len(part)) + found_split = True + if found_color_code: + pre_split += f"{part[:split_index]}{RESET_COLOR}" + post_split += f"{found_color_code}{part[split_index:]}" + else: + pre_split += part[:split_index] + post_split += part[split_index:] + else: + # Not found, add this part to the pre split + pre_split += part + return pre_split, post_split + + +def color_len(colored_text: str) -> int: + """Measure the length of a string while excluding ANSI codes from the + measurement. The standard `len(my_string)` method also counts ANSI codes + to the string length, which is counterproductive when layouting a + Terminal interface. + """ + # Return the length of the uncolored string. + return len(uncolorize(colored_text)) diff --git a/beetsplug/bpsync.py b/beetsplug/bpsync.py index db107d0f7..47595f1b9 100644 --- a/beetsplug/bpsync.py +++ b/beetsplug/bpsync.py @@ -1,187 +1,187 @@ -# This file is part of beets. -# Copyright 2019, Rahul Ahuja. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Update library's tags using Beatport.""" - -from beets import library, ui, util -from beets.autotag.distance import Distance -from beets.autotag.hooks import AlbumMatch, TrackMatch -from beets.plugins import BeetsPlugin, apply_item_changes -from beets.util.deprecation import deprecate_for_user - -from .beatport import BeatportPlugin - - -class BPSyncPlugin(BeetsPlugin): - def __init__(self): - super().__init__() - deprecate_for_user(self._log, "The 'bpsync' plugin") - self.beatport_plugin = BeatportPlugin() - self.beatport_plugin.setup() - - def commands(self): - cmd = ui.Subcommand("bpsync", help="update metadata from Beatport") - cmd.parser.add_option( - "-p", - "--pretend", - action="store_true", - help="show all changes but do nothing", - ) - cmd.parser.add_option( - "-m", - "--move", - action="store_true", - dest="move", - help="move files in the library directory", - ) - cmd.parser.add_option( - "-M", - "--nomove", - action="store_false", - dest="move", - help="don't move files in library", - ) - cmd.parser.add_option( - "-W", - "--nowrite", - action="store_false", - default=None, - dest="write", - help="don't write updated metadata to files", - ) - cmd.parser.add_format_option() - cmd.func = self.func - return [cmd] - - def func(self, lib, opts, args): - """Command handler for the bpsync function.""" - move = ui.should_move(opts.move) - pretend = opts.pretend - write = ui.should_write(opts.write) - - self.singletons(lib, args, move, pretend, write) - self.albums(lib, args, move, pretend, write) - - def singletons(self, lib, query, move, pretend, write): - """Retrieve and apply info from the autotagger for items matched by - query. - """ - for item in lib.items([*query, "singleton:true"]): - if not item.mb_trackid: - self._log.info( - "Skipping singleton with no mb_trackid: {}", item - ) - continue - - if not self.is_beatport_track(item): - self._log.info( - "Skipping non-{.beatport_plugin.data_source} singleton: {}", - self, - item, - ) - continue - - # Apply. - trackinfo = self.beatport_plugin.track_for_id(item.mb_trackid) - with lib.transaction(): - TrackMatch(Distance(), trackinfo, item).apply_metadata() - apply_item_changes(lib, item, move, pretend, write) - - @staticmethod - def is_beatport_track(item): - return ( - item.get("data_source") == BeatportPlugin.data_source - and item.mb_trackid.isnumeric() - ) - - def get_album_tracks(self, album): - if not album.mb_albumid: - self._log.info("Skipping album with no mb_albumid: {}", album) - return False - if not album.mb_albumid.isnumeric(): - self._log.info( - "Skipping album with invalid {.beatport_plugin.data_source} ID: {}", - self, - album, - ) - return False - items = list(album.items()) - if album.get("data_source") == self.beatport_plugin.data_source: - return items - if not all(self.is_beatport_track(item) for item in items): - self._log.info( - "Skipping non-{.beatport_plugin.data_source} release: {}", - self, - album, - ) - return False - return items - - def albums(self, lib, query, move, pretend, write): - """Retrieve and apply info from the autotagger for albums matched by - query and their items. - """ - # Process matching albums. - for album in lib.albums(query): - # Do we have a valid Beatport album? - items = self.get_album_tracks(album) - if not items: - continue - - # Get the Beatport album information. - albuminfo = self.beatport_plugin.album_for_id(album.mb_albumid) - if not albuminfo: - self._log.info( - "Release ID {0.mb_albumid} not found for album {0}", album - ) - continue - - beatport_trackid_to_trackinfo = { - track.track_id: track for track in albuminfo.tracks - } - library_trackid_to_item = { - int(item.mb_trackid): item for item in items - } - item_info_pairs = [ - (item, beatport_trackid_to_trackinfo[track_id]) - for track_id, item in library_trackid_to_item.items() - ] - - self._log.info("applying changes to {}", album) - with lib.transaction(): - AlbumMatch( - Distance(), albuminfo, dict(item_info_pairs) - ).apply_metadata() - changed = False - # Find any changed item to apply Beatport changes to album. - any_changed_item = items[0] - for item in items: - item_changed = ui.show_model_changes(item) - changed |= item_changed - if item_changed: - any_changed_item = item - apply_item_changes(lib, item, move, pretend, write) - - if pretend or not changed: - continue - - # Update album structure to reflect an item in it. - for key in library.Album.item_keys: - album[key] = any_changed_item[key] - album.store() - - # Move album art (and any inconsistent items). - if move and lib.directory in util.ancestry(items[0].path): - self._log.debug("moving album {}", album) - album.move() +# This file is part of beets. +# Copyright 2019, Rahul Ahuja. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Update library's tags using Beatport.""" + +from beets import library, ui, util +from beets.autotag.distance import Distance +from beets.autotag.hooks import AlbumMatch, TrackMatch +from beets.plugins import BeetsPlugin, apply_item_changes +from beets.util.deprecation import deprecate_for_user + +from .beatport import BeatportPlugin + + +class BPSyncPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + deprecate_for_user(self._log, "The 'bpsync' plugin") + self.beatport_plugin = BeatportPlugin() + self.beatport_plugin.setup() + + def commands(self): + cmd = ui.Subcommand("bpsync", help="update metadata from Beatport") + cmd.parser.add_option( + "-p", + "--pretend", + action="store_true", + help="show all changes but do nothing", + ) + cmd.parser.add_option( + "-m", + "--move", + action="store_true", + dest="move", + help="move files in the library directory", + ) + cmd.parser.add_option( + "-M", + "--nomove", + action="store_false", + dest="move", + help="don't move files in library", + ) + cmd.parser.add_option( + "-W", + "--nowrite", + action="store_false", + default=None, + dest="write", + help="don't write updated metadata to files", + ) + cmd.parser.add_format_option() + cmd.func = self.func + return [cmd] + + def func(self, lib, opts, args): + """Command handler for the bpsync function.""" + move = ui.should_move(opts.move) + pretend = opts.pretend + write = ui.should_write(opts.write) + + self.singletons(lib, args, move, pretend, write) + self.albums(lib, args, move, pretend, write) + + def singletons(self, lib, query, move, pretend, write): + """Retrieve and apply info from the autotagger for items matched by + query. + """ + for item in lib.items([*query, "singleton:true"]): + if not item.mb_trackid: + self._log.info( + "Skipping singleton with no mb_trackid: {}", item + ) + continue + + if not self.is_beatport_track(item): + self._log.info( + "Skipping non-{.beatport_plugin.data_source} singleton: {}", + self, + item, + ) + continue + + # Apply. + trackinfo = self.beatport_plugin.track_for_id(item.mb_trackid) + with lib.transaction(): + TrackMatch(Distance(), trackinfo, item).apply_metadata() + apply_item_changes(lib, item, move, pretend, write) + + @staticmethod + def is_beatport_track(item): + return ( + item.get("data_source") == BeatportPlugin.data_source + and item.mb_trackid.isnumeric() + ) + + def get_album_tracks(self, album): + if not album.mb_albumid: + self._log.info("Skipping album with no mb_albumid: {}", album) + return False + if not album.mb_albumid.isnumeric(): + self._log.info( + "Skipping album with invalid {.beatport_plugin.data_source} ID: {}", + self, + album, + ) + return False + items = list(album.items()) + if album.get("data_source") == self.beatport_plugin.data_source: + return items + if not all(self.is_beatport_track(item) for item in items): + self._log.info( + "Skipping non-{.beatport_plugin.data_source} release: {}", + self, + album, + ) + return False + return items + + def albums(self, lib, query, move, pretend, write): + """Retrieve and apply info from the autotagger for albums matched by + query and their items. + """ + # Process matching albums. + for album in lib.albums(query): + # Do we have a valid Beatport album? + items = self.get_album_tracks(album) + if not items: + continue + + # Get the Beatport album information. + albuminfo = self.beatport_plugin.album_for_id(album.mb_albumid) + if not albuminfo: + self._log.info( + "Release ID {0.mb_albumid} not found for album {0}", album + ) + continue + + beatport_trackid_to_trackinfo = { + track.track_id: track for track in albuminfo.tracks + } + library_trackid_to_item = { + int(item.mb_trackid): item for item in items + } + item_info_pairs = [ + (item, beatport_trackid_to_trackinfo[track_id]) + for track_id, item in library_trackid_to_item.items() + ] + + self._log.info("applying changes to {}", album) + with lib.transaction(): + AlbumMatch( + Distance(), albuminfo, dict(item_info_pairs) + ).apply_metadata() + changed = False + # Find any changed item to apply Beatport changes to album. + any_changed_item = items[0] + for item in items: + item_changed = ui.show_model_changes(item) + changed |= item_changed + if item_changed: + any_changed_item = item + apply_item_changes(lib, item, move, pretend, write) + + if pretend or not changed: + continue + + # Update album structure to reflect an item in it. + for key in library.Album.item_keys: + album[key] = any_changed_item[key] + album.store() + + # Move album art (and any inconsistent items). + if move and lib.directory in util.ancestry(items[0].path): + self._log.debug("moving album {}", album) + album.move() diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index e0156aec6..0d4874bd6 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -1,1142 +1,1142 @@ -# This file is part of beets. -# Copyright 2016, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Fetches, embeds, and displays lyrics.""" - -from __future__ import annotations - -import itertools -import math -import re -import textwrap -from contextlib import contextmanager, suppress -from dataclasses import dataclass -from functools import cached_property, partial, total_ordering -from html import unescape -from itertools import filterfalse, groupby -from pathlib import Path -from typing import TYPE_CHECKING, ClassVar, NamedTuple -from urllib.parse import quote, quote_plus, urlencode, urlparse - -import requests -from bs4 import BeautifulSoup -from unidecode import unidecode - -from beets import plugins, ui -from beets.autotag.distance import string_dist -from beets.dbcore import types -from beets.dbcore.query import FalseQuery -from beets.library import Item, parse_query_string -from beets.util.config import sanitize_choices -from beets.util.lyrics import INSTRUMENTAL_LYRICS, Lyrics - -from ._utils.requests import HTTPNotFoundError, RequestHandler - -if TYPE_CHECKING: - from collections.abc import Iterable, Iterator - - import confuse - - from beets.importer import ImportTask - from beets.library import Library - from beets.logging import BeetsLogger as Logger - - from ._typing import ( - GeniusAPI, - GoogleCustomSearchAPI, - JSONDict, - LRCLibAPI, - TranslatorAPI, - ) - - -class CaptchaError(requests.exceptions.HTTPError): - def __init__(self, *args, **kwargs) -> None: - super().__init__("Captcha is required", *args, **kwargs) - - -class GeniusHTTPError(requests.exceptions.HTTPError): - pass - - -# Utilities. - - -def search_pairs(item): - """Yield a pairs of artists and titles to search for. - - The first item in the pair is the name of the artist, the second - item is a list of song names. - - In addition to the artist and title obtained from the `item` the - method tries to strip extra information like paranthesized suffixes - and featured artists from the strings and add them as candidates. - The artist sort name is added as a fallback candidate to help in - cases where artist name includes special characters or is in a - non-latin script. - The method also tries to split multiple titles separated with `/`. - """ - - def generate_alternatives(string, patterns): - """Generate string alternatives by extracting first matching group for - each given pattern. - """ - alternatives = [string] - for pattern in patterns: - match = re.search(pattern, string, re.IGNORECASE) - if match: - alternatives.append(match.group(1)) - return alternatives - - title, artist, artist_sort = ( - item.title.strip(), - item.artist.strip(), - item.artist_sort.strip(), - ) - if not title or not artist: - return () - - patterns = [ - # Remove any featuring artists from the artists name - rf"(.*?) {plugins.feat_tokens()}" - ] - - # Skip various artists - artists = [] - lower_artist = artist.lower() - if "various" not in lower_artist: - artists.extend(generate_alternatives(artist, patterns)) - # Use the artist_sort as fallback only if it differs from artist to avoid - # repeated remote requests with the same search terms - artist_sort_lower = artist_sort.lower() - if ( - artist_sort - and lower_artist != artist_sort_lower - and "various" not in artist_sort_lower - ): - artists.append(artist_sort) - - patterns = [ - # Remove a parenthesized suffix from a title string. Common - # examples include (live), (remix), and (acoustic). - r"(.+?)\s+[(].*[)]$", - # Remove any featuring artists from the title - rf"(.*?) {plugins.feat_tokens(for_artist=False)}", - # Remove part of title after colon ':' for songs with subtitles - r"(.+?)\s*:.*", - ] - titles = generate_alternatives(title, patterns) - - # Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe) - # and each of them. - multi_titles = [] - for title in titles: - multi_titles.append([title]) - if " / " in title: - multi_titles.append([x.strip() for x in title.split(" / ")]) - - return itertools.product(artists, multi_titles) - - -def slug(text: str) -> str: - """Make a URL-safe, human-readable version of the given text - - This will do the following: - - 1. decode unicode characters into ASCII - 2. shift everything to lowercase - 3. strip whitespace - 4. replace other non-word characters with dashes - 5. strip extra dashes - """ - return re.sub(r"\W+", "-", unidecode(text).lower().strip()).strip("-") - - -class LyricsRequestHandler(RequestHandler): - _log: Logger - - def status_to_error(self, code: int) -> type[requests.HTTPError] | None: - if err := super().status_to_error(code): - return err - - if 300 <= code < 400: - return CaptchaError - - return None - - def debug(self, message: str, *args) -> None: - """Log a debug message with the class name.""" - self._log.debug(f"{self.__class__.__name__}: {message}", *args) - - def info(self, message: str, *args) -> None: - """Log an info message with the class name.""" - self._log.info(f"{self.__class__.__name__}: {message}", *args) - - def warn(self, message: str, *args) -> None: - """Log warning with the class name.""" - self._log.warning(f"{self.__class__.__name__}: {message}", *args) - - @staticmethod - def format_url(url: str, params: JSONDict | None) -> str: - if not params: - return url - - return f"{url}?{urlencode(params)}" - - def get_text( - self, url: str, params: JSONDict | None = None, **kwargs - ) -> str: - """Return text / HTML data from the given URL. - - Set the encoding to None to let requests handle it because some sites - set it incorrectly. - """ - url = self.format_url(url, params) - self.debug("Fetching HTML from {}", url) - r = self.get(url, **kwargs) - r.encoding = None - return r.text - - def get_json(self, url: str, params: JSONDict | None = None, **kwargs): - """Return JSON data from the given URL.""" - url = self.format_url(url, params) - self.debug("Fetching JSON from {}", url) - return super().get_json(url, **kwargs) - - def post_json(self, url: str, params: JSONDict | None = None, **kwargs): - """Send POST request and return JSON response.""" - url = self.format_url(url, params) - self.debug("Posting JSON to {}", url) - return self.request("post", url, **kwargs).json() - - @contextmanager - def handle_request(self) -> Iterator[None]: - try: - yield - except requests.JSONDecodeError: - self.warn("Could not decode response JSON data") - except requests.RequestException as exc: - self.warn("Request error: {}", exc) - - -class BackendClass(type): - @property - def name(cls) -> str: - """Return lowercase name of the backend class.""" - return cls.__name__.lower() - - -class Backend(LyricsRequestHandler, metaclass=BackendClass): - config: confuse.Subview - - def __init__(self, config: confuse.Subview, log: Logger) -> None: - self._log = log - self.config = config - - def fetch( - self, artist: str, title: str, album: str, length: int - ) -> Lyrics | None: - """Return lyrics for a song, or ``None`` when no match is found.""" - raise NotImplementedError - - -@dataclass -@total_ordering -class LRCLyrics: - """Hold LRCLib candidate data and ranking helpers for matching.""" - - #: Percentage tolerance for max duration difference between lyrics and item. - DURATION_DIFF_TOLERANCE = 0.05 - - target_duration: float - id: int - duration: float - instrumental: bool - plain: str - synced: str | None - - def __le__(self, other: LRCLyrics) -> bool: - """Compare two lyrics items by their score.""" - return self.dist < other.dist - - @classmethod - def verify_synced_lyrics( - cls, duration: float, lyrics: str | None - ) -> str | None: - """Accept synced lyrics only when the final timestamp fits duration.""" - if lyrics and ( - m := Lyrics.LINE_PARTS_PAT.match(lyrics.splitlines()[-1]) - ): - ts, _ = m.groups() - if ts: - mm, ss = map(float, ts.strip("[]").split(":")) - if 60 * mm + ss <= duration: - return lyrics - - return None - - @classmethod - def make( - cls, candidate: LRCLibAPI.Item, target_duration: float - ) -> LRCLyrics: - """Build a scored candidate from LRCLib payload data.""" - duration = candidate["duration"] or 0.0 - return cls( - target_duration, - candidate["id"], - duration, - candidate["instrumental"], - candidate["plainLyrics"], - cls.verify_synced_lyrics( - target_duration, candidate["syncedLyrics"] - ), - ) - - @cached_property - def duration_dist(self) -> float: - """Return the absolute difference between lyrics and target duration.""" - return abs(self.duration - self.target_duration) - - @cached_property - def is_valid(self) -> bool: - """Return whether the lyrics item is valid. - Lyrics duration must be within the tolerance defined by - :attr:`DURATION_DIFF_TOLERANCE`. - """ - return ( - self.duration_dist - <= self.target_duration * self.DURATION_DIFF_TOLERANCE - ) - - @cached_property - def dist(self) -> tuple[bool, float]: - """Distance/score of the given lyrics item. - - Return a tuple with the following values: - 1. Absolute difference between lyrics and target duration - 2. Boolean telling whether synced lyrics are available. - - Best lyrics match is the one that has the closest duration to - ``target_duration`` and has synced lyrics available. - """ - return not self.synced, self.duration_dist - - def get_text(self, want_synced: bool) -> str: - """Return the preferred text form for this candidate.""" - if self.instrumental: - return INSTRUMENTAL_LYRICS - - if want_synced and self.synced: - return "\n".join(map(str.strip, self.synced.splitlines())) - - return self.plain - - -class LRCLib(Backend): - """Fetch lyrics from the LRCLib API.""" - - BASE_URL = "https://lrclib.net/api" - GET_URL = f"{BASE_URL}/get" - SEARCH_URL = f"{BASE_URL}/search" - - def fetch_candidates( - self, artist: str, title: str, album: str, length: int - ) -> Iterator[list[LRCLibAPI.Item]]: - """Yield lyrics candidates for the given song data. - - I found that the ``/get`` endpoint sometimes returns inaccurate or - unsynced lyrics, while ``search`` yields more suitable candidates. - Therefore, we prioritize the latter and rank the results using our own - algorithm. If the search does not give suitable lyrics, we fall back to - the ``/get`` endpoint. - - Return an iterator over lists of candidates. - """ - base_params = {"artist_name": artist, "track_name": title} - get_params = {**base_params, "duration": length} - if album: - get_params["album_name"] = album - - yield self.get_json(self.SEARCH_URL, params=base_params) - - with suppress(HTTPNotFoundError): - yield [self.get_json(self.GET_URL, params=get_params)] - - @classmethod - def pick_best_match(cls, lyrics: Iterable[LRCLyrics]) -> LRCLyrics | None: - """Return best matching lyrics item from the given list.""" - return min((li for li in lyrics if li.is_valid), default=None) - - def fetch( - self, artist: str, title: str, album: str, length: int - ) -> Lyrics | None: - """Fetch lyrics text for the given song data.""" - evaluate_item = partial(LRCLyrics.make, target_duration=length) - - for group in self.fetch_candidates(artist, title, album, length): - candidates = [evaluate_item(item) for item in group] - if item := self.pick_best_match(candidates): - lyrics = item.get_text(self.config["synced"].get(bool)) - return Lyrics( - lyrics, self.__class__.name, f"{self.GET_URL}/{item.id}" - ) - - return None - - -class MusiXmatch(Backend): - URL_TEMPLATE = "https://www.musixmatch.com/lyrics/{}/{}" - - REPLACEMENTS: ClassVar[dict[str, str]] = { - r"\s+": "-", - "<": "Less_Than", - ">": "Greater_Than", - "#": "Number_", - r"[\[\{]": "(", - r"[\]\}]": ")", - } - - @classmethod - def encode(cls, text: str) -> str: - for old, new in cls.REPLACEMENTS.items(): - text = re.sub(old, new, text) - - return quote(unidecode(text)) - - @classmethod - def build_url(cls, *args: str) -> str: - return cls.URL_TEMPLATE.format(*map(cls.encode, args)) - - def fetch(self, artist: str, title: str, *_) -> Lyrics | None: - url = self.build_url(artist, title) - - html = self.get_text(url) - if "We detected that your IP is blocked" in html: - self.warn("Failed: Blocked IP address") - return None - html_parts = html.split('

]+>|

.*", "", html_part)) - lyrics = "\n".join(lyrics_parts) - lyrics = lyrics.strip(',"').replace("\\n", "\n") - # another odd case: sometimes only that string remains, for - # missing songs. this seems to happen after being blocked - # above, when filling in the CAPTCHA. - if "Instant lyrics for all your music." in lyrics: - return None - # sometimes there are non-existent lyrics with some content - if "Lyrics | Musixmatch" in lyrics: - return None - return Lyrics(lyrics, self.__class__.name, url) - - -class Html: - collapse_space = partial(re.compile(r"(^| ) +", re.M).sub, r"\1") - expand_br = partial(re.compile(r"\s*]*>\s*", re.I).sub, "\n") - #: two newlines between paragraphs on the same line (musica, letras.mus.br) - merge_blocks = partial(re.compile(r"(?)

]*>").sub, "\n\n") - #: a single new line between paragraphs on separate lines - #: (paroles.net, sweetslyrics.com, lacoccinelle.net) - merge_lines = partial(re.compile(r"

\s+]*>(?!___)").sub, "\n") - #: remove empty divs (lacoccinelle.net) - remove_empty_tags = partial( - re.compile(r"(<(div|span)[^>]*>\s*)").sub, "" - ) - #: remove Google Ads tags (musica.com) - remove_aside = partial(re.compile("