From 483d79a548276fe6c8259c73aaef652eabcd1ba8 Mon Sep 17 00:00:00 2001 From: Jan Vonka Date: Thu, 9 Feb 2017 16:04:00 +0000 Subject: [PATCH] REPO-1986: Upload Failing due to Metadata Extraction Issue (MNT-17436) - part 2 - part 2 - enable "addTags" to handle configurable list of separators (when using "enableStringTagging" option of "extract-metadata" action) - initial default separators/delimiters => comma, semi-colon & vertical bar (pipe) - also means we can re-enable & fix ContentMetadataExtractorTagMappingTest git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@135061 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- config/quick/quickIPTC3.jpg | Bin 0 -> 17438 bytes .../executer/ContentMetadataExtracter.java | 89 +++++++++++++++--- .../TikaPoweredMetadataExtracter.java | 51 +++++----- ...ontentMetadataExtracterTagMappingTest.java | 83 +++++++++++++--- 4 files changed, 171 insertions(+), 52 deletions(-) create mode 100644 config/quick/quickIPTC3.jpg diff --git a/config/quick/quickIPTC3.jpg b/config/quick/quickIPTC3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..380f8306cd8b012c64866ebb2fcb9286ff904db7 GIT binary patch literal 17438 zcmdVB2UJwg(l~gB3<8pK7;=Uga?Vl7StP>{hcskFBnU{(NhC@}5hRNem7oLx13_{U zP?CV4gdI@O_xs-e?f3TVIeYdp!|krFs_v>=U0q#$=XC6J79hB$tfmZrKp^1k4>+Ae zA5`*lv;zPQ4S*8>04x9-gb$z~OFYPL0Eh@cMc&VGK~wjSt3B8X zY>ja9bp_kG`GKumZNYG>0DrKpn>~{5Y&3s;Rn#=J!Th|Uy!;S=9|9E-hwzI-M8Hr{ zaY3lK5ERga`8c|QHQnHFH+Li*dLjT|BJXGretu~FMHvc#`6w471ZCx; zUX%e-Bpm=?fKY$Si-8=*rA|OTe{=#0`dcR`pbHM52$W0CXtL+r06-OaKa=SEe){(_ zI)k4*qe(ykz{0@5#6ZWw#KgqL#=^lR!Na|B1(%YD_$mo46+Im-6%7rTnU@XB$jwAU z!!E+X4S@;@3evNQNs01H@(Kv@pD_VpV`Jl9!KJ{%qu^(tVc`F-uhVvb5DOpyOi@9M z016=pl@N6L5}-$p6CG($XGZn=0->Oyp<`fTVdGpuBGeNAC?Hf+6f{(HbTs6n2i-=t z189WkL<~?l3}PKCOhyk9{*a^sEGGGy4pQCWEoK30L?|{6894jHI|rwrkg$lT zn7D$XlCp}b+BH3W14AR@8zwfkFgtq(M<-7&Zy#Sj|A4T&;SrJdqN0;iQq$5iGPCX% zJ}fFODJ?6ncv@Rm-_Y39{H(L<<*V+V-oDo(qwn5-_&7E`F*m=kxU{_Td39}j=iBby z{`Z5!A7^qQE#jA0my-QME<&VSC}?P?Xqabmflz#rUsOUgbOtB}k(>^ul?O2+e+U+d zd{RM82R4&{?iQ&vVi<>vS#XYJ`%JWR$^LhOh5kR1>{75la!mlZs37FSLnQ=cfbZnh ziEo&Sk6*i;;FcS#jxsToKJnD2`>-!&OXpLxmha-7ER#()wm!6avrb!Z(BI^9HT7nr zN?iK@ng1Sr8fX28CdDW71`B@$W@FT!`2ij#BdZ@gC7!1b3q1R3$H}i!atEZJrx2^g z5KS$^T{e*bh%#+_`^w}w zBn<)FG)FpU%Uh4=7jAyFii7J*F3bd9Bz$K-`0hK-^!vaBz6T8=FqwUeZ?iJqB%FwV z;b@J2OZx1;8ZA{UJ@3{#hD%{f5R_V<;pQk+git~4pmF(T#n4SnF=SzC(Bhk=6W&$X z;Kv^B_loWV`6nUnVbw{?1j;mBsxj}so_nmqI5Np{`dv7qPsq?NVLf* zk265bS-jZHG=~GZQr|WlHFK~{{ZozumwVS|q<3{>H`-Tg4#SVuA0(@x*!4)iTd9s# zg4t)|ef<7-J+CvGopdZr>z)K8Ik39pc(r4JK9^(bM)C7oL~7UerX(0QMIUPU! z3TSIPxOusGI=H!m1$ZHVsJxoS*`z~Wf;1?U0Q+^AzmXfl7AgV$UE;qi30#&0!4fn` zcaHQ{r*l`89lX5U#rgPLJ$bEc-K=4}Hf}C_epc>$P+kZhASvVLZe`;P^8#DL>>XXD z*fv|**uajqQfx*-8W0V41(<`QY5)SJ7oe$c6X0wkX3HicjU(wN?&sp}0`sy0`?3^~pO2rP z2Z_Ps>F?@g<;Uad$%N#913u@W2=lZ-IJ$c|y18C-=i;uRamnr9O6KBnHU=()P!U0h2tPzv6bcnpfC`F=@J_O9u%^P6@lVbb7?QgBK+hf%vgYOJ1&93^vK-v&yz)x`xZ|0Pio>m3?WNfKc#tccl{egr zP5QFI$=4UECd^RN%idJL7HpXIA~; z|NpKm|LEeZS5I$iCz#DIg`H1sc`N&W(VjFiX#5vJE=5HKkpI5nEWG^pEl%Dp?#O`j zUmAZ^es_DW@;_tx)%L$LEB6aWeQ90);)gDq&;9H#S9A#zSBKeK+4!qz>nkWa+QU4( zq@lv{0#F5E5d|S(1%6RMF$Fn@h=M%70#sN|6so|_FUj{Gh|lcuPqTs>+zp}OW($)R z_?zkv_+QcgKOV||qjID_{`bnhSk5jzzPQ5K%z-H)ztYHk1wRi&n1`QVAIdK-01<}> z@jwK{A&_&o@;Bz0Vvv1^BbDc9=l>V62oFR+A0i?yC?*a?mj4j{g?UMgT>fmWysZ9L z|FH8-rpeGDw*RZm2bUh0aIR290Hvw|V{4ZRiJKWpz zZ;g6;!p=bFE)s5QceVijZNA7RaYGow6S)veLwWgmks3Thzl5C|*582th@^JrV}A?# z#{*sXgMUoZ^M3wGA$bl}aC7%ZuEq{tTE4EZzkKC?;`Bdhe(_Rww1K&L{_S!96PLfR z{~N8VxqI0NB2}y8=XI%kWO!q9u69o^TfV;#`1m0ZQ630C55L&&Me46cadkHvtFy33 z&jGpJ{-eW-RpQs6F4utnMET#0ABIdhTn_wSoZeZ>Uj}nl6+eGyMvk^#4$^`UF-g9E z0?&-*4@@2AYLCPeLpr?w0ldVNcl7eqh9UHASh} zz+rY?zlr{Cy2SsbfHPHSJNm)kXa4?NkblB235<1-@g&^)TyA9ekGy_U{M~SgedGUx z{ddD9_8(*eS4S^tHCG#l-^~Bfa!H}*X6NN=g@FCvd%0gyU7%kwICHW3jxN7kJTh-2 z4iNxfcxVa$B#Pi(UUDH-l0V8)#7l7aF|3$tlY7H5o{A#_VIJaP9 zMdZ_TJy-Z2U-8eDe}oKyVO@b4GJUoH>1K}Bv=`7SrK$jarG80L!H2P4il zyysaw3^X(}bTkZfbPQ|^?)mlm_N(&G|L0jeWDXDy7mtE~ zfPw@cAD`sxA3h1;IV$0=8}9GhKgoa9Pdm@Dc&4B%R1h+Ob&PBNt^NXI{V8UHK3}Z)(`?84a%-PPwJDf_tJ8npzyCz|t6fyf1_O!a z{raKdkB|#e05i{Aq{L-~J2s%<)p)Lbslx*)XM>|Pn)*y2bWfvOg!Da z0zR3`IF{mNwz?S%@6xbeu;3IJ?|jT1J3g59wSo4HbbgJs=e7L7zAB{vvor(FiURU^ zh2kFj#v%FGH+!-dM3@<|{(dW;*Zn>3KccRBsy08n_{e@!xFS5bvwVwnqp?b3X}_eJ|NX@85p-Ay4{)g{-Q zvYn@2CGsyQnEEX8RF~C1s!1o)-S(}MyVB40L1Cy$B|y#h!HWNYeTC6d?XIeS9ot&J zO0v~V_p0&lH8tg~d2fDvOM?b;(UGcmF{`T^hc`>S-*0~I!#baj(o54i_`z2v z+Fe*iMLQym}Co+p43<9R-9*ip?0+ zU7cmv1K@cRVMv8yHT##050UCRBf3xOR%5Hn9V(((`1Va{b@nlMx9euygU4?_P_MRY zqnY>iGDA3Ne29J0D;h^m;bIfbn~~@Rs~xBBYm9x;n^?6yw*HA`Y69Q!P`?|Umj6|c{N?B)J*I_buBwKdYmcYnRjljro@@K#lbcm)H-P98)*M|%-CoAWek?}v3J>*IGi8PK8cU5R z4bVM@t5Nq(f#<}wvk zA;oy_kzLLGA0Grv35{(v38L=F#yZpGx9Oh(QL!P1+>>Gr?$(}e-7`7Og!eUD5T5YG zZkGW8SC_toxY{Jj22b_OgGsT{p|3MGPh*l^3X^lj$Thh=XEgBN1U86S#Ynb5x*6&047T(|awu}oP*XWbhWdD4(n+QOKqti-Vty?{^E&Ngb^ zuGvkhlY%2pw6--Ktw22`9pp3<`gNj#ijIkb`TH;jM2NzGN(7Z7 z(6J(x4Jp_{BVqJNs_6i4L-^&j3y0^d&u1NS<&Xg#bA_HvMEgnMz3Onc`56C+tZ?-G zK!WNg<#}Q0Pa?vbA{POkJQ-CDRYT$ha$JXJ?%uLrf&5rurQ-J5H zZv|W48)t#3wal62R-1xi$ycGLz)XB-N99NP=TzlJM_14lzv=BnVzzvQzWL7HHE?}} zQ_|L$r)gWE%lV_gRUG|R!z~G_Hgx@Gw+8P-NcMcixBrmHty;-n7x1Z)6sGffcjiIm zDe!QbuT*TEMRcoBn!I8U5v!V3d*pDdtMSCXN4V5!|LF*3oPfvxtsqBFtolh^SVLp# z?xI1|eQ~$9e%ml_*Syy<4rwJRCkp%FPxdsPnP5h3wQm=D`?x)D@EL?ndz+qw8@!IU zNU|{zySa{d9yswNLnBtww6&SGO~a+!><*4e&x)1xgxJkBZvP6i#>&dc5oo1>jLyo8 zb`&*pkrMCAWQ}euY!T*c;vNqHS0Y!9mQQ%@{n3`8qQ)+_{d#b`j@{)TvImXR)5Y@> z(UzLW1SR5WaQ&)g4rxB36KV!lXO$|uQ^2iH<;xu-IjtxBUVPc_WBU9!s9LsojVdVW zKO3~&4Z41?dBA?BF))O%8*BC>y1M(Dwk-o@0$8Z1N`PWAqZ(yqvT`4@VpK|}8X=SI zmU`|f;J{h9Yy^SPvfd;qS5}7crw;RCsu@6F`~pl={8=jMlyD#THJ#Mgoa&F9B>Kk& zIjh7Ue;0_Z5s*pB99Xu0o)xj!Zzpi9Myu>m3t3OTpWiQ1lWAaDKJ?uUX|^JS00OKzV97#0`s3VXnAq^)7zlYsW+L?sREyS%4OuP31X&d9W<%`U>$L-RyU(JVo`g{8~bFD zg~~8ui@Lw_mTdbFR)VB~iLMsibz8{E!;CS%RE{?bQ=QjtLlhT*6)vl|11&xypF}Gc z+LZ)!mtjg81I`Bghw4zm7dY`ZZSGD*^D(=g7!HpO;tb#GyEc*Hn^)N2Iu$xWnaeKC zrk6LpRPR$}z|^IM=9KK>51z+eF7YeSnKnKvHN<~!7F+7pSciW#d^hm@b6Ec7O(Uml z(}?kJGve{hUu}r)cD4@g-A5Z-g#Ik8d`J4F)cpOIAFNz+_RYe6C1q?MN5_l~oU)@) zcE<P6Sb=$mzG7w!H4evye)4yey%Pm<9fp0ko=MOQV-C>8 z!j&IvZYzh>c}W%s6n1PZ3oV!H@YGIa&F3h6i2A@3-Zw?j9(yyA#=yCka=Hy?9lA+s z7}dkD7LJW}tkWl+)ii%oq9mv{$akrKs|V zSNO4uj){EF3fkAHs|k1wIIeRi_`W8gm5#9|75d20GP=XA^Bzb1;cP+#3HwoKz>=2! z-cX0k*x5=TO@NH>(LreF*cced^Ww89g+d6Ro=quYMjb1Dd5@4JG+pb0n&G*vZ2~3% z+0YI!L_r%dqGyv__&X58kU=RcDs%euglJU0`yElT(j)q@pb^>1#Bg=+25Vtbw!9%E zp9B-m>LdrU8e2IKW?)hD>g_ZYy>qqNHF-tLZTnqe*VRdfn<(r$a(9&JAyMVknVi7# zDFE+AgMZMrxcB44$w`;?ni}A9Z%02%t2Vvpk>~))_m}mW8G2}h0*=ER8qC6tp6#u> zyiQ$jSTzS+MJRn~k7~nkOUlUwOSgR-sTqSl&CYe*#pQz$g5)^S;{Rq($Lut^O zx$z!O_RXU`<`p$mS4YlfR6;iYjw*WYcL%{D7NIw$#qouDBMP3a+|GO`edA_mci!!l zKIig1@_$(mFQdPG!y~*3$(v3GiiKNz%`kgaMNR7Hi^}-9doVr!vm>ENzhAw zewp%m`G$b%p@?Tn)L;|Cp(4R-?F~14LLoZ?cK??dwCLAQ0q4d*{kv~j8THUom!zCG zGTbS4gIi1_9}tkhQnN-PNGR`JZU2PY$&&58G=Ommcs zd(F|s_26RyC0kj#OEDZ*#wV4+>9jI+EXIP4&!MBtpoF7$ zYy0lXt%t5!U&6#t@?~8QuyiZ*YJ)$%@u?H1ZT=~tT%E6GbX`k?X$A6eR7+Ge(la`) zn)ZcBXQFs>MC+r8)J1%E(gGmf4aDwQh2N%Hgn^YwP~}SZHu;#(Fn3I0mt9BrMfMCj z+?dRW2^Zwcv2~Y$@-c^5QwMKH1?IhZ@MwX}|-xn)60@ zxos#JN%+Li)NBP8Qp>~?dlJhZa{{=pT!<*S8p-$Os^6yR>lVG}5j8O#sW5hbh)4a= zz#z@D=jkvvb5bpH{3-D0DqC5#%U2h)X8Xr4cxBq)JZjAEQ@<@tSeDxsD3&#{U^pBp zq~?f+6ozW{7DUxgX}-1hzUP)J*(9r%uEu%IkJNr5(2@m*ox?y?dvxxhdpK{Q6JU|? z0cVZj9oeUL2Q~Fqc%*T&n`^fj);MfaCr5rLL`8I$SPpHCkPAkuHCRx6?=Vx;&wnab zf}^{XtizYA8Ki|}y}apj$7sdSG+K@!#__devbCCSYB;fOj29T~`lqQ0Qkc0rGf7hsFcG|{aIgV+AaP#Gv0`I9gbG++hZKBwlLW#{F36)UW9{0GaS9ILJx@BX*ig-7IA6GF^uV{Vi0 zsnx(hK59HlS0|2-b((F!aZO#3(Ta-7t>Sp$peZm52a1!N_VJBvh;C8z%9S7bKMRwC z6?8wvTboUoKE;#l=0jg%J!)=i*m~N5vD}I-7qt}}j1pYK{ZXSuQCk-W1Jf2DKCt`H zv2;Zz{eB6?4S2kVeAvTCTLW2N&P}oHXcjO7o16o^SGIt!O_yq+-gUL7T@s~{?Bo4T zMlEXlDs!bmzH>(moCYlLRdf-MB!1RUC8Y|%_|e1XR6hRBc7UvfY%=xe@Lus`EO%jH zORBy|tDn^Dh*7P2HGQy#H74&-vTC{OzMo3K2S5< z19PnUHZ_XgV6n57=P50jApSW|%i^W4o481&FVN#{bk!$H=mVa5H9d$~#{W4^Xlyq= z@5c6rr-uy#FWQb}*maF=R2N(eO8ALi#}oZpzJzsV!OdJ$md!EC)Xy!@kD_?ls8;aG zBA-|8@}MGbDLed=2rp(5?N)cuCOcm^t>H?n1s^u&;|{DWjW?Lns9847b#9zi*zzWo z<{LjGL_Uzi%n_vJ^^oAl`Lwzi1!is|;|<<-%NZ1-oIh{Z!3W>Er5jiDHu{TWh9(=x z&YbLFuK-vvx)ZbXg(E{bT?t= zW~^@Z)u#pi-k%)Xj^)@` zxUbL{-a~PnMY%O0+`M=4%~ea4K;1%p^ZQv(?%%qty^7wvJvSi~jSJOB8E27?&ma-hNHKik>DYNfnnE0{-T{m2&IzSj#Od zX&4T((3j~0E*qO`Eq=G|Q^vg0y;Z=W?0n70Ra}m>^M1#CF5?yN>jZu$_VA_E;<|`( zLy`olygCa04~3<#zdyKC}amq$t7RiXvM=R;@rr%_(NH&H;pv9p;FcB6n3L6TuD z)|ujCz%8m15z{PWXwV6C0%t6MI=Lt?mcWbvT^YywFy@ZwIF)M;JJ2_ey`I z#zXfeQpIlLdqk?QymiwPluz>5>XCost%J|qyoDmyTvl&0biTh|*N4~DNe+o%^W}@X zHCs_gE2oIharm61U~p9)G|VQq?PskS(0}q|V+I+Ru@fLuTqtK32<>-ZM#umK)+DxMOm^q=U_=pmVkrv2e-g` zg&7iURf;RPZ@%*dJ`Pfq(`wb)CQQiEnqkTv)Vr@FIu1{@hT|4zw6 zzu%bQH@;y$(qhxo>kWaT+%j1P7)vp)$kDZrk`Rz#H^=(eP!pGT*HdP~;ZyTMpM@GJ z?%f~C`{`&Te-S;|YHg8OaF4^DnK^psDRnnwZy9%?nS4sc9HK-n3U za390O$bZ4v&o{N}ZjqDE(qxikqMrO-@pPb=VtTAcWvr-bm0E{^=Qt~FHyZ{tET%O1 zk!91n<=nBA=zc|Prv}c@%1BP#t_c(Ng3{HaYH(@W>b~a-8=1b9c6_yaF7V2h(LlH<(0AU<(=w%5H)Us`6-$-mv$FXX zofaV?3!~Q}`L4L_b62K-QCAuA$&9|R)d-ap)A4HlXVB8m7JiGjgL|`vK3jbxq!3`W z{z);qHM?b~@%W_2Lh@Q7PgO^d!s_d%O}tO+2Q{`0wWv-M341)!C4Cc$OfZ}Dw_isw zAONU-ch8`xaHVMN2WNYNYl(gLeMH7KP2WnZH$H3&_UNrjN|e84@Uk%ifyJ6~bG>U? za(nC+7Jg%iO6&8jP)L)2R(0*P78}hI!zfIPqtYGwfCHkNft%}D8`W|`fmoU^Jn`>v zbM*sSQA01iKo~8by>`eoo)LY1lQ_?2ik&T|ko0Z`-swq>MzBtMWOFyldTRc|51MH; z-BLNZ-E90pnnrVYk((T5{`M;cU=((+p6Qa@P0Vy8)CigH0hour=LF2vU^1qWy@}C0 z#9+tCMXmaItf?bM=O;Setf?3C7DG~?XkzhjwuXA5BL0}xs0=-QfTJmn!UVh1pfYJC zFa3#mlVnZxDNrgq2W{`EMvc{th0fr%7I@>#h`Os^dq9NJH@7v9zmUNY-$W?Y{+v`( zF%iGV^6Gkcz&$%h^seCCkgxNRg1IE&&`Mr( zO)Z$^4LDb=ynQ1@waG+!U?$?Q7O|M%#ZI%p6`5L-ujosKp%odwJ5^{;&AIqh#g&+K zjLNWZ5ob^_@EcyH&Migu0qfO8Ks|;+1|!22ff&bNHFt2B(YxPFBg?)J*{fV+=Pm@+ zEwF2CAM423c{mP_DphG9&UR|BlQr?6vhfSt-pbTjZZRh2d6+`;`eScc6G<&cudru} zVn3HJj*{RbXr~GygQ#DG6#Z(ZvqJK)97!@r`o)Et4QzkC<^OYFLj!O!@H3V9Izn5r|26_91km0dI zk^y+(yN2&k=ogc^1XG<|rCCKOR%ub0DH8FkVNYlp!7yEHk+E;lgZ6g?PV%N^I4f-U zH>Jf;=@>J9f+NFX+PV)sf4B&FzLK{FQFgbH=JMfKX2>wK7}Gx5Yli`Nm>CId8EJ1k zt`oOD(;RH-^RBaUqGw#%*-RN>R6lA!eJaw|Uv4WMcQsPE%0JgvuvI?IecG?@X~op% zMOI1g_Sy&{=>pYBjw{_1 z=FV_N*+4pCt^XT~6zZc6kb_QFb9eVw97)s%_B%~Qs!3t3;@ThGFj*8^9r$`FhO00b zk}=Xz?B>+N+JPwJdtO~PzqO(?LBkb#fW9i)L>81ZyPkuq{CMT>?6K{@bA8#JOt^Lg zbpqeIbDv2Z?%xizB-M5mZxIb5NL2rz{lLi}$kaM{kvUB2&f9xC9egPaDi7i%L`$yd z$PufcrlD4^&RN5Tug6KL+|Hp%Yyd`Wji5dJi;kl1u&1}1BxSNYbiNjsQTtmJBC%Kg`$lhTyp4v^5IYfAk-8RfVa$f-i zE|n5(i_@F&q<3^7x%h5PICkHM!9!tN{sT76Q(*Wt-VJY7XR7`pzlrw`A2Hj3wvhV*_x5fPnc54KBVdaDIE$BdEW z8m`H;xwRd-Aya1OzsorPX%T^|`U-#r2C&GI)R>cheH0C2kE2&pd-D76e?!jTin|Qz zhmwl)?8*EWm>F-4F@G3Re`&xz1ulrynB!EqyhLxYi6?4tTmq%l*>2#qu1mgqt}Av4 zgvgpU$d)3Pl;DXAj>)PlcoR7;6XwgWc$F`JNrgg;^3+s&Dp0MWKLb(~mE^h8RaecE zFL(iDrN4JYr&(e3lGQ(G7+A=UvPG`i}L$;7X%=KS~&t)9WXMig=i<8 zXB5v4nF#P25AfpOXq;dV9ps4-?i>!)ZI9d13flMY zO!Kj_TR^L0H*e$0=0L07`u7)HD-*JTArw&^B-m5?6%zY9a{Wh2QU$RbOrE{h=$~FG z3?>-jDkD&2n*7w7jEciN&e7@6dk?HN@PY2!l6V(#kZoRt7S+eiuD9z5{9UR z;o;DxlMH;3XPS*ena4*{U<$JoV#xEN>OzQEs@&i~1SS&4J@_{DIkH*tpg!y;^-J!_ zaerIhMNXGZ0emN;#HL=gzD(47_uRYseF0??qyY1+S(4QEXw)8ujvGs zF_amR%>Kr;`A5Od`$r;Z3CbFsg1PaNlBib?#vg-m$LVryp7oABqzw-b+~GpIQ|RrE zG}vd(zIUt_c^uSMF&dW`df&MG430kO3Zk@CFf{#TAJiv^>$uYON6AQ($QnK3O_Fm?f5ceYY`tO^-F4 z7fqPXCHb+qYE8o+wNIadRrLy0$NK(i`pOrfKDkC7*KwKgfZ~l_hcFN+yB(rOYtr=o z9?w*$jC1!q<v^vXd#b; zFn+x>aA{RmTM%vJDdt?q@Sjs40@K>}84Stx+nP3yGnj`AN`6*lNcCxqZ49bp7d1#& zWcRhkqe+<&kC;|s-u2I8mqHECX>U$h@jiCNToX5+oBH0J&Zd5~P@VR=K?A*yEnYC9 zM=AcQhgWU3r+ucXc@dE+8Ns`QU7F{6Y$7koc?nFfqd0CTRnj|7vrOCYZVC+fW_#^o z#yuD4$zK?y8TUl>7k;A_-6l*<&}(J<+UET*y;_9L`~Egz-@xnn8|?NkOtsR91D}=p zUOiMJy~3xngn}N>#5f(Yw_2UfJu-_ z|Jv zUPb=^P10gpp7o&E_$1Ltwn=~&ke9CDzqW#-k_<$B)@JC4IN*_IUdcoBOFR>VlD%E& zG6{Q6@!UY)0vGECmV7trCrFygo?I$q*FC?FR}0!zhZmbb7)mCnnwwch_~4$A9ptGN z7~HXZYgQmiY`#yl#a5x@{Z6tNEE)E|?IHI9JlQJWb$Ap#QH`y6ns?Hro7^Qfz-iKT z65g<#f-%iAKW^2<5XrMk_c-IQ5ow()$bJ0Tb^czq7^;0VR?jSXZQ{B4v587|8K^!f^yHVrV8Mu=N zX*?+)=&Cw`y*1a_EwcB-enMhj>^qGLx_#Cz@sv(l`o0WJj-#(7Cty&HFIZHbWydK}N1ke~~x3X)a zHV6vY7LOf@)UM`Zjokyk2$VhrSQY11WebLXhSqo8v+Wlxd?Zfo{Y;HOV^<893CkTz zIYJ|_(QiqjuTU_K>zYS*UiU$_0SQT0?F%0&72gM9N?A-}FH?`$k2x)%{ZJIwC_v5~ z!&McvD2Q7}`zGgcL2Q`>QTuvuxIMf46>q%Nal)wTvT76QPI}^yn<{p$KJb?tY(g?| z6yMU&Wej3Ie_E-|*PwepDZ~J%n7?5kczJ&_%hGXEKy^Y5q~%=Gmk>_|zAg^sLyz7l zXiDy<*dneFTmMuCl&cj`1b3>a$5d0ky-nV!FtpTu)dB4k$lEnToiBJ|af}z&Q5kSl zx&ax?Ro6qyGbsnD(xrR@>;?7H#xiGBA5~-dSuVWFr{TW3fPR zn7Z06n5={Q&{E+oE}eBKR+8%O-IIwbu#S2W=P`623Qa;q!Sgz>c+WOy2|T#Z%G}Gy zRV)S`o$YYkWpml3L(+tI3Mk+9>b#4sCVJ;a5B7(q{&1Fn zF&>t6^*3Ql_e<%Dk3~sdX%~tl``Xrb%xaV+*ojLRYW@&sl1Y|8_lW0H-24ek@ki~T zqKy6Ufh$c;n_f^Dm{&B30KoU+F@`7=#q}9dIw*vb~KV+7@7Mv(fA6F|N8TEjrxok|7K$Q9X2d4-xza=Yg>e5?8vS^LhTya#&P?!ci(sG_xcbSViL3HT;~qKp?)1gzbg#On2JMs&aPPNez6|Lm zV9Q4ht{>#%Oov3hPQpn-K6G{Gwk!+A#?XwHdX};7gG?gnLKX z8dWB(m~C*IbDT+%(kGoU&7PA2a1L&}qqoSHk~l&HVv0#g^TO^XU4333R%j_}6^!Y+ zQu%E5VZN~pek$<-xHI^mp_z`CfZaFD|7MTX)wVZR^;eR<2kH&x@A}=Kcsi@lsUnY? zqjk^yb+x?MBpo64rz=|W8p7rY%unk_1I6Cj|48&Kg_G}weffAOJ3DAG`XnXJ7K`E@ zSokeNS0AGWbl@p|h#G$x`5e<1$Z?0%mS6a>E;mqg;{~xLb@w-jXoMbv(K5CtOqieT z-QdA|e9tx7MK~7A&OFxK_VvxU$ylI@oGeeKG*pqcnWMiZ0ho*{wzXD5!9G* z6y(eH^=Lq0^xMhLefpeNg>+5m#zDS~G+f8F`sTSy^};O2GF+6a1}l59&`yKz8()mQ zbXzMv;hUnTR&Q6BvJT2nWQy>xq4OLieKGvV4o_`bfVu+BK~ZjxS9ABU7pdZs$uYA>pe60~=3*r}x!ANTvN}zgH_43e*oN<(sMf}uST{44-s6f^byDofByoCs z^C85d9$P@9X>5D{NIF=Z;05_Kq-#Mu0+ec{$no}=U;WU^a?Wec{Ka(w%SV>l)Y(hQ zc5*D3uM)Jcq+`%O%A?TBdMC`?uaDi7Ka`FsN}LuJw_>`l`hG4Z5-G+YSHP|oOmFsecX!|rUXV&zpR)`}a+mo4-Wr%FnNlm<)b n0V0;2AGTn?cu3?^_GeTAGzL8^inn6|kg_{gm?!TCPRIW*5(}V~ literal 0 HcmV?d00001 diff --git a/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java b/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java index 4639fdd4c1..48d7fddec7 100644 --- a/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java +++ b/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java @@ -45,6 +45,7 @@ package org.alfresco.repo.action.executer; import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -94,8 +95,13 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase private TaggingService taggingService; private MetadataExtracterRegistry metadataExtracterRegistry; private boolean carryAspectProperties = true; + + private boolean enableStringTagging = false; + // Default list of separators (when enableStringTagging is enabled) + protected List stringTaggingSeparators = Arrays.asList(",", ";", "\\|"); + public ContentMetadataExtracter() { } @@ -164,6 +170,16 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase this.enableStringTagging = enableStringTagging; } + /** + * List of string separators - note: all will be applied to a given string + * + * @param stringTaggingSeparators + */ + public void setStringTaggingSeparators(List stringTaggingSeparators) + { + this.stringTaggingSeparators = stringTaggingSeparators; + } + /** * Iterates the values of the taggable property which the metadata * extractor should have already attempted to convert values to {@link NodeRef}s. @@ -182,11 +198,12 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase protected void addTags(NodeRef actionedUponNodeRef, PropertyDefinition propertyDef, Serializable rawValue) { List tags = new ArrayList(); + if (logger.isDebugEnabled()) { - logger.debug("converting " + rawValue.toString() + " of type " + - rawValue.getClass().getCanonicalName() + " to tags"); + logger.debug("converting " + rawValue.toString() + " of type " + rawValue.getClass().getCanonicalName() + " to tags"); } + if (rawValue instanceof Collection) { for (Object singleValue : (Collection) rawValue) @@ -201,16 +218,15 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase (String) singleValue); try { - String tagName = (String) nodeService.getProperty((NodeRef) convertedPropertyValue, ContentModel.PROP_NAME); + NodeRef nodeRef = (NodeRef) convertedPropertyValue; + String tagName = (String) nodeService.getProperty(nodeRef, ContentModel.PROP_NAME); + if (logger.isTraceEnabled()) { - logger.trace("found tag '" + tagName + "' from tag nodeRef '" + (String) singleValue + "', " + - "adding to " + actionedUponNodeRef.toString()); - } - if (tagName != null && !tagName.equals("")) - { - tags.add(tagName); + logger.trace("adding string tag name'" + tagName + "' (from tag nodeRef "+nodeRef+") to " + actionedUponNodeRef); } + + tags.addAll(splitTag(tagName)); } catch (InvalidNodeRefException e) { @@ -223,17 +239,26 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase else { // Must be a simple string + if (logger.isTraceEnabled()) { - logger.trace("adding string tag '" + (String) singleValue + "' to " + actionedUponNodeRef.toString()); + logger.trace("adding string tag name'" + singleValue + "' to " + actionedUponNodeRef); } - tags.add((String) singleValue); + + tags.addAll(splitTag((String)singleValue)); } } else if (singleValue instanceof NodeRef) { - String tagName = (String) nodeService.getProperty((NodeRef) singleValue, ContentModel.PROP_NAME); - tags.add(tagName); + NodeRef nodeRef = (NodeRef)singleValue; + String tagName = (String) nodeService.getProperty(nodeRef, ContentModel.PROP_NAME); + + if (logger.isTraceEnabled()) + { + logger.trace("adding string tag name'" + tagName + "' (for nodeRef "+nodeRef+") to " + actionedUponNodeRef); + } + + tags.addAll(splitTag(tagName)); } } } @@ -241,9 +266,15 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase { if (logger.isTraceEnabled()) { - logger.trace("adding tag '" + (String) rawValue + "' to " + actionedUponNodeRef.toString()); + logger.trace("adding string tag name'" + (String)rawValue + "' to " + actionedUponNodeRef); } - tags.add((String) rawValue); + + tags.addAll(splitTag((String)rawValue)); + } + + if (logger.isDebugEnabled()) + { + logger.debug("adding tags '" + tags + "' to " + actionedUponNodeRef.toString()); } try @@ -259,6 +290,34 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase } } } + + protected List splitTag(String str) + { + List result = new ArrayList<>(); + if ((str != null) && (!str.equals(""))) + { + result.add(str.trim()); + + if (stringTaggingSeparators != null) + { + for (String sep : stringTaggingSeparators) + { + List splitTags = new ArrayList<>(result.size()); + for (String tag : result) + { + String[] parts = tag.split(sep); + for (String part : parts) + { + splitTags.add(part.trim()); + } + } + result = splitTags; + } + } + } + + return result; + } /** * @see org.alfresco.repo.action.executer.ActionExecuter#execute(Action, diff --git a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java index db9f51ac47..cb3369c510 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2016 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.content.metadata; import java.io.IOException; @@ -413,6 +413,7 @@ public abstract class TikaPoweredMetadataExtracter // keys onto their own content model for(String tikaKey : metadata.names()) { + // TODO review this change (part of MNT-15267) - should we really force string concatenation here !? putRawValue(tikaKey, getMetadataValue(metadata, tikaKey), rawProperties); } diff --git a/source/test-java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java b/source/test-java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java index 2f1447a3a6..b2f12a8cff 100644 --- a/source/test-java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java +++ b/source/test-java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java @@ -97,6 +97,7 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase protected static final String QUICK_FILENAME = "quickIPTC.jpg"; // Keywords separated with comma (,) protected static final String QUICK_FILENAME2 = "quickIPTC2.jpg"; // Keywords separated with pipe (|) + protected static final String QUICK_FILENAME3 = "quickIPTC3.jpg"; // Keywords separated with semi-colon (;) protected static final String QUICK_KEYWORD = "fox"; protected static final String TAG_1 = "tag one"; @@ -356,9 +357,12 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase /** * Test execution of mapping strings to tags */ - // TODO ignored until we investigate when/why this regressed - start with MNT-13655 ? - public void XtestTagMapping() throws Exception + public void testTagMapping() throws Exception { + // explicitly set here (rather than rely on defaults) in case another test method nullified + this.executer = (ContentMetadataExtracter) ctx.getBean("extract-metadata"); + executer.setStringTaggingSeparators(Arrays.asList(",", ";", "\\|")); + // Create the folders and documents to be tagged NodeRef[] nodes = createTestFolderAndDocument(QUICK_FILENAME); NodeRef document = nodes[0]; @@ -375,25 +379,33 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase executer.execute(action, document); // Test extracted properties + assertEquals(ContentMetadataExtracterTest.QUICK_DESCRIPTION, nodeService.getProperty(document, ContentModel.PROP_DESCRIPTION)); + assertTrue("storeRef tags should contain '" + QUICK_KEYWORD + "'", taggingService.getTags(storeRef).contains(QUICK_KEYWORD)); - assertTrue("document's tags should contain '" + QUICK_KEYWORD + "'", - taggingService.getTags(document).contains(QUICK_KEYWORD)); + + List tags = taggingService.getTags(document); + assertTrue("doc tags '"+tags+"' should contain '" + QUICK_KEYWORD + "'", + tags.contains(QUICK_KEYWORD)); // Test manually added keyword - assertTrue("tags should contain '" + TAG_2 + "'", - taggingService.getTags(document).contains(TAG_2)); + assertTrue("doc tags '"+tags+"' should contain '" + TAG_2 + "'", + tags.contains(TAG_2)); + + // Test manually added keyword - note: lower-case tag name + assertTrue("doc tags '"+tags+"' should contain '" + TAG_3.toLowerCase() + "'", + tags.contains(TAG_3.toLowerCase())); // Test manually added nodeRef keyword - assertTrue("tags should contain '" + TAG_1 + "'", - taggingService.getTags(document).contains(TAG_1)); + assertTrue("doc tags '"+tags+"' should contain '" + TAG_1 + "'", + tags.contains(TAG_1)); + + // Test that there are no extra tags created by the non-existent nodeRef + assertEquals("Unexpected number of doc tags '"+tags+"'", 7, + tags.size()); - // Test that there are no empty tags created by the non-existent nodeRef - assertEquals("tags should contain '" + TAG_1 + "'", 4, - taggingService.getTags(document).size() ); - return null; } }); @@ -408,6 +420,9 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase */ public void testIgnoreInvalidTag() throws Exception { + this.executer = (ContentMetadataExtracter) ctx.getBean("extract-metadata"); + executer.setStringTaggingSeparators(null); + // Create the folders and documents to be tagged NodeRef[] nodes = createTestFolderAndDocument(QUICK_FILENAME2); NodeRef document = nodes[0]; @@ -428,4 +443,48 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase removeTestFolderAndDocument(nodes); } + + public void testTagMappingSeparators() throws Exception + { + // explicitly set here (rather than rely on defaults) in case another test method nullified + this.executer = (ContentMetadataExtracter) ctx.getBean("extract-metadata"); + executer.setStringTaggingSeparators(Arrays.asList(",", ";", "\\|")); + + // IPTC Keywords with comma + NodeRef[] nodes = createTestFolderAndDocument(QUICK_FILENAME); + extractAndCheckTags(nodes[0], Arrays.asList("fox", "dog", "lazy", "jumping")); + removeTestFolderAndDocument(nodes); + + // IPTC Keywords with vertical bar (pipe) + nodes = createTestFolderAndDocument(QUICK_FILENAME2); + extractAndCheckTags(nodes[0], Arrays.asList("k1", "k2", "k3")); + removeTestFolderAndDocument(nodes); + + // IPTC Keywords with semi-colon + nodes = createTestFolderAndDocument(QUICK_FILENAME3); + extractAndCheckTags(nodes[0], Arrays.asList("keyword1", "keyword2", "keyword3", "keyword4")); + removeTestFolderAndDocument(nodes); + } + + private void extractAndCheckTags(NodeRef document, List expectedTags) + { + this.transactionService.getRetryingTransactionHelper().doInTransaction(new RetryingTransactionCallback(){ + + @Override + public Void execute() throws Throwable + { + ActionImpl action = new ActionImpl(document, ID, ContentMetadataExtracter.EXECUTOR_NAME, null); + executer.execute(action, document); + + List tags = taggingService.getTags(document); + + for (String expectedTag : expectedTags) + { + assertTrue("Expected tag '"+expectedTag+"' not in "+tags, tags.contains(expectedTag)); + } + + return null; + } + }); + } }