diff --git a/config/quick/quickIPTC3.jpg b/config/quick/quickIPTC3.jpg new file mode 100644 index 0000000000..380f8306cd Binary files /dev/null and b/config/quick/quickIPTC3.jpg differ diff --git a/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java b/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java index 4639fdd4c1..48d7fddec7 100644 --- a/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java +++ b/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java @@ -45,6 +45,7 @@ package org.alfresco.repo.action.executer; import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -94,8 +95,13 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase private TaggingService taggingService; private MetadataExtracterRegistry metadataExtracterRegistry; private boolean carryAspectProperties = true; + + private boolean enableStringTagging = false; + // Default list of separators (when enableStringTagging is enabled) + protected List stringTaggingSeparators = Arrays.asList(",", ";", "\\|"); + public ContentMetadataExtracter() { } @@ -164,6 +170,16 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase this.enableStringTagging = enableStringTagging; } + /** + * List of string separators - note: all will be applied to a given string + * + * @param stringTaggingSeparators + */ + public void setStringTaggingSeparators(List stringTaggingSeparators) + { + this.stringTaggingSeparators = stringTaggingSeparators; + } + /** * Iterates the values of the taggable property which the metadata * extractor should have already attempted to convert values to {@link NodeRef}s. @@ -182,11 +198,12 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase protected void addTags(NodeRef actionedUponNodeRef, PropertyDefinition propertyDef, Serializable rawValue) { List tags = new ArrayList(); + if (logger.isDebugEnabled()) { - logger.debug("converting " + rawValue.toString() + " of type " + - rawValue.getClass().getCanonicalName() + " to tags"); + logger.debug("converting " + rawValue.toString() + " of type " + rawValue.getClass().getCanonicalName() + " to tags"); } + if (rawValue instanceof Collection) { for (Object singleValue : (Collection) rawValue) @@ -201,16 +218,15 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase (String) singleValue); try { - String tagName = (String) nodeService.getProperty((NodeRef) convertedPropertyValue, ContentModel.PROP_NAME); + NodeRef nodeRef = (NodeRef) convertedPropertyValue; + String tagName = (String) nodeService.getProperty(nodeRef, ContentModel.PROP_NAME); + if (logger.isTraceEnabled()) { - logger.trace("found tag '" + tagName + "' from tag nodeRef '" + (String) singleValue + "', " + - "adding to " + actionedUponNodeRef.toString()); - } - if (tagName != null && !tagName.equals("")) - { - tags.add(tagName); + logger.trace("adding string tag name'" + tagName + "' (from tag nodeRef "+nodeRef+") to " + actionedUponNodeRef); } + + tags.addAll(splitTag(tagName)); } catch (InvalidNodeRefException e) { @@ -223,17 +239,26 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase else { // Must be a simple string + if (logger.isTraceEnabled()) { - logger.trace("adding string tag '" + (String) singleValue + "' to " + actionedUponNodeRef.toString()); + logger.trace("adding string tag name'" + singleValue + "' to " + actionedUponNodeRef); } - tags.add((String) singleValue); + + tags.addAll(splitTag((String)singleValue)); } } else if (singleValue instanceof NodeRef) { - String tagName = (String) nodeService.getProperty((NodeRef) singleValue, ContentModel.PROP_NAME); - tags.add(tagName); + NodeRef nodeRef = (NodeRef)singleValue; + String tagName = (String) nodeService.getProperty(nodeRef, ContentModel.PROP_NAME); + + if (logger.isTraceEnabled()) + { + logger.trace("adding string tag name'" + tagName + "' (for nodeRef "+nodeRef+") to " + actionedUponNodeRef); + } + + tags.addAll(splitTag(tagName)); } } } @@ -241,9 +266,15 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase { if (logger.isTraceEnabled()) { - logger.trace("adding tag '" + (String) rawValue + "' to " + actionedUponNodeRef.toString()); + logger.trace("adding string tag name'" + (String)rawValue + "' to " + actionedUponNodeRef); } - tags.add((String) rawValue); + + tags.addAll(splitTag((String)rawValue)); + } + + if (logger.isDebugEnabled()) + { + logger.debug("adding tags '" + tags + "' to " + actionedUponNodeRef.toString()); } try @@ -259,6 +290,34 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase } } } + + protected List splitTag(String str) + { + List result = new ArrayList<>(); + if ((str != null) && (!str.equals(""))) + { + result.add(str.trim()); + + if (stringTaggingSeparators != null) + { + for (String sep : stringTaggingSeparators) + { + List splitTags = new ArrayList<>(result.size()); + for (String tag : result) + { + String[] parts = tag.split(sep); + for (String part : parts) + { + splitTags.add(part.trim()); + } + } + result = splitTags; + } + } + } + + return result; + } /** * @see org.alfresco.repo.action.executer.ActionExecuter#execute(Action, diff --git a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java index db9f51ac47..cb3369c510 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2016 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.content.metadata; import java.io.IOException; @@ -413,6 +413,7 @@ public abstract class TikaPoweredMetadataExtracter // keys onto their own content model for(String tikaKey : metadata.names()) { + // TODO review this change (part of MNT-15267) - should we really force string concatenation here !? putRawValue(tikaKey, getMetadataValue(metadata, tikaKey), rawProperties); } diff --git a/source/test-java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java b/source/test-java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java index 2f1447a3a6..b2f12a8cff 100644 --- a/source/test-java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java +++ b/source/test-java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java @@ -97,6 +97,7 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase protected static final String QUICK_FILENAME = "quickIPTC.jpg"; // Keywords separated with comma (,) protected static final String QUICK_FILENAME2 = "quickIPTC2.jpg"; // Keywords separated with pipe (|) + protected static final String QUICK_FILENAME3 = "quickIPTC3.jpg"; // Keywords separated with semi-colon (;) protected static final String QUICK_KEYWORD = "fox"; protected static final String TAG_1 = "tag one"; @@ -356,9 +357,12 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase /** * Test execution of mapping strings to tags */ - // TODO ignored until we investigate when/why this regressed - start with MNT-13655 ? - public void XtestTagMapping() throws Exception + public void testTagMapping() throws Exception { + // explicitly set here (rather than rely on defaults) in case another test method nullified + this.executer = (ContentMetadataExtracter) ctx.getBean("extract-metadata"); + executer.setStringTaggingSeparators(Arrays.asList(",", ";", "\\|")); + // Create the folders and documents to be tagged NodeRef[] nodes = createTestFolderAndDocument(QUICK_FILENAME); NodeRef document = nodes[0]; @@ -375,25 +379,33 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase executer.execute(action, document); // Test extracted properties + assertEquals(ContentMetadataExtracterTest.QUICK_DESCRIPTION, nodeService.getProperty(document, ContentModel.PROP_DESCRIPTION)); + assertTrue("storeRef tags should contain '" + QUICK_KEYWORD + "'", taggingService.getTags(storeRef).contains(QUICK_KEYWORD)); - assertTrue("document's tags should contain '" + QUICK_KEYWORD + "'", - taggingService.getTags(document).contains(QUICK_KEYWORD)); + + List tags = taggingService.getTags(document); + assertTrue("doc tags '"+tags+"' should contain '" + QUICK_KEYWORD + "'", + tags.contains(QUICK_KEYWORD)); // Test manually added keyword - assertTrue("tags should contain '" + TAG_2 + "'", - taggingService.getTags(document).contains(TAG_2)); + assertTrue("doc tags '"+tags+"' should contain '" + TAG_2 + "'", + tags.contains(TAG_2)); + + // Test manually added keyword - note: lower-case tag name + assertTrue("doc tags '"+tags+"' should contain '" + TAG_3.toLowerCase() + "'", + tags.contains(TAG_3.toLowerCase())); // Test manually added nodeRef keyword - assertTrue("tags should contain '" + TAG_1 + "'", - taggingService.getTags(document).contains(TAG_1)); + assertTrue("doc tags '"+tags+"' should contain '" + TAG_1 + "'", + tags.contains(TAG_1)); + + // Test that there are no extra tags created by the non-existent nodeRef + assertEquals("Unexpected number of doc tags '"+tags+"'", 7, + tags.size()); - // Test that there are no empty tags created by the non-existent nodeRef - assertEquals("tags should contain '" + TAG_1 + "'", 4, - taggingService.getTags(document).size() ); - return null; } }); @@ -408,6 +420,9 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase */ public void testIgnoreInvalidTag() throws Exception { + this.executer = (ContentMetadataExtracter) ctx.getBean("extract-metadata"); + executer.setStringTaggingSeparators(null); + // Create the folders and documents to be tagged NodeRef[] nodes = createTestFolderAndDocument(QUICK_FILENAME2); NodeRef document = nodes[0]; @@ -428,4 +443,48 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase removeTestFolderAndDocument(nodes); } + + public void testTagMappingSeparators() throws Exception + { + // explicitly set here (rather than rely on defaults) in case another test method nullified + this.executer = (ContentMetadataExtracter) ctx.getBean("extract-metadata"); + executer.setStringTaggingSeparators(Arrays.asList(",", ";", "\\|")); + + // IPTC Keywords with comma + NodeRef[] nodes = createTestFolderAndDocument(QUICK_FILENAME); + extractAndCheckTags(nodes[0], Arrays.asList("fox", "dog", "lazy", "jumping")); + removeTestFolderAndDocument(nodes); + + // IPTC Keywords with vertical bar (pipe) + nodes = createTestFolderAndDocument(QUICK_FILENAME2); + extractAndCheckTags(nodes[0], Arrays.asList("k1", "k2", "k3")); + removeTestFolderAndDocument(nodes); + + // IPTC Keywords with semi-colon + nodes = createTestFolderAndDocument(QUICK_FILENAME3); + extractAndCheckTags(nodes[0], Arrays.asList("keyword1", "keyword2", "keyword3", "keyword4")); + removeTestFolderAndDocument(nodes); + } + + private void extractAndCheckTags(NodeRef document, List expectedTags) + { + this.transactionService.getRetryingTransactionHelper().doInTransaction(new RetryingTransactionCallback(){ + + @Override + public Void execute() throws Throwable + { + ActionImpl action = new ActionImpl(document, ID, ContentMetadataExtracter.EXECUTOR_NAME, null); + executer.execute(action, document); + + List tags = taggingService.getTags(document); + + for (String expectedTag : expectedTags) + { + assertTrue("Expected tag '"+expectedTag+"' not in "+tags, tags.contains(expectedTag)); + } + + return null; + } + }); + } }