REPO-1986: Upload Failing due to Metadata Extraction Issue (MNT-17436) - part 2

- part 2 - enable "addTags" to handle configurable list of separators (when using "enableStringTagging" option of "extract-metadata" action)
- initial default separators/delimiters => comma, semi-colon & vertical bar (pipe)
- also means we can re-enable & fix ContentMetadataExtractorTagMappingTest

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@135061 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Jan Vonka
2017-02-09 16:04:00 +00:00
parent a6d88afc6e
commit 483d79a548
4 changed files with 171 additions and 52 deletions

BIN
config/quick/quickIPTC3.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View File

@@ -45,6 +45,7 @@ package org.alfresco.repo.action.executer;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
@@ -94,8 +95,13 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
private TaggingService taggingService;
private MetadataExtracterRegistry metadataExtracterRegistry;
private boolean carryAspectProperties = true;
private boolean enableStringTagging = false;
// Default list of separators (when enableStringTagging is enabled)
protected List<String> stringTaggingSeparators = Arrays.asList(",", ";", "\\|");
public ContentMetadataExtracter()
{
}
@@ -164,6 +170,16 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
this.enableStringTagging = enableStringTagging;
}
/**
* List of string separators - note: all will be applied to a given string
*
* @param stringTaggingSeparators
*/
public void setStringTaggingSeparators(List<String> stringTaggingSeparators)
{
this.stringTaggingSeparators = stringTaggingSeparators;
}
/**
* Iterates the values of the taggable property which the metadata
* extractor should have already attempted to convert values to {@link NodeRef}s.
@@ -182,11 +198,12 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
protected void addTags(NodeRef actionedUponNodeRef, PropertyDefinition propertyDef, Serializable rawValue)
{
List<String> tags = new ArrayList<String>();
if (logger.isDebugEnabled())
{
logger.debug("converting " + rawValue.toString() + " of type " +
rawValue.getClass().getCanonicalName() + " to tags");
logger.debug("converting " + rawValue.toString() + " of type " + rawValue.getClass().getCanonicalName() + " to tags");
}
if (rawValue instanceof Collection<?>)
{
for (Object singleValue : (Collection<?>) rawValue)
@@ -201,16 +218,15 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
(String) singleValue);
try
{
String tagName = (String) nodeService.getProperty((NodeRef) convertedPropertyValue, ContentModel.PROP_NAME);
NodeRef nodeRef = (NodeRef) convertedPropertyValue;
String tagName = (String) nodeService.getProperty(nodeRef, ContentModel.PROP_NAME);
if (logger.isTraceEnabled())
{
logger.trace("found tag '" + tagName + "' from tag nodeRef '" + (String) singleValue + "', " +
"adding to " + actionedUponNodeRef.toString());
}
if (tagName != null && !tagName.equals(""))
{
tags.add(tagName);
logger.trace("adding string tag name'" + tagName + "' (from tag nodeRef "+nodeRef+") to " + actionedUponNodeRef);
}
tags.addAll(splitTag(tagName));
}
catch (InvalidNodeRefException e)
{
@@ -223,17 +239,26 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
else
{
// Must be a simple string
if (logger.isTraceEnabled())
{
logger.trace("adding string tag '" + (String) singleValue + "' to " + actionedUponNodeRef.toString());
logger.trace("adding string tag name'" + singleValue + "' to " + actionedUponNodeRef);
}
tags.add((String) singleValue);
tags.addAll(splitTag((String)singleValue));
}
}
else if (singleValue instanceof NodeRef)
{
String tagName = (String) nodeService.getProperty((NodeRef) singleValue, ContentModel.PROP_NAME);
tags.add(tagName);
NodeRef nodeRef = (NodeRef)singleValue;
String tagName = (String) nodeService.getProperty(nodeRef, ContentModel.PROP_NAME);
if (logger.isTraceEnabled())
{
logger.trace("adding string tag name'" + tagName + "' (for nodeRef "+nodeRef+") to " + actionedUponNodeRef);
}
tags.addAll(splitTag(tagName));
}
}
}
@@ -241,9 +266,15 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
{
if (logger.isTraceEnabled())
{
logger.trace("adding tag '" + (String) rawValue + "' to " + actionedUponNodeRef.toString());
logger.trace("adding string tag name'" + (String)rawValue + "' to " + actionedUponNodeRef);
}
tags.add((String) rawValue);
tags.addAll(splitTag((String)rawValue));
}
if (logger.isDebugEnabled())
{
logger.debug("adding tags '" + tags + "' to " + actionedUponNodeRef.toString());
}
try
@@ -260,6 +291,34 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
}
}
protected List<String> splitTag(String str)
{
List<String> result = new ArrayList<>();
if ((str != null) && (!str.equals("")))
{
result.add(str.trim());
if (stringTaggingSeparators != null)
{
for (String sep : stringTaggingSeparators)
{
List<String> splitTags = new ArrayList<>(result.size());
for (String tag : result)
{
String[] parts = tag.split(sep);
for (String part : parts)
{
splitTags.add(part.trim());
}
}
result = splitTags;
}
}
}
return result;
}
/**
* @see org.alfresco.repo.action.executer.ActionExecuter#execute(Action,
* NodeRef)

View File

@@ -413,6 +413,7 @@ public abstract class TikaPoweredMetadataExtracter
// keys onto their own content model
for(String tikaKey : metadata.names())
{
// TODO review this change (part of MNT-15267) - should we really force string concatenation here !?
putRawValue(tikaKey, getMetadataValue(metadata, tikaKey), rawProperties);
}

View File

@@ -97,6 +97,7 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
protected static final String QUICK_FILENAME = "quickIPTC.jpg"; // Keywords separated with comma (,)
protected static final String QUICK_FILENAME2 = "quickIPTC2.jpg"; // Keywords separated with pipe (|)
protected static final String QUICK_FILENAME3 = "quickIPTC3.jpg"; // Keywords separated with semi-colon (;)
protected static final String QUICK_KEYWORD = "fox";
protected static final String TAG_1 = "tag one";
@@ -356,9 +357,12 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
/**
* Test execution of mapping strings to tags
*/
// TODO ignored until we investigate when/why this regressed - start with MNT-13655 ?
public void XtestTagMapping() throws Exception
public void testTagMapping() throws Exception
{
// explicitly set here (rather than rely on defaults) in case another test method nullified
this.executer = (ContentMetadataExtracter) ctx.getBean("extract-metadata");
executer.setStringTaggingSeparators(Arrays.asList(",", ";", "\\|"));
// Create the folders and documents to be tagged
NodeRef[] nodes = createTestFolderAndDocument(QUICK_FILENAME);
NodeRef document = nodes[0];
@@ -375,24 +379,32 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
executer.execute(action, document);
// Test extracted properties
assertEquals(ContentMetadataExtracterTest.QUICK_DESCRIPTION,
nodeService.getProperty(document, ContentModel.PROP_DESCRIPTION));
assertTrue("storeRef tags should contain '" + QUICK_KEYWORD + "'",
taggingService.getTags(storeRef).contains(QUICK_KEYWORD));
assertTrue("document's tags should contain '" + QUICK_KEYWORD + "'",
taggingService.getTags(document).contains(QUICK_KEYWORD));
List<String> tags = taggingService.getTags(document);
assertTrue("doc tags '"+tags+"' should contain '" + QUICK_KEYWORD + "'",
tags.contains(QUICK_KEYWORD));
// Test manually added keyword
assertTrue("tags should contain '" + TAG_2 + "'",
taggingService.getTags(document).contains(TAG_2));
assertTrue("doc tags '"+tags+"' should contain '" + TAG_2 + "'",
tags.contains(TAG_2));
// Test manually added keyword - note: lower-case tag name
assertTrue("doc tags '"+tags+"' should contain '" + TAG_3.toLowerCase() + "'",
tags.contains(TAG_3.toLowerCase()));
// Test manually added nodeRef keyword
assertTrue("tags should contain '" + TAG_1 + "'",
taggingService.getTags(document).contains(TAG_1));
assertTrue("doc tags '"+tags+"' should contain '" + TAG_1 + "'",
tags.contains(TAG_1));
// Test that there are no empty tags created by the non-existent nodeRef
assertEquals("tags should contain '" + TAG_1 + "'", 4,
taggingService.getTags(document).size() );
// Test that there are no extra tags created by the non-existent nodeRef
assertEquals("Unexpected number of doc tags '"+tags+"'", 7,
tags.size());
return null;
}
@@ -408,6 +420,9 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
*/
public void testIgnoreInvalidTag() throws Exception
{
this.executer = (ContentMetadataExtracter) ctx.getBean("extract-metadata");
executer.setStringTaggingSeparators(null);
// Create the folders and documents to be tagged
NodeRef[] nodes = createTestFolderAndDocument(QUICK_FILENAME2);
NodeRef document = nodes[0];
@@ -428,4 +443,48 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
removeTestFolderAndDocument(nodes);
}
public void testTagMappingSeparators() throws Exception
{
// explicitly set here (rather than rely on defaults) in case another test method nullified
this.executer = (ContentMetadataExtracter) ctx.getBean("extract-metadata");
executer.setStringTaggingSeparators(Arrays.asList(",", ";", "\\|"));
// IPTC Keywords with comma
NodeRef[] nodes = createTestFolderAndDocument(QUICK_FILENAME);
extractAndCheckTags(nodes[0], Arrays.asList("fox", "dog", "lazy", "jumping"));
removeTestFolderAndDocument(nodes);
// IPTC Keywords with vertical bar (pipe)
nodes = createTestFolderAndDocument(QUICK_FILENAME2);
extractAndCheckTags(nodes[0], Arrays.asList("k1", "k2", "k3"));
removeTestFolderAndDocument(nodes);
// IPTC Keywords with semi-colon
nodes = createTestFolderAndDocument(QUICK_FILENAME3);
extractAndCheckTags(nodes[0], Arrays.asList("keyword1", "keyword2", "keyword3", "keyword4"));
removeTestFolderAndDocument(nodes);
}
private void extractAndCheckTags(NodeRef document, List<String> expectedTags)
{
this.transactionService.getRetryingTransactionHelper().doInTransaction(new RetryingTransactionCallback<Void>(){
@Override
public Void execute() throws Throwable
{
ActionImpl action = new ActionImpl(document, ID, ContentMetadataExtracter.EXECUTOR_NAME, null);
executer.execute(action, document);
List<String> tags = taggingService.getTags(document);
for (String expectedTag : expectedTags)
{
assertTrue("Expected tag '"+expectedTag+"' not in "+tags, tags.contains(expectedTag));
}
return null;
}
});
}
}