mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-10-08 14:51:49 +00:00
REPO-1986: Upload Failing due to Metadata Extraction Issue (MNT-17436) - part 2
- part 2 - enable "addTags" to handle configurable list of separators (when using "enableStringTagging" option of "extract-metadata" action) - initial default separators/delimiters => comma, semi-colon & vertical bar (pipe) - also means we can re-enable & fix ContentMetadataExtractorTagMappingTest git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@135061 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
BIN
config/quick/quickIPTC3.jpg
Normal file
BIN
config/quick/quickIPTC3.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 17 KiB |
@@ -45,6 +45,7 @@ package org.alfresco.repo.action.executer;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
@@ -94,8 +95,13 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
|
||||
private TaggingService taggingService;
|
||||
private MetadataExtracterRegistry metadataExtracterRegistry;
|
||||
private boolean carryAspectProperties = true;
|
||||
|
||||
|
||||
private boolean enableStringTagging = false;
|
||||
|
||||
// Default list of separators (when enableStringTagging is enabled)
|
||||
protected List<String> stringTaggingSeparators = Arrays.asList(",", ";", "\\|");
|
||||
|
||||
public ContentMetadataExtracter()
|
||||
{
|
||||
}
|
||||
@@ -164,6 +170,16 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
|
||||
this.enableStringTagging = enableStringTagging;
|
||||
}
|
||||
|
||||
/**
|
||||
* List of string separators - note: all will be applied to a given string
|
||||
*
|
||||
* @param stringTaggingSeparators
|
||||
*/
|
||||
public void setStringTaggingSeparators(List<String> stringTaggingSeparators)
|
||||
{
|
||||
this.stringTaggingSeparators = stringTaggingSeparators;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterates the values of the taggable property which the metadata
|
||||
* extractor should have already attempted to convert values to {@link NodeRef}s.
|
||||
@@ -182,11 +198,12 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
|
||||
protected void addTags(NodeRef actionedUponNodeRef, PropertyDefinition propertyDef, Serializable rawValue)
|
||||
{
|
||||
List<String> tags = new ArrayList<String>();
|
||||
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("converting " + rawValue.toString() + " of type " +
|
||||
rawValue.getClass().getCanonicalName() + " to tags");
|
||||
logger.debug("converting " + rawValue.toString() + " of type " + rawValue.getClass().getCanonicalName() + " to tags");
|
||||
}
|
||||
|
||||
if (rawValue instanceof Collection<?>)
|
||||
{
|
||||
for (Object singleValue : (Collection<?>) rawValue)
|
||||
@@ -201,16 +218,15 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
|
||||
(String) singleValue);
|
||||
try
|
||||
{
|
||||
String tagName = (String) nodeService.getProperty((NodeRef) convertedPropertyValue, ContentModel.PROP_NAME);
|
||||
NodeRef nodeRef = (NodeRef) convertedPropertyValue;
|
||||
String tagName = (String) nodeService.getProperty(nodeRef, ContentModel.PROP_NAME);
|
||||
|
||||
if (logger.isTraceEnabled())
|
||||
{
|
||||
logger.trace("found tag '" + tagName + "' from tag nodeRef '" + (String) singleValue + "', " +
|
||||
"adding to " + actionedUponNodeRef.toString());
|
||||
}
|
||||
if (tagName != null && !tagName.equals(""))
|
||||
{
|
||||
tags.add(tagName);
|
||||
logger.trace("adding string tag name'" + tagName + "' (from tag nodeRef "+nodeRef+") to " + actionedUponNodeRef);
|
||||
}
|
||||
|
||||
tags.addAll(splitTag(tagName));
|
||||
}
|
||||
catch (InvalidNodeRefException e)
|
||||
{
|
||||
@@ -223,17 +239,26 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
|
||||
else
|
||||
{
|
||||
// Must be a simple string
|
||||
|
||||
if (logger.isTraceEnabled())
|
||||
{
|
||||
logger.trace("adding string tag '" + (String) singleValue + "' to " + actionedUponNodeRef.toString());
|
||||
logger.trace("adding string tag name'" + singleValue + "' to " + actionedUponNodeRef);
|
||||
}
|
||||
tags.add((String) singleValue);
|
||||
|
||||
tags.addAll(splitTag((String)singleValue));
|
||||
}
|
||||
}
|
||||
else if (singleValue instanceof NodeRef)
|
||||
{
|
||||
String tagName = (String) nodeService.getProperty((NodeRef) singleValue, ContentModel.PROP_NAME);
|
||||
tags.add(tagName);
|
||||
NodeRef nodeRef = (NodeRef)singleValue;
|
||||
String tagName = (String) nodeService.getProperty(nodeRef, ContentModel.PROP_NAME);
|
||||
|
||||
if (logger.isTraceEnabled())
|
||||
{
|
||||
logger.trace("adding string tag name'" + tagName + "' (for nodeRef "+nodeRef+") to " + actionedUponNodeRef);
|
||||
}
|
||||
|
||||
tags.addAll(splitTag(tagName));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -241,9 +266,15 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
|
||||
{
|
||||
if (logger.isTraceEnabled())
|
||||
{
|
||||
logger.trace("adding tag '" + (String) rawValue + "' to " + actionedUponNodeRef.toString());
|
||||
logger.trace("adding string tag name'" + (String)rawValue + "' to " + actionedUponNodeRef);
|
||||
}
|
||||
tags.add((String) rawValue);
|
||||
|
||||
tags.addAll(splitTag((String)rawValue));
|
||||
}
|
||||
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("adding tags '" + tags + "' to " + actionedUponNodeRef.toString());
|
||||
}
|
||||
|
||||
try
|
||||
@@ -260,6 +291,34 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
|
||||
}
|
||||
}
|
||||
|
||||
protected List<String> splitTag(String str)
|
||||
{
|
||||
List<String> result = new ArrayList<>();
|
||||
if ((str != null) && (!str.equals("")))
|
||||
{
|
||||
result.add(str.trim());
|
||||
|
||||
if (stringTaggingSeparators != null)
|
||||
{
|
||||
for (String sep : stringTaggingSeparators)
|
||||
{
|
||||
List<String> splitTags = new ArrayList<>(result.size());
|
||||
for (String tag : result)
|
||||
{
|
||||
String[] parts = tag.split(sep);
|
||||
for (String part : parts)
|
||||
{
|
||||
splitTags.add(part.trim());
|
||||
}
|
||||
}
|
||||
result = splitTags;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see org.alfresco.repo.action.executer.ActionExecuter#execute(Action,
|
||||
* NodeRef)
|
||||
|
@@ -413,6 +413,7 @@ public abstract class TikaPoweredMetadataExtracter
|
||||
// keys onto their own content model
|
||||
for(String tikaKey : metadata.names())
|
||||
{
|
||||
// TODO review this change (part of MNT-15267) - should we really force string concatenation here !?
|
||||
putRawValue(tikaKey, getMetadataValue(metadata, tikaKey), rawProperties);
|
||||
}
|
||||
|
||||
|
@@ -97,6 +97,7 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
|
||||
|
||||
protected static final String QUICK_FILENAME = "quickIPTC.jpg"; // Keywords separated with comma (,)
|
||||
protected static final String QUICK_FILENAME2 = "quickIPTC2.jpg"; // Keywords separated with pipe (|)
|
||||
protected static final String QUICK_FILENAME3 = "quickIPTC3.jpg"; // Keywords separated with semi-colon (;)
|
||||
|
||||
protected static final String QUICK_KEYWORD = "fox";
|
||||
protected static final String TAG_1 = "tag one";
|
||||
@@ -356,9 +357,12 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
|
||||
/**
|
||||
* Test execution of mapping strings to tags
|
||||
*/
|
||||
// TODO ignored until we investigate when/why this regressed - start with MNT-13655 ?
|
||||
public void XtestTagMapping() throws Exception
|
||||
public void testTagMapping() throws Exception
|
||||
{
|
||||
// explicitly set here (rather than rely on defaults) in case another test method nullified
|
||||
this.executer = (ContentMetadataExtracter) ctx.getBean("extract-metadata");
|
||||
executer.setStringTaggingSeparators(Arrays.asList(",", ";", "\\|"));
|
||||
|
||||
// Create the folders and documents to be tagged
|
||||
NodeRef[] nodes = createTestFolderAndDocument(QUICK_FILENAME);
|
||||
NodeRef document = nodes[0];
|
||||
@@ -375,24 +379,32 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
|
||||
executer.execute(action, document);
|
||||
|
||||
// Test extracted properties
|
||||
|
||||
assertEquals(ContentMetadataExtracterTest.QUICK_DESCRIPTION,
|
||||
nodeService.getProperty(document, ContentModel.PROP_DESCRIPTION));
|
||||
|
||||
assertTrue("storeRef tags should contain '" + QUICK_KEYWORD + "'",
|
||||
taggingService.getTags(storeRef).contains(QUICK_KEYWORD));
|
||||
assertTrue("document's tags should contain '" + QUICK_KEYWORD + "'",
|
||||
taggingService.getTags(document).contains(QUICK_KEYWORD));
|
||||
|
||||
List<String> tags = taggingService.getTags(document);
|
||||
assertTrue("doc tags '"+tags+"' should contain '" + QUICK_KEYWORD + "'",
|
||||
tags.contains(QUICK_KEYWORD));
|
||||
|
||||
// Test manually added keyword
|
||||
assertTrue("tags should contain '" + TAG_2 + "'",
|
||||
taggingService.getTags(document).contains(TAG_2));
|
||||
assertTrue("doc tags '"+tags+"' should contain '" + TAG_2 + "'",
|
||||
tags.contains(TAG_2));
|
||||
|
||||
// Test manually added keyword - note: lower-case tag name
|
||||
assertTrue("doc tags '"+tags+"' should contain '" + TAG_3.toLowerCase() + "'",
|
||||
tags.contains(TAG_3.toLowerCase()));
|
||||
|
||||
// Test manually added nodeRef keyword
|
||||
assertTrue("tags should contain '" + TAG_1 + "'",
|
||||
taggingService.getTags(document).contains(TAG_1));
|
||||
assertTrue("doc tags '"+tags+"' should contain '" + TAG_1 + "'",
|
||||
tags.contains(TAG_1));
|
||||
|
||||
// Test that there are no empty tags created by the non-existent nodeRef
|
||||
assertEquals("tags should contain '" + TAG_1 + "'", 4,
|
||||
taggingService.getTags(document).size() );
|
||||
// Test that there are no extra tags created by the non-existent nodeRef
|
||||
assertEquals("Unexpected number of doc tags '"+tags+"'", 7,
|
||||
tags.size());
|
||||
|
||||
return null;
|
||||
}
|
||||
@@ -408,6 +420,9 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
|
||||
*/
|
||||
public void testIgnoreInvalidTag() throws Exception
|
||||
{
|
||||
this.executer = (ContentMetadataExtracter) ctx.getBean("extract-metadata");
|
||||
executer.setStringTaggingSeparators(null);
|
||||
|
||||
// Create the folders and documents to be tagged
|
||||
NodeRef[] nodes = createTestFolderAndDocument(QUICK_FILENAME2);
|
||||
NodeRef document = nodes[0];
|
||||
@@ -428,4 +443,48 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
|
||||
|
||||
removeTestFolderAndDocument(nodes);
|
||||
}
|
||||
|
||||
public void testTagMappingSeparators() throws Exception
|
||||
{
|
||||
// explicitly set here (rather than rely on defaults) in case another test method nullified
|
||||
this.executer = (ContentMetadataExtracter) ctx.getBean("extract-metadata");
|
||||
executer.setStringTaggingSeparators(Arrays.asList(",", ";", "\\|"));
|
||||
|
||||
// IPTC Keywords with comma
|
||||
NodeRef[] nodes = createTestFolderAndDocument(QUICK_FILENAME);
|
||||
extractAndCheckTags(nodes[0], Arrays.asList("fox", "dog", "lazy", "jumping"));
|
||||
removeTestFolderAndDocument(nodes);
|
||||
|
||||
// IPTC Keywords with vertical bar (pipe)
|
||||
nodes = createTestFolderAndDocument(QUICK_FILENAME2);
|
||||
extractAndCheckTags(nodes[0], Arrays.asList("k1", "k2", "k3"));
|
||||
removeTestFolderAndDocument(nodes);
|
||||
|
||||
// IPTC Keywords with semi-colon
|
||||
nodes = createTestFolderAndDocument(QUICK_FILENAME3);
|
||||
extractAndCheckTags(nodes[0], Arrays.asList("keyword1", "keyword2", "keyword3", "keyword4"));
|
||||
removeTestFolderAndDocument(nodes);
|
||||
}
|
||||
|
||||
private void extractAndCheckTags(NodeRef document, List<String> expectedTags)
|
||||
{
|
||||
this.transactionService.getRetryingTransactionHelper().doInTransaction(new RetryingTransactionCallback<Void>(){
|
||||
|
||||
@Override
|
||||
public Void execute() throws Throwable
|
||||
{
|
||||
ActionImpl action = new ActionImpl(document, ID, ContentMetadataExtracter.EXECUTOR_NAME, null);
|
||||
executer.execute(action, document);
|
||||
|
||||
List<String> tags = taggingService.getTags(document);
|
||||
|
||||
for (String expectedTag : expectedTags)
|
||||
{
|
||||
assertTrue("Expected tag '"+expectedTag+"' not in "+tags, tags.contains(expectedTag));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user