REPO-1986: Upload Failing due to Metadata Extraction Issue (MNT-17436) - part 2

- part 2 - enable "addTags" to handle configurable list of separators (when using "enableStringTagging" option of "extract-metadata" action)
- initial default separators/delimiters => comma, semi-colon & vertical bar (pipe)
- also means we can re-enable & fix ContentMetadataExtractorTagMappingTest

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@135061 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Jan Vonka
2017-02-09 16:04:00 +00:00
parent a6d88afc6e
commit 483d79a548
4 changed files with 171 additions and 52 deletions

View File

@@ -45,6 +45,7 @@ package org.alfresco.repo.action.executer;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
@@ -94,8 +95,13 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
private TaggingService taggingService;
private MetadataExtracterRegistry metadataExtracterRegistry;
private boolean carryAspectProperties = true;
private boolean enableStringTagging = false;
// Default list of separators (when enableStringTagging is enabled)
protected List<String> stringTaggingSeparators = Arrays.asList(",", ";", "\\|");
public ContentMetadataExtracter()
{
}
@@ -164,6 +170,16 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
this.enableStringTagging = enableStringTagging;
}
/**
* List of string separators - note: all will be applied to a given string
*
* @param stringTaggingSeparators
*/
public void setStringTaggingSeparators(List<String> stringTaggingSeparators)
{
this.stringTaggingSeparators = stringTaggingSeparators;
}
/**
* Iterates the values of the taggable property which the metadata
* extractor should have already attempted to convert values to {@link NodeRef}s.
@@ -182,11 +198,12 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
protected void addTags(NodeRef actionedUponNodeRef, PropertyDefinition propertyDef, Serializable rawValue)
{
List<String> tags = new ArrayList<String>();
if (logger.isDebugEnabled())
{
logger.debug("converting " + rawValue.toString() + " of type " +
rawValue.getClass().getCanonicalName() + " to tags");
logger.debug("converting " + rawValue.toString() + " of type " + rawValue.getClass().getCanonicalName() + " to tags");
}
if (rawValue instanceof Collection<?>)
{
for (Object singleValue : (Collection<?>) rawValue)
@@ -201,16 +218,15 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
(String) singleValue);
try
{
String tagName = (String) nodeService.getProperty((NodeRef) convertedPropertyValue, ContentModel.PROP_NAME);
NodeRef nodeRef = (NodeRef) convertedPropertyValue;
String tagName = (String) nodeService.getProperty(nodeRef, ContentModel.PROP_NAME);
if (logger.isTraceEnabled())
{
logger.trace("found tag '" + tagName + "' from tag nodeRef '" + (String) singleValue + "', " +
"adding to " + actionedUponNodeRef.toString());
}
if (tagName != null && !tagName.equals(""))
{
tags.add(tagName);
logger.trace("adding string tag name'" + tagName + "' (from tag nodeRef "+nodeRef+") to " + actionedUponNodeRef);
}
tags.addAll(splitTag(tagName));
}
catch (InvalidNodeRefException e)
{
@@ -223,17 +239,26 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
else
{
// Must be a simple string
if (logger.isTraceEnabled())
{
logger.trace("adding string tag '" + (String) singleValue + "' to " + actionedUponNodeRef.toString());
logger.trace("adding string tag name'" + singleValue + "' to " + actionedUponNodeRef);
}
tags.add((String) singleValue);
tags.addAll(splitTag((String)singleValue));
}
}
else if (singleValue instanceof NodeRef)
{
String tagName = (String) nodeService.getProperty((NodeRef) singleValue, ContentModel.PROP_NAME);
tags.add(tagName);
NodeRef nodeRef = (NodeRef)singleValue;
String tagName = (String) nodeService.getProperty(nodeRef, ContentModel.PROP_NAME);
if (logger.isTraceEnabled())
{
logger.trace("adding string tag name'" + tagName + "' (for nodeRef "+nodeRef+") to " + actionedUponNodeRef);
}
tags.addAll(splitTag(tagName));
}
}
}
@@ -241,9 +266,15 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
{
if (logger.isTraceEnabled())
{
logger.trace("adding tag '" + (String) rawValue + "' to " + actionedUponNodeRef.toString());
logger.trace("adding string tag name'" + (String)rawValue + "' to " + actionedUponNodeRef);
}
tags.add((String) rawValue);
tags.addAll(splitTag((String)rawValue));
}
if (logger.isDebugEnabled())
{
logger.debug("adding tags '" + tags + "' to " + actionedUponNodeRef.toString());
}
try
@@ -259,6 +290,34 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
}
}
}
protected List<String> splitTag(String str)
{
List<String> result = new ArrayList<>();
if ((str != null) && (!str.equals("")))
{
result.add(str.trim());
if (stringTaggingSeparators != null)
{
for (String sep : stringTaggingSeparators)
{
List<String> splitTags = new ArrayList<>(result.size());
for (String tag : result)
{
String[] parts = tag.split(sep);
for (String part : parts)
{
splitTags.add(part.trim());
}
}
result = splitTags;
}
}
}
return result;
}
/**
* @see org.alfresco.repo.action.executer.ActionExecuter#execute(Action,

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import java.io.IOException;
@@ -413,6 +413,7 @@ public abstract class TikaPoweredMetadataExtracter
// keys onto their own content model
for(String tikaKey : metadata.names())
{
// TODO review this change (part of MNT-15267) - should we really force string concatenation here !?
putRawValue(tikaKey, getMetadataValue(metadata, tikaKey), rawProperties);
}