mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-24 17:32:48 +00:00
Fixed NPE when extracter action hits properties not in the dictionary.
Exposed extracter OverwritePolicy, but it still defaults to the OverwritePolicy specified in the extracters configuration. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@6013 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -25,9 +25,10 @@
|
||||
package org.alfresco.repo.action.executer;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.metadata.MetadataExtracter;
|
||||
@@ -46,40 +47,15 @@ import org.alfresco.service.namespace.QName;
|
||||
/**
|
||||
* Extract metadata from any added content.
|
||||
* <p>
|
||||
* The metadata is extracted from the content and compared to the current
|
||||
* property values. Missing or zero-length properties are replaced,
|
||||
* otherwise they are left as is.<br/>
|
||||
* <i>This may change if the action gets parameterized in future</i>.
|
||||
* Currently, the default {@linkplain MetadataExtracter.OverwritePolicy overwrite policy}
|
||||
* for each extracter is used. (TODO: Add overwrite policy as a parameter.)
|
||||
*
|
||||
* @see MetadataExtracter.OverwritePolicy
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class ContentMetadataExtracter extends ActionExecuterAbstractBase
|
||||
{
|
||||
/**
|
||||
* Action constants
|
||||
*/
|
||||
public static final String NAME = "extract-metadata";
|
||||
|
||||
/*
|
||||
* TODO: Action parameters.
|
||||
*
|
||||
* Currently none exist, but it may be nice to add a 'policy' parameter for
|
||||
* overwriting the extracted properties, with the following possible values:
|
||||
* 1) Never: Never overwrite node properties that
|
||||
* exist (i.e. preserve values, nulls, and blanks)
|
||||
* 2) Pragmatic: Write
|
||||
* extracted properties if they didn't exist before, are null, or evaluate
|
||||
* to an empty string.
|
||||
* 3) Always: Always store the extracted properes.
|
||||
*
|
||||
* Policies 1 and 2 will preserve previously set properties in case nodes
|
||||
* are moved/copied, making this action run on the same content several
|
||||
* times. However, if a property is deliberately cleared (e.g. by putting
|
||||
* the empty string into the "decription" field), the pragmatic policy would
|
||||
* indeed overwrite it. The current implementation matches the 'pragmatic'
|
||||
* policy.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The node service
|
||||
*/
|
||||
@@ -140,68 +116,77 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
|
||||
*/
|
||||
public void executeImpl(Action ruleAction, NodeRef actionedUponNodeRef)
|
||||
{
|
||||
if (this.nodeService.exists(actionedUponNodeRef) == true)
|
||||
if (!nodeService.exists(actionedUponNodeRef))
|
||||
{
|
||||
ContentReader cr = contentService.getReader(actionedUponNodeRef, ContentModel.PROP_CONTENT);
|
||||
// Node is gone
|
||||
return;
|
||||
}
|
||||
ContentReader reader = contentService.getReader(actionedUponNodeRef, ContentModel.PROP_CONTENT);
|
||||
// The reader may be null, e.g. for folders and the like
|
||||
if (reader == null || reader.getMimetype() == null)
|
||||
{
|
||||
// No content to extract data from
|
||||
return;
|
||||
}
|
||||
String mimetype = reader.getMimetype();
|
||||
MetadataExtracter extracter = metadataExtracterRegistry.getExtracter(mimetype);
|
||||
if (extracter == null)
|
||||
{
|
||||
// There is no extracter to use
|
||||
return;
|
||||
}
|
||||
|
||||
// Get all the node's properties
|
||||
Map<QName, Serializable> nodeProperties = nodeService.getProperties(actionedUponNodeRef);
|
||||
|
||||
// TODO: The override policy should be a parameter here. Instead, we'll use the default policy
|
||||
// set on the extracter.
|
||||
// Give the node's properties to the extracter to be modified
|
||||
Map<QName, Serializable> modifiedProperties = extracter.extract(
|
||||
reader,
|
||||
/*OverwritePolicy.PRAGMATIC,*/
|
||||
nodeProperties);
|
||||
|
||||
// 'cr' may be null, e.g. for folders and the like
|
||||
if (cr != null && cr.getMimetype() != null)
|
||||
// If none of the properties where changed, then there is nothing more to do
|
||||
if (modifiedProperties.size() == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Check that all properties have the appropriate aspect applied
|
||||
Set<QName> requiredAspectQNames = new HashSet<QName>(3);
|
||||
|
||||
for (QName propertyQName : modifiedProperties.keySet())
|
||||
{
|
||||
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
|
||||
if (propertyDef == null)
|
||||
{
|
||||
MetadataExtracter me = metadataExtracterRegistry.getExtracter(cr.getMimetype());
|
||||
if (me != null)
|
||||
{
|
||||
Map<QName, Serializable> newProps = new HashMap<QName, Serializable>(7, 0.5f);
|
||||
me.extract(cr, newProps);
|
||||
|
||||
Map<QName, Serializable> allProps = nodeService.getProperties(actionedUponNodeRef);
|
||||
|
||||
/*
|
||||
* The code below implements a modestly conservative
|
||||
* 'preserve' policy which shouldn't override values
|
||||
* accidentally.
|
||||
*/
|
||||
|
||||
boolean changed = false;
|
||||
for (QName key : newProps.keySet())
|
||||
{
|
||||
// check if we need to add an aspect for the prop
|
||||
ClassDefinition propClass = dictionaryService.getProperty(key).getContainerClass();
|
||||
if (propClass.isAspect() &&
|
||||
nodeService.hasAspect(actionedUponNodeRef, propClass.getName()) == false)
|
||||
{
|
||||
Map<QName, Serializable> aspectProps = new HashMap<QName, Serializable>(3, 1.0f);
|
||||
for (QName defKey : propClass.getProperties().keySet())
|
||||
{
|
||||
if (dictionaryService.getProperty(defKey).isMandatory())
|
||||
{
|
||||
aspectProps.put(defKey, allProps.get(defKey));
|
||||
allProps.remove(defKey);
|
||||
}
|
||||
}
|
||||
nodeService.addAspect(actionedUponNodeRef, propClass.getName(), aspectProps);
|
||||
}
|
||||
|
||||
Serializable value = newProps.get(key);
|
||||
if (value == null)
|
||||
{
|
||||
continue; // Content extracters shouldn't do this
|
||||
}
|
||||
|
||||
// Look up the old value, and check for nulls
|
||||
Serializable oldValue = allProps.get(key);
|
||||
if (oldValue == null || oldValue.toString().length() == 0)
|
||||
{
|
||||
allProps.put(key, value);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (changed)
|
||||
{
|
||||
nodeService.setProperties(actionedUponNodeRef, allProps);
|
||||
}
|
||||
}
|
||||
// The property is not defined in the model
|
||||
continue;
|
||||
}
|
||||
ClassDefinition propertyContainerDef = propertyDef.getContainerClass();
|
||||
if (propertyContainerDef.isAspect())
|
||||
{
|
||||
QName aspectQName = propertyContainerDef.getName();
|
||||
requiredAspectQNames.add(aspectQName);
|
||||
}
|
||||
}
|
||||
|
||||
// Add all the properties to the node BEFORE we add the aspects
|
||||
nodeService.setProperties(actionedUponNodeRef, nodeProperties);
|
||||
|
||||
// Add each of the aspects, as required
|
||||
for (QName requiredAspectQName : requiredAspectQNames)
|
||||
{
|
||||
if (nodeService.hasAspect(actionedUponNodeRef, requiredAspectQName))
|
||||
{
|
||||
// The node has the aspect already
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
nodeService.addAspect(actionedUponNodeRef, requiredAspectQName, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -25,18 +25,25 @@
|
||||
package org.alfresco.repo.action.executer;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.action.ActionImpl;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
|
||||
import org.alfresco.repo.content.metadata.MetadataExtracterRegistry;
|
||||
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
|
||||
import org.alfresco.repo.security.authentication.AuthenticationComponent;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.cmr.repository.ContentService;
|
||||
import org.alfresco.service.cmr.repository.ContentWriter;
|
||||
import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.cmr.repository.NodeService;
|
||||
import org.alfresco.service.cmr.repository.StoreRef;
|
||||
import org.alfresco.service.namespace.NamespaceService;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.util.BaseSpringTest;
|
||||
import org.alfresco.util.GUID;
|
||||
@@ -45,7 +52,7 @@ import org.alfresco.util.GUID;
|
||||
* Test of the ActionExecuter for extracting metadata. Note: This test makes
|
||||
* assumptions about the PDF test data for PdfBoxExtracter.
|
||||
*
|
||||
* @author Jesper Steen M<EFBFBD>ller
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class ContentMetadataExtracterTest extends BaseSpringTest
|
||||
{
|
||||
@@ -90,7 +97,7 @@ public class ContentMetadataExtracterTest extends BaseSpringTest
|
||||
cw.putContent(AbstractContentTransformerTest.loadQuickTestFile("pdf"));
|
||||
|
||||
// Get the executer instance
|
||||
this.executer = (ContentMetadataExtracter) this.applicationContext.getBean(ContentMetadataExtracter.NAME);
|
||||
this.executer = (ContentMetadataExtracter) this.applicationContext.getBean("extract-metadata");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -119,6 +126,60 @@ public class ContentMetadataExtracterTest extends BaseSpringTest
|
||||
assertEquals(QUICK_DESCRIPTION, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_DESCRIPTION));
|
||||
assertEquals(QUICK_CREATOR, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_AUTHOR));
|
||||
}
|
||||
|
||||
private static final QName PROP_UNKNOWN_1 = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "unkown1");
|
||||
private static final QName PROP_UNKNOWN_2 = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "unkown2");
|
||||
private static class UnknownMetadataExtracter extends AbstractMappingMetadataExtracter
|
||||
{
|
||||
public UnknownMetadataExtracter()
|
||||
{
|
||||
Properties mappingProperties = new Properties();
|
||||
mappingProperties.put("unknown1", PROP_UNKNOWN_1.toString());
|
||||
mappingProperties.put("unknown2", PROP_UNKNOWN_2.toString());
|
||||
setMappingProperties(mappingProperties);
|
||||
}
|
||||
@Override
|
||||
protected Map<String, Set<QName>> getDefaultMapping()
|
||||
{
|
||||
// No need to give anything back as we have explicitly set the mapping already
|
||||
return new HashMap<String, Set<QName>>(0);
|
||||
}
|
||||
@Override
|
||||
public boolean isSupported(String sourceMimetype)
|
||||
{
|
||||
return sourceMimetype.equals(MimetypeMap.MIMETYPE_BINARY);
|
||||
}
|
||||
|
||||
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||
{
|
||||
Map<String, Serializable> rawMap = newRawMap();
|
||||
rawMap.put("unknown1", new Integer(1));
|
||||
rawMap.put("unknown2", "TWO");
|
||||
return rawMap;
|
||||
}
|
||||
}
|
||||
|
||||
public void testUnknownProperties()
|
||||
{
|
||||
MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry");
|
||||
UnknownMetadataExtracter extracterUnknown = new UnknownMetadataExtracter();
|
||||
extracterUnknown.setRegistry(registry);
|
||||
extracterUnknown.register();
|
||||
// Now add some content with a binary mimetype
|
||||
ContentWriter cw = this.contentService.getWriter(nodeRef, ContentModel.PROP_CONTENT, true);
|
||||
cw.setMimetype(MimetypeMap.MIMETYPE_BINARY);
|
||||
cw.putContent("Content for " + getName());
|
||||
|
||||
ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null);
|
||||
executer.execute(action, this.nodeRef);
|
||||
|
||||
// The unkown properties should be present
|
||||
Serializable prop1 = nodeService.getProperty(nodeRef, PROP_UNKNOWN_1);
|
||||
Serializable prop2 = nodeService.getProperty(nodeRef, PROP_UNKNOWN_2);
|
||||
|
||||
assertNotNull("Unknown property is null", prop1);
|
||||
assertNotNull("Unknown property is null", prop2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test execution of the pragmatic approach
|
||||
@@ -148,9 +209,5 @@ public class ContentMetadataExtracterTest extends BaseSpringTest
|
||||
|
||||
// But this one should have been set
|
||||
assertEquals(QUICK_DESCRIPTION, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_DESCRIPTION));
|
||||
|
||||
}
|
||||
|
||||
// If we implement other policies than "pragmatic", they should be tested as
|
||||
// well...
|
||||
}
|
||||
|
Reference in New Issue
Block a user