Fixed NPE when extracter action hits properties not in the dictionary.

Exposed extracter OverwritePolicy, but it still defaults to the OverwritePolicy specified in the extracters configuration.


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@6013 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Derek Hulley
2007-06-19 09:12:56 +00:00
parent a6e250cdac
commit 08fd123241
5 changed files with 247 additions and 139 deletions

View File

@@ -25,9 +25,10 @@
package org.alfresco.repo.action.executer;
import java.io.Serializable;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.metadata.MetadataExtracter;
@@ -46,40 +47,15 @@ import org.alfresco.service.namespace.QName;
/**
* Extract metadata from any added content.
* <p>
* The metadata is extracted from the content and compared to the current
* property values. Missing or zero-length properties are replaced,
* otherwise they are left as is.<br/>
* <i>This may change if the action gets parameterized in future</i>.
* Currently, the default {@linkplain MetadataExtracter.OverwritePolicy overwrite policy}
* for each extracter is used. (TODO: Add overwrite policy as a parameter.)
*
* @see MetadataExtracter.OverwritePolicy
*
* @author Jesper Steen Møller
*/
public class ContentMetadataExtracter extends ActionExecuterAbstractBase
{
/**
* Action constants
*/
public static final String NAME = "extract-metadata";
/*
* TODO: Action parameters.
*
* Currently none exist, but it may be nice to add a 'policy' parameter for
* overwriting the extracted properties, with the following possible values:
* 1) Never: Never overwrite node properties that
* exist (i.e. preserve values, nulls, and blanks)
* 2) Pragmatic: Write
* extracted properties if they didn't exist before, are null, or evaluate
* to an empty string.
* 3) Always: Always store the extracted properes.
*
* Policies 1 and 2 will preserve previously set properties in case nodes
* are moved/copied, making this action run on the same content several
* times. However, if a property is deliberately cleared (e.g. by putting
* the empty string into the "decription" field), the pragmatic policy would
* indeed overwrite it. The current implementation matches the 'pragmatic'
* policy.
*/
/**
* The node service
*/
@@ -140,68 +116,77 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
*/
public void executeImpl(Action ruleAction, NodeRef actionedUponNodeRef)
{
if (this.nodeService.exists(actionedUponNodeRef) == true)
if (!nodeService.exists(actionedUponNodeRef))
{
ContentReader cr = contentService.getReader(actionedUponNodeRef, ContentModel.PROP_CONTENT);
// Node is gone
return;
}
ContentReader reader = contentService.getReader(actionedUponNodeRef, ContentModel.PROP_CONTENT);
// The reader may be null, e.g. for folders and the like
if (reader == null || reader.getMimetype() == null)
{
// No content to extract data from
return;
}
String mimetype = reader.getMimetype();
MetadataExtracter extracter = metadataExtracterRegistry.getExtracter(mimetype);
if (extracter == null)
{
// There is no extracter to use
return;
}
// Get all the node's properties
Map<QName, Serializable> nodeProperties = nodeService.getProperties(actionedUponNodeRef);
// TODO: The override policy should be a parameter here. Instead, we'll use the default policy
// set on the extracter.
// Give the node's properties to the extracter to be modified
Map<QName, Serializable> modifiedProperties = extracter.extract(
reader,
/*OverwritePolicy.PRAGMATIC,*/
nodeProperties);
// 'cr' may be null, e.g. for folders and the like
if (cr != null && cr.getMimetype() != null)
// If none of the properties where changed, then there is nothing more to do
if (modifiedProperties.size() == 0)
{
return;
}
// Check that all properties have the appropriate aspect applied
Set<QName> requiredAspectQNames = new HashSet<QName>(3);
for (QName propertyQName : modifiedProperties.keySet())
{
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if (propertyDef == null)
{
MetadataExtracter me = metadataExtracterRegistry.getExtracter(cr.getMimetype());
if (me != null)
{
Map<QName, Serializable> newProps = new HashMap<QName, Serializable>(7, 0.5f);
me.extract(cr, newProps);
Map<QName, Serializable> allProps = nodeService.getProperties(actionedUponNodeRef);
/*
* The code below implements a modestly conservative
* 'preserve' policy which shouldn't override values
* accidentally.
*/
boolean changed = false;
for (QName key : newProps.keySet())
{
// check if we need to add an aspect for the prop
ClassDefinition propClass = dictionaryService.getProperty(key).getContainerClass();
if (propClass.isAspect() &&
nodeService.hasAspect(actionedUponNodeRef, propClass.getName()) == false)
{
Map<QName, Serializable> aspectProps = new HashMap<QName, Serializable>(3, 1.0f);
for (QName defKey : propClass.getProperties().keySet())
{
if (dictionaryService.getProperty(defKey).isMandatory())
{
aspectProps.put(defKey, allProps.get(defKey));
allProps.remove(defKey);
}
}
nodeService.addAspect(actionedUponNodeRef, propClass.getName(), aspectProps);
}
Serializable value = newProps.get(key);
if (value == null)
{
continue; // Content extracters shouldn't do this
}
// Look up the old value, and check for nulls
Serializable oldValue = allProps.get(key);
if (oldValue == null || oldValue.toString().length() == 0)
{
allProps.put(key, value);
changed = true;
}
}
if (changed)
{
nodeService.setProperties(actionedUponNodeRef, allProps);
}
}
// The property is not defined in the model
continue;
}
ClassDefinition propertyContainerDef = propertyDef.getContainerClass();
if (propertyContainerDef.isAspect())
{
QName aspectQName = propertyContainerDef.getName();
requiredAspectQNames.add(aspectQName);
}
}
// Add all the properties to the node BEFORE we add the aspects
nodeService.setProperties(actionedUponNodeRef, nodeProperties);
// Add each of the aspects, as required
for (QName requiredAspectQName : requiredAspectQNames)
{
if (nodeService.hasAspect(actionedUponNodeRef, requiredAspectQName))
{
// The node has the aspect already
continue;
}
else
{
nodeService.addAspect(actionedUponNodeRef, requiredAspectQName, null);
}
}
}

View File

@@ -25,18 +25,25 @@
package org.alfresco.repo.action.executer;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.action.ActionImpl;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
import org.alfresco.repo.content.metadata.MetadataExtracterRegistry;
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.repo.security.authentication.AuthenticationComponent;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.repository.StoreRef;
import org.alfresco.service.namespace.NamespaceService;
import org.alfresco.service.namespace.QName;
import org.alfresco.util.BaseSpringTest;
import org.alfresco.util.GUID;
@@ -45,7 +52,7 @@ import org.alfresco.util.GUID;
* Test of the ActionExecuter for extracting metadata. Note: This test makes
* assumptions about the PDF test data for PdfBoxExtracter.
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class ContentMetadataExtracterTest extends BaseSpringTest
{
@@ -90,7 +97,7 @@ public class ContentMetadataExtracterTest extends BaseSpringTest
cw.putContent(AbstractContentTransformerTest.loadQuickTestFile("pdf"));
// Get the executer instance
this.executer = (ContentMetadataExtracter) this.applicationContext.getBean(ContentMetadataExtracter.NAME);
this.executer = (ContentMetadataExtracter) this.applicationContext.getBean("extract-metadata");
}
/**
@@ -119,6 +126,60 @@ public class ContentMetadataExtracterTest extends BaseSpringTest
assertEquals(QUICK_DESCRIPTION, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_DESCRIPTION));
assertEquals(QUICK_CREATOR, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_AUTHOR));
}
private static final QName PROP_UNKNOWN_1 = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "unkown1");
private static final QName PROP_UNKNOWN_2 = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "unkown2");
private static class UnknownMetadataExtracter extends AbstractMappingMetadataExtracter
{
public UnknownMetadataExtracter()
{
Properties mappingProperties = new Properties();
mappingProperties.put("unknown1", PROP_UNKNOWN_1.toString());
mappingProperties.put("unknown2", PROP_UNKNOWN_2.toString());
setMappingProperties(mappingProperties);
}
@Override
protected Map<String, Set<QName>> getDefaultMapping()
{
// No need to give anything back as we have explicitly set the mapping already
return new HashMap<String, Set<QName>>(0);
}
@Override
public boolean isSupported(String sourceMimetype)
{
return sourceMimetype.equals(MimetypeMap.MIMETYPE_BINARY);
}
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
{
Map<String, Serializable> rawMap = newRawMap();
rawMap.put("unknown1", new Integer(1));
rawMap.put("unknown2", "TWO");
return rawMap;
}
}
public void testUnknownProperties()
{
MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry");
UnknownMetadataExtracter extracterUnknown = new UnknownMetadataExtracter();
extracterUnknown.setRegistry(registry);
extracterUnknown.register();
// Now add some content with a binary mimetype
ContentWriter cw = this.contentService.getWriter(nodeRef, ContentModel.PROP_CONTENT, true);
cw.setMimetype(MimetypeMap.MIMETYPE_BINARY);
cw.putContent("Content for " + getName());
ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null);
executer.execute(action, this.nodeRef);
// The unkown properties should be present
Serializable prop1 = nodeService.getProperty(nodeRef, PROP_UNKNOWN_1);
Serializable prop2 = nodeService.getProperty(nodeRef, PROP_UNKNOWN_2);
assertNotNull("Unknown property is null", prop1);
assertNotNull("Unknown property is null", prop2);
}
/**
* Test execution of the pragmatic approach
@@ -148,9 +209,5 @@ public class ContentMetadataExtracterTest extends BaseSpringTest
// But this one should have been set
assertEquals(QUICK_DESCRIPTION, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_DESCRIPTION));
}
// If we implement other policies than "pragmatic", they should be tested as
// well...
}

View File

@@ -354,8 +354,9 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
while (tokenizer.hasMoreTokens())
{
String qnameStr = tokenizer.nextToken().trim();
// Check if we need to resolve a namespace reference
int index = qnameStr.indexOf(QName.NAMESPACE_PREFIX);
if (index > -1)
if (index > -1 && qnameStr.charAt(0) != QName.NAMESPACE_BEGIN)
{
String prefix = qnameStr.substring(0, index);
String suffix = qnameStr.substring(index + 1);
@@ -490,7 +491,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
/**
* {@inheritDoc}
*/
public final boolean extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
public final Map<QName, Serializable> extract(ContentReader reader, Map<QName, Serializable> destination)
{
return extract(reader, this.overwritePolicy, destination, this.mapping);
}
@@ -498,11 +499,22 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
/**
* {@inheritDoc}
*/
public final boolean extract(
public final Map<QName, Serializable> extract(
ContentReader reader,
OverwritePolicy overwritePolicy,
Map<QName, Serializable> destination)
{
return extract(reader, overwritePolicy, destination, this.mapping);
}
/**
* {@inheritDoc}
*/
public final Map<QName, Serializable> extract(
ContentReader reader,
OverwritePolicy overwritePolicy,
Map<QName, Serializable> destination,
Map<String, Set<QName>> mapping) throws ContentIOException
Map<String, Set<QName>> mapping)
{
// Done
if (logger.isDebugEnabled())
@@ -522,14 +534,14 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
// check the reliability
checkIsSupported(reader);
boolean changed = false;
Map<QName, Serializable> changedProperties = null;
try
{
Map<String, Serializable> rawMetadata = extractRaw(reader);
// Convert to system properties (standalone)
Map<QName, Serializable> systemProperties = mapRawToSystem(rawMetadata);
// Now use the proper overwrite policy
changed = overwritePolicy.applyProperties(systemProperties, destination);
changedProperties = overwritePolicy.applyProperties(systemProperties, destination);
}
catch (Throwable e)
{
@@ -539,8 +551,8 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
}
finally
{
// check that the reader was closed
if (!reader.isClosed())
// check that the reader was closed (if used)
if (reader.isChannelOpen())
{
logger.error("Content reader not closed by metadata extracter: \n" +
" reader: " + reader + "\n" +
@@ -554,9 +566,9 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
logger.debug("Completed metadata extraction: \n" +
" reader: " + reader + "\n" +
" extracter: " + this + "\n" +
" changed: " + changed);
" changed: " + changedProperties);
}
return changed;
return changedProperties;
}
/**

View File

@@ -26,6 +26,7 @@ package org.alfresco.repo.content.metadata;
import java.io.Serializable;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
@@ -43,6 +44,7 @@ import org.apache.commons.logging.LogFactory;
* @deprecated Use the {@link org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter}
*
* @author Jesper Steen Møller
* @author Derek Hulley
*/
abstract public class AbstractMetadataExtracter implements MetadataExtracter
{
@@ -164,15 +166,43 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
/**
* {@inheritDoc}
* <p>
* A {@linkplain OverwritePolicy#PRAGMATIC pragmatic} overwrite policy will be applied.
*/
public boolean extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
public Map<QName, Serializable> extract(ContentReader reader, Map<QName, Serializable> destination)
{
return extract(reader, OverwritePolicy.PRAGMATIC, destination);
}
/**
* {@inheritDoc}
*
* @param propertyMapping ignored
*
* @see #extract(ContentReader, Map)
*/
public final Map<QName, Serializable> extract(
ContentReader reader,
OverwritePolicy overwritePolicy,
Map<QName, Serializable> destination) throws ContentIOException
{
// check the reliability
checkReliability(reader);
Map<QName, Serializable> newProperties = new HashMap<QName, Serializable>(13);
try
{
extractInternal(reader, destination);
extractInternal(reader, newProperties);
// Apply the overwrite policy
Map<QName, Serializable> modifiedProperties = overwritePolicy.applyProperties(newProperties, destination);
// done
if (logger.isDebugEnabled())
{
logger.debug("Completed metadata extraction: \n" +
" reader: " + reader + "\n" +
" extracter: " + this);
}
return modifiedProperties;
}
catch (Throwable e)
{
@@ -182,23 +212,14 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
}
finally
{
// check that the reader was closed
if (!reader.isClosed())
// check that the reader was closed (if used)
if (reader.isChannelOpen())
{
logger.error("Content reader not closed by metadata extracter: \n" +
" reader: " + reader + "\n" +
" extracter: " + this);
}
}
// done
if (logger.isDebugEnabled())
{
logger.debug("Completed metadata extraction: \n" +
" reader: " + reader + "\n" +
" extracter: " + this);
}
return true;
}
/**
@@ -209,7 +230,7 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter
*
* @see #extract(ContentReader, Map)
*/
public final boolean extract(
public final Map<QName, Serializable> extract(
ContentReader reader,
OverwritePolicy overwritePolicy,
Map<QName, Serializable> destination,

View File

@@ -25,6 +25,7 @@
package org.alfresco.repo.content.metadata;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
@@ -61,9 +62,9 @@ public interface MetadataExtracter extends ContentWorker
EAGER
{
@Override
public boolean applyProperties(Map<QName, Serializable> extractedProperties, Map<QName, Serializable> targetProperties)
public Map<QName, Serializable> applyProperties(Map<QName, Serializable> extractedProperties, Map<QName, Serializable> targetProperties)
{
boolean modified = false;
Map<QName, Serializable> modifiedProperties = new HashMap<QName, Serializable>(7);
for (Map.Entry<QName, Serializable> entry : extractedProperties.entrySet())
{
QName propertyQName = entry.getKey();
@@ -74,9 +75,9 @@ public interface MetadataExtracter extends ContentWorker
continue;
}
targetProperties.put(propertyQName, extractedValue);
modified = true;
modifiedProperties.put(propertyQName, extractedValue);
}
return modified;
return modifiedProperties;
}
},
/**
@@ -91,12 +92,12 @@ public interface MetadataExtracter extends ContentWorker
PRAGMATIC
{
@Override
public boolean applyProperties(Map<QName, Serializable> extractedProperties, Map<QName, Serializable> targetProperties)
public Map<QName, Serializable> applyProperties(Map<QName, Serializable> extractedProperties, Map<QName, Serializable> targetProperties)
{
/*
* Negative and positive checks are mixed in the loop.
*/
boolean modified = false;
Map<QName, Serializable> modifiedProperties = new HashMap<QName, Serializable>(7);
for (Map.Entry<QName, Serializable> entry : extractedProperties.entrySet())
{
QName propertyQName = entry.getKey();
@@ -111,7 +112,7 @@ public interface MetadataExtracter extends ContentWorker
{
// There is nothing currently
targetProperties.put(propertyQName, extractedValue);
modified = true;
modifiedProperties.put(propertyQName, extractedValue);
continue;
}
Serializable originalValue = targetProperties.get(propertyQName);
@@ -119,7 +120,7 @@ public interface MetadataExtracter extends ContentWorker
{
// The current value is null
targetProperties.put(propertyQName, extractedValue);
modified = true;
modifiedProperties.put(propertyQName, extractedValue);
continue;
}
// Check the string representation
@@ -135,13 +136,13 @@ public interface MetadataExtracter extends ContentWorker
{
// The original string is trivial
targetProperties.put(propertyQName, extractedValue);
modified = true;
modifiedProperties.put(propertyQName, extractedValue);
continue;
}
}
// We have some other object as the original value, so keep it
}
return modified;
return modifiedProperties;
}
},
/**
@@ -157,9 +158,9 @@ public interface MetadataExtracter extends ContentWorker
CAUTIOUS
{
@Override
public boolean applyProperties(Map<QName, Serializable> extractedProperties, Map<QName, Serializable> targetProperties)
public Map<QName, Serializable> applyProperties(Map<QName, Serializable> extractedProperties, Map<QName, Serializable> targetProperties)
{
boolean modified = false;
Map<QName, Serializable> modifiedProperties = new HashMap<QName, Serializable>(7);
for (Map.Entry<QName, Serializable> entry : extractedProperties.entrySet())
{
QName propertyQName = entry.getKey();
@@ -176,18 +177,20 @@ public interface MetadataExtracter extends ContentWorker
continue;
}
targetProperties.put(propertyQName, extractedValue);
modified = true;
modifiedProperties.put(propertyQName, extractedValue);
}
return modified;
return modifiedProperties;
}
};
/**
* Apply the overwrite policy for the extracted properties.
*
* @return Returns true if <i>any</i> properties were set on the target properties
* @return
* Returns a map of all properties that were applied to the target map. If the result is
* an empty map, then the target map remains unchanged.
*/
public boolean applyProperties(Map<QName, Serializable> extractedProperties, Map<QName, Serializable> targetProperties)
public Map<QName, Serializable> applyProperties(Map<QName, Serializable> extractedProperties, Map<QName, Serializable> targetProperties)
{
throw new UnsupportedOperationException("Override this method");
}
@@ -235,12 +238,40 @@ public interface MetadataExtracter extends ContentWorker
*
* @param reader the source of the content
* @param destination the map of properties to populate (essentially a return value)
* @return Returns <tt>true</tt> if the destination map was modified
* @return Returns a map of all properties on the destination map that were
* added or modified. If the return map is empty, then no properties
* were modified.
* @throws ContentIOException if a detectable error occurs
*
* @see #extract(ContentReader, OverwritePolicy, Map, Map)
*/
public boolean extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException;
public Map<QName, Serializable> extract(ContentReader reader, Map<QName, Serializable> destination);
/**
* Extracts the metadata values from the content provided by the reader and source
* mimetype to the supplied map.
* <p>
* The extraction viability can be determined by an up front call to {@link #isSupported(String)}.
* <p>
* The source mimetype <b>must</b> be available on the
* {@link org.alfresco.service.cmr.repository.ContentAccessor#getMimetype()} method
* of the reader.
*
* @param reader the source of the content
* @param overwritePolicy the policy stipulating how the system properties must be
* overwritten if present
* @param destination the map of properties to populate (essentially a return value)
* @return Returns a map of all properties on the destination map that were
* added or modified. If the return map is empty, then no properties
* were modified.
* @throws ContentIOException if a detectable error occurs
*
* @see #extract(ContentReader, OverwritePolicy, Map, Map)
*/
public Map<QName, Serializable> extract(
ContentReader reader,
OverwritePolicy overwritePolicy,
Map<QName, Serializable> destination);
/**
* Extracts the metadata from the content provided by the reader and source
@@ -260,14 +291,16 @@ public interface MetadataExtracter extends ContentWorker
* overwritten if present
* @param destination the map of properties to populate (essentially a return value)
* @param mapping a mapping of document-specific properties to system properties.
* @return Returns <tt>true</tt> if the destination map was modified
* @return Returns a map of all properties on the destination map that were
* added or modified. If the return map is empty, then no properties
* were modified.
* @throws ContentIOException if a detectable error occurs
*
* @see #extract(ContentReader, Map)
*/
public boolean extract(
public Map<QName, Serializable> extract(
ContentReader reader,
OverwritePolicy overwritePolicy,
Map<QName, Serializable> destination,
Map<String, Set<QName>> mapping) throws ContentIOException;
Map<String, Set<QName>> mapping);
}