From 08fd12324180d519fae71e93ac0a2407337ba58e Mon Sep 17 00:00:00 2001 From: Derek Hulley Date: Tue, 19 Jun 2007 09:12:56 +0000 Subject: [PATCH] Fixed NPE when extracter action hits properties not in the dictionary. Exposed extracter OverwritePolicy, but it still defaults to the OverwritePolicy specified in the extracters configuration. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@6013 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- .../executer/ContentMetadataExtracter.java | 161 ++++++++---------- .../ContentMetadataExtracterTest.java | 69 +++++++- .../AbstractMappingMetadataExtracter.java | 32 ++-- .../metadata/AbstractMetadataExtracter.java | 49 ++++-- .../content/metadata/MetadataExtracter.java | 75 +++++--- 5 files changed, 247 insertions(+), 139 deletions(-) diff --git a/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java b/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java index 18939e1b19..f4e29ba44e 100644 --- a/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java +++ b/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java @@ -25,9 +25,10 @@ package org.alfresco.repo.action.executer; import java.io.Serializable; -import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.alfresco.model.ContentModel; import org.alfresco.repo.content.metadata.MetadataExtracter; @@ -46,40 +47,15 @@ import org.alfresco.service.namespace.QName; /** * Extract metadata from any added content. *

- * The metadata is extracted from the content and compared to the current - * property values. Missing or zero-length properties are replaced, - * otherwise they are left as is.
- * This may change if the action gets parameterized in future. + * Currently, the default {@linkplain MetadataExtracter.OverwritePolicy overwrite policy} + * for each extracter is used. (TODO: Add overwrite policy as a parameter.) + * + * @see MetadataExtracter.OverwritePolicy * * @author Jesper Steen Møller */ public class ContentMetadataExtracter extends ActionExecuterAbstractBase { - /** - * Action constants - */ - public static final String NAME = "extract-metadata"; - - /* - * TODO: Action parameters. - * - * Currently none exist, but it may be nice to add a 'policy' parameter for - * overwriting the extracted properties, with the following possible values: - * 1) Never: Never overwrite node properties that - * exist (i.e. preserve values, nulls, and blanks) - * 2) Pragmatic: Write - * extracted properties if they didn't exist before, are null, or evaluate - * to an empty string. - * 3) Always: Always store the extracted properes. - * - * Policies 1 and 2 will preserve previously set properties in case nodes - * are moved/copied, making this action run on the same content several - * times. However, if a property is deliberately cleared (e.g. by putting - * the empty string into the "decription" field), the pragmatic policy would - * indeed overwrite it. The current implementation matches the 'pragmatic' - * policy. - */ - /** * The node service */ @@ -140,68 +116,77 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase */ public void executeImpl(Action ruleAction, NodeRef actionedUponNodeRef) { - if (this.nodeService.exists(actionedUponNodeRef) == true) + if (!nodeService.exists(actionedUponNodeRef)) { - ContentReader cr = contentService.getReader(actionedUponNodeRef, ContentModel.PROP_CONTENT); + // Node is gone + return; + } + ContentReader reader = contentService.getReader(actionedUponNodeRef, ContentModel.PROP_CONTENT); + // The reader may be null, e.g. for folders and the like + if (reader == null || reader.getMimetype() == null) + { + // No content to extract data from + return; + } + String mimetype = reader.getMimetype(); + MetadataExtracter extracter = metadataExtracterRegistry.getExtracter(mimetype); + if (extracter == null) + { + // There is no extracter to use + return; + } + + // Get all the node's properties + Map nodeProperties = nodeService.getProperties(actionedUponNodeRef); + + // TODO: The override policy should be a parameter here. Instead, we'll use the default policy + // set on the extracter. + // Give the node's properties to the extracter to be modified + Map modifiedProperties = extracter.extract( + reader, + /*OverwritePolicy.PRAGMATIC,*/ + nodeProperties); - // 'cr' may be null, e.g. for folders and the like - if (cr != null && cr.getMimetype() != null) + // If none of the properties where changed, then there is nothing more to do + if (modifiedProperties.size() == 0) + { + return; + } + + // Check that all properties have the appropriate aspect applied + Set requiredAspectQNames = new HashSet(3); + + for (QName propertyQName : modifiedProperties.keySet()) + { + PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName); + if (propertyDef == null) { - MetadataExtracter me = metadataExtracterRegistry.getExtracter(cr.getMimetype()); - if (me != null) - { - Map newProps = new HashMap(7, 0.5f); - me.extract(cr, newProps); - - Map allProps = nodeService.getProperties(actionedUponNodeRef); - - /* - * The code below implements a modestly conservative - * 'preserve' policy which shouldn't override values - * accidentally. - */ - - boolean changed = false; - for (QName key : newProps.keySet()) - { - // check if we need to add an aspect for the prop - ClassDefinition propClass = dictionaryService.getProperty(key).getContainerClass(); - if (propClass.isAspect() && - nodeService.hasAspect(actionedUponNodeRef, propClass.getName()) == false) - { - Map aspectProps = new HashMap(3, 1.0f); - for (QName defKey : propClass.getProperties().keySet()) - { - if (dictionaryService.getProperty(defKey).isMandatory()) - { - aspectProps.put(defKey, allProps.get(defKey)); - allProps.remove(defKey); - } - } - nodeService.addAspect(actionedUponNodeRef, propClass.getName(), aspectProps); - } - - Serializable value = newProps.get(key); - if (value == null) - { - continue; // Content extracters shouldn't do this - } - - // Look up the old value, and check for nulls - Serializable oldValue = allProps.get(key); - if (oldValue == null || oldValue.toString().length() == 0) - { - allProps.put(key, value); - changed = true; - } - } - - if (changed) - { - nodeService.setProperties(actionedUponNodeRef, allProps); - } - } + // The property is not defined in the model + continue; } + ClassDefinition propertyContainerDef = propertyDef.getContainerClass(); + if (propertyContainerDef.isAspect()) + { + QName aspectQName = propertyContainerDef.getName(); + requiredAspectQNames.add(aspectQName); + } + } + + // Add all the properties to the node BEFORE we add the aspects + nodeService.setProperties(actionedUponNodeRef, nodeProperties); + + // Add each of the aspects, as required + for (QName requiredAspectQName : requiredAspectQNames) + { + if (nodeService.hasAspect(actionedUponNodeRef, requiredAspectQName)) + { + // The node has the aspect already + continue; + } + else + { + nodeService.addAspect(actionedUponNodeRef, requiredAspectQName, null); + } } } diff --git a/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java b/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java index af55a98d61..23276a2461 100644 --- a/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java @@ -25,18 +25,25 @@ package org.alfresco.repo.action.executer; import java.io.Serializable; +import java.util.HashMap; import java.util.Map; +import java.util.Properties; +import java.util.Set; import org.alfresco.model.ContentModel; import org.alfresco.repo.action.ActionImpl; import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter; +import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; import org.alfresco.repo.content.transform.AbstractContentTransformerTest; import org.alfresco.repo.security.authentication.AuthenticationComponent; +import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentService; import org.alfresco.service.cmr.repository.ContentWriter; import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.cmr.repository.NodeService; import org.alfresco.service.cmr.repository.StoreRef; +import org.alfresco.service.namespace.NamespaceService; import org.alfresco.service.namespace.QName; import org.alfresco.util.BaseSpringTest; import org.alfresco.util.GUID; @@ -45,7 +52,7 @@ import org.alfresco.util.GUID; * Test of the ActionExecuter for extracting metadata. Note: This test makes * assumptions about the PDF test data for PdfBoxExtracter. * - * @author Jesper Steen M�ller + * @author Jesper Steen Møller */ public class ContentMetadataExtracterTest extends BaseSpringTest { @@ -90,7 +97,7 @@ public class ContentMetadataExtracterTest extends BaseSpringTest cw.putContent(AbstractContentTransformerTest.loadQuickTestFile("pdf")); // Get the executer instance - this.executer = (ContentMetadataExtracter) this.applicationContext.getBean(ContentMetadataExtracter.NAME); + this.executer = (ContentMetadataExtracter) this.applicationContext.getBean("extract-metadata"); } /** @@ -119,6 +126,60 @@ public class ContentMetadataExtracterTest extends BaseSpringTest assertEquals(QUICK_DESCRIPTION, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_DESCRIPTION)); assertEquals(QUICK_CREATOR, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_AUTHOR)); } + + private static final QName PROP_UNKNOWN_1 = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "unkown1"); + private static final QName PROP_UNKNOWN_2 = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "unkown2"); + private static class UnknownMetadataExtracter extends AbstractMappingMetadataExtracter + { + public UnknownMetadataExtracter() + { + Properties mappingProperties = new Properties(); + mappingProperties.put("unknown1", PROP_UNKNOWN_1.toString()); + mappingProperties.put("unknown2", PROP_UNKNOWN_2.toString()); + setMappingProperties(mappingProperties); + } + @Override + protected Map> getDefaultMapping() + { + // No need to give anything back as we have explicitly set the mapping already + return new HashMap>(0); + } + @Override + public boolean isSupported(String sourceMimetype) + { + return sourceMimetype.equals(MimetypeMap.MIMETYPE_BINARY); + } + + public Map extractRaw(ContentReader reader) throws Throwable + { + Map rawMap = newRawMap(); + rawMap.put("unknown1", new Integer(1)); + rawMap.put("unknown2", "TWO"); + return rawMap; + } + } + + public void testUnknownProperties() + { + MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry"); + UnknownMetadataExtracter extracterUnknown = new UnknownMetadataExtracter(); + extracterUnknown.setRegistry(registry); + extracterUnknown.register(); + // Now add some content with a binary mimetype + ContentWriter cw = this.contentService.getWriter(nodeRef, ContentModel.PROP_CONTENT, true); + cw.setMimetype(MimetypeMap.MIMETYPE_BINARY); + cw.putContent("Content for " + getName()); + + ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null); + executer.execute(action, this.nodeRef); + + // The unkown properties should be present + Serializable prop1 = nodeService.getProperty(nodeRef, PROP_UNKNOWN_1); + Serializable prop2 = nodeService.getProperty(nodeRef, PROP_UNKNOWN_2); + + assertNotNull("Unknown property is null", prop1); + assertNotNull("Unknown property is null", prop2); + } /** * Test execution of the pragmatic approach @@ -148,9 +209,5 @@ public class ContentMetadataExtracterTest extends BaseSpringTest // But this one should have been set assertEquals(QUICK_DESCRIPTION, this.nodeService.getProperty(this.nodeRef, ContentModel.PROP_DESCRIPTION)); - } - - // If we implement other policies than "pragmatic", they should be tested as - // well... } diff --git a/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java index 31ac7acc5b..825a0208b3 100644 --- a/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java @@ -354,8 +354,9 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac while (tokenizer.hasMoreTokens()) { String qnameStr = tokenizer.nextToken().trim(); + // Check if we need to resolve a namespace reference int index = qnameStr.indexOf(QName.NAMESPACE_PREFIX); - if (index > -1) + if (index > -1 && qnameStr.charAt(0) != QName.NAMESPACE_BEGIN) { String prefix = qnameStr.substring(0, index); String suffix = qnameStr.substring(index + 1); @@ -490,7 +491,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac /** * {@inheritDoc} */ - public final boolean extract(ContentReader reader, Map destination) throws ContentIOException + public final Map extract(ContentReader reader, Map destination) { return extract(reader, this.overwritePolicy, destination, this.mapping); } @@ -498,11 +499,22 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac /** * {@inheritDoc} */ - public final boolean extract( + public final Map extract( + ContentReader reader, + OverwritePolicy overwritePolicy, + Map destination) + { + return extract(reader, overwritePolicy, destination, this.mapping); + } + + /** + * {@inheritDoc} + */ + public final Map extract( ContentReader reader, OverwritePolicy overwritePolicy, Map destination, - Map> mapping) throws ContentIOException + Map> mapping) { // Done if (logger.isDebugEnabled()) @@ -522,14 +534,14 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac // check the reliability checkIsSupported(reader); - boolean changed = false; + Map changedProperties = null; try { Map rawMetadata = extractRaw(reader); // Convert to system properties (standalone) Map systemProperties = mapRawToSystem(rawMetadata); // Now use the proper overwrite policy - changed = overwritePolicy.applyProperties(systemProperties, destination); + changedProperties = overwritePolicy.applyProperties(systemProperties, destination); } catch (Throwable e) { @@ -539,8 +551,8 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac } finally { - // check that the reader was closed - if (!reader.isClosed()) + // check that the reader was closed (if used) + if (reader.isChannelOpen()) { logger.error("Content reader not closed by metadata extracter: \n" + " reader: " + reader + "\n" + @@ -554,9 +566,9 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac logger.debug("Completed metadata extraction: \n" + " reader: " + reader + "\n" + " extracter: " + this + "\n" + - " changed: " + changed); + " changed: " + changedProperties); } - return changed; + return changedProperties; } /** diff --git a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java index b30a12ed75..0c7c0f8a9f 100644 --- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java @@ -26,6 +26,7 @@ package org.alfresco.repo.content.metadata; import java.io.Serializable; import java.util.Collections; +import java.util.HashMap; import java.util.Map; import java.util.Set; @@ -43,6 +44,7 @@ import org.apache.commons.logging.LogFactory; * @deprecated Use the {@link org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter} * * @author Jesper Steen Møller + * @author Derek Hulley */ abstract public class AbstractMetadataExtracter implements MetadataExtracter { @@ -164,15 +166,43 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter /** * {@inheritDoc} + *

+ * A {@linkplain OverwritePolicy#PRAGMATIC pragmatic} overwrite policy will be applied. */ - public boolean extract(ContentReader reader, Map destination) throws ContentIOException + public Map extract(ContentReader reader, Map destination) + { + return extract(reader, OverwritePolicy.PRAGMATIC, destination); + } + + /** + * {@inheritDoc} + * + * @param propertyMapping ignored + * + * @see #extract(ContentReader, Map) + */ + public final Map extract( + ContentReader reader, + OverwritePolicy overwritePolicy, + Map destination) throws ContentIOException { // check the reliability checkReliability(reader); + Map newProperties = new HashMap(13); try { - extractInternal(reader, destination); + extractInternal(reader, newProperties); + // Apply the overwrite policy + Map modifiedProperties = overwritePolicy.applyProperties(newProperties, destination); + // done + if (logger.isDebugEnabled()) + { + logger.debug("Completed metadata extraction: \n" + + " reader: " + reader + "\n" + + " extracter: " + this); + } + return modifiedProperties; } catch (Throwable e) { @@ -182,23 +212,14 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter } finally { - // check that the reader was closed - if (!reader.isClosed()) + // check that the reader was closed (if used) + if (reader.isChannelOpen()) { logger.error("Content reader not closed by metadata extracter: \n" + " reader: " + reader + "\n" + " extracter: " + this); } } - - // done - if (logger.isDebugEnabled()) - { - logger.debug("Completed metadata extraction: \n" + - " reader: " + reader + "\n" + - " extracter: " + this); - } - return true; } /** @@ -209,7 +230,7 @@ abstract public class AbstractMetadataExtracter implements MetadataExtracter * * @see #extract(ContentReader, Map) */ - public final boolean extract( + public final Map extract( ContentReader reader, OverwritePolicy overwritePolicy, Map destination, diff --git a/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java index 5fa6e36400..20a528472c 100644 --- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java @@ -25,6 +25,7 @@ package org.alfresco.repo.content.metadata; import java.io.Serializable; +import java.util.HashMap; import java.util.Map; import java.util.Set; @@ -61,9 +62,9 @@ public interface MetadataExtracter extends ContentWorker EAGER { @Override - public boolean applyProperties(Map extractedProperties, Map targetProperties) + public Map applyProperties(Map extractedProperties, Map targetProperties) { - boolean modified = false; + Map modifiedProperties = new HashMap(7); for (Map.Entry entry : extractedProperties.entrySet()) { QName propertyQName = entry.getKey(); @@ -74,9 +75,9 @@ public interface MetadataExtracter extends ContentWorker continue; } targetProperties.put(propertyQName, extractedValue); - modified = true; + modifiedProperties.put(propertyQName, extractedValue); } - return modified; + return modifiedProperties; } }, /** @@ -91,12 +92,12 @@ public interface MetadataExtracter extends ContentWorker PRAGMATIC { @Override - public boolean applyProperties(Map extractedProperties, Map targetProperties) + public Map applyProperties(Map extractedProperties, Map targetProperties) { /* * Negative and positive checks are mixed in the loop. */ - boolean modified = false; + Map modifiedProperties = new HashMap(7); for (Map.Entry entry : extractedProperties.entrySet()) { QName propertyQName = entry.getKey(); @@ -111,7 +112,7 @@ public interface MetadataExtracter extends ContentWorker { // There is nothing currently targetProperties.put(propertyQName, extractedValue); - modified = true; + modifiedProperties.put(propertyQName, extractedValue); continue; } Serializable originalValue = targetProperties.get(propertyQName); @@ -119,7 +120,7 @@ public interface MetadataExtracter extends ContentWorker { // The current value is null targetProperties.put(propertyQName, extractedValue); - modified = true; + modifiedProperties.put(propertyQName, extractedValue); continue; } // Check the string representation @@ -135,13 +136,13 @@ public interface MetadataExtracter extends ContentWorker { // The original string is trivial targetProperties.put(propertyQName, extractedValue); - modified = true; + modifiedProperties.put(propertyQName, extractedValue); continue; } } // We have some other object as the original value, so keep it } - return modified; + return modifiedProperties; } }, /** @@ -157,9 +158,9 @@ public interface MetadataExtracter extends ContentWorker CAUTIOUS { @Override - public boolean applyProperties(Map extractedProperties, Map targetProperties) + public Map applyProperties(Map extractedProperties, Map targetProperties) { - boolean modified = false; + Map modifiedProperties = new HashMap(7); for (Map.Entry entry : extractedProperties.entrySet()) { QName propertyQName = entry.getKey(); @@ -176,18 +177,20 @@ public interface MetadataExtracter extends ContentWorker continue; } targetProperties.put(propertyQName, extractedValue); - modified = true; + modifiedProperties.put(propertyQName, extractedValue); } - return modified; + return modifiedProperties; } }; /** * Apply the overwrite policy for the extracted properties. * - * @return Returns true if any properties were set on the target properties + * @return + * Returns a map of all properties that were applied to the target map. If the result is + * an empty map, then the target map remains unchanged. */ - public boolean applyProperties(Map extractedProperties, Map targetProperties) + public Map applyProperties(Map extractedProperties, Map targetProperties) { throw new UnsupportedOperationException("Override this method"); } @@ -235,12 +238,40 @@ public interface MetadataExtracter extends ContentWorker * * @param reader the source of the content * @param destination the map of properties to populate (essentially a return value) - * @return Returns true if the destination map was modified + * @return Returns a map of all properties on the destination map that were + * added or modified. If the return map is empty, then no properties + * were modified. * @throws ContentIOException if a detectable error occurs * * @see #extract(ContentReader, OverwritePolicy, Map, Map) */ - public boolean extract(ContentReader reader, Map destination) throws ContentIOException; + public Map extract(ContentReader reader, Map destination); + + /** + * Extracts the metadata values from the content provided by the reader and source + * mimetype to the supplied map. + *

+ * The extraction viability can be determined by an up front call to {@link #isSupported(String)}. + *

+ * The source mimetype must be available on the + * {@link org.alfresco.service.cmr.repository.ContentAccessor#getMimetype()} method + * of the reader. + * + * @param reader the source of the content + * @param overwritePolicy the policy stipulating how the system properties must be + * overwritten if present + * @param destination the map of properties to populate (essentially a return value) + * @return Returns a map of all properties on the destination map that were + * added or modified. If the return map is empty, then no properties + * were modified. + * @throws ContentIOException if a detectable error occurs + * + * @see #extract(ContentReader, OverwritePolicy, Map, Map) + */ + public Map extract( + ContentReader reader, + OverwritePolicy overwritePolicy, + Map destination); /** * Extracts the metadata from the content provided by the reader and source @@ -260,14 +291,16 @@ public interface MetadataExtracter extends ContentWorker * overwritten if present * @param destination the map of properties to populate (essentially a return value) * @param mapping a mapping of document-specific properties to system properties. - * @return Returns true if the destination map was modified + * @return Returns a map of all properties on the destination map that were + * added or modified. If the return map is empty, then no properties + * were modified. * @throws ContentIOException if a detectable error occurs * * @see #extract(ContentReader, Map) */ - public boolean extract( + public Map extract( ContentReader reader, OverwritePolicy overwritePolicy, Map destination, - Map> mapping) throws ContentIOException; + Map> mapping); }