diff --git a/src/main/java/org/alfresco/repo/action/executer/ContentMetadataEmbedder.java b/src/main/java/org/alfresco/repo/action/executer/ContentMetadataEmbedder.java index d157927d09..c2550a09e3 100644 --- a/src/main/java/org/alfresco/repo/action/executer/ContentMetadataEmbedder.java +++ b/src/main/java/org/alfresco/repo/action/executer/ContentMetadataEmbedder.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.action.executer; import java.io.Serializable; @@ -30,7 +30,6 @@ import java.util.List; import java.util.Map; import org.alfresco.model.ContentModel; -import org.alfresco.repo.action.executer.ActionExecuterAbstractBase; import org.alfresco.repo.content.metadata.MetadataEmbedder; import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; import org.alfresco.service.cmr.action.Action; @@ -108,7 +107,8 @@ public class ContentMetadataEmbedder extends ActionExecuterAbstractBase return; } String mimetype = reader.getMimetype(); - MetadataEmbedder embedder = metadataExtracterRegistry.getEmbedder(mimetype); + long sourceSizeInBytes = reader.getSize(); + MetadataEmbedder embedder = metadataExtracterRegistry.getEmbedder(mimetype, sourceSizeInBytes); if (embedder == null) { if(logger.isDebugEnabled()) @@ -136,7 +136,7 @@ public class ContentMetadataEmbedder extends ActionExecuterAbstractBase try { - embedder.embed(nodeProperties, reader, writer); + embedder.embed(actionedUponNodeRef, nodeProperties, reader, writer); } catch (Throwable e) { diff --git a/src/main/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java b/src/main/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java index 1752fc9775..47cba6d035 100644 --- a/src/main/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java +++ b/src/main/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -86,22 +86,23 @@ import org.apache.commons.logging.LogFactory; public class ContentMetadataExtracter extends ActionExecuterAbstractBase { private static Log logger = LogFactory.getLog(ContentMetadataExtracter.class); - + public static final String EXECUTOR_NAME = "extract-metadata"; - + private NodeService nodeService; private ContentService contentService; private DictionaryService dictionaryService; private TaggingService taggingService; private MetadataExtracterRegistry metadataExtracterRegistry; private boolean carryAspectProperties = true; - - + + private boolean enableStringTagging = false; - + // Default list of separators (when enableStringTagging is enabled) - protected List stringTaggingSeparators = Arrays.asList(",", ";", "\\|"); - + public final static List DEFAULT_STRING_TAGGING_SEPARATORS = Arrays.asList(",", ";", "\\|"); + protected List stringTaggingSeparators = DEFAULT_STRING_TAGGING_SEPARATORS; + public ContentMetadataExtracter() { } @@ -121,7 +122,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase { this.contentService = contentService; } - + /** * @param dictService The DictionaryService to set. */ @@ -148,7 +149,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase /** * Whether or not aspect-related properties must be carried to the new version of the node - * + * * @param carryAspectProperties true (default) to carry all aspect-linked * properties forward. false will clean the * aspect of any unextracted values. @@ -157,12 +158,12 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase { this.carryAspectProperties = carryAspectProperties; } - + /** * Whether or not to enable mapping of simple strings to cm:taggable tags - * - * @param enableStringTagging true find or create tags for each string - * mapped to cm:taggable. false (default) + * + * @param enableStringTagging true find or create tags for each string + * mapped to cm:taggable. false (default) * ignore mapping strings to tags. */ public void setEnableStringTagging(boolean enableStringTagging) @@ -172,7 +173,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase /** * List of string separators - note: all will be applied to a given string - * + * * @param stringTaggingSeparators */ public void setStringTaggingSeparators(List stringTaggingSeparators) @@ -188,14 +189,21 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase * the taggable property should still contain raw string values. *

* Mixing of NodeRefs and string values is permitted so each raw value is - * checked for a valid NodeRef representation and if so, converts to a NodeRef, + * checked for a valid NodeRef representation and if so, converts to a NodeRef, * if not, adds as a tag via the {@link TaggingService}. - * + * * @param actionedUponNodeRef The NodeRef being actioned upon * @param propertyDef the PropertyDefinition of the taggable property * @param rawValue the raw value from the metadata extracter */ protected void addTags(NodeRef actionedUponNodeRef, PropertyDefinition propertyDef, Serializable rawValue) + { + addTags(actionedUponNodeRef, propertyDef, rawValue, nodeService, stringTaggingSeparators, taggingService); + } + + private static void addTags(NodeRef actionedUponNodeRef, PropertyDefinition propertyDef, Serializable rawValue, + NodeService nodeService, List stringTaggingSeparators, + TaggingService taggingService) { if (rawValue == null) { @@ -231,7 +239,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase logger.trace("adding string tag name'" + tagName + "' (from tag nodeRef "+nodeRef+") to " + actionedUponNodeRef); } - tags.addAll(splitTag(tagName)); + tags.addAll(splitTag(tagName, stringTaggingSeparators)); } catch (InvalidNodeRefException e) { @@ -250,7 +258,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase logger.trace("adding string tag name'" + singleValue + "' to " + actionedUponNodeRef); } - tags.addAll(splitTag((String)singleValue)); + tags.addAll(splitTag((String)singleValue, stringTaggingSeparators)); } } else if (singleValue instanceof NodeRef) @@ -263,7 +271,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase logger.trace("adding string tag name'" + tagName + "' (for nodeRef "+nodeRef+") to " + actionedUponNodeRef); } - tags.addAll(splitTag(tagName)); + tags.addAll(splitTag(tagName, stringTaggingSeparators)); } } } @@ -273,8 +281,8 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase { logger.trace("adding string tag name'" + (String)rawValue + "' to " + actionedUponNodeRef); } - - tags.addAll(splitTag((String)rawValue)); + + tags.addAll(splitTag((String)rawValue, stringTaggingSeparators)); } if (logger.isDebugEnabled()) @@ -297,6 +305,11 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase } protected List splitTag(String str) + { + return splitTag(str, stringTaggingSeparators); + } + + private static List splitTag(String str, List stringTaggingSeparators) { List result = new ArrayList<>(); if ((str != null) && (!str.equals(""))) @@ -323,7 +336,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase return result; } - + /** * @see org.alfresco.repo.action.executer.ActionExecuter#execute(Action, * NodeRef) @@ -347,7 +360,8 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase return; } String mimetype = reader.getMimetype(); - MetadataExtracter extracter = metadataExtracterRegistry.getExtracter(mimetype); + long sourceSizeInBytes = reader.getSize(); + MetadataExtracter extracter = metadataExtracterRegistry.getExtractor(mimetype, sourceSizeInBytes); if (extracter == null) { if(logger.isDebugEnabled()) @@ -372,6 +386,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase try { modifiedProperties = extracter.extract( + actionedUponNodeRef, reader, /*OverwritePolicy.PRAGMATIC,*/ nodeProperties); @@ -408,11 +423,22 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase { return; } - + + addExtractedMetadataToNode(actionedUponNodeRef, nodeProperties, modifiedProperties, + nodeService, dictionaryService, taggingService, enableStringTagging, carryAspectProperties, + stringTaggingSeparators); + } + + public static void addExtractedMetadataToNode(NodeRef actionedUponNodeRef, Map nodeProperties, + Map modifiedProperties, + NodeService nodeService, DictionaryService dictionaryService, + TaggingService taggingService, boolean enableStringTagging, + boolean carryAspectProperties, List stringTaggingSeparators) + { // Check that all properties have the appropriate aspect applied Set requiredAspectQNames = new HashSet(3); Set aspectPropertyQNames = new HashSet(17); - + /** * The modified properties contain null values as well. As we are only interested * in the keys, this will force aspect aspect properties to be removed even if there @@ -432,9 +458,10 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase if (enableStringTagging && propertyContainerDef.getName().equals(ContentModel.ASPECT_TAGGABLE)) { Serializable oldValue = nodeProperties.get(propertyQName); - addTags(actionedUponNodeRef, propertyDef, oldValue); + addTags(actionedUponNodeRef, propertyDef, oldValue, + nodeService, stringTaggingSeparators, taggingService); // Replace the raw value with the created tag NodeRefs - nodeProperties.put(ContentModel.PROP_TAGS, + nodeProperties.put(ContentModel.PROP_TAGS, nodeService.getProperty(actionedUponNodeRef, ContentModel.PROP_TAGS)); } else @@ -447,7 +474,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase } } } - + if (!carryAspectProperties) { // Remove any node properties that are defined on the aspects but were not extracted @@ -465,10 +492,14 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase } } } - + + // The following code can result in a postCommit to extract the metadata again via JavaBehaviour + // (such as ImapContentPolicy.onAddAspect). Not very efficient, but I cannot think of a way to + // avoid it that does not risk memory leaks or disabling behaviour we want. + // Add all the properties to the node BEFORE we add the aspects nodeService.setProperties(actionedUponNodeRef, nodeProperties); - + // Add each of the aspects, as required for (QName requiredAspectQName : requiredAspectQNames) { diff --git a/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java index 78865f8768..ece13df5b5 100644 --- a/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java +++ b/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -25,6 +25,33 @@ */ package org.alfresco.repo.content.metadata; +import org.alfresco.api.AlfrescoPublicApi; +import org.alfresco.error.AlfrescoRuntimeException; +import org.alfresco.model.ContentModel; +import org.alfresco.repo.content.StreamAwareContentReaderProxy; +import org.alfresco.service.cmr.dictionary.DataTypeDefinition; +import org.alfresco.service.cmr.dictionary.DictionaryService; +import org.alfresco.service.cmr.dictionary.PropertyDefinition; +import org.alfresco.service.cmr.repository.ContentIOException; +import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.cmr.repository.ContentWriter; +import org.alfresco.service.cmr.repository.MalformedNodeRefException; +import org.alfresco.service.cmr.repository.MimetypeService; +import org.alfresco.service.cmr.repository.NodeRef; +import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; +import org.alfresco.service.cmr.repository.datatype.TypeConversionException; +import org.alfresco.service.namespace.InvalidQNameException; +import org.alfresco.service.namespace.QName; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.springframework.beans.factory.BeanNameAware; +import org.springframework.context.ApplicationContext; +import org.springframework.context.ApplicationContextAware; +import org.springframework.extensions.surf.util.ISO8601DateFormat; + import java.io.InputStream; import java.io.Serializable; import java.lang.reflect.Array; @@ -50,32 +77,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; -import org.alfresco.api.AlfrescoPublicApi; -import org.alfresco.error.AlfrescoRuntimeException; -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.StreamAwareContentReaderProxy; -import org.alfresco.service.cmr.dictionary.DataTypeDefinition; -import org.alfresco.service.cmr.dictionary.DictionaryService; -import org.alfresco.service.cmr.dictionary.PropertyDefinition; -import org.alfresco.service.cmr.repository.ContentIOException; -import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.cmr.repository.ContentWriter; -import org.alfresco.service.cmr.repository.MalformedNodeRefException; -import org.alfresco.service.cmr.repository.MimetypeService; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.cmr.repository.datatype.TypeConversionException; -import org.alfresco.service.namespace.InvalidQNameException; -import org.alfresco.service.namespace.QName; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.joda.time.DateTime; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; -import org.springframework.beans.factory.BeanNameAware; -import org.springframework.context.ApplicationContext; -import org.springframework.context.ApplicationContextAware; -import org.springframework.extensions.surf.util.ISO8601DateFormat; - /** * Support class for metadata extracters that support dynamic and config-driven * mapping between extracted values and model properties. Extraction is broken @@ -131,7 +132,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac private MetadataExtracterRegistry registry; private MimetypeService mimetypeService; - private DictionaryService dictionaryService; + protected DictionaryService dictionaryService; private boolean initialized; private Set supportedMimetypes; @@ -232,6 +233,11 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac this.dictionaryService = dictionaryService; } + public Set getSupportedMimetypes() + { + return supportedMimetypes; + } + /** * Set the mimetypes that are supported by the extracter. * @@ -278,7 +284,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac return supportedEmbedMimetypes.contains(sourceMimetype); } - private boolean isEnabled(String mimetype) + protected boolean isEnabled(String mimetype) { return properties == null || mimetypeService == null || (getBooleanProperty(beanName+".enabled", true) && @@ -714,10 +720,10 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { return readMappingProperties(mappingProperties.entrySet()); } - + /** * A utility method to convert mapping properties entries to the Map form. - * + * * @see #setMappingProperties(Properties) */ private Map> readMappingProperties(Set> mappingPropertiesEntries) @@ -765,8 +771,8 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { throw new AlfrescoRuntimeException( "No prefix mapping for extracter property mapping: \n" + - " Extracter: " + this + "\n" + - " Mapping: " + entry); + " Extracter: " + this + "\n" + + " Mapping: " + entry); } qnameStr = QName.NAMESPACE_BEGIN + uri + QName.NAMESPACE_END + suffix; } @@ -780,8 +786,8 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { throw new AlfrescoRuntimeException( "Can't create metadata extracter property mapping: \n" + - " Extracter: " + this + "\n" + - " Mapping: " + entry); + " Extracter: " + this + "\n" + + " Mapping: " + entry); } } if (logger.isTraceEnabled()) @@ -1132,7 +1138,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac @Override public final Map extract(ContentReader reader, Map destination) { - return extract(reader, this.overwritePolicy, destination, this.mapping); + return extract(null, reader, this.overwritePolicy, destination, this.mapping); } /** @@ -1144,7 +1150,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac OverwritePolicy overwritePolicy, Map destination) { - return extract(reader, overwritePolicy, destination, this.mapping); + return extract(null, reader, overwritePolicy, destination, this.mapping); } /** @@ -1156,6 +1162,29 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac OverwritePolicy overwritePolicy, Map destination, Map> mapping) + { + return extract(null, reader, overwritePolicy, destination, mapping); + } + + /** + * {@inheritDoc} + */ + @Override + public Map extract(NodeRef nodeRef, ContentReader reader, Map destination) + { + return extract(nodeRef, reader, overwritePolicy, destination, mapping); + } + + /** + * {@inheritDoc} + */ + @Override + public Map extract( + NodeRef nodeRef, + ContentReader reader, + OverwritePolicy overwritePolicy, + Map destination, + Map> mapping) { // Done if (logger.isDebugEnabled()) @@ -1182,12 +1211,13 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac // Check that the content has some meat if (reader.getSize() > 0 && reader.exists()) { - rawMetadata = extractRaw(reader, getLimits(reader.getMimetype())); + rawMetadata = extractRaw(nodeRef, reader, getLimits(reader.getMimetype())); } else { rawMetadata = new HashMap(1); } + // Convert to system properties (standalone) Map systemProperties = mapRawToSystem(rawMetadata); // Convert the properties according to the dictionary types @@ -1215,7 +1245,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac // the current mime type is plausible String typeErrorMessage = null; String differentType = null; - if(mimetypeService != null) + if (mimetypeService != null) { differentType = mimetypeService.getMimetypeIfNotMatches(reader.getReader()); } @@ -1224,7 +1254,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac logger.info("Unable to verify mimetype of " + reader.getReader() + " as no MimetypeService available to " + getClass().getName()); } - if(differentType != null) + if (differentType != null) { typeErrorMessage = "\n" + " claimed mime type: " + reader.getMimetype() + "\n" + @@ -1285,6 +1315,19 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac Map properties, ContentReader reader, ContentWriter writer) + { + embed(null, properties, reader, writer); + } + + /** + * {@inheritDoc} + */ + @Override + public void embed( + NodeRef nodeRef, + Map properties, + ContentReader reader, + ContentWriter writer) { // Done if (logger.isDebugEnabled()) @@ -1307,7 +1350,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac try { - embedInternal(mapSystemToRaw(properties), reader, writer); + embedInternal(nodeRef, mapSystemToRaw(properties), reader, writer); if(logger.isDebugEnabled()) { logger.debug("Embedded Metadata into " + writer); @@ -1472,7 +1515,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac * @return Returns a modified map of properties that have been converted. */ @SuppressWarnings("unchecked") - private Map convertSystemPropertyValues(Map systemProperties) + protected Map convertSystemPropertyValues(Map systemProperties) { Map convertedProperties = new HashMap(systemProperties.size() + 7); for (Map.Entry entry : systemProperties.entrySet()) @@ -1500,6 +1543,10 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { convertedPropertyValue = propertyValue; } + else if (propertyValue instanceof Long) + { + convertedPropertyValue = new Date((Long)propertyValue); + } else if (propertyValue instanceof Collection) { convertedPropertyValue = (Serializable) makeDates((Collection) propertyValue); @@ -1518,7 +1565,9 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { StringBuilder mesg = new StringBuilder(); mesg.append("Unable to convert Date property: ").append(propertyQName) - .append(", value: ").append(propertyValue).append(", type: ").append(propertyTypeDef.getName()); + .append(", value: ").append(propertyValue).append(" (") + .append(propertyValue.getClass().getSimpleName()) + .append("), type: ").append(propertyTypeDef.getName()); logger.warn(mesg.toString()); } } @@ -1688,6 +1737,21 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac } } + // Try milliseconds. This was introduced with T-Engine extractors. Previously Dates would have been + // created and then converted to a Alfresco Date property in a single operation. T-Engines do not know + // about Alfresco Date property formats. + try + { + long ms = Long.parseLong(dateStr); + if (Long.toString(ms).equals(dateStr)) + { + date = new Date(ms); + } + } + catch (NumberFormatException ignore) + { + } + if (date == null) { // Still no luck @@ -1982,7 +2046,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac return limits; } - + /** * Callable wrapper for the * {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader)} method @@ -2026,7 +2090,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac /** * Exception wrapper to handle exceeded limits imposed by {@link MetadataExtracterLimits} - * {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)} + * {@link AbstractMappingMetadataExtracter#extractRaw(NodeRef, ContentReader, MetadataExtracterLimits)} */ private class LimitExceededException extends Exception { @@ -2047,19 +2111,17 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac *

* If no timeout limit is defined or is unlimited (-1), * the extractRaw method is called directly. - * + * + * @param nodeRef the node being acted on. * @param reader the document to extract the values from. This stream provided by * the reader must be closed if accessed directly. * @param limits the limits to impose on the extraction * @return Returns a map of document property values keyed by property name. * @throws Throwable All exception conditions can be handled. */ - private Map extractRaw( + private Map extractRaw(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits) throws Throwable { - FutureTask> task = null; - StreamAwareContentReaderProxy proxiedReader = null; - if (reader.getSize() > limits.getMaxDocumentSizeMB() * MEGABYTE_SIZE) { throw new LimitExceededException("Max doc size exceeded " + limits.getMaxDocumentSizeMB() + " MB"); @@ -2084,7 +2146,16 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac throw new LimitExceededException("Reached concurrent extractions limit - " + limits.getMaxConcurrentExtractionsCount()); } } - + + return extractRawInThread(nodeRef, reader, limits); + } + + protected Map extractRawInThread(NodeRef nodeRef, ContentReader reader, + MetadataExtracterLimits limits) + throws Throwable + { + FutureTask> task = null; + StreamAwareContentReaderProxy proxiedReader = null; try { proxiedReader = new StreamAwareContentReaderProxy(reader); @@ -2119,14 +2190,19 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac } finally { - int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.decrementAndGet(); - if (logger.isDebugEnabled()) - { - logger.debug("Extraction finalized. Remaining concurrent extraction : " + totalDocCount); - } + extractRawThreadFinished(); } } - + + protected void extractRawThreadFinished() + { + int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.decrementAndGet(); + if (logger.isDebugEnabled()) + { + logger.debug("Extraction finalized. Remaining concurrent extraction : " + totalDocCount); + } + } + /** * Override to provide the raw extracted metadata values. An extracter should extract * as many of the available properties as is realistically possible. Even if the @@ -2162,6 +2238,11 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac */ protected abstract Map extractRaw(ContentReader reader) throws Throwable; + protected void embedInternal(NodeRef nodeRef, Map metadata, ContentReader reader, ContentWriter writer) throws Throwable + { + embedInternal(metadata, reader, writer); + } + /** * Override to embed metadata values. An extracter should embed * as many of the available properties as is realistically possible. Even if the @@ -2182,4 +2263,46 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { // TODO make this an abstract method once more extracters support embedding } + + // Originally in TikaPoweredMetadataExtracter + public static Map convertMetadataToStrings(Map properties) + { + Map propertiesAsStrings = new HashMap<>(); + for (String metadataKey : properties.keySet()) + { + Serializable value = properties.get(metadataKey); + if (value == null) + { + continue; + } + if (value instanceof Collection) + { + for (Object singleValue : (Collection) value) + { + try + { + // Convert to a string value + propertiesAsStrings.put(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, singleValue)); + } + catch (TypeConversionException e) + { + TikaPoweredMetadataExtracter.logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); + } + } + } + else + { + try + { + // Convert to a string value + propertiesAsStrings.put(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, value)); + } + catch (TypeConversionException e) + { + TikaPoweredMetadataExtracter.logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); + } + } + } + return propertiesAsStrings; + } } diff --git a/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java b/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java new file mode 100644 index 0000000000..f2d9469bf6 --- /dev/null +++ b/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java @@ -0,0 +1,537 @@ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.repo.content.metadata; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.alfresco.model.ContentModel; +import org.alfresco.repo.action.executer.ContentMetadataExtracter; +import org.alfresco.repo.content.transform.TransformerDebug; +import org.alfresco.repo.rendition2.RenditionService2; +import org.alfresco.repo.rendition2.TransformDefinition; +import org.alfresco.repo.security.authentication.AuthenticationUtil; +import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.cmr.repository.ContentService; +import org.alfresco.service.cmr.repository.ContentWriter; +import org.alfresco.service.cmr.repository.NodeRef; +import org.alfresco.service.cmr.repository.NodeService; +import org.alfresco.service.cmr.tagging.TaggingService; +import org.alfresco.service.namespace.NamespaceException; +import org.alfresco.service.namespace.NamespacePrefixResolver; +import org.alfresco.service.namespace.QName; +import org.alfresco.service.transaction.TransactionService; +import org.alfresco.transform.client.registry.TransformServiceRegistry; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; +import org.springframework.dao.ConcurrencyFailureException; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.StringJoiner; +import java.util.concurrent.ExecutorService; + +import static org.alfresco.repo.rendition2.RenditionDefinition2.TIMEOUT; +import static org.alfresco.repo.rendition2.TransformDefinition.getTransformName; + +/** + * Requests an extract of metadata via a remote async transform using + * {@link RenditionService2#transform(NodeRef, TransformDefinition)}. The properties that will extracted are defined + * by the transform. This allows out of process metadata extracts to be defined without the need to apply an AMP. + * The actual transform is a request to go from the source mimetype to {@code "alfresco-metadata-extract"}. The + * resulting transform is a Map in json of properties and values to be set on the source node. + *

+ * As with other sub-classes of {@link AbstractMappingMetadataExtracter} it also supports embedding of metadata in + * a source node. In this case the remote async transform states that it supports a transform from a source mimetype + * to {@code "alfresco-metadata-embed"}. The resulting transform is a replacement for the content of the node. + * + * @author adavis + */ +public class AsynchronousExtractor extends AbstractMappingMetadataExtracter +{ + private static final String EXTRACT = "extract"; + private static final String EMBED = "embed"; + private static final String MIMETYPE_METADATA_EXTRACT = "alfresco-metadata-extract"; + private static final String MIMETYPE_METADATA_EMBED = "alfresco-metadata-embed"; + private static final String METADATA = "metadata"; + private static final Map EMPTY_METADATA = Collections.emptyMap(); + + private final ObjectMapper jsonObjectMapper = new ObjectMapper(); + + private NodeService nodeService; + private NamespacePrefixResolver namespacePrefixResolver; + private TransformerDebug transformerDebug; + private RenditionService2 renditionService2; + private ContentService contentService; + private TransactionService transactionService; + private TransformServiceRegistry transformServiceRegistry; + private TaggingService taggingService; + + public void setNodeService(NodeService nodeService) + { + this.nodeService = nodeService; + } + + public void setNamespacePrefixResolver(NamespacePrefixResolver namespacePrefixResolver) + { + this.namespacePrefixResolver = namespacePrefixResolver; + } + + public void setTransformerDebug(TransformerDebug transformerDebug) + { + this.transformerDebug = transformerDebug; + } + + public void setRenditionService2(RenditionService2 renditionService2) + { + this.renditionService2 = renditionService2; + } + + public void setContentService(ContentService contentService) + { + this.contentService = contentService; + } + + public void setTransactionService(TransactionService transactionService) + { + this.transactionService = transactionService; + } + + public void setTransformServiceRegistry(TransformServiceRegistry transformServiceRegistry) + { + this.transformServiceRegistry = transformServiceRegistry; + } + + public void setTaggingService(TaggingService taggingService) + { + this.taggingService = taggingService; + } + + @Override + protected Map> getDefaultMapping() + { + return Collections.emptyMap(); // Mappings are done by the transform, but a non null value must be returned. + } + + public boolean isSupported(String sourceMimetype, long sourceSizeInBytes) + { + return isEnabled(sourceMimetype) && isSupported(sourceMimetype, sourceSizeInBytes, MIMETYPE_METADATA_EXTRACT); + } + + public boolean isEmbedderSupported(String sourceMimetype, long sourceSizeInBytes) + { + return isSupported(sourceMimetype, sourceSizeInBytes, MIMETYPE_METADATA_EMBED); + } + + private boolean isSupported(String sourceMimetype, long sourceSizeInBytes, String targetMimetype) + { + return transformServiceRegistry.isSupported(sourceMimetype, sourceSizeInBytes, targetMimetype, Collections.emptyMap(), targetMimetype); + } + + public static boolean isMetadataExtractMimetype(String targetMimetype) + { + return MIMETYPE_METADATA_EXTRACT.equals(targetMimetype); + } + + public static boolean isMetadataEmbedMimetype(String targetMimetype) + { + return MIMETYPE_METADATA_EMBED.equals(targetMimetype); + } + + /** + * Returns a file extension used as the target in a transform. The normal extension is changed if the + * {@code targetMimetype} is an extraction or embedding type. + * + * @param targetMimetype the target mimetype + * @param sourceExtension normal source extension + * @param targetExtension current target extension (normally {@code "bin" for embedding and extraction}) + * @return the extension to be used. + */ + public static String getExtension(String targetMimetype, String sourceExtension, String targetExtension) + { + return isMetadataExtractMimetype(targetMimetype) + ? "json" + : isMetadataEmbedMimetype(targetMimetype) + ? sourceExtension + : targetExtension; + } + + /** + * Returns a rendition name used in {@link TransformerDebug}. The normal name is changed if it is a metadata + * extract or embed. The name in this case is actually the {@code "alfresco-metadata-extract/"} + * {@code "alfresco-metadata-embed/"} followed by the source mimetype. + * + * @param renditionName the normal name, or a special one based on the source mimetype and a prefixed. + * @return the renditionName to be used. + */ + public static String getRenditionName(String renditionName) + { + String transformName = getTransformName(renditionName); + return transformName != null && transformName.startsWith(MIMETYPE_METADATA_EXTRACT) + ? "metadataExtract" + : transformName != null && transformName.startsWith(MIMETYPE_METADATA_EMBED) + ? "metadataEmbed" + : renditionName; + } + + @Override + protected void checkIsSupported(ContentReader reader) + { + // Just return, as we have already checked when this extractor was selected. + } + + @Override + protected void checkIsEmbedSupported(ContentWriter writer) + { + // Just return, as we have already checked when this embedder was selected. + } + + @Override + // Not called. Overloaded method with the NodeRef is called. + protected Map extractRaw(ContentReader reader) + { + return null; + } + + @Override + protected Map extractRawInThread(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits) + throws Throwable + { + long timeoutMs = limits.getTimeoutMs(); + Map options = Collections.singletonMap(TIMEOUT, Long.toString(timeoutMs)); + transformInBackground(nodeRef, reader, MIMETYPE_METADATA_EXTRACT, EXTRACT, options); + return EMPTY_METADATA; + } + + @Override + protected void embedInternal(NodeRef nodeRef, Map metadata, ContentReader reader, ContentWriter writer) + { + String metadataAsJson = metadataToString(metadata); + Map options = Collections.singletonMap(METADATA, metadataAsJson); + transformInBackground(nodeRef, reader, MIMETYPE_METADATA_EMBED, EMBED, options); + } + + private void transformInBackground(NodeRef nodeRef, ContentReader reader, String targetMimetype, + String embedOrExtract, Map options) + { + ExecutorService executorService = getExecutorService(); + executorService.execute(() -> + { + try + { + transform(nodeRef, reader, targetMimetype, embedOrExtract, options); + } + finally + { + extractRawThreadFinished(); + } + }); + } + + private void transform(NodeRef nodeRef, ContentReader reader, String targetMimetype, + String embedOrExtract, Map options) + { + String sourceMimetype = reader.getMimetype(); + + // This needs to be specific to each source mimetype and the extract or embed as the name + // is used to cache the transform name that will be used. + String transformName = targetMimetype + '/' + sourceMimetype; + + TransformDefinition transformDefinition = new TransformDefinition(transformName, targetMimetype, + options, null, null, null); + + if (logger.isTraceEnabled()) + { + StringJoiner sj = new StringJoiner("\n"); + sj.add("Request " + embedOrExtract + " transform on " + nodeRef); + options.forEach((k,v)->sj.add(" "+k+"="+v)); + logger.trace(sj); + } + + AuthenticationUtil.runAs( + (AuthenticationUtil.RunAsWork) () -> + transactionService.getRetryingTransactionHelper().doInTransaction(() -> + { + try + { + renditionService2.transform(nodeRef, transformDefinition); + } + catch (IllegalArgumentException e) + { + if (e.getMessage().endsWith("The supplied sourceNodeRef "+nodeRef+" does not exist.")) + { + throw new ConcurrencyFailureException( + "The original transaction may not have finished. " + e.getMessage()); + } + } + return null; + }), AuthenticationUtil.getSystemUserName()); + } + + public void setMetadata(NodeRef nodeRef, InputStream transformInputStream) + { + if (logger.isTraceEnabled()) + { + logger.trace("Update metadata on " + nodeRef); + } + + Map metadata = readMetadata(transformInputStream); + if (metadata == null) + { + return; // Error state. + } + + // Remove well know entries from the map that drive how the real metadata is applied. + OverwritePolicy overwritePolicy = removeOverwritePolicy(metadata, "sys:overwritePolicy", OverwritePolicy.PRAGMATIC); + Boolean enableStringTagging = removeBoolean(metadata, "sys:enableStringTagging", false); + Boolean carryAspectProperties = removeBoolean(metadata, "sys:carryAspectProperties", true); + List stringTaggingSeparators = removeTaggingSeparators(metadata, "sys:stringTaggingSeparators", + ContentMetadataExtracter.DEFAULT_STRING_TAGGING_SEPARATORS); + if (overwritePolicy == null || + enableStringTagging == null || + carryAspectProperties == null || + stringTaggingSeparators == null) + { + return; // Error state. + } + + AuthenticationUtil.runAsSystem((AuthenticationUtil.RunAsWork) () -> + transactionService.getRetryingTransactionHelper().doInTransaction(() -> + { + // Based on: AbstractMappingMetadataExtracter.extract + Map nodeProperties = nodeService.getProperties(nodeRef); + // Convert to system properties (standalone) + Map systemProperties = convertKeysToQNames(metadata); + // Convert the properties according to the dictionary types + systemProperties = convertSystemPropertyValues(systemProperties); + // There is no last filter in the AsynchronousExtractor. + // Now use the proper overwrite policy + Map changedProperties = overwritePolicy.applyProperties(systemProperties, nodeProperties); + + // Based on: ContentMetadataExtracter.executeImpl + // If none of the properties where changed, then there is nothing more to do + if (changedProperties.size() == 0) + { + return null; + } + boolean transformerDebugEnabled = transformerDebug.isEnabled(); + boolean debugEnabled = logger.isDebugEnabled(); + if (transformerDebugEnabled || debugEnabled) + { + for (Map.Entry entry : changedProperties.entrySet()) + { + QName qname = entry.getKey(); + Serializable value = entry.getValue(); + String prefixString = qname.toPrefixString(namespacePrefixResolver); + String debugMessage = prefixString + "=" + (value == null ? "" : value); + if (transformerDebugEnabled) + { + transformerDebug.debugUsingPreviousReference(" "+debugMessage); + } + if (debugEnabled) + { + logger.debug(debugMessage); + } + } + } + ContentMetadataExtracter.addExtractedMetadataToNode(nodeRef, nodeProperties, changedProperties, + nodeService, dictionaryService, taggingService, + enableStringTagging, carryAspectProperties, stringTaggingSeparators); + + if (logger.isTraceEnabled()) + { + logger.trace("Extraction of Metadata from " + nodeRef + " complete " + changedProperties); + } + + return null; + }, false, true)); + } + + private Map readMetadata(InputStream transformInputStream) + { + try + { + TypeReference> typeRef = new TypeReference>() {}; + return jsonObjectMapper.readValue(transformInputStream, typeRef); + } + catch (IOException e) + { + logger.error("Failed to read metadata from transform result", e); + return null; + } + } + + private String metadataToString(Map metadata) + { + Map metadataAsStrings = AbstractMappingMetadataExtracter.convertMetadataToStrings(metadata); + try + { + return jsonObjectMapper.writeValueAsString(metadataAsStrings); + } + catch (JsonProcessingException e) + { + logger.error("Failed to save metadata as Json", e); + return null; + } + } + + private OverwritePolicy removeOverwritePolicy(Map map, String key, OverwritePolicy defaultValue) + { + Serializable value = map.remove(key); + if (value == null) + { + return defaultValue; + } + try + { + return OverwritePolicy.valueOf((String)value); + } + catch (IllegalArgumentException|ClassCastException e) + { + logger.error(key + "=" + value + " is invalid"); + return null; + } + } + + private Boolean removeBoolean(Map map, Serializable key, boolean defaultValue) + { + @SuppressWarnings("SuspiciousMethodCalls") Serializable value = map.remove(key); + if (value != null && + (!(value instanceof String) || + (!(Boolean.FALSE.toString().equals(value) || Boolean.TRUE.toString().equals(value))))) + { + logger.error(key + "=" + value + " is invalid. Must be " + Boolean.TRUE + " or " + Boolean.FALSE); + return null; // no flexibility of parseBoolean(...). It is just invalid + } + return value == null ? defaultValue : Boolean.parseBoolean((String)value); + } + + private List removeTaggingSeparators(Map map, String key, List defaultValue) + { + Serializable value = map.remove(key); + if (value == null) + { + return defaultValue; + } + if (!(value instanceof String)) + { + logger.error(key + "=" + value + " is invalid."); + return null; + } + + List list = new ArrayList<>(); + try (CSVParser parser = CSVParser.parse((String)value, CSVFormat.RFC4180)) + { + Iterator iterator = parser.iterator(); + CSVRecord record = iterator.next(); + if (iterator.hasNext()) + { + logger.error(key + "=" + value + " is invalid. Should only have one record"); + return null; + } + record.forEach(list::add); + } + catch (IOException|NoSuchElementException e) + { + logger.error(key + "=" + value + " is invalid. Must be a CSV using CSVFormat.RFC4180"); + return null; + } + return list; + } + + private Map convertKeysToQNames(Map documentMetadata) + { + Map properties = new HashMap<>(); + for (Map.Entry entry : documentMetadata.entrySet()) + { + String key = entry.getKey(); + Serializable value = entry.getValue(); + try + { + QName qName = QName.createQName(key); + try + { + qName.toPrefixString(namespacePrefixResolver); + properties.put(qName, value); + } + catch (NamespaceException e) + { + logger.error("Error unregistered namespace in " + qName); + } + } + catch (NamespaceException e) + { + logger.error("Error creating qName from "+key); + } + } + return properties; + } + + public void setEmbeddedMetadata(NodeRef nodeRef, InputStream transformInputStream) + { + if (logger.isDebugEnabled()) + { + logger.debug("Update of content to include metadata on " + nodeRef); + } + AuthenticationUtil.runAsSystem(() -> + transactionService.getRetryingTransactionHelper().doInTransaction(() -> + { + try + { + // Set or replace content + ContentReader reader = contentService.getReader(nodeRef, ContentModel.PROP_CONTENT); + String mimetype = reader.getMimetype(); + String encoding = reader.getEncoding(); + ContentWriter writer = contentService.getWriter(nodeRef, ContentModel.PROP_CONTENT, true); + writer.setMimetype(mimetype); + writer.setEncoding(encoding); + writer.putContent(transformInputStream); + + if (logger.isTraceEnabled()) + { + logger.trace("Embedded Metadata on " + nodeRef + " complete"); + } + } + catch (Exception e) + { + logger.error("Failed to copy embedded metadata transform InputStream into " + nodeRef); + throw e; + } + + return null; + }, false, true)); + } +} diff --git a/src/main/java/org/alfresco/repo/content/metadata/DWGMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/DWGMetadataExtracter.java index a26ead0f9c..0ef186435d 100644 --- a/src/main/java/org/alfresco/repo/content/metadata/DWGMetadataExtracter.java +++ b/src/main/java/org/alfresco/repo/content/metadata/DWGMetadataExtracter.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -36,6 +36,8 @@ import org.apache.tika.parser.dwg.DWGParser; /** + * @deprecated extractor has been moved to a T-Engine. + * * Metadata extractor for the * {@link org.alfresco.repo.content.MimetypeMap#MIMETYPE_APP_DWG MIMETYPE_APP_DWG} * and @@ -55,6 +57,7 @@ import org.apache.tika.parser.dwg.DWGParser; * @since 3.4 * @author Nick Burch */ +@Deprecated public class DWGMetadataExtracter extends TikaPoweredMetadataExtracter { private static final String KEY_KEYWORD = "keyword"; diff --git a/src/main/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java index 5355ccdbd7..7e58d1faef 100644 --- a/src/main/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java +++ b/src/main/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -24,7 +24,7 @@ * #L% */ /* - * Copyright (C) 2005 Jesper Steen Møller + * Copyright (C) 2005 - 2020 Jesper Steen Møller * * This file is part of Alfresco * @@ -61,6 +61,8 @@ import org.alfresco.repo.content.MimetypeMap; import org.alfresco.service.cmr.repository.ContentReader; /** + * @deprecated extractor has been moved to a T-Engine. + * * Extracts the following values from HTML documents: *

  *   author:                 --      cm:author
@@ -75,6 +77,7 @@ import org.alfresco.service.cmr.repository.ContentReader;
  * @author Jesper Steen Møller
  * @author Derek Hulley
  */
+@Deprecated
 public class HtmlMetadataExtracter extends AbstractMappingMetadataExtracter
 {
     private static final String KEY_AUTHOR = "author";
diff --git a/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracter.java
index b7400bfe54..d55dda69f6 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracter.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracter.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Repository
  * %%
- * Copyright (C) 2005 - 2017 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software. 
  * If the software was purchased under a paid Alfresco license, the terms of 
@@ -25,20 +25,17 @@
  */
 package org.alfresco.repo.content.metadata;
 
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
-import org.alfresco.repo.content.MimetypeMap;
-import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
-import org.alfresco.repo.content.metadata.OpenOfficeMetadataWorker;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.util.PropertyCheck;
 
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+
 /**
+ * @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1
+ *
  * Extracts values from Open Office documents into the following:
  * 
  *   author:                 --      cm:author
@@ -48,6 +45,7 @@ import org.alfresco.util.PropertyCheck;
  * 
  * @author Neil McErlean
  */
+@Deprecated
 public class JodConverterMetadataExtracter extends AbstractMappingMetadataExtracter implements OpenOfficeMetadataWorker
 {
     private OpenOfficeMetadataWorker worker;
diff --git a/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracterWorker.java b/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracterWorker.java
index 299680cb1d..b882af4a98 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracterWorker.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracterWorker.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Repository
  * %%
- * Copyright (C) 2005 - 2017 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software. 
  * If the software was purchased under a paid Alfresco license, the terms of 
@@ -35,7 +35,6 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.alfresco.repo.content.JodConverter;
-import org.alfresco.repo.content.metadata.OpenOfficeMetadataWorker;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.service.cmr.repository.MimetypeService;
 import org.alfresco.util.TempFileProvider;
@@ -59,6 +58,19 @@ import com.sun.star.util.CloseVetoException;
 import com.sun.star.util.XCloseable;
 import com.sun.star.util.XRefreshable;
 
+/**
+ * Extracts values from Open Office documents into the following:
+ * 
+ *   author:                 --      cm:author
+ *   title:                  --      cm:title
+ *   description:            --      cm:description
+ * 
+ * + * @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1 + * + * @author Neil McErlean + */ +@Deprecated public class JodConverterMetadataExtracterWorker implements OpenOfficeMetadataWorker { diff --git a/src/main/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java index fa1bfbde76..a2a1cfef5a 100644 --- a/src/main/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java +++ b/src/main/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.content.metadata; import java.io.Serializable; @@ -36,6 +36,8 @@ import org.apache.tika.parser.Parser; import org.apache.tika.parser.mp3.Mp3Parser; /** + * @deprecated extractor has been moved to a T-Engine. + * * Extracts the following values from MP3 files: *
  *   songTitle:              --      cm:title
@@ -57,6 +59,7 @@ import org.apache.tika.parser.mp3.Mp3Parser;
  * 
  * @author Nick Burch
  */
+@Deprecated
 public class MP3MetadataExtracter extends TikaAudioMetadataExtracter
 {
     private static final String KEY_SONG_TITLE = "songTitle";
diff --git a/src/main/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java
index 6467f1b067..ddddd49fc0 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java
@@ -1,28 +1,28 @@
-/*
- * #%L
- * Alfresco Repository
- * %%
- * Copyright (C) 2005 - 2016 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software. 
- * If the software was purchased under a paid Alfresco license, the terms of 
- * the paid license agreement will prevail.  Otherwise, the software is 
- * provided under the following open source license terms:
- * 
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see .
- * #L%
- */
+/*
+ * #%L
+ * Alfresco Repository
+ * %%
+ * Copyright (C) 2005 - 2016 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software. 
+ * If the software was purchased under a paid Alfresco license, the terms of 
+ * the paid license agreement will prevail.  Otherwise, the software is 
+ * provided under the following open source license terms:
+ * 
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ * #L%
+ */
 package org.alfresco.repo.content.metadata;
 
 import java.io.Serializable;
@@ -35,6 +35,8 @@ import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.microsoft.OfficeParser;
 
 /**
+ * @deprecated extractor has been moved to a T-Engine.
+ *
  * Outlook MAPI format email meta-data extractor extracting the following values:
  * 
  *   sentDate:               --      cm:sentdate
@@ -53,6 +55,7 @@ import org.apache.tika.parser.microsoft.OfficeParser;
  * @since 2.1
  * @author Kevin Roast
  */
+@Deprecated
 public class MailMetadataExtracter extends TikaPoweredMetadataExtracter
 {
     private static final String KEY_SENT_DATE = "sentDate";
diff --git a/src/main/java/org/alfresco/repo/content/metadata/MetadataEmbedder.java b/src/main/java/org/alfresco/repo/content/metadata/MetadataEmbedder.java
index 3fbba6cf74..79eed186a1 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/MetadataEmbedder.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/MetadataEmbedder.java
@@ -1,28 +1,28 @@
-/*
- * #%L
- * Alfresco Repository
- * %%
- * Copyright (C) 2005 - 2016 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software. 
- * If the software was purchased under a paid Alfresco license, the terms of 
- * the paid license agreement will prevail.  Otherwise, the software is 
- * provided under the following open source license terms:
- * 
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see .
- * #L%
- */
+/*
+ * #%L
+ * Alfresco Repository
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software. 
+ * If the software was purchased under a paid Alfresco license, the terms of 
+ * the paid license agreement will prevail.  Otherwise, the software is 
+ * provided under the following open source license terms:
+ * 
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ * #L%
+ */
 package org.alfresco.repo.content.metadata;
 
 import java.io.Serializable;
@@ -33,6 +33,7 @@ import org.alfresco.repo.content.ContentWorker;
 import org.alfresco.service.cmr.repository.ContentIOException;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.service.cmr.repository.ContentWriter;
+import org.alfresco.service.cmr.repository.NodeRef;
 import org.alfresco.service.namespace.QName;
 
 /**
@@ -69,5 +70,18 @@ public interface MetadataEmbedder extends ContentWorker {
      */
     public void embed(Map properties, ContentReader reader, ContentWriter writer) throws ContentIOException;
 
-
+    /**
+     * Identical to {@link #embed(Map, ContentReader, ContentWriter)} but with the addition of the
+     * {@code NodeRef} being acted on. By default, the method without the {@code NodeRef} is called.
+     *
+     * @param nodeRef the node being acted on.
+     * @param properties the model properties to embed
+     * @param reader the reader for the original source content file
+     * @param writer the writer for the content after metadata has been embedded
+     * @throws ContentIOException
+     */
+    public default void embed(NodeRef nodeRef, Map properties, ContentReader reader, ContentWriter writer) throws ContentIOException
+    {
+        embed(properties, reader, writer);
+    }
 }
diff --git a/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
index 051184fd10..054c0077df 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
@@ -1,28 +1,28 @@
-/*
- * #%L
- * Alfresco Repository
- * %%
- * Copyright (C) 2005 - 2016 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software. 
- * If the software was purchased under a paid Alfresco license, the terms of 
- * the paid license agreement will prevail.  Otherwise, the software is 
- * provided under the following open source license terms:
- * 
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see .
- * #L%
- */
+/*
+ * #%L
+ * Alfresco Repository
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software. 
+ * If the software was purchased under a paid Alfresco license, the terms of 
+ * the paid license agreement will prevail.  Otherwise, the software is 
+ * provided under the following open source license terms:
+ * 
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ * #L%
+ */
 /*
  * Copyright (C) 2005 Jesper Steen Møller
  *
@@ -52,6 +52,7 @@ import org.alfresco.api.AlfrescoPublicApi;
 import org.alfresco.repo.content.ContentWorker;
 import org.alfresco.service.cmr.repository.ContentIOException;
 import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.cmr.repository.NodeRef;
 import org.alfresco.service.namespace.NamespaceService;
 import org.alfresco.service.namespace.QName;
 
@@ -402,4 +403,46 @@ public interface MetadataExtracter extends ContentWorker
             OverwritePolicy overwritePolicy,
             Map destination,
             Map> mapping);
+
+    /**
+     * Identical to {@link #extract(ContentReader, Map)} but with the addition of the {@code NodeRef} being acted on.
+     * By default, the method without the {@code NodeRef} is called.
+     *
+     * @param nodeRef               the node being acted on.
+     * @param reader                the source of the content
+     * @param destination           the map of properties to populate (essentially a return value)
+     * @return                      Returns a map of all properties on the destination map that were
+     *                              added or modified.  If the return map is empty, then no properties
+     *                              were modified.
+     * @throws ContentIOException   if a detectable error occurs
+     */
+    public default Map extract(NodeRef nodeRef, ContentReader reader, Map destination)
+    {
+        return extract(reader, destination);
+    }
+
+    /**
+     * Identical to {@link #extract(ContentReader, OverwritePolicy, Map, Map)}  but with the addition of the
+     * {@code NodeRef} being acted on. By default, the method without the {@code NodeRef} is called.
+     *
+     * @param nodeRef               the node being acted on.
+     * @param reader                the source of the content
+     * @param overwritePolicy       the policy stipulating how the system properties must be
+     *                              overwritten if present
+     * @param destination           the map of properties to populate (essentially a return value)
+     * @param mapping               a mapping of document-specific properties to system properties.
+     * @return                      Returns a map of all properties on the destination map that were
+     *                              added or modified.  If the return map is empty, then no properties
+     *                              were modified.
+     * @throws ContentIOException   if a detectable error occurs
+     */
+    public default Map extract(
+            NodeRef nodeRef,
+            ContentReader reader,
+            OverwritePolicy overwritePolicy,
+            Map destination,
+            Map> mapping)
+    {
+        return extract(reader, overwritePolicy, destination, mapping);
+    }
 }
diff --git a/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java b/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
index 945cd49781..0db88f7400 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
@@ -1,28 +1,28 @@
-/*
- * #%L
- * Alfresco Repository
- * %%
- * Copyright (C) 2005 - 2016 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software. 
- * If the software was purchased under a paid Alfresco license, the terms of 
- * the paid license agreement will prevail.  Otherwise, the software is 
- * provided under the following open source license terms:
- * 
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see .
- * #L%
- */
+/*
+ * #%L
+ * Alfresco Repository
+ * %%
+ * Copyright (C) 2005 - 2016 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software. 
+ * If the software was purchased under a paid Alfresco license, the terms of 
+ * the paid license agreement will prevail.  Otherwise, the software is 
+ * provided under the following open source license terms:
+ * 
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ * #L%
+ */
 /*
  * Copyright (C) 2005-2012 Jesper Steen Møller
  *
@@ -73,18 +73,22 @@ public class MetadataExtracterRegistry
     private List extracters;
     private Map> extracterCache;
     private Map> embedderCache;
+    private AsynchronousExtractor asynchronousExtractor;
 
     /** Controls read access to the cache */
     private Lock extracterCacheReadLock;
     /** controls write access to the cache */
     private Lock extracterCacheWriteLock;
 
+    private boolean asyncExtractEnabled = true;
+    private boolean asyncEmbedEnabled = true;
+
     public MetadataExtracterRegistry()
     {
         // initialise lists
-        extracters = new ArrayList(10);
-        extracterCache = new HashMap>(17);
-        embedderCache = new HashMap>(17);
+        extracters = new ArrayList<>(11);
+        extracterCache = new HashMap<>(18);
+        embedderCache = new HashMap<>(18);
 
         // create lock objects for access to the cache
         ReadWriteLock extractionCacheLock = new ReentrantReadWriteLock();
@@ -125,7 +129,14 @@ public class MetadataExtracterRegistry
         extracterCacheWriteLock.lock();
         try
         {
-            extracters.add(extracter);
+            if (extracter instanceof AsynchronousExtractor)
+            {
+                asynchronousExtractor = (AsynchronousExtractor)extracter;
+            }
+            else
+            {
+                extracters.add(extracter);
+            }
             extracterCache.clear();
             embedderCache.clear();
         }
@@ -135,16 +146,42 @@ public class MetadataExtracterRegistry
         }
     }
 
+    public void setAsyncExtractEnabled(boolean asyncExtractEnabled)
+    {
+        this.asyncExtractEnabled = asyncExtractEnabled;
+    }
+
+    public void setAsyncEmbedEnabled(boolean asyncEmbedEnabled)
+    {
+        this.asyncEmbedEnabled = asyncEmbedEnabled;
+    }
+
+    /**
+     * Returns the {@link AsynchronousExtractor} if it is able to perform the extraction and is enabled. Failing that it
+     * calls {@link #getExtracter(String)}.
+     *
+     * @param sourceSizeInBytes size of the source content.
+     * @param sourceMimetype the source MIMETYPE of the extraction
+     * @return Returns a metadata extractor that can extract metadata from the chosen MIME type.
+     */
+    public MetadataExtracter getExtractor(String sourceMimetype, long sourceSizeInBytes)
+    {
+        return asyncExtractEnabled && asynchronousExtractor != null &&
+               asynchronousExtractor.isSupported(sourceMimetype, sourceSizeInBytes)
+            ? asynchronousExtractor
+            : getExtracter(sourceMimetype);
+    }
+
     /**
      * Gets the best metadata extracter. This is a combination of the most
      * reliable and the most performant extracter.
-     * 

- * The result is cached for quicker access next time. - * - * @param sourceMimetype the source MIME of the extraction - * @return Returns a metadata extracter that can extract metadata from the - * chosen MIME type. - */ + *

+ * The result is cached for quicker access next time. + * + * @param sourceMimetype the source MIME of the extraction + * @return Returns a metadata extracter that can extract metadata from the + * chosen MIME type. + */ public MetadataExtracter getExtracter(String sourceMimetype) { logger.debug("Get extractors for " + sourceMimetype); @@ -202,18 +239,18 @@ public class MetadataExtracterRegistry } private String getName(MetadataExtracter extractor) - { - if (extractor == null) - { - return null; - } - else if (extractor instanceof AbstractMappingMetadataExtracter) - { - return ((AbstractMappingMetadataExtracter)extractor).getBeanName(); - } - else - { - return extractor.getClass().getSimpleName(); + { + if (extractor == null) + { + return null; + } + else if (extractor instanceof AbstractMappingMetadataExtracter) + { + return ((AbstractMappingMetadataExtracter)extractor).getBeanName(); + } + else + { + return extractor.getClass().getSimpleName(); } } @@ -222,48 +259,64 @@ public class MetadataExtracterRegistry * @return Returns a set of extractors that will work for the given mimetype */ private List findBestExtracters(String sourceMimetype) - { - if (logger.isDebugEnabled()) + { + if (logger.isDebugEnabled()) { - logger.debug("Finding extractors for " + sourceMimetype); + logger.debug("Finding extractors for " + sourceMimetype); } - List extractors = new ArrayList(1); + List extractors = new ArrayList<>(1); for (MetadataExtracter extractor : extracters) { if (!extractor.isSupported(sourceMimetype)) { - // extraction not achievable - if (logger.isDebugEnabled()) + // extraction not achievable + if (logger.isDebugEnabled()) { - logger.debug("Find unsupported: "+getName(extractor)); + logger.debug("Find unsupported: "+getName(extractor)); } continue; - } - if (logger.isDebugEnabled()) + } + if (logger.isDebugEnabled()) { - logger.debug("Find supported: "+getName(extractor)); + logger.debug("Find supported: "+getName(extractor)); } extractors.add(extractor); - } - if (logger.isDebugEnabled()) + } + if (logger.isDebugEnabled()) { - logger.debug("Find returning: "+extractors); + logger.debug("Find returning: "+extractors); } return extractors; } - + + /** + * Returns the {@link AsynchronousExtractor} if it is able to perform the embedding and is enabled. Failing that it + * calls {@link #getEmbedder(String)}. + * + * @param sourceSizeInBytes size of the source content. + * @param sourceMimetype the source MIMETYPE of the extraction + * @return Returns a metadata extractor that can extract metadata from the chosen MIME type. + */ + public MetadataEmbedder getEmbedder(String sourceMimetype, long sourceSizeInBytes) + { + return asyncEmbedEnabled && asynchronousExtractor != null && + asynchronousExtractor.isEmbedderSupported(sourceMimetype, sourceSizeInBytes) + ? asynchronousExtractor + : getEmbedder(sourceMimetype); + } + /** * Gets the best metadata embedder. This is a combination of the most * reliable and the most performant embedder. - *

- * The result is cached for quicker access next time. - * - * @param sourceMimetype the source MIME of the extraction - * @return Returns a metadata embedder that can embed metadata in the - * chosen MIME type. - */ + *

+ * The result is cached for quicker access next time. + * + * @param sourceMimetype the source MIME of the extraction + * @return Returns a metadata embedder that can embed metadata in the + * chosen MIME type. + */ public MetadataEmbedder getEmbedder(String sourceMimetype) { List embedders = null; diff --git a/src/main/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java index 17cc11ae6d..283de8cd40 100644 --- a/src/main/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java +++ b/src/main/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2016 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ /* * Copyright (C) 2005 Jesper Steen Møller * @@ -53,6 +53,8 @@ import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.OfficeParser; /** + * @deprecated extractor has been moved to a T-Engine. + * * Office file format Metadata Extracter. This extracter uses the POI library to extract * the following: *

@@ -78,6 +80,7 @@ import org.apache.tika.parser.microsoft.OfficeParser;
  * @author Derek Hulley
  * @author Nick Burch
  */
+@Deprecated
 public class OfficeMetadataExtracter extends TikaPoweredMetadataExtracter
 {
     public static final String KEY_CREATE_DATETIME = "createDateTime";
diff --git a/src/main/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java
index ca3d764d7c..68f91815cb 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java
@@ -1,30 +1,30 @@
 /*
- * #%L
- * Alfresco Repository
- * %%
- * Copyright (C) 2005 - 2016 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software. 
- * If the software was purchased under a paid Alfresco license, the terms of 
- * the paid license agreement will prevail.  Otherwise, the software is 
- * provided under the following open source license terms:
- * 
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see .
- * #L%
- */
-/*
- * Copyright (C) 2005 Antti Jokipii
+ * #%L
+ * Alfresco Repository
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software. 
+ * If the software was purchased under a paid Alfresco license, the terms of 
+ * the paid license agreement will prevail.  Otherwise, the software is 
+ * provided under the following open source license terms:
+ * 
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ * #L%
+ */
+/*
+ * Copyright (C) 2005 - 2020 Antti Jokipii
  *
  * This file is part of Alfresco
  *
@@ -59,6 +59,8 @@ import org.joda.time.format.DateTimeFormatter;
 
 
 /**
+ * @deprecated extractor has been moved to a T-Engine.
+ *
  * Metadata extractor for the
  * {@link org.alfresco.repo.content.MimetypeMap#MIMETYPE_OPENDOCUMENT_TEXT MIMETYPE_OPENDOCUMENT_XXX}
  * mimetypes.
@@ -86,6 +88,7 @@ import org.joda.time.format.DateTimeFormatter;
  * @author Antti Jokipii
  * @author Derek Hulley
  */
+@Deprecated
 public class OpenDocumentMetadataExtracter extends TikaPoweredMetadataExtracter
 {
     private static final String KEY_CREATION_DATE = "creationDate";
diff --git a/src/main/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataWorker.java b/src/main/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataWorker.java
index a8ef8f8a4a..5adb16e0d6 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataWorker.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataWorker.java
@@ -23,28 +23,31 @@
  * along with Alfresco. If not, see .
  * #L%
  */
-package org.alfresco.repo.content.metadata;
-
-import java.io.Serializable;
-import java.util.Map;
-
-import org.alfresco.service.cmr.repository.ContentReader;
-
-/**
- * An interface that allows separation between the metadata extractor registry and the third party subsystem owning the
- * open office connection.
- * 
- * @author dward
- */
-public interface OpenOfficeMetadataWorker
-{
-    /**
-     * @return Returns true if a connection to the Uno server could be established
-     */
-    public boolean isConnected();
-
-    /**
-     * @see AbstractMappingMetadataExtracter#extractRaw(ContentReader)
-     */
-    public Map extractRaw(ContentReader reader) throws Throwable;
+package org.alfresco.repo.content.metadata;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import org.alfresco.service.cmr.repository.ContentReader;
+
+/**
+ * @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1
+ *
+ * An interface that allows separation between the metadata extractor registry and the third party subsystem owning the
+ * open office connection.
+ * 
+ * @author dward
+ */
+@Deprecated
+public interface OpenOfficeMetadataWorker
+{
+    /**
+     * @return Returns true if a connection to the Uno server could be established
+     */
+    public boolean isConnected();
+
+    /**
+     * @see AbstractMappingMetadataExtracter#extractRaw(ContentReader)
+     */
+    public Map extractRaw(ContentReader reader) throws Throwable;
 }
\ No newline at end of file
diff --git a/src/main/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
index 3c0ca7e126..9ba0a5630f 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
@@ -1,30 +1,30 @@
-/*
- * #%L
- * Alfresco Repository
- * %%
- * Copyright (C) 2005 - 2016 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software. 
- * If the software was purchased under a paid Alfresco license, the terms of 
- * the paid license agreement will prevail.  Otherwise, the software is 
- * provided under the following open source license terms:
- * 
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see .
- * #L%
- */
 /*
- * Copyright (C) 2005 Jesper Steen Møller
+ * #%L
+ * Alfresco Repository
+ * %%
+ * Copyright (C) 2005 - 2016 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software. 
+ * If the software was purchased under a paid Alfresco license, the terms of 
+ * the paid license agreement will prevail.  Otherwise, the software is 
+ * provided under the following open source license terms:
+ * 
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ * #L%
+ */
+/*
+ * Copyright (C) 2005 - 2020 Jesper Steen Møller
  *
  * This file is part of Alfresco
  *
@@ -52,6 +52,8 @@ import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.pdf.PDFParser;
 
 /**
+ * @deprecated extractor has been moved to a T-Engine.
+ *
  * Metadata extractor for the PDF documents.
  * 
  *   author:                 --      cm:author
@@ -66,6 +68,7 @@ import org.apache.tika.parser.pdf.PDFParser;
  * @author Jesper Steen Møller
  * @author Derek Hulley
  */
+@Deprecated
 public class PdfBoxMetadataExtracter extends TikaPoweredMetadataExtracter
 {
     protected static Log pdfLogger = LogFactory.getLog(PdfBoxMetadataExtracter.class);
diff --git a/src/main/java/org/alfresco/repo/content/metadata/PoiMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/PoiMetadataExtracter.java
index d4e93e9942..cea98d2c73 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/PoiMetadataExtracter.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/PoiMetadataExtracter.java
@@ -36,6 +36,8 @@ import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
 import org.springframework.beans.factory.InitializingBean;
 
 /**
+ * @deprecated extractor has been moved to a T-Engine.
+ *
  * POI-based metadata extractor for Office 07 documents.
  * See http://poi.apache.org/ for information on POI.
  * 
@@ -52,6 +54,7 @@ import org.springframework.beans.factory.InitializingBean;
  * @author Neil McErlean
  * @author Dmitry Velichkevich
  */
+@Deprecated
 public class PoiMetadataExtracter extends TikaPoweredMetadataExtracter
 {
     protected static Log logger = LogFactory.getLog(PoiMetadataExtracter.class);
diff --git a/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java
index d0ed7c9e10..0875fbc749 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracter.java
@@ -1,28 +1,28 @@
-/*
- * #%L
- * Alfresco Repository
- * %%
- * Copyright (C) 2005 - 2016 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software. 
- * If the software was purchased under a paid Alfresco license, the terms of 
- * the paid license agreement will prevail.  Otherwise, the software is 
- * provided under the following open source license terms:
- * 
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see .
- * #L%
- */
+/*
+ * #%L
+ * Alfresco Repository
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software. 
+ * If the software was purchased under a paid Alfresco license, the terms of 
+ * the paid license agreement will prevail.  Otherwise, the software is 
+ * provided under the following open source license terms:
+ * 
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ * #L%
+ */
 package org.alfresco.repo.content.metadata;
 
 import java.io.IOException;
@@ -66,6 +66,7 @@ import org.alfresco.service.namespace.QName;
  * @author Derek Hulley
  * @since 3.2
  */
+@Deprecated
 public class RFC822MetadataExtracter extends AbstractMappingMetadataExtracter
 {
 
diff --git a/src/main/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.java
index a17cbb51a3..c837e790f3 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracter.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Repository
  * %%
- * Copyright (C) 2005 - 2016 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software. 
  * If the software was purchased under a paid Alfresco license, the terms of 
@@ -42,6 +42,8 @@ import org.gagravarr.tika.FlacParser;
 import org.gagravarr.tika.VorbisParser;
 
 /**
+ * @deprecated extractor has been moved to a T-Engine.
+ *
  * A Metadata Extractor which makes use of the Apache
  *  Tika Audio Parsers to extract metadata from your
  *  media files. 
@@ -64,6 +66,7 @@ import org.gagravarr.tika.VorbisParser;
  * @since 4.0
  * @author Nick Burch
  */
+@Deprecated
 public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter
 {
     protected static final String KEY_LYRICS = "lyrics";
diff --git a/src/main/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java
index 369527875c..e2e1018869 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracter.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Repository
  * %%
- * Copyright (C) 2005 - 2016 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software. 
  * If the software was purchased under a paid Alfresco license, the terms of 
@@ -40,6 +40,8 @@ import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.Parser;
 
 /**
+ * @deprecated extractor has been moved to a T-Engine.
+ *
  * A Metadata Extractor which makes use of the Apache
  *  Tika auto-detection to select the best parser
  *  to extract the metadata from your document.
@@ -60,6 +62,7 @@ import org.apache.tika.parser.Parser;
  * @since 3.4
  * @author Nick Burch
  */
+@Deprecated
 public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter
 {
     protected static Log logger = LogFactory.getLog(TikaAutoMetadataExtracter.class);
diff --git a/src/main/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java
index abd7854354..0c89f63cfe 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Repository
  * %%
- * Copyright (C) 2005 - 2016 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software. 
  * If the software was purchased under a paid Alfresco license, the terms of 
@@ -31,7 +31,6 @@ import java.io.OutputStream;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -45,8 +44,6 @@ import org.alfresco.repo.content.MimetypeMap;
 import org.alfresco.repo.content.filestore.FileContentReader;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.service.cmr.repository.ContentWriter;
-import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
-import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.tika.embedder.Embedder;
@@ -74,6 +71,8 @@ import org.xml.sax.SAXException;
 
 
 /**
+ * @deprecated extractors have been moved to a T-Engine.
+ *
  * The parent of all Metadata Extractors which use
  * Apache Tika under the hood. This handles all the
  * common parts of processing the files, and the common
@@ -92,6 +91,7 @@ import org.xml.sax.SAXException;
  * @author Nick Burch
  */
 @AlfrescoPublicApi
+@Deprecated
 public abstract class TikaPoweredMetadataExtracter
         extends AbstractMappingMetadataExtracter
         implements MetadataEmbedder
@@ -473,43 +473,11 @@ public abstract class TikaPoweredMetadataExtracter
         {
             return;
         }
-        
+
+        Map metadataAsStrings = convertMetadataToStrings(properties);
         Metadata metadataToEmbed = new Metadata();
-        for (String metadataKey : properties.keySet())
-        {
-            Serializable value = properties.get(metadataKey);
-            if (value == null)
-            {
-                continue;
-            }
-            if (value instanceof Collection)
-            {
-                for (Object singleValue : (Collection) value)
-                {
-                    try
-                    {
-                        // Convert to a string value for Tika
-                        metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, singleValue));
-                    }
-                    catch (TypeConversionException e)
-                    {
-                        logger.info("Could not convert " + metadataKey + ": " + e.getMessage());
-                    }
-                }
-            }
-            else
-            {
-                try
-                {
-                    // Convert to a string value for Tika
-                    metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, value));
-                }
-                catch (TypeConversionException e)
-                {
-                    logger.info("Could not convert " + metadataKey + ": " + e.getMessage());
-                }
-            }
-        }
+        metadataAsStrings.forEach((k,v)->metadataToEmbed.add(k, v));
+
         InputStream inputStream = getInputStream(reader);
         OutputStream outputStream = writer.getContentOutputStream();
         embedder.embed(metadataToEmbed, inputStream, outputStream, null);
diff --git a/src/main/java/org/alfresco/repo/content/metadata/TikaSpringConfiguredMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/TikaSpringConfiguredMetadataExtracter.java
index 176b09549b..74e0e226da 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/TikaSpringConfiguredMetadataExtracter.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/TikaSpringConfiguredMetadataExtracter.java
@@ -1,28 +1,28 @@
-/*
- * #%L
- * Alfresco Repository
- * %%
- * Copyright (C) 2005 - 2016 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software. 
- * If the software was purchased under a paid Alfresco license, the terms of 
- * the paid license agreement will prevail.  Otherwise, the software is 
- * provided under the following open source license terms:
- * 
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see .
- * #L%
- */
+/*
+ * #%L
+ * Alfresco Repository
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software. 
+ * If the software was purchased under a paid Alfresco license, the terms of 
+ * the paid license agreement will prevail.  Otherwise, the software is 
+ * provided under the following open source license terms:
+ * 
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ * #L%
+ */
 package org.alfresco.repo.content.metadata;
 
 import java.util.ArrayList;
@@ -37,6 +37,8 @@ import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 
 /**
+ * @deprecated Tika extractors have been moved to a T-Engine.
+ *
  * A Metadata Extractor which makes use of Apache Tika,
  *  and allows the selection of the Tika parser to be
  *  sprung-in to extract the metadata from your document.
@@ -56,6 +58,7 @@ import org.apache.tika.parser.Parser;
  * @author Nick Burch
  */
 @AlfrescoPublicApi
+@Deprecated
 public class TikaSpringConfiguredMetadataExtracter extends TikaPoweredMetadataExtracter
 {
     protected static Log logger = LogFactory.getLog(TikaSpringConfiguredMetadataExtracter.class);
diff --git a/src/main/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracter.java b/src/main/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracter.java
index 775e6ee9f6..bd8cc89d7f 100644
--- a/src/main/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracter.java
+++ b/src/main/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracter.java
@@ -60,6 +60,7 @@ import org.alfresco.util.PropertyCheck;
  * @since 2.1
  * @author Derek Hulley
  */
+@Deprecated
 public class XmlMetadataExtracter extends AbstractMappingMetadataExtracter
 {
     public static String[] SUPPORTED_MIMETYPES = new String[] { MimetypeMap.MIMETYPE_XML };
diff --git a/src/main/java/org/alfresco/repo/content/transform/AdminUiTransformerDebug.java b/src/main/java/org/alfresco/repo/content/transform/AdminUiTransformerDebug.java
index c4ed028550..8e0ffe5e5f 100644
--- a/src/main/java/org/alfresco/repo/content/transform/AdminUiTransformerDebug.java
+++ b/src/main/java/org/alfresco/repo/content/transform/AdminUiTransformerDebug.java
@@ -249,7 +249,7 @@ public class AdminUiTransformerDebug extends TransformerDebug implements Applica
                                      boolean firstTransformer)
     {
         String mimetypes = firstTransformer
-                ? getMimetypeExt(sourceMimetype)+getMimetypeExt(targetMimetype)
+                ? getSourceAndTargetExt(sourceMimetype, targetMimetype)
                 : spaces(10);
         char c = (char)('a'+transformerCount);
         log(mimetypes+
diff --git a/src/main/java/org/alfresco/repo/content/transform/LegacyTransformerDebug.java b/src/main/java/org/alfresco/repo/content/transform/LegacyTransformerDebug.java
index db85c40e9b..7831333627 100644
--- a/src/main/java/org/alfresco/repo/content/transform/LegacyTransformerDebug.java
+++ b/src/main/java/org/alfresco/repo/content/transform/LegacyTransformerDebug.java
@@ -25,6 +25,7 @@
  */
 package org.alfresco.repo.content.transform;
 
+import org.alfresco.repo.content.metadata.AsynchronousExtractor;
 import org.alfresco.service.cmr.repository.NodeRef;
 import org.alfresco.service.cmr.repository.TransformationOptions;
 import org.alfresco.transform.client.registry.SupportedTransform;
@@ -99,10 +100,9 @@ public class LegacyTransformerDebug extends AdminUiTransformerDebug
     public void blacklistTransform(ContentTransformer transformer, String sourceMimetype,
                                    String targetMimetype, TransformationOptions options)
     {
-        log("Blacklist "+getName(transformer)+" "+getMimetypeExt(sourceMimetype)+getMimetypeExt(targetMimetype));
+        log("Blacklist "+getName(transformer)+" "+ getSourceAndTargetExt(sourceMimetype, targetMimetype));
     }
 
-
     @Deprecated
     public void pushTransform(ContentTransformer transformer, String fromUrl, String sourceMimetype,
                               String targetMimetype, long sourceSize, TransformationOptions options)
@@ -265,7 +265,10 @@ public class LegacyTransformerDebug extends AdminUiTransformerDebug
         }
         String i = Integer.toString(mimetypePairCount);
         String priority = gePriority(transformer, sourceMimetype, targetMimetype);
-        log(spaces(5-i.length())+mimetypePairCount+") "+getMimetypeExt(sourceMimetype)+getMimetypeExt(targetMimetype)+
+        String sourceExt = getMimetypeExt(sourceMimetype);
+        String targetExt = getMimetypeExt(targetMimetype);
+        targetExt = AsynchronousExtractor.getExtension(targetMimetype, sourceExt, targetExt);
+        log(spaces(5-i.length())+mimetypePairCount+") "+ sourceExt + targetExt +
                 priority +
                 ' '+fileSize((maxSourceSizeKBytes > 0) ? maxSourceSizeKBytes*1024 : maxSourceSizeKBytes)+
                 (maxSourceSizeKBytes == 0 ? " disabled" : ""));
diff --git a/src/main/java/org/alfresco/repo/content/transform/LocalTransformImpl.java b/src/main/java/org/alfresco/repo/content/transform/LocalTransformImpl.java
index ffefc4ba15..ef0ab8b3d5 100644
--- a/src/main/java/org/alfresco/repo/content/transform/LocalTransformImpl.java
+++ b/src/main/java/org/alfresco/repo/content/transform/LocalTransformImpl.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Repository
  * %%
- * Copyright (C) 2019 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software.
  * If the software was purchased under a paid Alfresco license, the terms of
@@ -25,6 +25,7 @@
  */
 package org.alfresco.repo.content.transform;
 
+import org.alfresco.repo.content.metadata.AsynchronousExtractor;
 import org.alfresco.repo.rendition2.RenditionDefinition2;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.service.cmr.repository.ContentWriter;
@@ -188,6 +189,7 @@ public class LocalTransformImpl extends AbstractLocalTransform
         args[i++] = "targetMimetype";
         args[i++] = targetMimetype;
 
+        targetExtension = AsynchronousExtractor.getExtension(targetMimetype, sourceExtension, targetExtension);
         remoteTransformerClient.request(reader, writer, sourceMimetype, sourceExtension, targetExtension,
                 timeoutMs, log, args);
     }
diff --git a/src/main/java/org/alfresco/repo/content/transform/TransformerDebug.java b/src/main/java/org/alfresco/repo/content/transform/TransformerDebug.java
index c13e76f949..bd6d656f89 100644
--- a/src/main/java/org/alfresco/repo/content/transform/TransformerDebug.java
+++ b/src/main/java/org/alfresco/repo/content/transform/TransformerDebug.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Repository
  * %%
- * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software. 
  * If the software was purchased under a paid Alfresco license, the terms of 
@@ -26,6 +26,7 @@
 package org.alfresco.repo.content.transform;
 
 import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.metadata.AsynchronousExtractor;
 import org.alfresco.service.cmr.repository.MimetypeService;
 import org.alfresco.service.cmr.repository.NodeRef;
 import org.alfresco.service.cmr.repository.NodeService;
@@ -68,6 +69,7 @@ public class TransformerDebug
     protected Log logger;
     protected NodeService nodeService;
     protected MimetypeService mimetypeService;
+    private final ThreadLocal previousTransformId = ThreadLocal.withInitial(()->-1);
 
     protected enum Call
     {
@@ -280,6 +282,16 @@ public class TransformerDebug
         this.mimetypeService = mimetypeService;
     }
 
+    public void setPreviousTransformId(int id)
+    {
+        previousTransformId.set(id);
+    }
+
+    private int getPreviousTransformId()
+    {
+        return previousTransformId.get();
+    }
+
     public void afterPropertiesSet() throws Exception
     {
         PropertyCheck.mandatory(this, "nodeService", nodeService);
@@ -351,7 +363,7 @@ public class TransformerDebug
         log(frame.sourceMimetype+' '+frame.targetMimetype, false);
         
         String fileName = getFileName(frame.sourceNodeRef, firstLevel, sourceSize);
-        log(getMimetypeExt(frame.sourceMimetype)+getMimetypeExt(frame.targetMimetype) +
+        log(getSourceAndTargetExt(frame.sourceMimetype, frame.targetMimetype) +
                 ((fileName != null) ? fileName+' ' : "")+
                 ((sourceSize >= 0) ? fileSize(sourceSize)+' ' : "") +
                 (firstLevel ? getRenditionName(renditionName) : "") + message);
@@ -370,7 +382,7 @@ public class TransformerDebug
                       Map options, String renditionName, String message)
     {
         String fileName = getFileName(sourceNodeRef, true, -1);
-        log("              "+getMimetypeExt(sourceMimetype)+getMimetypeExt(targetMimetype) +
+        log("              "+ getSourceAndTargetExt(sourceMimetype, targetMimetype) +
                 ((fileName != null) ? fileName+' ' : "")+
                 ((sourceSize >= 0) ? fileSize(sourceSize)+' ' : "") +
                 (getRenditionName(renditionName)) + message);
@@ -453,6 +465,7 @@ public class TransformerDebug
                 ourStack.pop();
             }
         }
+        setPreviousTransformId(id);
         return id;
     }
 
@@ -462,8 +475,7 @@ public class TransformerDebug
         {
             String failureReason = frame.getFailureReason();
             boolean firstLevel = size == 1;
-            String sourceExt = getMimetypeExt(frame.sourceMimetype);
-            String targetExt = getMimetypeExt(frame.targetMimetype);
+            String sourceAndTargetExt = getSourceAndTargetExt(frame.sourceMimetype, frame.targetMimetype);
             String fileName = getFileName(frame.sourceNodeRef, firstLevel, frame.sourceSize);
             long sourceSize = frame.getSourceSize();
             String transformerName = frame.getTransformerName();
@@ -506,19 +518,18 @@ public class TransformerDebug
 
             if (level != null)
             {
-                infoLog(getReference(debug, false), sourceExt, targetExt, level, fileName, sourceSize,
+                infoLog(getReference(debug, false, false), sourceAndTargetExt, level, fileName, sourceSize,
                         transformerName, renditionName, failureReason, ms, debug);
             }
         }
     }
     
-    private void infoLog(String reference, String sourceExt, String targetExt, String level, String fileName,
+    private void infoLog(String reference, String sourceAndTargetExt, String level, String fileName,
             long sourceSize, String transformerName, String renditionName, String failureReason, String ms, boolean debug)
     {
         String message =
                 reference +
-                sourceExt +
-                targetExt +
+                sourceAndTargetExt +
                 (level == null ? "" : level+' ') +
                 (fileName == null ? "" : fileName) +
                 (sourceSize >= 0 ? ' '+fileSize(sourceSize) : "") +
@@ -569,6 +580,18 @@ public class TransformerDebug
         }
     }
 
+    /**
+     * Log a message prefixed with the previous transformation reference, used by this Thread.
+     * @param message
+     */
+    public void debugUsingPreviousReference(String message)
+    {
+        if (isEnabled() && message != null)
+        {
+            log(message, null,true, true);
+        }
+    }
+
     /**
      * Log a message prefixed with the current transformation reference
      * and include a exception, suppressing the stack trace if repeated
@@ -631,16 +654,21 @@ public class TransformerDebug
     {
         log(message, null, debug);
     }
-    
+
     private void log(String message, Throwable t, boolean debug)
+    {
+        log(message, t, debug, false);
+    }
+
+    private void log(String message, Throwable t, boolean debug, boolean usePreviousRef)
     {
         if (debug && ThreadInfo.getDebugOutput() && logger.isDebugEnabled())
         {
-            logger.debug(getReference(false, false)+message, t);
+            logger.debug(getReference(false, false, usePreviousRef)+message, t);
         }
         else if (logger.isTraceEnabled())
         {
-            logger.trace(getReference(false, false)+message, t);
+            logger.trace(getReference(false, false, usePreviousRef)+message, t);
         }
 
         if (debug)
@@ -648,7 +676,7 @@ public class TransformerDebug
             StringBuilder sb = ThreadInfo.getStringBuilder();
             if (sb != null)
             {
-                sb.append(getReference(false, true));
+                sb.append(getReference(false, true, usePreviousRef));
                 sb.append(message);
                 if (t != null)
                 {
@@ -691,10 +719,21 @@ public class TransformerDebug
      * Returns a N.N.N style reference to the transformation.
      * @param firstLevelOnly indicates if only the top level should be included and no extra padding.
      * @param overrideFirstLevel if the first level id should just be set to 1 (used in test methods)
+     * @param usePreviousRef if the reference of the last transform performed by this Thread should be used.
      * @return a padded (fixed length) reference.
      */
-    private String getReference(boolean firstLevelOnly, boolean overrideFirstLevel)
+    private String getReference(boolean firstLevelOnly, boolean overrideFirstLevel, boolean usePreviousRef)
     {
+        if (usePreviousRef)
+        {
+            int id = getPreviousTransformId();
+            String ref = "";
+            if (id >= 0)
+            {
+                ref = Integer.toString(id)+spaces(13);
+            }
+            return ref;
+        }
         StringBuilder sb = new StringBuilder("");
         Frame frame = null;
         Iterator iterator = ThreadInfo.getStack().descendingIterator();
@@ -737,7 +776,7 @@ public class TransformerDebug
             }
             else
             {
-            sb.append(spaces(13-sb.length()+lengthOfFirstId)); // Try to pad to level 7
+                sb.append(spaces(13-sb.length()+lengthOfFirstId)); // Try to pad to level 7
             }
         }
         return sb.toString();
@@ -783,6 +822,14 @@ public class TransformerDebug
         return result;
     }
 
+    protected String getSourceAndTargetExt(String sourceMimetype, String targetMimetype)
+    {
+        String sourceExt = getMimetypeExt(sourceMimetype);
+        String targetExt = getMimetypeExt(targetMimetype);
+        targetExt = AsynchronousExtractor.getExtension(targetMimetype, sourceExt, targetExt);
+        return sourceExt + targetExt + spaces(1+4-targetExt.length());
+    }
+
     protected String getMimetypeExt(String mimetype)
     {
         StringBuilder sb = new StringBuilder("");
@@ -867,16 +914,15 @@ public class TransformerDebug
         if (isEnabled())
         {
             pushMisc();
-            String sourceExt = getMimetypeExt(sourceMimetype);
-            String targetExt = getMimetypeExt(targetMimetype);
-            debug(sourceExt + targetExt +
+            String sourceAndTargetExt = getSourceAndTargetExt(sourceMimetype, targetMimetype);
+            debug(sourceAndTargetExt +
                     ((fileName != null) ? fileName + ' ' : "") +
                     ((sourceSize >= 0) ? fileSize(sourceSize) + ' ' : "") +
                     getRenditionName(renditionName) + " "+ TRANSFORM_SERVICE_NAME);
             log(options);
             log(sourceNodeRef.toString() + ' ' + contentHashcode);
-            String reference = getReference(true, false);
-            infoLog(reference, sourceExt, targetExt, null, fileName, sourceSize, TRANSFORM_SERVICE_NAME,
+            String reference = getReference(true, false, false);
+            infoLog(reference, sourceAndTargetExt, null, fileName, sourceSize, TRANSFORM_SERVICE_NAME,
                     renditionName, null, "", true);
         }
         return pop(Call.AVAILABLE, true, false);
@@ -884,19 +930,21 @@ public class TransformerDebug
 
     private String getRenditionName(String renditionName)
     {
-        return renditionName != null ? "-- "+renditionName+" -- " : "";
+        return renditionName != null
+            ? "-- "+ AsynchronousExtractor.getRenditionName(renditionName)+" -- "
+            : "";
     }
 
     /**
      * Debugs a response to the Transform Service
      */
     public void debugTransformServiceResponse(NodeRef sourceNodeRef, int contentHashcode,
-                                              long requested, int seq, String sourceExt, String targetExt, String msg)
+                                              long requested, int id, String sourceExt, String targetExt, String msg)
     {
         pushMisc();
         Frame frame = ThreadInfo.getStack().getLast();
-        frame.id = seq;
-        boolean suppressFinish = seq == -1 || requested == -1;
+        frame.id = id;
+        boolean suppressFinish = id == -1 || requested == -1;
         if (!suppressFinish)
         {
             frame.start = requested;
diff --git a/src/main/java/org/alfresco/repo/rendition2/RenditionService2Impl.java b/src/main/java/org/alfresco/repo/rendition2/RenditionService2Impl.java
index 298723c76d..350411d903 100644
--- a/src/main/java/org/alfresco/repo/rendition2/RenditionService2Impl.java
+++ b/src/main/java/org/alfresco/repo/rendition2/RenditionService2Impl.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Repository
  * %%
- * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software.
  * If the software was purchased under a paid Alfresco license, the terms of
@@ -29,6 +29,7 @@ import org.alfresco.model.ContentModel;
 import org.alfresco.model.RenditionModel;
 import org.alfresco.repo.content.ContentServicePolicies;
 import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.repo.content.metadata.AsynchronousExtractor;
 import org.alfresco.repo.policy.BehaviourFilter;
 import org.alfresco.repo.policy.PolicyComponent;
 import org.alfresco.repo.rendition.RenditionPreventionRegistry;
@@ -112,6 +113,7 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea
     private RuleService ruleService;
     private PostTxnCallbackScheduler renditionRequestSheduler;
     private TransformReplyProvider transformReplyProvider;
+    private AsynchronousExtractor asynchronousExtractor;
     private boolean enabled;
     private boolean thumbnailsEnabled;
 
@@ -176,6 +178,11 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea
         this.transformReplyProvider = transformReplyProvider;
     }
 
+    public void setAsynchronousExtractor(AsynchronousExtractor asynchronousExtractor)
+    {
+        this.asynchronousExtractor = asynchronousExtractor;
+    }
+
     public void setEnabled(boolean enabled)
     {
         this.enabled = enabled;
@@ -203,6 +210,7 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea
         PropertyCheck.mandatory(this, "policyComponent", policyComponent);
         PropertyCheck.mandatory(this, "behaviourFilter", behaviourFilter);
         PropertyCheck.mandatory(this, "ruleService", ruleService);
+        PropertyCheck.mandatory(this, "asynchronousExtractor", asynchronousExtractor);
     }
 
     @Override
@@ -374,41 +382,115 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea
     public void consume(NodeRef sourceNodeRef, InputStream transformInputStream, RenditionDefinition2 renditionDefinition,
                         int transformContentHashCode)
     {
+        int sourceContentHashCode = getSourceContentHashCode(sourceNodeRef);
+        if (logger.isDebugEnabled())
+        {
+            logger.debug("Consume: Source " + sourceContentHashCode + " and transform's source " + transformContentHashCode+" hashcodes");
+        }
+
         if (renditionDefinition instanceof TransformDefinition)
         {
-            if (logger.isDebugEnabled())
+            TransformDefinition transformDefinition = (TransformDefinition)renditionDefinition;
+            String targetMimetype = transformDefinition.getTargetMimetype();
+            if (AsynchronousExtractor.isMetadataExtractMimetype(targetMimetype))
             {
-                TransformDefinition transformDefinition = (TransformDefinition)renditionDefinition;
-                String transformName = transformDefinition.getTransformName();
-                String replyQueue = transformDefinition.getReplyQueue();
-                String clientData = transformDefinition.getClientData();
-                boolean success = transformInputStream != null;
-                logger.info("Reply to " + replyQueue + " that the transform " + transformName +
-                        " with the client data " + clientData + " " + (success ? "was successful" : "failed."));
+                consumeExtractedMetadata(sourceNodeRef, sourceContentHashCode, transformInputStream, transformDefinition, transformContentHashCode);
+            }
+            else if (AsynchronousExtractor.isMetadataEmbedMimetype(targetMimetype))
+            {
+                consumeEmbeddedMetadata(sourceNodeRef, sourceContentHashCode, transformInputStream, transformDefinition, transformContentHashCode);
+            }
+            else
+            {
+                consumeTransformReply(sourceNodeRef, transformInputStream, transformDefinition, transformContentHashCode);
             }
-            transformReplyProvider.produceTransformEvent(sourceNodeRef, transformInputStream,
-                    (TransformDefinition)renditionDefinition, transformContentHashCode);
         }
         else
         {
-            consumeRendition(sourceNodeRef, transformInputStream, renditionDefinition, transformContentHashCode);
+            consumeRendition(sourceNodeRef, sourceContentHashCode, transformInputStream, renditionDefinition, transformContentHashCode);
         }
     }
 
+    private void consumeExtractedMetadata(NodeRef nodeRef, int sourceContentHashCode, InputStream transformInputStream,
+                                          TransformDefinition transformDefinition, int transformContentHashCode)
+    {
+        if (transformInputStream == null)
+        {
+            if (logger.isDebugEnabled())
+            {
+                logger.debug("Ignore transform for metadata extraction on " + nodeRef + " as it failed");
+            }
+        }
+        else if (transformContentHashCode != sourceContentHashCode)
+        {
+            if (logger.isDebugEnabled())
+            {
+                logger.debug("Ignore transform for metadata extraction on " + nodeRef + " as it is no longer needed");
+            }
+        }
+        else
+        {
+            if (logger.isDebugEnabled())
+            {
+                logger.debug("Set the metadata extraction on " + nodeRef);
+            }
+            asynchronousExtractor.setMetadata(nodeRef, transformInputStream);
+        }
+    }
+
+    private void consumeEmbeddedMetadata(NodeRef nodeRef, int sourceContentHashCode, InputStream transformInputStream,
+                                         TransformDefinition transformDefinition, int transformContentHashCode)
+    {
+        if (transformInputStream == null)
+        {
+            if (logger.isDebugEnabled())
+            {
+                logger.debug("Ignore transform for metadata embed on " + nodeRef + " as it failed");
+            }
+        }
+        else if (transformContentHashCode != sourceContentHashCode)
+        {
+            if (logger.isDebugEnabled())
+            {
+                logger.debug("Ignore transform for metadata embed on " + nodeRef + " as it is no longer needed");
+            }
+        }
+        else
+        {
+            if (logger.isDebugEnabled())
+            {
+                logger.debug("Set the content with embedded metadata on " + nodeRef);
+            }
+
+            asynchronousExtractor.setEmbeddedMetadata(nodeRef, transformInputStream);
+        }
+    }
+
+    private void consumeTransformReply(NodeRef sourceNodeRef, InputStream transformInputStream,
+                                       TransformDefinition transformDefinition, int transformContentHashCode)
+    {
+        if (logger.isDebugEnabled())
+        {
+            String transformName = transformDefinition.getTransformName();
+            String replyQueue = transformDefinition.getReplyQueue();
+            String clientData = transformDefinition.getClientData();
+            boolean success = transformInputStream != null;
+            logger.info("Reply to " + replyQueue + " that the transform " + transformName +
+                    " with the client data " + clientData + " " + (success ? "was successful" : "failed."));
+        }
+        transformReplyProvider.produceTransformEvent(sourceNodeRef, transformInputStream,
+                transformDefinition, transformContentHashCode);
+    }
+
     /**
      *  Takes a transformation (InputStream) and attaches it as a rendition to the source node.
      *  Does nothing if there is already a newer rendition.
      *  If the transformInputStream is null, this is taken to be a transform failure.
      */
-    private void consumeRendition(NodeRef sourceNodeRef, InputStream transformInputStream,
+    private void consumeRendition(NodeRef sourceNodeRef, int sourceContentHashCode, InputStream transformInputStream,
                                   RenditionDefinition2 renditionDefinition, int transformContentHashCode)
     {
         String renditionName = renditionDefinition.getRenditionName();
-        int sourceContentHashCode = getSourceContentHashCode(sourceNodeRef);
-        if (logger.isDebugEnabled())
-        {
-            logger.debug("Consume: Source " + sourceContentHashCode + " and transform's source " + transformContentHashCode+" hashcodes");
-        }
         if (transformContentHashCode != sourceContentHashCode)
         {
             if (logger.isDebugEnabled())
@@ -475,7 +557,7 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea
                                 }
                                 catch (Exception e)
                                 {
-                                    logger.error("Failed to read transform InputStream into rendition " + renditionName + " on " + sourceNodeRef);
+                                    logger.error("Failed to copy transform InputStream into rendition " + renditionName + " on " + sourceNodeRef);
                                     throw e;
                                 }
                             }
diff --git a/src/main/java/org/alfresco/repo/rendition2/TransformDefinition.java b/src/main/java/org/alfresco/repo/rendition2/TransformDefinition.java
index 26df001b31..3c0b1522f6 100644
--- a/src/main/java/org/alfresco/repo/rendition2/TransformDefinition.java
+++ b/src/main/java/org/alfresco/repo/rendition2/TransformDefinition.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Repository
  * %%
- * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software.
  * If the software was purchased under a paid Alfresco license, the terms of
@@ -80,7 +80,14 @@ public class TransformDefinition extends RenditionDefinition2Impl
     public String getTransformName()
     {
         String renditionName = getRenditionName();
-        return renditionName == null ? null : renditionName.substring(TRANSFORM_NAMESPACE.length());
+        return getTransformName(renditionName);
+    }
+
+    public static String getTransformName(String renditionName)
+    {
+        return renditionName == null || !renditionName.startsWith(TRANSFORM_NAMESPACE)
+                ? null
+                : renditionName.substring(TRANSFORM_NAMESPACE.length());
     }
 
     public String getClientData()
diff --git a/src/main/java/org/alfresco/repo/tagging/TaggingServiceImpl.java b/src/main/java/org/alfresco/repo/tagging/TaggingServiceImpl.java
index 478da899bc..e9f2fc17ae 100644
--- a/src/main/java/org/alfresco/repo/tagging/TaggingServiceImpl.java
+++ b/src/main/java/org/alfresco/repo/tagging/TaggingServiceImpl.java
@@ -1,28 +1,28 @@
-/*
- * #%L
- * Alfresco Repository
- * %%
- * Copyright (C) 2005 - 2016 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software. 
- * If the software was purchased under a paid Alfresco license, the terms of 
- * the paid license agreement will prevail.  Otherwise, the software is 
- * provided under the following open source license terms:
- * 
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see .
- * #L%
- */
+/*
+ * #%L
+ * Alfresco Repository
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software. 
+ * If the software was purchased under a paid Alfresco license, the terms of 
+ * the paid license agreement will prevail.  Otherwise, the software is 
+ * provided under the following open source license terms:
+ * 
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ * #L%
+ */
 package org.alfresco.repo.tagging;
 
 import java.io.BufferedReader;
@@ -74,9 +74,9 @@ import org.alfresco.service.cmr.repository.NodeService;
 import org.alfresco.service.cmr.repository.Path;
 import org.alfresco.service.cmr.repository.StoreRef;
 import org.alfresco.service.cmr.search.CategoryService;
-import org.alfresco.service.cmr.search.ResultSet;
-import org.alfresco.service.cmr.search.SearchParameters;
-import org.alfresco.service.cmr.search.SearchParameters.FieldFacet;
+import org.alfresco.service.cmr.search.ResultSet;
+import org.alfresco.service.cmr.search.SearchParameters;
+import org.alfresco.service.cmr.search.SearchParameters.FieldFacet;
 import org.alfresco.service.cmr.search.SearchService;
 import org.alfresco.service.cmr.tagging.TagDetails;
 import org.alfresco.service.cmr.tagging.TagScope;
@@ -1544,35 +1544,35 @@ public class TaggingServiceImpl implements TaggingService,
         {
             updateAllScopeTags(workingCopy, Boolean.FALSE);
         }
-    }
-
-    /**
-     * @see org.alfresco.service.cmr.tagging.TaggingService#findTaggedNodesAndCountByTagName(StoreRef)
-     */
-    @Override
-    public List> findTaggedNodesAndCountByTagName(StoreRef storeRef)
-    {
-        String queryTaggeble = "ASPECT:\"" + ContentModel.ASPECT_TAGGABLE + "\"" + "-ASPECT:\"" + ContentModel.ASPECT_WORKING_COPY + "\"";
-        SearchParameters sp = new SearchParameters();
-        sp.setQuery(queryTaggeble);
-        sp.setLanguage(SearchService.LANGUAGE_LUCENE);
-        sp.addStore(storeRef);
-        sp.addFieldFacet(new FieldFacet("TAG"));
-
-        ResultSet resultSet = null;
-        try
-        {
-            // Do the search for nodes
-            resultSet = this.searchService.query(sp);
-            return resultSet.getFieldFacet("TAG");
-        }
-        finally
-        {
-            if (resultSet != null)
-            {
-                resultSet.close();
-            }
-        }
+    }
+
+    /**
+     * @see org.alfresco.service.cmr.tagging.TaggingService#findTaggedNodesAndCountByTagName(StoreRef)
+     */
+    @Override
+    public List> findTaggedNodesAndCountByTagName(StoreRef storeRef)
+    {
+        String queryTaggeble = "ASPECT:\"" + ContentModel.ASPECT_TAGGABLE + "\"" + "-ASPECT:\"" + ContentModel.ASPECT_WORKING_COPY + "\"";
+        SearchParameters sp = new SearchParameters();
+        sp.setQuery(queryTaggeble);
+        sp.setLanguage(SearchService.LANGUAGE_LUCENE);
+        sp.addStore(storeRef);
+        sp.addFieldFacet(new FieldFacet("TAG"));
+
+        ResultSet resultSet = null;
+        try
+        {
+            // Do the search for nodes
+            resultSet = this.searchService.query(sp);
+            return resultSet.getFieldFacet("TAG");
+        }
+        finally
+        {
+            if (resultSet != null)
+            {
+                resultSet.close();
+            }
+        }
     }
 
 }
diff --git a/src/main/java/org/alfresco/transform/client/registry/CombinedConfig.java b/src/main/java/org/alfresco/transform/client/registry/CombinedConfig.java
index 6a5b192901..dcd0b8f9d0 100644
--- a/src/main/java/org/alfresco/transform/client/registry/CombinedConfig.java
+++ b/src/main/java/org/alfresco/transform/client/registry/CombinedConfig.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Repository
  * %%
- * Copyright (C) 2019 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software.
  * If the software was purchased under a paid Alfresco license, the terms of
@@ -48,7 +48,6 @@ import org.apache.http.util.EntityUtils;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -56,6 +55,9 @@ import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
 
+import static org.alfresco.repo.content.metadata.AsynchronousExtractor.isMetadataEmbedMimetype;
+import static org.alfresco.repo.content.metadata.AsynchronousExtractor.isMetadataExtractMimetype;
+
 /**
  * This class reads multiple T-Engine config and local files and registers them all with a registry as if they were all
  * in one file. Transform options are shared between all sources.

@@ -398,7 +400,13 @@ public class CombinedConfig // the source matches the last intermediate. Set supportedSourceAndTargets = sourceMediaTypesAndMaxSizes.stream(). flatMap(s -> stepTransformer.getSupportedSourceAndTargetList().stream(). - filter(st -> st.getSourceMediaType().equals(src)). + filter(st -> + { + String targetMimetype = st.getTargetMediaType(); + return st.getSourceMediaType().equals(src) && + !(isMetadataExtractMimetype(targetMimetype) || + isMetadataEmbedMimetype(targetMimetype)); + }). map(t -> t.getTargetMediaType()). map(trg -> SupportedSourceAndTarget.builder(). withSourceMediaType(s.getSourceMediaType()). diff --git a/src/main/resources/alfresco/content-services-context.xml b/src/main/resources/alfresco/content-services-context.xml index 87d3080df3..b0f96902d6 100644 --- a/src/main/resources/alfresco/content-services-context.xml +++ b/src/main/resources/alfresco/content-services-context.xml @@ -255,7 +255,10 @@ - + + + + + + + + + + + + + + diff --git a/src/main/resources/alfresco/rendition-services2-context.xml b/src/main/resources/alfresco/rendition-services2-context.xml index 7a88f3b2f1..4280e29e6f 100644 --- a/src/main/resources/alfresco/rendition-services2-context.xml +++ b/src/main/resources/alfresco/rendition-services2-context.xml @@ -59,6 +59,7 @@ + diff --git a/src/main/resources/alfresco/repository.properties b/src/main/resources/alfresco/repository.properties index 53e41fd25e..b603a3b0a4 100644 --- a/src/main/resources/alfresco/repository.properties +++ b/src/main/resources/alfresco/repository.properties @@ -608,6 +608,9 @@ system.thumbnail.quietPeriod=604800 system.thumbnail.quietPeriodRetriesEnabled=true system.thumbnail.redeployStaticDefsOnStartup=true +content.metadata.async.extract.enabled=true +content.metadata.async.embed.enabled=true + # The default timeout for metadata mapping extracters content.metadataExtracter.default.timeoutMs=20000 diff --git a/src/test/java/org/alfresco/AppContext01TestSuite.java b/src/test/java/org/alfresco/AppContext01TestSuite.java index 3dc93230d4..8492621e79 100644 --- a/src/test/java/org/alfresco/AppContext01TestSuite.java +++ b/src/test/java/org/alfresco/AppContext01TestSuite.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2017 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -45,9 +45,12 @@ import org.junit.runners.Suite; org.alfresco.repo.action.evaluator.HasAspectEvaluatorTest.class, org.alfresco.repo.action.executer.SetPropertyValueActionExecuterTest.class, org.alfresco.repo.action.executer.AddFeaturesActionExecuterTest.class, + org.alfresco.repo.action.executer.ContentMetadataExtracterTest.class, org.alfresco.repo.action.executer.ContentMetadataExtracterTagMappingTest.class, org.alfresco.repo.action.executer.ContentMetadataEmbedderTest.class, + org.alfresco.repo.action.executer.AsynchronousExtractorTest.class, + org.alfresco.repo.rule.RuleLinkTest.class, org.alfresco.repo.rule.RuleServiceCoverageTest.class, org.alfresco.repo.rule.RuleServiceImplTest.class, diff --git a/src/test/java/org/alfresco/repo/action/executer/AsynchronousExtractorTest.java b/src/test/java/org/alfresco/repo/action/executer/AsynchronousExtractorTest.java new file mode 100644 index 0000000000..76a2a0e0a0 --- /dev/null +++ b/src/test/java/org/alfresco/repo/action/executer/AsynchronousExtractorTest.java @@ -0,0 +1,559 @@ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.repo.action.executer; + +import org.alfresco.model.ContentModel; +import org.alfresco.repo.action.ActionImpl; +import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter; +import org.alfresco.repo.content.metadata.AsynchronousExtractor; +import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; +import org.alfresco.repo.content.transform.AbstractContentTransformerTest; +import org.alfresco.repo.content.transform.TransformerDebug; +import org.alfresco.repo.content.transform.UnsupportedTransformationException; +import org.alfresco.repo.rendition2.RenditionDefinition2; +import org.alfresco.repo.rendition2.RenditionService2Impl; +import org.alfresco.repo.rendition2.TransformClient; +import org.alfresco.repo.security.authentication.AuthenticationComponent; +import org.alfresco.repo.transaction.RetryingTransactionHelper; +import org.alfresco.service.cmr.dictionary.DictionaryService; +import org.alfresco.service.cmr.repository.ContentIOException; +import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.cmr.repository.ContentService; +import org.alfresco.service.cmr.repository.ContentWriter; +import org.alfresco.service.cmr.repository.MimetypeService; +import org.alfresco.service.cmr.repository.NodeRef; +import org.alfresco.service.cmr.repository.NodeService; +import org.alfresco.service.cmr.repository.StoreRef; +import org.alfresco.service.cmr.tagging.TaggingService; +import org.alfresco.service.namespace.NamespacePrefixResolver; +import org.alfresco.service.namespace.QName; +import org.alfresco.service.transaction.TransactionService; +import org.alfresco.test_category.BaseSpringTestsCategory; +import org.alfresco.transform.client.registry.TransformServiceRegistry; +import org.alfresco.util.BaseSpringTest; +import org.alfresco.util.GUID; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import javax.transaction.HeuristicMixedException; +import javax.transaction.HeuristicRollbackException; +import javax.transaction.NotSupportedException; +import javax.transaction.RollbackException; +import javax.transaction.SystemException; +import javax.transaction.UserTransaction; +import java.io.IOException; +import java.io.InputStream; +import java.io.Serializable; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.StringJoiner; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import static java.util.Arrays.asList; +import static org.alfresco.model.ContentModel.PROP_CONTENT; +import static org.alfresco.model.ContentModel.PROP_CREATED; +import static org.alfresco.model.ContentModel.PROP_CREATOR; +import static org.alfresco.model.ContentModel.PROP_MODIFIED; +import static org.alfresco.model.ContentModel.PROP_MODIFIER; +import static org.alfresco.repo.rendition2.RenditionService2Impl.SOURCE_HAS_NO_CONTENT; + +/** + * Tests the asynchronous extract and embed of metadata. This is normally performed in a T-Engine, but in this test + * class is mocked using a separate Thread that returns well known values. What make the AsynchronousExtractor + * different from other {@link AbstractMappingMetadataExtracter} sub classes is that the calling Thread does not + * do the work of updating properties or the content, as the T-Engine will reply at some later point. + * + * @author adavis + */ +@Category(BaseSpringTestsCategory.class) +public class AsynchronousExtractorTest extends BaseSpringTest +{ + private final static String ID = GUID.generate(); + private static final String AFTER_CALLING_EXECUTE = "after calling execute"; + private static final String AFTER_THE_TRANSFORM = "after the transform"; + private static final Integer UNCHANGED_HASHCODE = null; + private static final Integer CHANGED_HASHCODE = 1234; + private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat("EEE MMM dd HH:mm:ss Z yyyy"); + private static final ExecutorService executorService = Executors.newCachedThreadPool(); + + private NodeService nodeService; + private ContentService contentService; + private DictionaryService dictionaryService; + private MimetypeService mimetypeService; + private MetadataExtracterRegistry metadataExtracterRegistry; + private StoreRef testStoreRef; + private NodeRef rootNodeRef; + private NodeRef nodeRef; + private AsynchronousExtractor asynchronousExtractor; + private NamespacePrefixResolver namespacePrefixResolver; + private TransformerDebug transformerDebug; + private TransactionService transactionService; + private TransformServiceRegistry transformServiceRegistry; + private TaggingService taggingService; + private ContentMetadataExtracter contentMetadataExtracter; + private ContentMetadataEmbedder contentMetadataEmbedder; + private RenditionService2Impl renditionService2; + private TransformClient transformClient; + + private long origSize; + private Map origProperties; + private Map expectedProperties; + private Map properties; + + private class TestAsynchronousExtractor extends AsynchronousExtractor + { + private final String mockResult; + private final Integer changedHashcode; + private final Random random = new Random(); + + private boolean finished; + + TransformClient mockTransformClient = new TransformClient() + { + @Override + public void checkSupported(NodeRef sourceNodeRef, RenditionDefinition2 renditionDefinition, String sourceMimetype, long sourceSizeInBytes, String contentUrl) + { + } + + @Override + public void transform(NodeRef sourceNodeRef, RenditionDefinition2 renditionDefinition, String user, int sourceContentHashCode) + throws UnsupportedTransformationException, ContentIOException + { + mockTransform(sourceNodeRef, renditionDefinition, sourceContentHashCode); + } + }; + + /** + * Creates an AsynchronousExtractor that simulates a extract or embed. + * + * @param mockResult if specified indicates a value was returned. The result is read as a resource from + * the classpath. + * @param changedHashcode if specified indicates that the source node content changed or was deleted between + * the request to extract or embed and the response. + */ + TestAsynchronousExtractor(String mockResult, Integer changedHashcode) + { + this.mockResult = mockResult; + this.changedHashcode = changedHashcode; + + setNodeService(nodeService); + setNamespacePrefixResolver(namespacePrefixResolver); + setTransformerDebug(transformerDebug); + setRenditionService2(renditionService2); + setContentService(contentService); + setTransactionService(transactionService); + setTransformServiceRegistry(transformServiceRegistry); + setTaggingService(taggingService); + setRegistry(metadataExtracterRegistry); + setMimetypeService(mimetypeService); + setDictionaryService(dictionaryService); + setExecutorService(executorService); + register(); + + renditionService2.setTransformClient(mockTransformClient); + } + + @Override + public boolean isSupported(String sourceMimetype, long sourceSizeInBytes) + { + return true; + } + + @Override + public boolean isEmbedderSupported(String sourceMimetype, long sourceSizeInBytes) + { + return true; + } + + private void mockTransform(NodeRef sourceNodeRef, RenditionDefinition2 renditionDefinition, int sourceContentHashCode) + { + try + { + transformerDebug.pushMisc(); + wait(50, 700); + } + finally + { + transformerDebug.popMisc(); + } + + int transformContentHashCode = changedHashcode == null ? sourceContentHashCode : changedHashcode; + if (mockResult != null) + { + try (InputStream transformInputStream = getClass().getClassLoader().getResourceAsStream(mockResult)) + { + renditionService2.consume(sourceNodeRef, transformInputStream, renditionDefinition, transformContentHashCode); + } + catch (IOException e) + { + throw new RuntimeException("Could not read '" + mockResult + "' from the classpath.", e); + } + } + else + { + renditionService2.failure(sourceNodeRef, renditionDefinition, transformContentHashCode); + } + + synchronized (this) + { + finished = true; + notifyAll(); + } + } + + /** + * Wait for a few milliseconds or until the finished flag is set. + * + * @param from inclusive lower bound. If negative, there is only an upper bound. + * @param to exclusive upper bound. + * @return the wait. + */ + public synchronized void wait(int from, int to) + { + long start = System.currentTimeMillis(); + long end = start + (from < 0 ? to : from + random.nextInt(to - from)); + + while (!finished && System.currentTimeMillis() < end) + { + try + { + long ms = end - System.currentTimeMillis(); + if (ms > 0) + { + wait(ms); + } + } + catch (InterruptedException ignore) + { + } + } + } + } + + @Before + public void before() throws Exception + { + nodeService = (NodeService) applicationContext.getBean("nodeService"); + contentService = (ContentService) applicationContext.getBean("contentService"); + dictionaryService = (DictionaryService) applicationContext.getBean("dictionaryService"); + mimetypeService = (MimetypeService) applicationContext.getBean("mimetypeService"); + namespacePrefixResolver = (NamespacePrefixResolver) applicationContext.getBean("namespaceService"); + transformerDebug = (TransformerDebug) applicationContext.getBean("transformerDebug"); + renditionService2 = (RenditionService2Impl) applicationContext.getBean("renditionService2"); + transactionService = (TransactionService) applicationContext.getBean("transactionService"); + transformServiceRegistry = (TransformServiceRegistry) applicationContext.getBean("transformServiceRegistry"); + taggingService = (TaggingService) applicationContext.getBean("taggingService"); + transformClient = (TransformClient) applicationContext.getBean("transformClient"); + + // Create an empty metadata extractor registry, so that if we add one it will be used + metadataExtracterRegistry = new MetadataExtracterRegistry(); + + contentMetadataExtracter = new ContentMetadataExtracter(); + contentMetadataExtracter.setNodeService(nodeService); + contentMetadataExtracter.setContentService(contentService); + contentMetadataExtracter.setDictionaryService(dictionaryService); + contentMetadataExtracter.setMetadataExtracterRegistry(metadataExtracterRegistry); + contentMetadataExtracter.setApplicableTypes(new String[]{ContentModel.TYPE_CONTENT.toString()}); + contentMetadataExtracter.setCarryAspectProperties(true); + + contentMetadataEmbedder = new ContentMetadataEmbedder(); + contentMetadataEmbedder.setNodeService(nodeService); + contentMetadataEmbedder.setContentService(contentService); + contentMetadataEmbedder.setMetadataExtracterRegistry(metadataExtracterRegistry); + contentMetadataEmbedder.setApplicableTypes(new String[]{ContentModel.TYPE_CONTENT.toString()}); + + transactionService.getRetryingTransactionHelper().doInTransaction(new RetryingTransactionHelper.RetryingTransactionCallback() + { + @Override + public Void execute() throws Throwable + { + AuthenticationComponent authenticationComponent = (AuthenticationComponent) applicationContext.getBean("authenticationComponent"); + authenticationComponent.setSystemUserAsCurrentUser(); + + // Create the store and get the root node + testStoreRef = nodeService.createStore( + StoreRef.PROTOCOL_WORKSPACE, + "Test_" + System.currentTimeMillis()); + rootNodeRef = nodeService.getRootNode(testStoreRef); + + // Create the node used for tests + nodeRef = nodeService.createNode( + rootNodeRef, ContentModel.ASSOC_CHILDREN, + QName.createQName("{test}testnode"), + ContentModel.TYPE_CONTENT).getChildRef(); + + // Authenticate as the system user + authenticationComponent.setSystemUserAsCurrentUser(); + + ContentWriter cw = contentService.getWriter(nodeRef, ContentModel.PROP_CONTENT, true); + cw.setMimetype(MimetypeMap.MIMETYPE_PDF); + cw.putContent(AbstractContentTransformerTest.loadQuickTestFile("pdf")); + + origProperties = nodeService.getProperties(nodeRef); + nodeService.setProperties(nodeRef, origProperties); + origProperties = new HashMap<>(origProperties); // just in case the contents changed. + expectedProperties = new HashMap<>(origProperties); // ready to be modified. + + origSize = getSize(nodeRef); + + return null; + } + }); + } + + @After + public void after() throws Exception + { + renditionService2.setTransformClient(transformClient); + } + + private void assertAsyncMetadataExecute(ActionExecuterAbstractBase executor, String mockResult, + Integer changedHashcode, long expectedSize, + Map expectedProperties, + QName... ignoreProperties) throws Exception + { + TestAsynchronousExtractor extractor = new TestAsynchronousExtractor(mockResult, changedHashcode); + + executeAction(executor, extractor); + assertContentSize(nodeRef, origSize, AFTER_CALLING_EXECUTE); + assertProperties(nodeRef, origProperties, AFTER_CALLING_EXECUTE, ignoreProperties); + + extractor.wait(-1, 10000); + assertContentSize(nodeRef, expectedSize, AFTER_THE_TRANSFORM); + assertProperties(nodeRef, expectedProperties, AFTER_THE_TRANSFORM, ignoreProperties); + } + + private void executeAction(ActionExecuterAbstractBase extractor, TestAsynchronousExtractor asynchronousExtractor) + throws SystemException, NotSupportedException, HeuristicRollbackException, HeuristicMixedException, RollbackException + { + UserTransaction txn = transactionService.getUserTransaction(); + txn.begin(); + ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null); + extractor.execute(action, nodeRef); + txn.commit(); + } + + void assertContentSize(NodeRef nodeRef, long expectSize, String state) + { + long size = getSize(nodeRef); + if (expectSize == origSize) + { + assertEquals("The content should remain unchanged " + state, origSize, size); + } + else + { + assertEquals("The content should have changed " + state, expectSize, size); + } + } + + private long getSize(NodeRef nodeRef) + { + ContentReader reader = contentService.getReader(nodeRef, ContentModel.PROP_CONTENT); + return reader.getSize(); + } + + private void assertProperties(NodeRef nodeRef, Map expectProperties, String state, + QName[] ignoreProperties) + { + properties = nodeService.getProperties(nodeRef); + + // Work out the difference in a human readable form and ignore the 5 system set properties (as they always + // change) plus any the caller has requested. + StringJoiner sj = new StringJoiner("\n"); + List ignoreKeys = new ArrayList<>(asList(PROP_MODIFIED, PROP_MODIFIER, PROP_CONTENT, PROP_CREATED, PROP_CREATOR)); + ignoreKeys.addAll(asList(ignoreProperties)); + for (Map.Entry entry : expectProperties.entrySet()) + { + QName k = entry.getKey(); + Serializable v = entry.getValue(); + Serializable actual = properties.get(k); + if (!ignoreKeys.contains(k) && !v.equals(actual)) + { + sj.add(k + "\n Expected: " + v + "\n Was: " + actual); + } + } + for (QName k : properties.keySet()) + { + Serializable actual = properties.get(k); + if (!ignoreKeys.contains(k) && !expectProperties.containsKey(k)) + { + sj.add(k + "\n Expected: null\n Was: " + actual); + } + } + + if (sj.length() != 0) + { + if (expectProperties.equals(origProperties)) + { + fail("The properties should remain unchanged " + state + "\n" + sj); + } + else + { + fail("The properties should have changed " + state + "\n" + sj); + } + } + } + + @Test + public void testExtractHtml() throws Exception + { + expectedProperties.put(QName.createQName("cm:author", namespacePrefixResolver), "Nevin Nollop"); + expectedProperties.put(QName.createQName("cm:description", namespacePrefixResolver), "Gym class featuring a brown fox and lazy dog"); + expectedProperties.put(QName.createQName("cm:title", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog"); + + assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.html_metadata.json", + UNCHANGED_HASHCODE, origSize, expectedProperties); + } + + @Test + public void testExtractNodeDeleted() throws Exception + { + assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.html_metadata.json", + SOURCE_HAS_NO_CONTENT, origSize, origProperties); + } + + @Test + public void testExtractContentChanged() throws Exception + { + assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.html_metadata.json", + 1234, origSize, origProperties); + } + + @Test + public void testExtractTransformFailure() throws Exception + { + assertAsyncMetadataExecute(contentMetadataExtracter, null, + UNCHANGED_HASHCODE, origSize, origProperties); + } + + @Test + public void testExtractTransformCorrupt() throws Exception + { + assertAsyncMetadataExecute(contentMetadataExtracter, "quick.html", // not json + UNCHANGED_HASHCODE, origSize, origProperties); + } + + @Test + public void testUnknownNamespaceInResponse() throws Exception + { + // "sys:overwritePolicy": "PRAGMATIC" - is used + // "{http://www.unknown}name": "ignored" - is reported in an ERROR log + expectedProperties.put(QName.createQName("cm:author", namespacePrefixResolver), "Used"); + assertAsyncMetadataExecute(contentMetadataExtracter, "quick/unknown_namespace_metadata.json", + UNCHANGED_HASHCODE, origSize, expectedProperties); + } + + @Test + public void testExtractMsg() throws Exception // has dates as RFC822 + { + expectedProperties.put(QName.createQName("cm:addressee", namespacePrefixResolver), "mark.rogers@alfresco.com"); + expectedProperties.put(QName.createQName("cm:description", namespacePrefixResolver), "This is a quick test"); + expectedProperties.put(QName.createQName("cm:addressees", namespacePrefixResolver), + new ArrayList<>(asList("mark.rogers@alfresco.com", "speedy@quick.com", "mrquick@nowhere.com"))); + + expectedProperties.put(QName.createQName("cm:sentdate", namespacePrefixResolver), SIMPLE_DATE_FORMAT.parse("Fri Jan 18 13:44:20 GMT 2013")); // 2013-01-18T13:44:20Z + expectedProperties.put(QName.createQName("cm:subjectline", namespacePrefixResolver), "This is a quick test"); + expectedProperties.put(QName.createQName("cm:author", namespacePrefixResolver), "Mark Rogers"); + expectedProperties.put(QName.createQName("cm:originator", namespacePrefixResolver), "Mark Rogers"); + + assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.msg_metadata.json", + UNCHANGED_HASHCODE, origSize, expectedProperties); + + Serializable sentDate = properties.get(QName.createQName("cm:sentdate", namespacePrefixResolver)); + } + + @Test + public void testExtractEml() throws Exception // has dates as longs since 1970 + { + expectedProperties.put(QName.createQName("cm:addressee", namespacePrefixResolver), "Nevin Nollop "); + expectedProperties.put(QName.createQName("cm:description", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog"); + expectedProperties.put(QName.createQName("cm:addressees", namespacePrefixResolver), + new ArrayList<>(asList("Nevin Nollop "))); + expectedProperties.put(QName.createQName("imap:dateSent", namespacePrefixResolver), SIMPLE_DATE_FORMAT.parse("Fri Jun 04 13:23:22 BST 2004")); + expectedProperties.put(QName.createQName("imap:messageTo", namespacePrefixResolver), "Nevin Nollop "); + expectedProperties.put(QName.createQName("imap:messageId", namespacePrefixResolver), "<20040604122322.GV1905@phoenix.home>"); + expectedProperties.put(QName.createQName("cm:title", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog"); + expectedProperties.put(QName.createQName("imap:messageSubject", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog"); + expectedProperties.put(QName.createQName("imap:messageCc", namespacePrefixResolver), "Nevin Nollop "); + expectedProperties.put(QName.createQName("cm:sentdate", namespacePrefixResolver), SIMPLE_DATE_FORMAT.parse("Fri Jun 04 13:23:22 BST 2004")); + expectedProperties.put(QName.createQName("cm:subjectline", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog"); + expectedProperties.put(QName.createQName("imap:messageFrom", namespacePrefixResolver), "Nevin Nollop "); + expectedProperties.put(QName.createQName("cm:originator", namespacePrefixResolver), "Nevin Nollop "); + + // Note: As the metadata is for eml, an aspect gets added resulting in a second extract because of + // ImapContentPolicy.onAddAspect. I cannot see a good way to avoid this. + assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.eml_metadata.json", + UNCHANGED_HASHCODE, origSize, expectedProperties, + // cm:author is not in the quick.eml_metadata.json but is being added by the second extract which thinks + // the source mimetype is MimetypeMap.MIMETYPE_PDF, because that is what the before() method sets the + // content to. As a result the PdfBox metadata extractor is called, which extracts cm:author. Given that + // we don't know when this will take place, we simply ignore this property. We could fix this up, but it + // does not add anything to the test. + QName.createQName("cm:author", namespacePrefixResolver)); + } + + + @Test + public void testEmbed() throws Exception + { + assertAsyncMetadataExecute(contentMetadataEmbedder, "quick/quick.html", // just replace the pdf with html! + UNCHANGED_HASHCODE, 428, expectedProperties); + } + @Test + public void testEmbedNodeDeleted() throws Exception + { + assertAsyncMetadataExecute(contentMetadataEmbedder, "quick/quick.html", + SOURCE_HAS_NO_CONTENT, origSize, origProperties); + } + + @Test + public void testEmbedContentChanged() throws Exception + { + assertAsyncMetadataExecute(contentMetadataEmbedder, "quick/quick.html", + 1234, origSize, origProperties); + } + + @Test + public void testEmbedTransformFailure() throws Exception + { + assertAsyncMetadataExecute(contentMetadataEmbedder, null, + UNCHANGED_HASHCODE, origSize, origProperties); + } + + // TODO Write tests for: overwritePolicy, enableStringTagging and carryAspectProperties. + // Values are set in AsynchronousExtractor.setMetadata(...) but make use of original code within + // MetadataExtracter and AbstractMappingMetadataExtracter. + // As the tests for exiting extractors are to be removed in ACS 7.0, it is possible that they were being used + // to test these values. +} \ No newline at end of file diff --git a/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java b/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java index c4df229da6..5c41c6c663 100644 --- a/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java +++ b/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -25,22 +25,11 @@ */ package org.alfresco.repo.action.executer; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.Serializable; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - import org.alfresco.model.ContentModel; import org.alfresco.repo.action.ActionImpl; import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter; import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; -import org.alfresco.repo.content.metadata.TikaPoweredMetadataExtracter; import org.alfresco.repo.content.transform.AbstractContentTransformerTest; import org.alfresco.repo.security.authentication.AuthenticationComponent; import org.alfresco.service.cmr.dictionary.DictionaryService; @@ -55,15 +44,25 @@ import org.alfresco.service.namespace.QName; import org.alfresco.util.BaseSpringTest; import org.alfresco.util.GUID; import org.apache.tika.embedder.Embedder; -import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; +import org.junit.After; import org.junit.Before; import org.junit.Test; import org.springframework.transaction.annotation.Transactional; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + /** * Test of the ActionExecuter for embedding metadata * @@ -94,7 +93,9 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest this.dictionaryService = (DictionaryService) this.applicationContext.getBean("dictionaryService"); this.mimetypeService = (MimetypeService) this.applicationContext.getBean("mimetypeService"); this.metadataExtracterRegistry = (MetadataExtracterRegistry) this.applicationContext.getBean("metadataExtracterRegistry"); - + metadataExtracterRegistry.setAsyncExtractEnabled(false); + metadataExtracterRegistry.setAsyncEmbedEnabled(false); + AuthenticationComponent authenticationComponent = (AuthenticationComponent)applicationContext.getBean("authenticationComponent"); authenticationComponent.setSystemUserAsCurrentUser(); @@ -123,15 +124,21 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest this.executer.setApplicableTypes(new String[] { ContentModel.TYPE_CONTENT.toString() }); } + @After + public void after() + { + metadataExtracterRegistry.setAsyncExtractEnabled(true); + metadataExtracterRegistry.setAsyncEmbedEnabled(true); + } + /** * Test that a failing embedder does not destroy the original content */ @Test public void testFailingEmbedder() { - MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry"); - FailingEmbedder embedder = new FailingEmbedder(Arrays.asList(MimetypeMap.MIMETYPE_PDF)); - embedder.setRegistry(registry); + AbstractMappingMetadataExtracter embedder = new FailingMappingMetadataEmbedder(Arrays.asList(MimetypeMap.MIMETYPE_PDF)); + embedder.setRegistry(metadataExtracterRegistry); embedder.setDictionaryService(this.dictionaryService); embedder.setMimetypeService(this.mimetypeService); embedder.register(); @@ -158,17 +165,16 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest } /** - * Tika-powered embedder which fails upon calling embed on its {@link FailingTikaEmbedder} + * Embedder which fails upon calling embed on its {@link FailingEmbedder} */ - private class FailingEmbedder extends TikaPoweredMetadataExtracter + private class FailingMappingMetadataEmbedder extends AbstractMappingMetadataExtracter { - /** * Constructor for setting supported extract and embed mimetypes * * @param mimetypes the supported extract and embed mimetypes */ - public FailingEmbedder(Collection mimetypes) + public FailingMappingMetadataEmbedder(Collection mimetypes) { super( new HashSet(mimetypes), @@ -176,15 +182,26 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest } @Override - protected Parser getParser() + protected void embedInternal(Map metadata, ContentReader reader, ContentWriter writer) throws Throwable { - return null; + Embedder embedder = getEmbedder(); + if (embedder == null) + { + return; + } + + Map metadataAsStrings = convertMetadataToStrings(metadata); + Metadata metadataToEmbed = new Metadata(); + metadataAsStrings.forEach((k,v)->metadataToEmbed.add(k, v)); + + InputStream inputStream = reader.getContentInputStream(); + OutputStream outputStream = writer.getContentOutputStream(); + embedder.embed(metadataToEmbed, null, outputStream, null); } - @Override protected Embedder getEmbedder() { - return new FailingTikaEmbedder(); + return new FailingEmbedder(); } @Override @@ -202,12 +219,18 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest mapping.put("author", qnames); return mapping; } + + @Override + protected Map extractRaw(ContentReader reader) throws Throwable + { + return null; + } } /** - * Tika metadata embedder which fails on a call to embed. + * Metadata embedder which fails on a call to embed. */ - private class FailingTikaEmbedder implements Embedder + private class FailingEmbedder implements Embedder { private static final long serialVersionUID = -4954679684941467571L; @@ -219,7 +242,7 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest @Override public void embed(Metadata metadata, InputStream originalStream, OutputStream outputStream, ParseContext context) - throws IOException, TikaException + throws IOException { throw new IOException("Forced failure"); } diff --git a/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java b/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java index 2c68e725cb..a7f46c3bc7 100644 --- a/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java +++ b/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -43,8 +43,8 @@ import org.alfresco.repo.action.ActionImpl; import org.alfresco.repo.action.ActionModel; import org.alfresco.repo.action.AsynchronousActionExecutionQueuePolicies; import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter; import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; -import org.alfresco.repo.content.metadata.TikaPoweredMetadataExtracter; import org.alfresco.repo.content.transform.AbstractContentTransformerTest; import org.alfresco.repo.policy.Behaviour.NotificationFrequency; import org.alfresco.repo.policy.JavaBehaviour; @@ -73,8 +73,6 @@ import org.alfresco.util.GUID; import org.alfresco.util.testing.category.LuceneTests; import org.alfresco.util.testing.category.RedundantTests; import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.jpeg.JpegParser; import org.junit.experimental.categories.Category; import org.springframework.context.ConfigurableApplicationContext; @@ -111,6 +109,7 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase private TaggingService taggingService; private NodeService nodeService; private ContentService contentService; + private MetadataExtracterRegistry metadataExtracterRegistry; private AuditService auditService; private TransactionService transactionService; private AuthenticationComponent authenticationComponent; @@ -143,7 +142,10 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase this.taggingService = (TaggingService)ctx.getBean("TaggingService"); this.nodeService = (NodeService) ctx.getBean("NodeService"); this.contentService = (ContentService) ctx.getBean("ContentService"); - + this.metadataExtracterRegistry = (MetadataExtracterRegistry) ctx.getBean("metadataExtracterRegistry"); + metadataExtracterRegistry.setAsyncExtractEnabled(false); + metadataExtracterRegistry.setAsyncEmbedEnabled(false); + this.transactionService = (TransactionService)ctx.getBean("transactionComponent"); this.auditService = (AuditService)ctx.getBean("auditService"); this.authenticationComponent = (AuthenticationComponent)ctx.getBean("authenticationComponent"); @@ -207,6 +209,9 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase @Override protected void tearDown() throws Exception { + metadataExtracterRegistry.setAsyncExtractEnabled(true); + metadataExtracterRegistry.setAsyncEmbedEnabled(true); + if (AlfrescoTransactionSupport.getTransactionReadState() != TxnReadState.TXN_NONE) { fail("Test is not transaction-safe. Fix up transaction handling and re-test."); @@ -296,7 +301,7 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase }); } - private static class TagMappingMetadataExtracter extends TikaPoweredMetadataExtracter + private static class TagMappingMetadataExtracter extends AbstractMappingMetadataExtracter { private String existingTagNodeRef; @@ -328,16 +333,10 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase return sourceMimetype.equals(MimetypeMap.MIMETYPE_IMAGE_JPEG); } - @Override - protected Parser getParser() - { - return new JpegParser(); - } - @SuppressWarnings("unchecked") public Map extractRaw(ContentReader reader) throws Throwable { - Map rawMap = super.extractRaw(reader); + Map rawMap = newRawMap(); // Add some test keywords to those actually extracted from the file including a nodeRef List keywords = new ArrayList(Arrays.asList( diff --git a/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java b/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java index d7ccf2a97d..1e62162c18 100644 --- a/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -43,16 +43,11 @@ */ package org.alfresco.repo.action.executer; -import java.io.Serializable; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; -import java.util.Set; - import org.alfresco.model.ContentModel; import org.alfresco.repo.action.ActionImpl; import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter; +import org.alfresco.repo.content.metadata.AsynchronousExtractor; import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; import org.alfresco.repo.content.transform.AbstractContentTransformerTest; import org.alfresco.repo.security.authentication.AuthenticationComponent; @@ -67,11 +62,18 @@ import org.alfresco.service.namespace.QName; import org.alfresco.test_category.BaseSpringTestsCategory; import org.alfresco.util.BaseSpringTest; import org.alfresco.util.GUID; +import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; import org.springframework.transaction.annotation.Transactional; +import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + /** * Test of the ActionExecuter for extracting metadata. Note: This test makes * assumptions about the PDF test data for PdfBoxExtracter. @@ -88,6 +90,7 @@ public class ContentMetadataExtracterTest extends BaseSpringTest private NodeService nodeService; private ContentService contentService; + private MetadataExtracterRegistry registry; private StoreRef testStoreRef; private NodeRef rootNodeRef; private NodeRef nodeRef; @@ -101,7 +104,10 @@ public class ContentMetadataExtracterTest extends BaseSpringTest { this.nodeService = (NodeService) this.applicationContext.getBean("nodeService"); this.contentService = (ContentService) this.applicationContext.getBean("contentService"); - + registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry"); + registry.setAsyncExtractEnabled(false); + registry.setAsyncEmbedEnabled(false); + AuthenticationComponent authenticationComponent = (AuthenticationComponent)applicationContext.getBean("authenticationComponent"); authenticationComponent.setSystemUserAsCurrentUser(); @@ -126,6 +132,13 @@ public class ContentMetadataExtracterTest extends BaseSpringTest this.executer = (ContentMetadataExtracter) this.applicationContext.getBean("extract-metadata"); } + @After + public void after() + { + registry.setAsyncExtractEnabled(true); + registry.setAsyncEmbedEnabled(true); + } + /** * Test execution of the extraction itself */ @@ -189,7 +202,6 @@ public class ContentMetadataExtracterTest extends BaseSpringTest @Test public void testUnknownProperties() { - MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry"); TestUnknownMetadataExtracter extracterUnknown = new TestUnknownMetadataExtracter(); extracterUnknown.setRegistry(registry); extracterUnknown.register(); @@ -247,7 +259,6 @@ public class ContentMetadataExtracterTest extends BaseSpringTest @Test public void testNullExtractedValues_ALF1823() { - MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry"); TestNullPropMetadataExtracter extractor = new TestNullPropMetadataExtracter(); extractor.setRegistry(registry); extractor.register(); diff --git a/src/test/java/org/alfresco/repo/content/AbstractJodConverterBasedTest.java b/src/test/java/org/alfresco/repo/content/AbstractJodConverterBasedTest.java index 0a1523d53d..99b8382314 100644 --- a/src/test/java/org/alfresco/repo/content/AbstractJodConverterBasedTest.java +++ b/src/test/java/org/alfresco/repo/content/AbstractJodConverterBasedTest.java @@ -63,11 +63,13 @@ import org.junit.Ignore; import org.springframework.context.ApplicationContext; /** - * + * @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1 and the transformer is also deprecated. + * * @author Neil McErlean * @since 3.3 */ @Ignore("This is an abstract class so don't instaniate it or run it in Junit") +@Deprecated public abstract class AbstractJodConverterBasedTest { private static Log log = LogFactory.getLog(AbstractJodConverterBasedTest.class); diff --git a/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyOfficeMetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyOfficeMetadataExtracterTest.java index 9e6bf3a557..c323e5d352 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyOfficeMetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyOfficeMetadataExtracterTest.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.content.metadata; import static org.junit.Assert.assertEquals; @@ -39,8 +39,11 @@ import org.joda.time.format.DateTimeFormat; import org.junit.Test; /** + * @deprecated extractor has been moved to a T-Engine. + * * MNT-8978 */ +@Deprecated public class ConcurrencyOfficeMetadataExtracterTest { diff --git a/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyPdfBoxMetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyPdfBoxMetadataExtracterTest.java index c6d3eceaf2..ff9648fb0e 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyPdfBoxMetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/ConcurrencyPdfBoxMetadataExtracterTest.java @@ -35,11 +35,14 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; /** + * @deprecated extractor has been moved to a T-Engine. + * * The test designed for testing the concurrent limitations in * {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)} * * @author amukha */ +@Deprecated public class ConcurrencyPdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest { private SlowPdfBoxMetadataExtracter extracter; diff --git a/src/test/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java index a5d23378ba..3d46fdf603 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -42,10 +42,13 @@ import org.apache.tika.metadata.Metadata; /** + * @deprecated extractor has been moved to a T-Engine. + * * @see DWGMetadataExtracter * * @author Nick Burch */ +@Deprecated public class DWGMetadataExtracterTest extends AbstractMetadataExtracterTest { private DWGMetadataExtracter extracter; @@ -174,5 +177,5 @@ public class DWGMetadataExtracterTest extends AbstractMetadataExtracterTest assertEquals("Custom DWG property not found", "valueforcustomprop1", properties.get(TIKA_CUSTOM_TEST_PROPERTY)); } - + } diff --git a/src/test/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java index 41afd1f962..9b30dd123d 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java @@ -24,7 +24,7 @@ * #L% */ /* - * Copyright (C) 2005 Jesper Steen Møller + * Copyright (C) 2005 - 2020 Jesper Steen Møller * * This file is part of Alfresco * @@ -54,8 +54,11 @@ import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; import org.alfresco.service.namespace.QName; /** + * @deprecated extractor has been moved to a T-Engine. + * * @author Jesper Steen Møller */ +@Deprecated public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest { private static final String QUICK_TITLE_JAPANESE = "確認した結果を添付しますので、確認してください"; diff --git a/src/test/java/org/alfresco/repo/content/metadata/JodMetadataExtractorOOoTest.java b/src/test/java/org/alfresco/repo/content/metadata/JodMetadataExtractorOOoTest.java index 506b3fac9c..47e8d676c5 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/JodMetadataExtractorOOoTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/JodMetadataExtractorOOoTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2017 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -50,10 +50,12 @@ import org.junit.Ignore; import org.junit.Test; /** - * + * @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1 + * * @author Neil McErlean * @since 3.2 SP1 */ +@Deprecated public class JodMetadataExtractorOOoTest extends AbstractJodConverterBasedTest { protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog"; diff --git a/src/test/java/org/alfresco/repo/content/metadata/MP3MetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/MP3MetadataExtracterTest.java index cd846e894c..368736432d 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/MP3MetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/MP3MetadataExtracterTest.java @@ -1,30 +1,30 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ /* - * Copyright (C) 2005 Jesper Steen Møller + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2016 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +/* + * Copyright (C) 2005 - 2020 Jesper Steen Møller * * This file is part of Alfresco * @@ -52,8 +52,11 @@ import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; import org.alfresco.service.namespace.QName; /** + * @deprecated extractor has been moved to a T-Engine. + * * Test for the MP3 metadata extraction from id3 tags. */ +@Deprecated public class MP3MetadataExtracterTest extends TikaAudioMetadataExtracterTest { private MP3MetadataExtracter extracter; diff --git a/src/test/java/org/alfresco/repo/content/metadata/MailMetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/MailMetadataExtracterTest.java index ee5f68307d..02bafab78f 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/MailMetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/MailMetadataExtracterTest.java @@ -1,30 +1,30 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ /* - * Copyright (C) 2005 Jesper Steen Møller + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2016 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +/* + * Copyright (C) 2005 - 2020 Jesper Steen Møller * * This file is part of Alfresco * @@ -55,9 +55,12 @@ import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; import org.alfresco.service.namespace.QName; /** + * @deprecated extractor has been moved to a T-Engine. + * * @author Derek Hulley * @since 3.2 */ +@Deprecated public class MailMetadataExtracterTest extends AbstractMetadataExtracterTest { private MailMetadataExtracter extracter; diff --git a/src/test/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java index fa433629d8..4ecd551b57 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.content.metadata; import java.io.Serializable; @@ -38,10 +38,13 @@ import org.alfresco.service.namespace.QName; /** + * @deprecated extractor has been moved to a T-Engine. + * * @see OfficeMetadataExtracter * * @author Jesper Steen Møller */ +@Deprecated public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest { private OfficeMetadataExtracter extracter; diff --git a/src/test/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracterTest.java index 6747da4ac2..50d752aa49 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracterTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -38,10 +38,13 @@ import org.alfresco.service.namespace.QName; /** + * @deprecated extractor has been moved to a T-Engine. + * * @see OpenDocumentMetadataExtracter * * @author Derek Hulley */ +@Deprecated public class OpenDocumentMetadataExtracterTest extends AbstractMetadataExtracterTest { private OpenDocumentMetadataExtracter extracter; diff --git a/src/test/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java index 1679217862..55c41c7b57 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -40,10 +40,13 @@ import org.alfresco.service.namespace.QName; import org.apache.pdfbox.util.DateConverter; /** + * @deprecated extractor has been moved to a T-Engine. + * * @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter * * @author Jesper Steen Møller */ +@Deprecated public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest { private PdfBoxMetadataExtracter extracter; diff --git a/src/test/java/org/alfresco/repo/content/metadata/PoiMetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/PoiMetadataExtracterTest.java index 79af74be12..03e9d2fcb7 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/PoiMetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/PoiMetadataExtracterTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -39,11 +39,14 @@ import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; import org.alfresco.service.namespace.QName; /** + * @deprecated extractor has been moved to a T-Engine. + * * @see org.alfresco.repo.content.metadata.PoiMetadataExtracter * * @author Neil McErlean * @author Dmitry Velichkevich */ +@Deprecated public class PoiMetadataExtracterTest extends AbstractMetadataExtracterTest { private static final int MINIMAL_EXPECTED_PROPERTIES_AMOUNT = 3; diff --git a/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java index 180f4f5988..27f09dd6ca 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/RFC822MetadataExtracterTest.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.content.metadata; import static org.junit.Assert.assertEquals; @@ -45,8 +45,11 @@ import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; /** + * @deprecated extractor has been moved to a T-Engine. + * * Test for the RFC822 (imap/mbox) extractor */ +@Deprecated public class RFC822MetadataExtracterTest extends AbstractMetadataExtracterTest { private RFC822MetadataExtracter extracter; diff --git a/src/test/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracterTest.java index f24ee3107d..62ad2ee9d4 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/TikaAudioMetadataExtracterTest.java @@ -1,30 +1,30 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ /* - * Copyright (C) 2005 Jesper Steen Møller + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +/* + * Copyright (C) 2005 - 2020 Jesper Steen Møller * * This file is part of Alfresco * @@ -53,8 +53,11 @@ import org.alfresco.service.namespace.NamespaceService; import org.alfresco.service.namespace.QName; /** + * @deprecated extractor has been moved to a T-Engine. + * * Test for the audio metadata extraction. */ +@Deprecated public class TikaAudioMetadataExtracterTest extends AbstractMetadataExtracterTest { private TikaAudioMetadataExtracter extracter; diff --git a/src/test/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java b/src/test/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java index 32a003cb4e..e8ff6e3c29 100644 --- a/src/test/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java +++ b/src/test/java/org/alfresco/repo/content/metadata/TikaAutoMetadataExtracterTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -58,10 +58,13 @@ import org.apache.tika.parser.odf.OpenDocumentParser; /** + * @deprecated extractor has been moved to a T-Engine. + * * @see TikaAutoMetadataExtracter * * @author Nick Burch */ +@Deprecated public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest { private static Log logger = LogFactory.getLog(TikaAutoMetadataExtracterTest.class); diff --git a/src/test/java/org/alfresco/repo/imap/ImapMessageTest.java b/src/test/java/org/alfresco/repo/imap/ImapMessageTest.java index ad8eb603c3..969e41bb26 100644 --- a/src/test/java/org/alfresco/repo/imap/ImapMessageTest.java +++ b/src/test/java/org/alfresco/repo/imap/ImapMessageTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -100,6 +100,8 @@ import com.sun.mail.imap.protocol.RFC822DATA; import com.sun.mail.imap.protocol.UID; import com.sun.mail.util.ASCIIUtility; +import static org.alfresco.model.ContentModel.PROP_MODIFIED; + @Category({OwnJVMTestsCategory.class, LuceneTests.class}) public class ImapMessageTest extends TestCase { @@ -456,15 +458,30 @@ public class ImapMessageTest extends TestCase messageHelper.addCc(address); // Creating the message node in the repository + UserTransaction txn = transactionService.getUserTransaction(); + txn.begin(); String name = AlfrescoImapConst.MESSAGE_PREFIX + GUID.generate(); FileInfo messageFile = fileFolderService.create(testImapFolderNodeRef, name, ContentModel.TYPE_CONTENT); // Writing a content. + NodeRef nodeRef = messageFile.getNodeRef(); + Serializable origModified = getModified(nodeRef); new IncomingImapMessage(messageFile, serviceRegistry, message); - + txn.commit(); + + // Calls to new IncomingImapMessage(...) only takes place when a new nodeRef is being created. + // No other code will be changing the nodeRef. An ImapModel.ASPECT_IMAP_CONTENT is added, which + // triggers a metadata extract to take place in a post commit method. Previously this would have been a + // synchronous process. This is no longer true as it may now take place in a T-Engine. So, we need to wait + // for the extract to take place. There does not + long end = System.currentTimeMillis()+10000; + while (System.currentTimeMillis() <= end && origModified.equals(getModified(nodeRef))) + { + Thread.currentThread().sleep(1000); + } + // Getting the transformed properties from the repository // cm:originator, cm:addressee, cm:addressees, imap:messageFrom, imap:messageTo, imap:messageCc - Map properties = nodeService.getProperties(messageFile.getNodeRef()); - + Map properties = nodeService.getProperties(nodeRef); String cmOriginator = (String) properties.get(ContentModel.PROP_ORIGINATOR); String cmAddressee = (String) properties.get(ContentModel.PROP_ADDRESSEE); @SuppressWarnings("unchecked") @@ -488,6 +505,12 @@ public class ImapMessageTest extends TestCase assertEquals(decodedAddress, imapMessageCc); } + private Serializable getModified(NodeRef nodeRef) + { + Map origProperties = nodeService.getProperties(nodeRef); + return origProperties.get(PROP_MODIFIED); + } + @Category(RedundantTests.class) public void testEightBitMessage() throws Exception { diff --git a/src/test/java/org/alfresco/repo/rendition2/RenditionService2Test.java b/src/test/java/org/alfresco/repo/rendition2/RenditionService2Test.java index ec2eda7cb6..9c163cd253 100644 --- a/src/test/java/org/alfresco/repo/rendition2/RenditionService2Test.java +++ b/src/test/java/org/alfresco/repo/rendition2/RenditionService2Test.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2019 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -27,6 +27,7 @@ package org.alfresco.repo.rendition2; import com.fasterxml.jackson.databind.ObjectMapper; import org.alfresco.model.ContentModel; +import org.alfresco.repo.content.metadata.AsynchronousExtractor; import org.alfresco.repo.policy.BehaviourFilter; import org.alfresco.repo.policy.PolicyComponent; import org.alfresco.repo.rendition.RenditionPreventionRegistry; @@ -46,11 +47,9 @@ import org.junit.runner.RunWith; import org.mockito.Mock; import org.mockito.junit.MockitoJUnitRunner; -import org.quartz.CronExpression; import java.io.IOException; import java.util.Collections; -import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.Set; @@ -61,7 +60,6 @@ import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.*; /** @@ -90,6 +88,7 @@ public class RenditionService2Test @Mock private RuleService ruleService; @Mock private TransformServiceRegistryImpl transformServiceRegistry; @Mock private TransformReplyProvider transformReplyProvider; + @Mock private AsynchronousExtractor asynchronousExtractor; private NodeRef nodeRef = new NodeRef("workspace://spacesStore/test-id"); private NodeRef nodeRefMissing = new NodeRef("workspace://spacesStore/bad-test-id"); @@ -154,6 +153,7 @@ public class RenditionService2Test renditionService2.setTransformReplyProvider(transformReplyProvider); renditionService2.setEnabled(true); renditionService2.setThumbnailsEnabled(true); + renditionService2.setAsynchronousExtractor(asynchronousExtractor); renditionDefinitionRegistry2.setRenditionConfigDir("alfresco/renditions/test"); renditionDefinitionRegistry2.afterPropertiesSet(); diff --git a/src/test/java/org/alfresco/transform/client/registry/LocalTransformServiceRegistryConfigTest.java b/src/test/java/org/alfresco/transform/client/registry/LocalTransformServiceRegistryConfigTest.java index 7f89ff7df5..f8907efa57 100644 --- a/src/test/java/org/alfresco/transform/client/registry/LocalTransformServiceRegistryConfigTest.java +++ b/src/test/java/org/alfresco/transform/client/registry/LocalTransformServiceRegistryConfigTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2019 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -242,10 +242,13 @@ public class LocalTransformServiceRegistryConfigTest extends TransformServiceReg targetMimetype.add("image/gif"); targetMimetype.add("image/tiff"); imagemagickSupportedTransformation.put("image/tiff", targetMimetype); + targetMimetype = new ArrayList<>(targetMimetype); targetMimetype.add("image/png"); targetMimetype.add("image/jpeg"); imagemagickSupportedTransformation.put("image/gif", targetMimetype); imagemagickSupportedTransformation.put("image/jpeg", targetMimetype); + targetMimetype = new ArrayList<>(targetMimetype); + targetMimetype.add("alfresco-metadata-extract"); // Metadata extract and embed types should be excluded from pipeline cartesian products imagemagickSupportedTransformation.put("image/png", targetMimetype); targetMimetype = new ArrayList<>(); targetMimetype.add("target1"); @@ -333,8 +336,7 @@ public class LocalTransformServiceRegistryConfigTest extends TransformServiceReg @Override public void testJsonConfig() throws IOException { - // Not 60, 60 as we have added source->target1..3 to three transformers - internalTestJsonConfig(63, 69); + internalTestJsonConfig(64, 70); } @Test @@ -368,7 +370,7 @@ public class LocalTransformServiceRegistryConfigTest extends TransformServiceReg switch (t.transformer.getTransformerName()) { case "imagemagick": - assertEquals(t.transformer.getTransformerName() + " incorrect number of supported transform", 17, t.transformer.getSupportedSourceAndTargetList().size()); + assertEquals(t.transformer.getTransformerName() + " incorrect number of supported transform", 18, t.transformer.getSupportedSourceAndTargetList().size()); assertEquals( t.transformer.getTransformerName() + "incorrect number of transform option names", 1, t.transformer.getTransformOptions().size()); assertEquals( t.transformer.getTransformerName() + "incorrect number of transform options", 6, countTopLevelOptions(t.transformer.getTransformOptions())); assertEquals(t.transformer.getTransformerName() + " expected to not be a transformer pipeline", t.transformer.getTransformerPipeline().size(), 0); @@ -428,6 +430,7 @@ public class LocalTransformServiceRegistryConfigTest extends TransformServiceReg break; case "officeToImageViaPdf": + // Note we will get 35 entries in getSupportedSourceAndTargetList() if the metadata transforms are not excluded assertEquals(t.transformer.getTransformerName() + " incorrect number of supported transform", 28, t.transformer.getSupportedSourceAndTargetList().size()); assertEquals( t.transformer.getTransformerName() + "incorrect number of transform option names", 2, t.transformer.getTransformOptions().size()); assertEquals( t.transformer.getTransformerName() + "incorrect number of transform options", 11, countTopLevelOptions(t.transformer.getTransformOptions())); diff --git a/src/test/resources/alfresco/local-transform-service-config-test.json b/src/test/resources/alfresco/local-transform-service-config-test.json index d98a14c3be..337c468bfc 100644 --- a/src/test/resources/alfresco/local-transform-service-config-test.json +++ b/src/test/resources/alfresco/local-transform-service-config-test.json @@ -55,6 +55,7 @@ {"sourceMediaType": "image/png", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "image/png", "targetMediaType": "image/png" }, {"sourceMediaType": "image/png", "targetMediaType": "image/tiff"}, + {"sourceMediaType": "image/png", "targetMediaType": "alfresco-metadata-extract"}, {"sourceMediaType": "image/tiff", "targetMediaType": "image/gif" }, {"sourceMediaType": "image/tiff", "targetMediaType": "image/tiff"}, diff --git a/src/test/resources/quick/quick.eml_metadata.json b/src/test/resources/quick/quick.eml_metadata.json new file mode 100644 index 0000000000..873f2bb1de --- /dev/null +++ b/src/test/resources/quick/quick.eml_metadata.json @@ -0,0 +1,15 @@ +{ + "{http://www.alfresco.org/model/content/1.0}addressee" : "Nevin Nollop ", + "{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog", + "{http://www.alfresco.org/model/content/1.0}addressees" : "Nevin Nollop ", + "{http://www.alfresco.org/model/imap/1.0}dateSent" : 1086351802000, + "{http://www.alfresco.org/model/imap/1.0}messageTo" : "Nevin Nollop ", + "{http://www.alfresco.org/model/imap/1.0}messageId" : "<20040604122322.GV1905@phoenix.home>", + "{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog", + "{http://www.alfresco.org/model/imap/1.0}messageSubject" : "The quick brown fox jumps over the lazy dog", + "{http://www.alfresco.org/model/imap/1.0}messageCc" : "Nevin Nollop ", + "{http://www.alfresco.org/model/content/1.0}sentdate" : 1086351802000, + "{http://www.alfresco.org/model/content/1.0}subjectline" : "The quick brown fox jumps over the lazy dog", + "{http://www.alfresco.org/model/imap/1.0}messageFrom" : "Nevin Nollop ", + "{http://www.alfresco.org/model/content/1.0}originator" : "Nevin Nollop " +} \ No newline at end of file diff --git a/src/test/resources/quick/quick.html_metadata.json b/src/test/resources/quick/quick.html_metadata.json new file mode 100644 index 0000000000..99b5abf9cd --- /dev/null +++ b/src/test/resources/quick/quick.html_metadata.json @@ -0,0 +1,5 @@ +{ + "{http://www.alfresco.org/model/content/1.0}author": "Nevin Nollop", + "{http://www.alfresco.org/model/content/1.0}description": "Gym class featuring a brown fox and lazy dog", + "{http://www.alfresco.org/model/content/1.0}title": "The quick brown fox jumps over the lazy dog" +} \ No newline at end of file diff --git a/src/test/resources/quick/quick.msg_metadata.json b/src/test/resources/quick/quick.msg_metadata.json new file mode 100644 index 0000000000..f5c047860a --- /dev/null +++ b/src/test/resources/quick/quick.msg_metadata.json @@ -0,0 +1,9 @@ +{ + "{http://www.alfresco.org/model/content/1.0}addressee" : "mark.rogers@alfresco.com", + "{http://www.alfresco.org/model/content/1.0}description" : "This is a quick test", + "{http://www.alfresco.org/model/content/1.0}addressees" : [ "mark.rogers@alfresco.com", "speedy@quick.com", "mrquick@nowhere.com" ], + "{http://www.alfresco.org/model/content/1.0}sentdate" : "2013-01-18T13:44:20Z", + "{http://www.alfresco.org/model/content/1.0}subjectline" : "This is a quick test", + "{http://www.alfresco.org/model/content/1.0}author" : "Mark Rogers", + "{http://www.alfresco.org/model/content/1.0}originator" : "Mark Rogers" +} \ No newline at end of file diff --git a/src/test/resources/quick/unknown_namespace_metadata.json b/src/test/resources/quick/unknown_namespace_metadata.json new file mode 100644 index 0000000000..719b134a07 --- /dev/null +++ b/src/test/resources/quick/unknown_namespace_metadata.json @@ -0,0 +1,5 @@ +{ + "sys:overwritePolicy": "PRAGMATIC", + "{http://www.unknown}name": "ignored", + "{http://www.alfresco.org/model/content/1.0}author": "Used" +} \ No newline at end of file