diff --git a/.travis.yml b/.travis.yml index 25d6a30e63..1af1449510 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,6 +21,8 @@ branches: only: - master - /release\/.*/ + - /feature\/.*/ + - /fix\/.*/ env: global: diff --git a/repository/src/main/java/org/alfresco/opencmis/AlfrescoCmisServiceImpl.java b/repository/src/main/java/org/alfresco/opencmis/AlfrescoCmisServiceImpl.java index 99bdc935b4..64563d9d6f 100644 --- a/repository/src/main/java/org/alfresco/opencmis/AlfrescoCmisServiceImpl.java +++ b/repository/src/main/java/org/alfresco/opencmis/AlfrescoCmisServiceImpl.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -1306,9 +1306,8 @@ public class AlfrescoCmisServiceImpl extends AbstractCmisService implements Alfr writer.putContent(contentStream.getStream()); } + // extract metadata and generate thumbnail asynchronously connector.extractMetadata(nodeRef); - - // generate "doclib" thumbnail asynchronously connector.createThumbnails(nodeRef, Collections.singleton("doclib")); connector.applyVersioningState(nodeRef, versioningState); @@ -1384,7 +1383,8 @@ public class AlfrescoCmisServiceImpl extends AbstractCmisService implements Alfr PropertyIds.NAME, PropertyIds.OBJECT_TYPE_ID }); connector.applyPolicies(nodeRef, type, policies); connector.applyACL(nodeRef, type, addAces, removeAces); - + + // extract metadata and generate thumbnail asynchronously connector.extractMetadata(nodeRef); connector.createThumbnails(nodeRef, Collections.singleton("doclib")); diff --git a/repository/src/main/java/org/alfresco/opencmis/CMISConnector.java b/repository/src/main/java/org/alfresco/opencmis/CMISConnector.java index fa8a6e21d5..5526998986 100644 --- a/repository/src/main/java/org/alfresco/opencmis/CMISConnector.java +++ b/repository/src/main/java/org/alfresco/opencmis/CMISConnector.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -1040,7 +1040,8 @@ public class CMISConnector implements ApplicationContextAware, ApplicationListen public void extractMetadata(NodeRef nodeRef) { org.alfresco.service.cmr.action.Action action = actionService.createAction(ContentMetadataExtracter.EXECUTOR_NAME); - actionService.executeAction(action, nodeRef, true, false); + action.setExecuteAsynchronously(true); + actionService.executeAction(action, nodeRef, true, true); } public SiteInfo getSite(NodeRef nodeRef) diff --git a/repository/src/main/java/org/alfresco/repo/action/AsynchronousActionExecutionQueueImpl.java b/repository/src/main/java/org/alfresco/repo/action/AsynchronousActionExecutionQueueImpl.java index a359f4b253..3be9b8dc05 100644 --- a/repository/src/main/java/org/alfresco/repo/action/AsynchronousActionExecutionQueueImpl.java +++ b/repository/src/main/java/org/alfresco/repo/action/AsynchronousActionExecutionQueueImpl.java @@ -1,56 +1,56 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.action; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.Vector; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ThreadPoolExecutor; - -import org.alfresco.error.AlfrescoRuntimeException; -import org.alfresco.error.StackTraceUtil; -import org.alfresco.repo.action.AsynchronousActionExecutionQueuePolicies.OnAsyncActionExecute; -import org.alfresco.repo.policy.ClassPolicyDelegate; -import org.alfresco.repo.policy.PolicyComponent; -import org.alfresco.repo.rule.RuleServiceImpl; -import org.alfresco.repo.security.authentication.AuthenticationContext; -import org.alfresco.repo.tenant.TenantUtil; -import org.alfresco.repo.tenant.TenantUtil.TenantRunAsWork; -import org.alfresco.repo.transaction.AlfrescoTransactionSupport; -import org.alfresco.repo.transaction.RetryingTransactionHelper.RetryingTransactionCallback; -import org.alfresco.service.cmr.action.Action; -import org.alfresco.service.cmr.action.ActionServiceException; -import org.alfresco.service.cmr.repository.NodeRef; -import org.alfresco.service.transaction.TransactionService; -import org.alfresco.util.transaction.TransactionListenerAdapter; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.Vector; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ThreadPoolExecutor; + +import org.alfresco.error.AlfrescoRuntimeException; +import org.alfresco.error.StackTraceUtil; +import org.alfresco.repo.action.AsynchronousActionExecutionQueuePolicies.OnAsyncActionExecute; +import org.alfresco.repo.policy.ClassPolicyDelegate; +import org.alfresco.repo.policy.PolicyComponent; +import org.alfresco.repo.rule.RuleServiceImpl; +import org.alfresco.repo.security.authentication.AuthenticationContext; +import org.alfresco.repo.tenant.TenantUtil; +import org.alfresco.repo.tenant.TenantUtil.TenantRunAsWork; +import org.alfresco.repo.transaction.AlfrescoTransactionSupport; +import org.alfresco.repo.transaction.RetryingTransactionHelper.RetryingTransactionCallback; +import org.alfresco.service.cmr.action.Action; +import org.alfresco.service.cmr.action.ActionServiceException; +import org.alfresco.service.cmr.repository.NodeRef; +import org.alfresco.service.transaction.TransactionService; +import org.alfresco.util.transaction.TransactionListenerAdapter; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; /** * The asynchronous action execution queue implementation @@ -404,6 +404,9 @@ public class AsynchronousActionExecutionQueueImpl implements AsynchronousActionE } // Get the tenant the action was submitted from final String tenantId = ((ActionImpl)ActionExecutionWrapper.this.action).getTenantId(); + + // Let the executor know it is async + ((ActionImpl)ActionExecutionWrapper.this.action).setExecuteAsynchronously(true); // import the content TenantRunAsWork actionRunAs = new TenantRunAsWork() diff --git a/repository/src/main/java/org/alfresco/repo/action/executer/ContentMetadataEmbedder.java b/repository/src/main/java/org/alfresco/repo/action/executer/ContentMetadataEmbedder.java index d157927d09..c2550a09e3 100644 --- a/repository/src/main/java/org/alfresco/repo/action/executer/ContentMetadataEmbedder.java +++ b/repository/src/main/java/org/alfresco/repo/action/executer/ContentMetadataEmbedder.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.action.executer; import java.io.Serializable; @@ -30,7 +30,6 @@ import java.util.List; import java.util.Map; import org.alfresco.model.ContentModel; -import org.alfresco.repo.action.executer.ActionExecuterAbstractBase; import org.alfresco.repo.content.metadata.MetadataEmbedder; import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; import org.alfresco.service.cmr.action.Action; @@ -108,7 +107,8 @@ public class ContentMetadataEmbedder extends ActionExecuterAbstractBase return; } String mimetype = reader.getMimetype(); - MetadataEmbedder embedder = metadataExtracterRegistry.getEmbedder(mimetype); + long sourceSizeInBytes = reader.getSize(); + MetadataEmbedder embedder = metadataExtracterRegistry.getEmbedder(mimetype, sourceSizeInBytes); if (embedder == null) { if(logger.isDebugEnabled()) @@ -136,7 +136,7 @@ public class ContentMetadataEmbedder extends ActionExecuterAbstractBase try { - embedder.embed(nodeProperties, reader, writer); + embedder.embed(actionedUponNodeRef, nodeProperties, reader, writer); } catch (Throwable e) { diff --git a/repository/src/main/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java index 1752fc9775..47cba6d035 100644 --- a/repository/src/main/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java +++ b/repository/src/main/java/org/alfresco/repo/action/executer/ContentMetadataExtracter.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -86,22 +86,23 @@ import org.apache.commons.logging.LogFactory; public class ContentMetadataExtracter extends ActionExecuterAbstractBase { private static Log logger = LogFactory.getLog(ContentMetadataExtracter.class); - + public static final String EXECUTOR_NAME = "extract-metadata"; - + private NodeService nodeService; private ContentService contentService; private DictionaryService dictionaryService; private TaggingService taggingService; private MetadataExtracterRegistry metadataExtracterRegistry; private boolean carryAspectProperties = true; - - + + private boolean enableStringTagging = false; - + // Default list of separators (when enableStringTagging is enabled) - protected List stringTaggingSeparators = Arrays.asList(",", ";", "\\|"); - + public final static List DEFAULT_STRING_TAGGING_SEPARATORS = Arrays.asList(",", ";", "\\|"); + protected List stringTaggingSeparators = DEFAULT_STRING_TAGGING_SEPARATORS; + public ContentMetadataExtracter() { } @@ -121,7 +122,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase { this.contentService = contentService; } - + /** * @param dictService The DictionaryService to set. */ @@ -148,7 +149,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase /** * Whether or not aspect-related properties must be carried to the new version of the node - * + * * @param carryAspectProperties true (default) to carry all aspect-linked * properties forward. false will clean the * aspect of any unextracted values. @@ -157,12 +158,12 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase { this.carryAspectProperties = carryAspectProperties; } - + /** * Whether or not to enable mapping of simple strings to cm:taggable tags - * - * @param enableStringTagging true find or create tags for each string - * mapped to cm:taggable. false (default) + * + * @param enableStringTagging true find or create tags for each string + * mapped to cm:taggable. false (default) * ignore mapping strings to tags. */ public void setEnableStringTagging(boolean enableStringTagging) @@ -172,7 +173,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase /** * List of string separators - note: all will be applied to a given string - * + * * @param stringTaggingSeparators */ public void setStringTaggingSeparators(List stringTaggingSeparators) @@ -188,14 +189,21 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase * the taggable property should still contain raw string values. *

* Mixing of NodeRefs and string values is permitted so each raw value is - * checked for a valid NodeRef representation and if so, converts to a NodeRef, + * checked for a valid NodeRef representation and if so, converts to a NodeRef, * if not, adds as a tag via the {@link TaggingService}. - * + * * @param actionedUponNodeRef The NodeRef being actioned upon * @param propertyDef the PropertyDefinition of the taggable property * @param rawValue the raw value from the metadata extracter */ protected void addTags(NodeRef actionedUponNodeRef, PropertyDefinition propertyDef, Serializable rawValue) + { + addTags(actionedUponNodeRef, propertyDef, rawValue, nodeService, stringTaggingSeparators, taggingService); + } + + private static void addTags(NodeRef actionedUponNodeRef, PropertyDefinition propertyDef, Serializable rawValue, + NodeService nodeService, List stringTaggingSeparators, + TaggingService taggingService) { if (rawValue == null) { @@ -231,7 +239,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase logger.trace("adding string tag name'" + tagName + "' (from tag nodeRef "+nodeRef+") to " + actionedUponNodeRef); } - tags.addAll(splitTag(tagName)); + tags.addAll(splitTag(tagName, stringTaggingSeparators)); } catch (InvalidNodeRefException e) { @@ -250,7 +258,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase logger.trace("adding string tag name'" + singleValue + "' to " + actionedUponNodeRef); } - tags.addAll(splitTag((String)singleValue)); + tags.addAll(splitTag((String)singleValue, stringTaggingSeparators)); } } else if (singleValue instanceof NodeRef) @@ -263,7 +271,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase logger.trace("adding string tag name'" + tagName + "' (for nodeRef "+nodeRef+") to " + actionedUponNodeRef); } - tags.addAll(splitTag(tagName)); + tags.addAll(splitTag(tagName, stringTaggingSeparators)); } } } @@ -273,8 +281,8 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase { logger.trace("adding string tag name'" + (String)rawValue + "' to " + actionedUponNodeRef); } - - tags.addAll(splitTag((String)rawValue)); + + tags.addAll(splitTag((String)rawValue, stringTaggingSeparators)); } if (logger.isDebugEnabled()) @@ -297,6 +305,11 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase } protected List splitTag(String str) + { + return splitTag(str, stringTaggingSeparators); + } + + private static List splitTag(String str, List stringTaggingSeparators) { List result = new ArrayList<>(); if ((str != null) && (!str.equals(""))) @@ -323,7 +336,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase return result; } - + /** * @see org.alfresco.repo.action.executer.ActionExecuter#execute(Action, * NodeRef) @@ -347,7 +360,8 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase return; } String mimetype = reader.getMimetype(); - MetadataExtracter extracter = metadataExtracterRegistry.getExtracter(mimetype); + long sourceSizeInBytes = reader.getSize(); + MetadataExtracter extracter = metadataExtracterRegistry.getExtractor(mimetype, sourceSizeInBytes); if (extracter == null) { if(logger.isDebugEnabled()) @@ -372,6 +386,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase try { modifiedProperties = extracter.extract( + actionedUponNodeRef, reader, /*OverwritePolicy.PRAGMATIC,*/ nodeProperties); @@ -408,11 +423,22 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase { return; } - + + addExtractedMetadataToNode(actionedUponNodeRef, nodeProperties, modifiedProperties, + nodeService, dictionaryService, taggingService, enableStringTagging, carryAspectProperties, + stringTaggingSeparators); + } + + public static void addExtractedMetadataToNode(NodeRef actionedUponNodeRef, Map nodeProperties, + Map modifiedProperties, + NodeService nodeService, DictionaryService dictionaryService, + TaggingService taggingService, boolean enableStringTagging, + boolean carryAspectProperties, List stringTaggingSeparators) + { // Check that all properties have the appropriate aspect applied Set requiredAspectQNames = new HashSet(3); Set aspectPropertyQNames = new HashSet(17); - + /** * The modified properties contain null values as well. As we are only interested * in the keys, this will force aspect aspect properties to be removed even if there @@ -432,9 +458,10 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase if (enableStringTagging && propertyContainerDef.getName().equals(ContentModel.ASPECT_TAGGABLE)) { Serializable oldValue = nodeProperties.get(propertyQName); - addTags(actionedUponNodeRef, propertyDef, oldValue); + addTags(actionedUponNodeRef, propertyDef, oldValue, + nodeService, stringTaggingSeparators, taggingService); // Replace the raw value with the created tag NodeRefs - nodeProperties.put(ContentModel.PROP_TAGS, + nodeProperties.put(ContentModel.PROP_TAGS, nodeService.getProperty(actionedUponNodeRef, ContentModel.PROP_TAGS)); } else @@ -447,7 +474,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase } } } - + if (!carryAspectProperties) { // Remove any node properties that are defined on the aspects but were not extracted @@ -465,10 +492,14 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase } } } - + + // The following code can result in a postCommit to extract the metadata again via JavaBehaviour + // (such as ImapContentPolicy.onAddAspect). Not very efficient, but I cannot think of a way to + // avoid it that does not risk memory leaks or disabling behaviour we want. + // Add all the properties to the node BEFORE we add the aspects nodeService.setProperties(actionedUponNodeRef, nodeProperties); - + // Add each of the aspects, as required for (QName requiredAspectQName : requiredAspectQNames) { diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java index 78865f8768..ece13df5b5 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -25,6 +25,33 @@ */ package org.alfresco.repo.content.metadata; +import org.alfresco.api.AlfrescoPublicApi; +import org.alfresco.error.AlfrescoRuntimeException; +import org.alfresco.model.ContentModel; +import org.alfresco.repo.content.StreamAwareContentReaderProxy; +import org.alfresco.service.cmr.dictionary.DataTypeDefinition; +import org.alfresco.service.cmr.dictionary.DictionaryService; +import org.alfresco.service.cmr.dictionary.PropertyDefinition; +import org.alfresco.service.cmr.repository.ContentIOException; +import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.cmr.repository.ContentWriter; +import org.alfresco.service.cmr.repository.MalformedNodeRefException; +import org.alfresco.service.cmr.repository.MimetypeService; +import org.alfresco.service.cmr.repository.NodeRef; +import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; +import org.alfresco.service.cmr.repository.datatype.TypeConversionException; +import org.alfresco.service.namespace.InvalidQNameException; +import org.alfresco.service.namespace.QName; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.springframework.beans.factory.BeanNameAware; +import org.springframework.context.ApplicationContext; +import org.springframework.context.ApplicationContextAware; +import org.springframework.extensions.surf.util.ISO8601DateFormat; + import java.io.InputStream; import java.io.Serializable; import java.lang.reflect.Array; @@ -50,32 +77,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; -import org.alfresco.api.AlfrescoPublicApi; -import org.alfresco.error.AlfrescoRuntimeException; -import org.alfresco.model.ContentModel; -import org.alfresco.repo.content.StreamAwareContentReaderProxy; -import org.alfresco.service.cmr.dictionary.DataTypeDefinition; -import org.alfresco.service.cmr.dictionary.DictionaryService; -import org.alfresco.service.cmr.dictionary.PropertyDefinition; -import org.alfresco.service.cmr.repository.ContentIOException; -import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.cmr.repository.ContentWriter; -import org.alfresco.service.cmr.repository.MalformedNodeRefException; -import org.alfresco.service.cmr.repository.MimetypeService; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.cmr.repository.datatype.TypeConversionException; -import org.alfresco.service.namespace.InvalidQNameException; -import org.alfresco.service.namespace.QName; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.joda.time.DateTime; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; -import org.springframework.beans.factory.BeanNameAware; -import org.springframework.context.ApplicationContext; -import org.springframework.context.ApplicationContextAware; -import org.springframework.extensions.surf.util.ISO8601DateFormat; - /** * Support class for metadata extracters that support dynamic and config-driven * mapping between extracted values and model properties. Extraction is broken @@ -131,7 +132,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac private MetadataExtracterRegistry registry; private MimetypeService mimetypeService; - private DictionaryService dictionaryService; + protected DictionaryService dictionaryService; private boolean initialized; private Set supportedMimetypes; @@ -232,6 +233,11 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac this.dictionaryService = dictionaryService; } + public Set getSupportedMimetypes() + { + return supportedMimetypes; + } + /** * Set the mimetypes that are supported by the extracter. * @@ -278,7 +284,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac return supportedEmbedMimetypes.contains(sourceMimetype); } - private boolean isEnabled(String mimetype) + protected boolean isEnabled(String mimetype) { return properties == null || mimetypeService == null || (getBooleanProperty(beanName+".enabled", true) && @@ -714,10 +720,10 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { return readMappingProperties(mappingProperties.entrySet()); } - + /** * A utility method to convert mapping properties entries to the Map form. - * + * * @see #setMappingProperties(Properties) */ private Map> readMappingProperties(Set> mappingPropertiesEntries) @@ -765,8 +771,8 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { throw new AlfrescoRuntimeException( "No prefix mapping for extracter property mapping: \n" + - " Extracter: " + this + "\n" + - " Mapping: " + entry); + " Extracter: " + this + "\n" + + " Mapping: " + entry); } qnameStr = QName.NAMESPACE_BEGIN + uri + QName.NAMESPACE_END + suffix; } @@ -780,8 +786,8 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { throw new AlfrescoRuntimeException( "Can't create metadata extracter property mapping: \n" + - " Extracter: " + this + "\n" + - " Mapping: " + entry); + " Extracter: " + this + "\n" + + " Mapping: " + entry); } } if (logger.isTraceEnabled()) @@ -1132,7 +1138,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac @Override public final Map extract(ContentReader reader, Map destination) { - return extract(reader, this.overwritePolicy, destination, this.mapping); + return extract(null, reader, this.overwritePolicy, destination, this.mapping); } /** @@ -1144,7 +1150,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac OverwritePolicy overwritePolicy, Map destination) { - return extract(reader, overwritePolicy, destination, this.mapping); + return extract(null, reader, overwritePolicy, destination, this.mapping); } /** @@ -1156,6 +1162,29 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac OverwritePolicy overwritePolicy, Map destination, Map> mapping) + { + return extract(null, reader, overwritePolicy, destination, mapping); + } + + /** + * {@inheritDoc} + */ + @Override + public Map extract(NodeRef nodeRef, ContentReader reader, Map destination) + { + return extract(nodeRef, reader, overwritePolicy, destination, mapping); + } + + /** + * {@inheritDoc} + */ + @Override + public Map extract( + NodeRef nodeRef, + ContentReader reader, + OverwritePolicy overwritePolicy, + Map destination, + Map> mapping) { // Done if (logger.isDebugEnabled()) @@ -1182,12 +1211,13 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac // Check that the content has some meat if (reader.getSize() > 0 && reader.exists()) { - rawMetadata = extractRaw(reader, getLimits(reader.getMimetype())); + rawMetadata = extractRaw(nodeRef, reader, getLimits(reader.getMimetype())); } else { rawMetadata = new HashMap(1); } + // Convert to system properties (standalone) Map systemProperties = mapRawToSystem(rawMetadata); // Convert the properties according to the dictionary types @@ -1215,7 +1245,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac // the current mime type is plausible String typeErrorMessage = null; String differentType = null; - if(mimetypeService != null) + if (mimetypeService != null) { differentType = mimetypeService.getMimetypeIfNotMatches(reader.getReader()); } @@ -1224,7 +1254,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac logger.info("Unable to verify mimetype of " + reader.getReader() + " as no MimetypeService available to " + getClass().getName()); } - if(differentType != null) + if (differentType != null) { typeErrorMessage = "\n" + " claimed mime type: " + reader.getMimetype() + "\n" + @@ -1285,6 +1315,19 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac Map properties, ContentReader reader, ContentWriter writer) + { + embed(null, properties, reader, writer); + } + + /** + * {@inheritDoc} + */ + @Override + public void embed( + NodeRef nodeRef, + Map properties, + ContentReader reader, + ContentWriter writer) { // Done if (logger.isDebugEnabled()) @@ -1307,7 +1350,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac try { - embedInternal(mapSystemToRaw(properties), reader, writer); + embedInternal(nodeRef, mapSystemToRaw(properties), reader, writer); if(logger.isDebugEnabled()) { logger.debug("Embedded Metadata into " + writer); @@ -1472,7 +1515,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac * @return Returns a modified map of properties that have been converted. */ @SuppressWarnings("unchecked") - private Map convertSystemPropertyValues(Map systemProperties) + protected Map convertSystemPropertyValues(Map systemProperties) { Map convertedProperties = new HashMap(systemProperties.size() + 7); for (Map.Entry entry : systemProperties.entrySet()) @@ -1500,6 +1543,10 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { convertedPropertyValue = propertyValue; } + else if (propertyValue instanceof Long) + { + convertedPropertyValue = new Date((Long)propertyValue); + } else if (propertyValue instanceof Collection) { convertedPropertyValue = (Serializable) makeDates((Collection) propertyValue); @@ -1518,7 +1565,9 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { StringBuilder mesg = new StringBuilder(); mesg.append("Unable to convert Date property: ").append(propertyQName) - .append(", value: ").append(propertyValue).append(", type: ").append(propertyTypeDef.getName()); + .append(", value: ").append(propertyValue).append(" (") + .append(propertyValue.getClass().getSimpleName()) + .append("), type: ").append(propertyTypeDef.getName()); logger.warn(mesg.toString()); } } @@ -1688,6 +1737,21 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac } } + // Try milliseconds. This was introduced with T-Engine extractors. Previously Dates would have been + // created and then converted to a Alfresco Date property in a single operation. T-Engines do not know + // about Alfresco Date property formats. + try + { + long ms = Long.parseLong(dateStr); + if (Long.toString(ms).equals(dateStr)) + { + date = new Date(ms); + } + } + catch (NumberFormatException ignore) + { + } + if (date == null) { // Still no luck @@ -1982,7 +2046,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac return limits; } - + /** * Callable wrapper for the * {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader)} method @@ -2026,7 +2090,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac /** * Exception wrapper to handle exceeded limits imposed by {@link MetadataExtracterLimits} - * {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)} + * {@link AbstractMappingMetadataExtracter#extractRaw(NodeRef, ContentReader, MetadataExtracterLimits)} */ private class LimitExceededException extends Exception { @@ -2047,19 +2111,17 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac *

* If no timeout limit is defined or is unlimited (-1), * the extractRaw method is called directly. - * + * + * @param nodeRef the node being acted on. * @param reader the document to extract the values from. This stream provided by * the reader must be closed if accessed directly. * @param limits the limits to impose on the extraction * @return Returns a map of document property values keyed by property name. * @throws Throwable All exception conditions can be handled. */ - private Map extractRaw( + private Map extractRaw(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits) throws Throwable { - FutureTask> task = null; - StreamAwareContentReaderProxy proxiedReader = null; - if (reader.getSize() > limits.getMaxDocumentSizeMB() * MEGABYTE_SIZE) { throw new LimitExceededException("Max doc size exceeded " + limits.getMaxDocumentSizeMB() + " MB"); @@ -2084,7 +2146,16 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac throw new LimitExceededException("Reached concurrent extractions limit - " + limits.getMaxConcurrentExtractionsCount()); } } - + + return extractRawInThread(nodeRef, reader, limits); + } + + protected Map extractRawInThread(NodeRef nodeRef, ContentReader reader, + MetadataExtracterLimits limits) + throws Throwable + { + FutureTask> task = null; + StreamAwareContentReaderProxy proxiedReader = null; try { proxiedReader = new StreamAwareContentReaderProxy(reader); @@ -2119,14 +2190,19 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac } finally { - int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.decrementAndGet(); - if (logger.isDebugEnabled()) - { - logger.debug("Extraction finalized. Remaining concurrent extraction : " + totalDocCount); - } + extractRawThreadFinished(); } } - + + protected void extractRawThreadFinished() + { + int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.decrementAndGet(); + if (logger.isDebugEnabled()) + { + logger.debug("Extraction finalized. Remaining concurrent extraction : " + totalDocCount); + } + } + /** * Override to provide the raw extracted metadata values. An extracter should extract * as many of the available properties as is realistically possible. Even if the @@ -2162,6 +2238,11 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac */ protected abstract Map extractRaw(ContentReader reader) throws Throwable; + protected void embedInternal(NodeRef nodeRef, Map metadata, ContentReader reader, ContentWriter writer) throws Throwable + { + embedInternal(metadata, reader, writer); + } + /** * Override to embed metadata values. An extracter should embed * as many of the available properties as is realistically possible. Even if the @@ -2182,4 +2263,46 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { // TODO make this an abstract method once more extracters support embedding } + + // Originally in TikaPoweredMetadataExtracter + public static Map convertMetadataToStrings(Map properties) + { + Map propertiesAsStrings = new HashMap<>(); + for (String metadataKey : properties.keySet()) + { + Serializable value = properties.get(metadataKey); + if (value == null) + { + continue; + } + if (value instanceof Collection) + { + for (Object singleValue : (Collection) value) + { + try + { + // Convert to a string value + propertiesAsStrings.put(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, singleValue)); + } + catch (TypeConversionException e) + { + TikaPoweredMetadataExtracter.logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); + } + } + } + else + { + try + { + // Convert to a string value + propertiesAsStrings.put(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, value)); + } + catch (TypeConversionException e) + { + TikaPoweredMetadataExtracter.logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); + } + } + } + return propertiesAsStrings; + } } diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java b/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java new file mode 100644 index 0000000000..f2d9469bf6 --- /dev/null +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/AsynchronousExtractor.java @@ -0,0 +1,537 @@ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.repo.content.metadata; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.alfresco.model.ContentModel; +import org.alfresco.repo.action.executer.ContentMetadataExtracter; +import org.alfresco.repo.content.transform.TransformerDebug; +import org.alfresco.repo.rendition2.RenditionService2; +import org.alfresco.repo.rendition2.TransformDefinition; +import org.alfresco.repo.security.authentication.AuthenticationUtil; +import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.cmr.repository.ContentService; +import org.alfresco.service.cmr.repository.ContentWriter; +import org.alfresco.service.cmr.repository.NodeRef; +import org.alfresco.service.cmr.repository.NodeService; +import org.alfresco.service.cmr.tagging.TaggingService; +import org.alfresco.service.namespace.NamespaceException; +import org.alfresco.service.namespace.NamespacePrefixResolver; +import org.alfresco.service.namespace.QName; +import org.alfresco.service.transaction.TransactionService; +import org.alfresco.transform.client.registry.TransformServiceRegistry; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; +import org.springframework.dao.ConcurrencyFailureException; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.StringJoiner; +import java.util.concurrent.ExecutorService; + +import static org.alfresco.repo.rendition2.RenditionDefinition2.TIMEOUT; +import static org.alfresco.repo.rendition2.TransformDefinition.getTransformName; + +/** + * Requests an extract of metadata via a remote async transform using + * {@link RenditionService2#transform(NodeRef, TransformDefinition)}. The properties that will extracted are defined + * by the transform. This allows out of process metadata extracts to be defined without the need to apply an AMP. + * The actual transform is a request to go from the source mimetype to {@code "alfresco-metadata-extract"}. The + * resulting transform is a Map in json of properties and values to be set on the source node. + *

+ * As with other sub-classes of {@link AbstractMappingMetadataExtracter} it also supports embedding of metadata in + * a source node. In this case the remote async transform states that it supports a transform from a source mimetype + * to {@code "alfresco-metadata-embed"}. The resulting transform is a replacement for the content of the node. + * + * @author adavis + */ +public class AsynchronousExtractor extends AbstractMappingMetadataExtracter +{ + private static final String EXTRACT = "extract"; + private static final String EMBED = "embed"; + private static final String MIMETYPE_METADATA_EXTRACT = "alfresco-metadata-extract"; + private static final String MIMETYPE_METADATA_EMBED = "alfresco-metadata-embed"; + private static final String METADATA = "metadata"; + private static final Map EMPTY_METADATA = Collections.emptyMap(); + + private final ObjectMapper jsonObjectMapper = new ObjectMapper(); + + private NodeService nodeService; + private NamespacePrefixResolver namespacePrefixResolver; + private TransformerDebug transformerDebug; + private RenditionService2 renditionService2; + private ContentService contentService; + private TransactionService transactionService; + private TransformServiceRegistry transformServiceRegistry; + private TaggingService taggingService; + + public void setNodeService(NodeService nodeService) + { + this.nodeService = nodeService; + } + + public void setNamespacePrefixResolver(NamespacePrefixResolver namespacePrefixResolver) + { + this.namespacePrefixResolver = namespacePrefixResolver; + } + + public void setTransformerDebug(TransformerDebug transformerDebug) + { + this.transformerDebug = transformerDebug; + } + + public void setRenditionService2(RenditionService2 renditionService2) + { + this.renditionService2 = renditionService2; + } + + public void setContentService(ContentService contentService) + { + this.contentService = contentService; + } + + public void setTransactionService(TransactionService transactionService) + { + this.transactionService = transactionService; + } + + public void setTransformServiceRegistry(TransformServiceRegistry transformServiceRegistry) + { + this.transformServiceRegistry = transformServiceRegistry; + } + + public void setTaggingService(TaggingService taggingService) + { + this.taggingService = taggingService; + } + + @Override + protected Map> getDefaultMapping() + { + return Collections.emptyMap(); // Mappings are done by the transform, but a non null value must be returned. + } + + public boolean isSupported(String sourceMimetype, long sourceSizeInBytes) + { + return isEnabled(sourceMimetype) && isSupported(sourceMimetype, sourceSizeInBytes, MIMETYPE_METADATA_EXTRACT); + } + + public boolean isEmbedderSupported(String sourceMimetype, long sourceSizeInBytes) + { + return isSupported(sourceMimetype, sourceSizeInBytes, MIMETYPE_METADATA_EMBED); + } + + private boolean isSupported(String sourceMimetype, long sourceSizeInBytes, String targetMimetype) + { + return transformServiceRegistry.isSupported(sourceMimetype, sourceSizeInBytes, targetMimetype, Collections.emptyMap(), targetMimetype); + } + + public static boolean isMetadataExtractMimetype(String targetMimetype) + { + return MIMETYPE_METADATA_EXTRACT.equals(targetMimetype); + } + + public static boolean isMetadataEmbedMimetype(String targetMimetype) + { + return MIMETYPE_METADATA_EMBED.equals(targetMimetype); + } + + /** + * Returns a file extension used as the target in a transform. The normal extension is changed if the + * {@code targetMimetype} is an extraction or embedding type. + * + * @param targetMimetype the target mimetype + * @param sourceExtension normal source extension + * @param targetExtension current target extension (normally {@code "bin" for embedding and extraction}) + * @return the extension to be used. + */ + public static String getExtension(String targetMimetype, String sourceExtension, String targetExtension) + { + return isMetadataExtractMimetype(targetMimetype) + ? "json" + : isMetadataEmbedMimetype(targetMimetype) + ? sourceExtension + : targetExtension; + } + + /** + * Returns a rendition name used in {@link TransformerDebug}. The normal name is changed if it is a metadata + * extract or embed. The name in this case is actually the {@code "alfresco-metadata-extract/"} + * {@code "alfresco-metadata-embed/"} followed by the source mimetype. + * + * @param renditionName the normal name, or a special one based on the source mimetype and a prefixed. + * @return the renditionName to be used. + */ + public static String getRenditionName(String renditionName) + { + String transformName = getTransformName(renditionName); + return transformName != null && transformName.startsWith(MIMETYPE_METADATA_EXTRACT) + ? "metadataExtract" + : transformName != null && transformName.startsWith(MIMETYPE_METADATA_EMBED) + ? "metadataEmbed" + : renditionName; + } + + @Override + protected void checkIsSupported(ContentReader reader) + { + // Just return, as we have already checked when this extractor was selected. + } + + @Override + protected void checkIsEmbedSupported(ContentWriter writer) + { + // Just return, as we have already checked when this embedder was selected. + } + + @Override + // Not called. Overloaded method with the NodeRef is called. + protected Map extractRaw(ContentReader reader) + { + return null; + } + + @Override + protected Map extractRawInThread(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits) + throws Throwable + { + long timeoutMs = limits.getTimeoutMs(); + Map options = Collections.singletonMap(TIMEOUT, Long.toString(timeoutMs)); + transformInBackground(nodeRef, reader, MIMETYPE_METADATA_EXTRACT, EXTRACT, options); + return EMPTY_METADATA; + } + + @Override + protected void embedInternal(NodeRef nodeRef, Map metadata, ContentReader reader, ContentWriter writer) + { + String metadataAsJson = metadataToString(metadata); + Map options = Collections.singletonMap(METADATA, metadataAsJson); + transformInBackground(nodeRef, reader, MIMETYPE_METADATA_EMBED, EMBED, options); + } + + private void transformInBackground(NodeRef nodeRef, ContentReader reader, String targetMimetype, + String embedOrExtract, Map options) + { + ExecutorService executorService = getExecutorService(); + executorService.execute(() -> + { + try + { + transform(nodeRef, reader, targetMimetype, embedOrExtract, options); + } + finally + { + extractRawThreadFinished(); + } + }); + } + + private void transform(NodeRef nodeRef, ContentReader reader, String targetMimetype, + String embedOrExtract, Map options) + { + String sourceMimetype = reader.getMimetype(); + + // This needs to be specific to each source mimetype and the extract or embed as the name + // is used to cache the transform name that will be used. + String transformName = targetMimetype + '/' + sourceMimetype; + + TransformDefinition transformDefinition = new TransformDefinition(transformName, targetMimetype, + options, null, null, null); + + if (logger.isTraceEnabled()) + { + StringJoiner sj = new StringJoiner("\n"); + sj.add("Request " + embedOrExtract + " transform on " + nodeRef); + options.forEach((k,v)->sj.add(" "+k+"="+v)); + logger.trace(sj); + } + + AuthenticationUtil.runAs( + (AuthenticationUtil.RunAsWork) () -> + transactionService.getRetryingTransactionHelper().doInTransaction(() -> + { + try + { + renditionService2.transform(nodeRef, transformDefinition); + } + catch (IllegalArgumentException e) + { + if (e.getMessage().endsWith("The supplied sourceNodeRef "+nodeRef+" does not exist.")) + { + throw new ConcurrencyFailureException( + "The original transaction may not have finished. " + e.getMessage()); + } + } + return null; + }), AuthenticationUtil.getSystemUserName()); + } + + public void setMetadata(NodeRef nodeRef, InputStream transformInputStream) + { + if (logger.isTraceEnabled()) + { + logger.trace("Update metadata on " + nodeRef); + } + + Map metadata = readMetadata(transformInputStream); + if (metadata == null) + { + return; // Error state. + } + + // Remove well know entries from the map that drive how the real metadata is applied. + OverwritePolicy overwritePolicy = removeOverwritePolicy(metadata, "sys:overwritePolicy", OverwritePolicy.PRAGMATIC); + Boolean enableStringTagging = removeBoolean(metadata, "sys:enableStringTagging", false); + Boolean carryAspectProperties = removeBoolean(metadata, "sys:carryAspectProperties", true); + List stringTaggingSeparators = removeTaggingSeparators(metadata, "sys:stringTaggingSeparators", + ContentMetadataExtracter.DEFAULT_STRING_TAGGING_SEPARATORS); + if (overwritePolicy == null || + enableStringTagging == null || + carryAspectProperties == null || + stringTaggingSeparators == null) + { + return; // Error state. + } + + AuthenticationUtil.runAsSystem((AuthenticationUtil.RunAsWork) () -> + transactionService.getRetryingTransactionHelper().doInTransaction(() -> + { + // Based on: AbstractMappingMetadataExtracter.extract + Map nodeProperties = nodeService.getProperties(nodeRef); + // Convert to system properties (standalone) + Map systemProperties = convertKeysToQNames(metadata); + // Convert the properties according to the dictionary types + systemProperties = convertSystemPropertyValues(systemProperties); + // There is no last filter in the AsynchronousExtractor. + // Now use the proper overwrite policy + Map changedProperties = overwritePolicy.applyProperties(systemProperties, nodeProperties); + + // Based on: ContentMetadataExtracter.executeImpl + // If none of the properties where changed, then there is nothing more to do + if (changedProperties.size() == 0) + { + return null; + } + boolean transformerDebugEnabled = transformerDebug.isEnabled(); + boolean debugEnabled = logger.isDebugEnabled(); + if (transformerDebugEnabled || debugEnabled) + { + for (Map.Entry entry : changedProperties.entrySet()) + { + QName qname = entry.getKey(); + Serializable value = entry.getValue(); + String prefixString = qname.toPrefixString(namespacePrefixResolver); + String debugMessage = prefixString + "=" + (value == null ? "" : value); + if (transformerDebugEnabled) + { + transformerDebug.debugUsingPreviousReference(" "+debugMessage); + } + if (debugEnabled) + { + logger.debug(debugMessage); + } + } + } + ContentMetadataExtracter.addExtractedMetadataToNode(nodeRef, nodeProperties, changedProperties, + nodeService, dictionaryService, taggingService, + enableStringTagging, carryAspectProperties, stringTaggingSeparators); + + if (logger.isTraceEnabled()) + { + logger.trace("Extraction of Metadata from " + nodeRef + " complete " + changedProperties); + } + + return null; + }, false, true)); + } + + private Map readMetadata(InputStream transformInputStream) + { + try + { + TypeReference> typeRef = new TypeReference>() {}; + return jsonObjectMapper.readValue(transformInputStream, typeRef); + } + catch (IOException e) + { + logger.error("Failed to read metadata from transform result", e); + return null; + } + } + + private String metadataToString(Map metadata) + { + Map metadataAsStrings = AbstractMappingMetadataExtracter.convertMetadataToStrings(metadata); + try + { + return jsonObjectMapper.writeValueAsString(metadataAsStrings); + } + catch (JsonProcessingException e) + { + logger.error("Failed to save metadata as Json", e); + return null; + } + } + + private OverwritePolicy removeOverwritePolicy(Map map, String key, OverwritePolicy defaultValue) + { + Serializable value = map.remove(key); + if (value == null) + { + return defaultValue; + } + try + { + return OverwritePolicy.valueOf((String)value); + } + catch (IllegalArgumentException|ClassCastException e) + { + logger.error(key + "=" + value + " is invalid"); + return null; + } + } + + private Boolean removeBoolean(Map map, Serializable key, boolean defaultValue) + { + @SuppressWarnings("SuspiciousMethodCalls") Serializable value = map.remove(key); + if (value != null && + (!(value instanceof String) || + (!(Boolean.FALSE.toString().equals(value) || Boolean.TRUE.toString().equals(value))))) + { + logger.error(key + "=" + value + " is invalid. Must be " + Boolean.TRUE + " or " + Boolean.FALSE); + return null; // no flexibility of parseBoolean(...). It is just invalid + } + return value == null ? defaultValue : Boolean.parseBoolean((String)value); + } + + private List removeTaggingSeparators(Map map, String key, List defaultValue) + { + Serializable value = map.remove(key); + if (value == null) + { + return defaultValue; + } + if (!(value instanceof String)) + { + logger.error(key + "=" + value + " is invalid."); + return null; + } + + List list = new ArrayList<>(); + try (CSVParser parser = CSVParser.parse((String)value, CSVFormat.RFC4180)) + { + Iterator iterator = parser.iterator(); + CSVRecord record = iterator.next(); + if (iterator.hasNext()) + { + logger.error(key + "=" + value + " is invalid. Should only have one record"); + return null; + } + record.forEach(list::add); + } + catch (IOException|NoSuchElementException e) + { + logger.error(key + "=" + value + " is invalid. Must be a CSV using CSVFormat.RFC4180"); + return null; + } + return list; + } + + private Map convertKeysToQNames(Map documentMetadata) + { + Map properties = new HashMap<>(); + for (Map.Entry entry : documentMetadata.entrySet()) + { + String key = entry.getKey(); + Serializable value = entry.getValue(); + try + { + QName qName = QName.createQName(key); + try + { + qName.toPrefixString(namespacePrefixResolver); + properties.put(qName, value); + } + catch (NamespaceException e) + { + logger.error("Error unregistered namespace in " + qName); + } + } + catch (NamespaceException e) + { + logger.error("Error creating qName from "+key); + } + } + return properties; + } + + public void setEmbeddedMetadata(NodeRef nodeRef, InputStream transformInputStream) + { + if (logger.isDebugEnabled()) + { + logger.debug("Update of content to include metadata on " + nodeRef); + } + AuthenticationUtil.runAsSystem(() -> + transactionService.getRetryingTransactionHelper().doInTransaction(() -> + { + try + { + // Set or replace content + ContentReader reader = contentService.getReader(nodeRef, ContentModel.PROP_CONTENT); + String mimetype = reader.getMimetype(); + String encoding = reader.getEncoding(); + ContentWriter writer = contentService.getWriter(nodeRef, ContentModel.PROP_CONTENT, true); + writer.setMimetype(mimetype); + writer.setEncoding(encoding); + writer.putContent(transformInputStream); + + if (logger.isTraceEnabled()) + { + logger.trace("Embedded Metadata on " + nodeRef + " complete"); + } + } + catch (Exception e) + { + logger.error("Failed to copy embedded metadata transform InputStream into " + nodeRef); + throw e; + } + + return null; + }, false, true)); + } +} diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java index ff164ca105..691a5ac707 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracter.java index 5cb216de90..61783333ad 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracter.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracter.java @@ -25,19 +25,14 @@ */ package org.alfresco.repo.content.metadata; -import java.io.Serializable; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import org.alfresco.repo.content.MimetypeMap; -import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter; -import org.alfresco.repo.content.metadata.OpenOfficeMetadataWorker; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.util.PropertyCheck; +import java.io.Serializable; +import java.util.Collections; +import java.util.Map; +import java.util.Set; + /** * @deprecated OOTB extractors are being moved to T-Engines. * diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracterWorker.java b/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracterWorker.java index 3646e4864b..0a8a1e1ab0 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracterWorker.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/JodConverterMetadataExtracterWorker.java @@ -35,7 +35,6 @@ import java.util.HashMap; import java.util.Map; import org.alfresco.repo.content.JodConverter; -import org.alfresco.repo.content.metadata.OpenOfficeMetadataWorker; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.MimetypeService; import org.alfresco.util.TempFileProvider; @@ -60,7 +59,17 @@ import com.sun.star.util.XCloseable; import com.sun.star.util.XRefreshable; /** + * Extracts values from Open Office documents into the following: + *

+ *   author:                 --      cm:author
+ *   title:                  --      cm:title
+ *   description:            --      cm:description
+ * 
+ * + * @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1 * @deprecated OOTB extractors are being moved to T-Engines. + * + * @author Neil McErlean */ @Deprecated public class JodConverterMetadataExtracterWorker implements diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataEmbedder.java b/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataEmbedder.java index 3fbba6cf74..79eed186a1 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataEmbedder.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataEmbedder.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.content.metadata; import java.io.Serializable; @@ -33,6 +33,7 @@ import org.alfresco.repo.content.ContentWorker; import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentWriter; +import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.namespace.QName; /** @@ -69,5 +70,18 @@ public interface MetadataEmbedder extends ContentWorker { */ public void embed(Map properties, ContentReader reader, ContentWriter writer) throws ContentIOException; - + /** + * Identical to {@link #embed(Map, ContentReader, ContentWriter)} but with the addition of the + * {@code NodeRef} being acted on. By default, the method without the {@code NodeRef} is called. + * + * @param nodeRef the node being acted on. + * @param properties the model properties to embed + * @param reader the reader for the original source content file + * @param writer the writer for the content after metadata has been embedded + * @throws ContentIOException + */ + public default void embed(NodeRef nodeRef, Map properties, ContentReader reader, ContentWriter writer) throws ContentIOException + { + embed(properties, reader, writer); + } } diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracter.java index 051184fd10..054c0077df 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracter.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracter.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ /* * Copyright (C) 2005 Jesper Steen Møller * @@ -52,6 +52,7 @@ import org.alfresco.api.AlfrescoPublicApi; import org.alfresco.repo.content.ContentWorker; import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.namespace.NamespaceService; import org.alfresco.service.namespace.QName; @@ -402,4 +403,46 @@ public interface MetadataExtracter extends ContentWorker OverwritePolicy overwritePolicy, Map destination, Map> mapping); + + /** + * Identical to {@link #extract(ContentReader, Map)} but with the addition of the {@code NodeRef} being acted on. + * By default, the method without the {@code NodeRef} is called. + * + * @param nodeRef the node being acted on. + * @param reader the source of the content + * @param destination the map of properties to populate (essentially a return value) + * @return Returns a map of all properties on the destination map that were + * added or modified. If the return map is empty, then no properties + * were modified. + * @throws ContentIOException if a detectable error occurs + */ + public default Map extract(NodeRef nodeRef, ContentReader reader, Map destination) + { + return extract(reader, destination); + } + + /** + * Identical to {@link #extract(ContentReader, OverwritePolicy, Map, Map)} but with the addition of the + * {@code NodeRef} being acted on. By default, the method without the {@code NodeRef} is called. + * + * @param nodeRef the node being acted on. + * @param reader the source of the content + * @param overwritePolicy the policy stipulating how the system properties must be + * overwritten if present + * @param destination the map of properties to populate (essentially a return value) + * @param mapping a mapping of document-specific properties to system properties. + * @return Returns a map of all properties on the destination map that were + * added or modified. If the return map is empty, then no properties + * were modified. + * @throws ContentIOException if a detectable error occurs + */ + public default Map extract( + NodeRef nodeRef, + ContentReader reader, + OverwritePolicy overwritePolicy, + Map destination, + Map> mapping) + { + return extract(reader, overwritePolicy, destination, mapping); + } } diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java b/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java index 945cd49781..0db88f7400 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2016 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ /* * Copyright (C) 2005-2012 Jesper Steen Møller * @@ -73,18 +73,22 @@ public class MetadataExtracterRegistry private List extracters; private Map> extracterCache; private Map> embedderCache; + private AsynchronousExtractor asynchronousExtractor; /** Controls read access to the cache */ private Lock extracterCacheReadLock; /** controls write access to the cache */ private Lock extracterCacheWriteLock; + private boolean asyncExtractEnabled = true; + private boolean asyncEmbedEnabled = true; + public MetadataExtracterRegistry() { // initialise lists - extracters = new ArrayList(10); - extracterCache = new HashMap>(17); - embedderCache = new HashMap>(17); + extracters = new ArrayList<>(11); + extracterCache = new HashMap<>(18); + embedderCache = new HashMap<>(18); // create lock objects for access to the cache ReadWriteLock extractionCacheLock = new ReentrantReadWriteLock(); @@ -125,7 +129,14 @@ public class MetadataExtracterRegistry extracterCacheWriteLock.lock(); try { - extracters.add(extracter); + if (extracter instanceof AsynchronousExtractor) + { + asynchronousExtractor = (AsynchronousExtractor)extracter; + } + else + { + extracters.add(extracter); + } extracterCache.clear(); embedderCache.clear(); } @@ -135,16 +146,42 @@ public class MetadataExtracterRegistry } } + public void setAsyncExtractEnabled(boolean asyncExtractEnabled) + { + this.asyncExtractEnabled = asyncExtractEnabled; + } + + public void setAsyncEmbedEnabled(boolean asyncEmbedEnabled) + { + this.asyncEmbedEnabled = asyncEmbedEnabled; + } + + /** + * Returns the {@link AsynchronousExtractor} if it is able to perform the extraction and is enabled. Failing that it + * calls {@link #getExtracter(String)}. + * + * @param sourceSizeInBytes size of the source content. + * @param sourceMimetype the source MIMETYPE of the extraction + * @return Returns a metadata extractor that can extract metadata from the chosen MIME type. + */ + public MetadataExtracter getExtractor(String sourceMimetype, long sourceSizeInBytes) + { + return asyncExtractEnabled && asynchronousExtractor != null && + asynchronousExtractor.isSupported(sourceMimetype, sourceSizeInBytes) + ? asynchronousExtractor + : getExtracter(sourceMimetype); + } + /** * Gets the best metadata extracter. This is a combination of the most * reliable and the most performant extracter. - *

- * The result is cached for quicker access next time. - * - * @param sourceMimetype the source MIME of the extraction - * @return Returns a metadata extracter that can extract metadata from the - * chosen MIME type. - */ + *

+ * The result is cached for quicker access next time. + * + * @param sourceMimetype the source MIME of the extraction + * @return Returns a metadata extracter that can extract metadata from the + * chosen MIME type. + */ public MetadataExtracter getExtracter(String sourceMimetype) { logger.debug("Get extractors for " + sourceMimetype); @@ -202,18 +239,18 @@ public class MetadataExtracterRegistry } private String getName(MetadataExtracter extractor) - { - if (extractor == null) - { - return null; - } - else if (extractor instanceof AbstractMappingMetadataExtracter) - { - return ((AbstractMappingMetadataExtracter)extractor).getBeanName(); - } - else - { - return extractor.getClass().getSimpleName(); + { + if (extractor == null) + { + return null; + } + else if (extractor instanceof AbstractMappingMetadataExtracter) + { + return ((AbstractMappingMetadataExtracter)extractor).getBeanName(); + } + else + { + return extractor.getClass().getSimpleName(); } } @@ -222,48 +259,64 @@ public class MetadataExtracterRegistry * @return Returns a set of extractors that will work for the given mimetype */ private List findBestExtracters(String sourceMimetype) - { - if (logger.isDebugEnabled()) + { + if (logger.isDebugEnabled()) { - logger.debug("Finding extractors for " + sourceMimetype); + logger.debug("Finding extractors for " + sourceMimetype); } - List extractors = new ArrayList(1); + List extractors = new ArrayList<>(1); for (MetadataExtracter extractor : extracters) { if (!extractor.isSupported(sourceMimetype)) { - // extraction not achievable - if (logger.isDebugEnabled()) + // extraction not achievable + if (logger.isDebugEnabled()) { - logger.debug("Find unsupported: "+getName(extractor)); + logger.debug("Find unsupported: "+getName(extractor)); } continue; - } - if (logger.isDebugEnabled()) + } + if (logger.isDebugEnabled()) { - logger.debug("Find supported: "+getName(extractor)); + logger.debug("Find supported: "+getName(extractor)); } extractors.add(extractor); - } - if (logger.isDebugEnabled()) + } + if (logger.isDebugEnabled()) { - logger.debug("Find returning: "+extractors); + logger.debug("Find returning: "+extractors); } return extractors; } - + + /** + * Returns the {@link AsynchronousExtractor} if it is able to perform the embedding and is enabled. Failing that it + * calls {@link #getEmbedder(String)}. + * + * @param sourceSizeInBytes size of the source content. + * @param sourceMimetype the source MIMETYPE of the extraction + * @return Returns a metadata extractor that can extract metadata from the chosen MIME type. + */ + public MetadataEmbedder getEmbedder(String sourceMimetype, long sourceSizeInBytes) + { + return asyncEmbedEnabled && asynchronousExtractor != null && + asynchronousExtractor.isEmbedderSupported(sourceMimetype, sourceSizeInBytes) + ? asynchronousExtractor + : getEmbedder(sourceMimetype); + } + /** * Gets the best metadata embedder. This is a combination of the most * reliable and the most performant embedder. - *

- * The result is cached for quicker access next time. - * - * @param sourceMimetype the source MIME of the extraction - * @return Returns a metadata embedder that can embed metadata in the - * chosen MIME type. - */ + *

+ * The result is cached for quicker access next time. + * + * @param sourceMimetype the source MIME of the extraction + * @return Returns a metadata embedder that can embed metadata in the + * chosen MIME type. + */ public MetadataEmbedder getEmbedder(String sourceMimetype) { List embedders = null; diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java index 8d2da83e7c..25034754c6 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java index 5f8e89cf7c..0c89f63cfe 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java @@ -31,7 +31,6 @@ import java.io.OutputStream; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -45,8 +44,6 @@ import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.filestore.FileContentReader; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentWriter; -import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; -import org.alfresco.service.cmr.repository.datatype.TypeConversionException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.tika.embedder.Embedder; @@ -476,43 +473,11 @@ public abstract class TikaPoweredMetadataExtracter { return; } - + + Map metadataAsStrings = convertMetadataToStrings(properties); Metadata metadataToEmbed = new Metadata(); - for (String metadataKey : properties.keySet()) - { - Serializable value = properties.get(metadataKey); - if (value == null) - { - continue; - } - if (value instanceof Collection) - { - for (Object singleValue : (Collection) value) - { - try - { - // Convert to a string value for Tika - metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, singleValue)); - } - catch (TypeConversionException e) - { - logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); - } - } - } - else - { - try - { - // Convert to a string value for Tika - metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, value)); - } - catch (TypeConversionException e) - { - logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); - } - } - } + metadataAsStrings.forEach((k,v)->metadataToEmbed.add(k, v)); + InputStream inputStream = getInputStream(reader); OutputStream outputStream = writer.getContentOutputStream(); embedder.embed(metadataToEmbed, inputStream, outputStream, null); diff --git a/repository/src/main/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracter.java b/repository/src/main/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracter.java index 775e6ee9f6..bd8cc89d7f 100644 --- a/repository/src/main/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracter.java +++ b/repository/src/main/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracter.java @@ -60,6 +60,7 @@ import org.alfresco.util.PropertyCheck; * @since 2.1 * @author Derek Hulley */ +@Deprecated public class XmlMetadataExtracter extends AbstractMappingMetadataExtracter { public static String[] SUPPORTED_MIMETYPES = new String[] { MimetypeMap.MIMETYPE_XML }; diff --git a/repository/src/main/java/org/alfresco/repo/content/transform/AdminUiTransformerDebug.java b/repository/src/main/java/org/alfresco/repo/content/transform/AdminUiTransformerDebug.java index c4ed028550..8e0ffe5e5f 100644 --- a/repository/src/main/java/org/alfresco/repo/content/transform/AdminUiTransformerDebug.java +++ b/repository/src/main/java/org/alfresco/repo/content/transform/AdminUiTransformerDebug.java @@ -249,7 +249,7 @@ public class AdminUiTransformerDebug extends TransformerDebug implements Applica boolean firstTransformer) { String mimetypes = firstTransformer - ? getMimetypeExt(sourceMimetype)+getMimetypeExt(targetMimetype) + ? getSourceAndTargetExt(sourceMimetype, targetMimetype) : spaces(10); char c = (char)('a'+transformerCount); log(mimetypes+ diff --git a/repository/src/main/java/org/alfresco/repo/content/transform/LegacyTransformerDebug.java b/repository/src/main/java/org/alfresco/repo/content/transform/LegacyTransformerDebug.java index db85c40e9b..7831333627 100644 --- a/repository/src/main/java/org/alfresco/repo/content/transform/LegacyTransformerDebug.java +++ b/repository/src/main/java/org/alfresco/repo/content/transform/LegacyTransformerDebug.java @@ -25,6 +25,7 @@ */ package org.alfresco.repo.content.transform; +import org.alfresco.repo.content.metadata.AsynchronousExtractor; import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.cmr.repository.TransformationOptions; import org.alfresco.transform.client.registry.SupportedTransform; @@ -99,10 +100,9 @@ public class LegacyTransformerDebug extends AdminUiTransformerDebug public void blacklistTransform(ContentTransformer transformer, String sourceMimetype, String targetMimetype, TransformationOptions options) { - log("Blacklist "+getName(transformer)+" "+getMimetypeExt(sourceMimetype)+getMimetypeExt(targetMimetype)); + log("Blacklist "+getName(transformer)+" "+ getSourceAndTargetExt(sourceMimetype, targetMimetype)); } - @Deprecated public void pushTransform(ContentTransformer transformer, String fromUrl, String sourceMimetype, String targetMimetype, long sourceSize, TransformationOptions options) @@ -265,7 +265,10 @@ public class LegacyTransformerDebug extends AdminUiTransformerDebug } String i = Integer.toString(mimetypePairCount); String priority = gePriority(transformer, sourceMimetype, targetMimetype); - log(spaces(5-i.length())+mimetypePairCount+") "+getMimetypeExt(sourceMimetype)+getMimetypeExt(targetMimetype)+ + String sourceExt = getMimetypeExt(sourceMimetype); + String targetExt = getMimetypeExt(targetMimetype); + targetExt = AsynchronousExtractor.getExtension(targetMimetype, sourceExt, targetExt); + log(spaces(5-i.length())+mimetypePairCount+") "+ sourceExt + targetExt + priority + ' '+fileSize((maxSourceSizeKBytes > 0) ? maxSourceSizeKBytes*1024 : maxSourceSizeKBytes)+ (maxSourceSizeKBytes == 0 ? " disabled" : "")); diff --git a/repository/src/main/java/org/alfresco/repo/content/transform/LocalTransformImpl.java b/repository/src/main/java/org/alfresco/repo/content/transform/LocalTransformImpl.java index ffefc4ba15..ef0ab8b3d5 100644 --- a/repository/src/main/java/org/alfresco/repo/content/transform/LocalTransformImpl.java +++ b/repository/src/main/java/org/alfresco/repo/content/transform/LocalTransformImpl.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2019 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -25,6 +25,7 @@ */ package org.alfresco.repo.content.transform; +import org.alfresco.repo.content.metadata.AsynchronousExtractor; import org.alfresco.repo.rendition2.RenditionDefinition2; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentWriter; @@ -188,6 +189,7 @@ public class LocalTransformImpl extends AbstractLocalTransform args[i++] = "targetMimetype"; args[i++] = targetMimetype; + targetExtension = AsynchronousExtractor.getExtension(targetMimetype, sourceExtension, targetExtension); remoteTransformerClient.request(reader, writer, sourceMimetype, sourceExtension, targetExtension, timeoutMs, log, args); } diff --git a/repository/src/main/java/org/alfresco/repo/content/transform/TransformerDebug.java b/repository/src/main/java/org/alfresco/repo/content/transform/TransformerDebug.java index c13e76f949..bd6d656f89 100644 --- a/repository/src/main/java/org/alfresco/repo/content/transform/TransformerDebug.java +++ b/repository/src/main/java/org/alfresco/repo/content/transform/TransformerDebug.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2019 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -26,6 +26,7 @@ package org.alfresco.repo.content.transform; import org.alfresco.model.ContentModel; +import org.alfresco.repo.content.metadata.AsynchronousExtractor; import org.alfresco.service.cmr.repository.MimetypeService; import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.cmr.repository.NodeService; @@ -68,6 +69,7 @@ public class TransformerDebug protected Log logger; protected NodeService nodeService; protected MimetypeService mimetypeService; + private final ThreadLocal previousTransformId = ThreadLocal.withInitial(()->-1); protected enum Call { @@ -280,6 +282,16 @@ public class TransformerDebug this.mimetypeService = mimetypeService; } + public void setPreviousTransformId(int id) + { + previousTransformId.set(id); + } + + private int getPreviousTransformId() + { + return previousTransformId.get(); + } + public void afterPropertiesSet() throws Exception { PropertyCheck.mandatory(this, "nodeService", nodeService); @@ -351,7 +363,7 @@ public class TransformerDebug log(frame.sourceMimetype+' '+frame.targetMimetype, false); String fileName = getFileName(frame.sourceNodeRef, firstLevel, sourceSize); - log(getMimetypeExt(frame.sourceMimetype)+getMimetypeExt(frame.targetMimetype) + + log(getSourceAndTargetExt(frame.sourceMimetype, frame.targetMimetype) + ((fileName != null) ? fileName+' ' : "")+ ((sourceSize >= 0) ? fileSize(sourceSize)+' ' : "") + (firstLevel ? getRenditionName(renditionName) : "") + message); @@ -370,7 +382,7 @@ public class TransformerDebug Map options, String renditionName, String message) { String fileName = getFileName(sourceNodeRef, true, -1); - log(" "+getMimetypeExt(sourceMimetype)+getMimetypeExt(targetMimetype) + + log(" "+ getSourceAndTargetExt(sourceMimetype, targetMimetype) + ((fileName != null) ? fileName+' ' : "")+ ((sourceSize >= 0) ? fileSize(sourceSize)+' ' : "") + (getRenditionName(renditionName)) + message); @@ -453,6 +465,7 @@ public class TransformerDebug ourStack.pop(); } } + setPreviousTransformId(id); return id; } @@ -462,8 +475,7 @@ public class TransformerDebug { String failureReason = frame.getFailureReason(); boolean firstLevel = size == 1; - String sourceExt = getMimetypeExt(frame.sourceMimetype); - String targetExt = getMimetypeExt(frame.targetMimetype); + String sourceAndTargetExt = getSourceAndTargetExt(frame.sourceMimetype, frame.targetMimetype); String fileName = getFileName(frame.sourceNodeRef, firstLevel, frame.sourceSize); long sourceSize = frame.getSourceSize(); String transformerName = frame.getTransformerName(); @@ -506,19 +518,18 @@ public class TransformerDebug if (level != null) { - infoLog(getReference(debug, false), sourceExt, targetExt, level, fileName, sourceSize, + infoLog(getReference(debug, false, false), sourceAndTargetExt, level, fileName, sourceSize, transformerName, renditionName, failureReason, ms, debug); } } } - private void infoLog(String reference, String sourceExt, String targetExt, String level, String fileName, + private void infoLog(String reference, String sourceAndTargetExt, String level, String fileName, long sourceSize, String transformerName, String renditionName, String failureReason, String ms, boolean debug) { String message = reference + - sourceExt + - targetExt + + sourceAndTargetExt + (level == null ? "" : level+' ') + (fileName == null ? "" : fileName) + (sourceSize >= 0 ? ' '+fileSize(sourceSize) : "") + @@ -569,6 +580,18 @@ public class TransformerDebug } } + /** + * Log a message prefixed with the previous transformation reference, used by this Thread. + * @param message + */ + public void debugUsingPreviousReference(String message) + { + if (isEnabled() && message != null) + { + log(message, null,true, true); + } + } + /** * Log a message prefixed with the current transformation reference * and include a exception, suppressing the stack trace if repeated @@ -631,16 +654,21 @@ public class TransformerDebug { log(message, null, debug); } - + private void log(String message, Throwable t, boolean debug) + { + log(message, t, debug, false); + } + + private void log(String message, Throwable t, boolean debug, boolean usePreviousRef) { if (debug && ThreadInfo.getDebugOutput() && logger.isDebugEnabled()) { - logger.debug(getReference(false, false)+message, t); + logger.debug(getReference(false, false, usePreviousRef)+message, t); } else if (logger.isTraceEnabled()) { - logger.trace(getReference(false, false)+message, t); + logger.trace(getReference(false, false, usePreviousRef)+message, t); } if (debug) @@ -648,7 +676,7 @@ public class TransformerDebug StringBuilder sb = ThreadInfo.getStringBuilder(); if (sb != null) { - sb.append(getReference(false, true)); + sb.append(getReference(false, true, usePreviousRef)); sb.append(message); if (t != null) { @@ -691,10 +719,21 @@ public class TransformerDebug * Returns a N.N.N style reference to the transformation. * @param firstLevelOnly indicates if only the top level should be included and no extra padding. * @param overrideFirstLevel if the first level id should just be set to 1 (used in test methods) + * @param usePreviousRef if the reference of the last transform performed by this Thread should be used. * @return a padded (fixed length) reference. */ - private String getReference(boolean firstLevelOnly, boolean overrideFirstLevel) + private String getReference(boolean firstLevelOnly, boolean overrideFirstLevel, boolean usePreviousRef) { + if (usePreviousRef) + { + int id = getPreviousTransformId(); + String ref = ""; + if (id >= 0) + { + ref = Integer.toString(id)+spaces(13); + } + return ref; + } StringBuilder sb = new StringBuilder(""); Frame frame = null; Iterator iterator = ThreadInfo.getStack().descendingIterator(); @@ -737,7 +776,7 @@ public class TransformerDebug } else { - sb.append(spaces(13-sb.length()+lengthOfFirstId)); // Try to pad to level 7 + sb.append(spaces(13-sb.length()+lengthOfFirstId)); // Try to pad to level 7 } } return sb.toString(); @@ -783,6 +822,14 @@ public class TransformerDebug return result; } + protected String getSourceAndTargetExt(String sourceMimetype, String targetMimetype) + { + String sourceExt = getMimetypeExt(sourceMimetype); + String targetExt = getMimetypeExt(targetMimetype); + targetExt = AsynchronousExtractor.getExtension(targetMimetype, sourceExt, targetExt); + return sourceExt + targetExt + spaces(1+4-targetExt.length()); + } + protected String getMimetypeExt(String mimetype) { StringBuilder sb = new StringBuilder(""); @@ -867,16 +914,15 @@ public class TransformerDebug if (isEnabled()) { pushMisc(); - String sourceExt = getMimetypeExt(sourceMimetype); - String targetExt = getMimetypeExt(targetMimetype); - debug(sourceExt + targetExt + + String sourceAndTargetExt = getSourceAndTargetExt(sourceMimetype, targetMimetype); + debug(sourceAndTargetExt + ((fileName != null) ? fileName + ' ' : "") + ((sourceSize >= 0) ? fileSize(sourceSize) + ' ' : "") + getRenditionName(renditionName) + " "+ TRANSFORM_SERVICE_NAME); log(options); log(sourceNodeRef.toString() + ' ' + contentHashcode); - String reference = getReference(true, false); - infoLog(reference, sourceExt, targetExt, null, fileName, sourceSize, TRANSFORM_SERVICE_NAME, + String reference = getReference(true, false, false); + infoLog(reference, sourceAndTargetExt, null, fileName, sourceSize, TRANSFORM_SERVICE_NAME, renditionName, null, "", true); } return pop(Call.AVAILABLE, true, false); @@ -884,19 +930,21 @@ public class TransformerDebug private String getRenditionName(String renditionName) { - return renditionName != null ? "-- "+renditionName+" -- " : ""; + return renditionName != null + ? "-- "+ AsynchronousExtractor.getRenditionName(renditionName)+" -- " + : ""; } /** * Debugs a response to the Transform Service */ public void debugTransformServiceResponse(NodeRef sourceNodeRef, int contentHashcode, - long requested, int seq, String sourceExt, String targetExt, String msg) + long requested, int id, String sourceExt, String targetExt, String msg) { pushMisc(); Frame frame = ThreadInfo.getStack().getLast(); - frame.id = seq; - boolean suppressFinish = seq == -1 || requested == -1; + frame.id = id; + boolean suppressFinish = id == -1 || requested == -1; if (!suppressFinish) { frame.start = requested; diff --git a/repository/src/main/java/org/alfresco/repo/rendition2/RenditionService2Impl.java b/repository/src/main/java/org/alfresco/repo/rendition2/RenditionService2Impl.java index 298723c76d..350411d903 100644 --- a/repository/src/main/java/org/alfresco/repo/rendition2/RenditionService2Impl.java +++ b/repository/src/main/java/org/alfresco/repo/rendition2/RenditionService2Impl.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2019 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -29,6 +29,7 @@ import org.alfresco.model.ContentModel; import org.alfresco.model.RenditionModel; import org.alfresco.repo.content.ContentServicePolicies; import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.repo.content.metadata.AsynchronousExtractor; import org.alfresco.repo.policy.BehaviourFilter; import org.alfresco.repo.policy.PolicyComponent; import org.alfresco.repo.rendition.RenditionPreventionRegistry; @@ -112,6 +113,7 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea private RuleService ruleService; private PostTxnCallbackScheduler renditionRequestSheduler; private TransformReplyProvider transformReplyProvider; + private AsynchronousExtractor asynchronousExtractor; private boolean enabled; private boolean thumbnailsEnabled; @@ -176,6 +178,11 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea this.transformReplyProvider = transformReplyProvider; } + public void setAsynchronousExtractor(AsynchronousExtractor asynchronousExtractor) + { + this.asynchronousExtractor = asynchronousExtractor; + } + public void setEnabled(boolean enabled) { this.enabled = enabled; @@ -203,6 +210,7 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea PropertyCheck.mandatory(this, "policyComponent", policyComponent); PropertyCheck.mandatory(this, "behaviourFilter", behaviourFilter); PropertyCheck.mandatory(this, "ruleService", ruleService); + PropertyCheck.mandatory(this, "asynchronousExtractor", asynchronousExtractor); } @Override @@ -374,41 +382,115 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea public void consume(NodeRef sourceNodeRef, InputStream transformInputStream, RenditionDefinition2 renditionDefinition, int transformContentHashCode) { + int sourceContentHashCode = getSourceContentHashCode(sourceNodeRef); + if (logger.isDebugEnabled()) + { + logger.debug("Consume: Source " + sourceContentHashCode + " and transform's source " + transformContentHashCode+" hashcodes"); + } + if (renditionDefinition instanceof TransformDefinition) { - if (logger.isDebugEnabled()) + TransformDefinition transformDefinition = (TransformDefinition)renditionDefinition; + String targetMimetype = transformDefinition.getTargetMimetype(); + if (AsynchronousExtractor.isMetadataExtractMimetype(targetMimetype)) { - TransformDefinition transformDefinition = (TransformDefinition)renditionDefinition; - String transformName = transformDefinition.getTransformName(); - String replyQueue = transformDefinition.getReplyQueue(); - String clientData = transformDefinition.getClientData(); - boolean success = transformInputStream != null; - logger.info("Reply to " + replyQueue + " that the transform " + transformName + - " with the client data " + clientData + " " + (success ? "was successful" : "failed.")); + consumeExtractedMetadata(sourceNodeRef, sourceContentHashCode, transformInputStream, transformDefinition, transformContentHashCode); + } + else if (AsynchronousExtractor.isMetadataEmbedMimetype(targetMimetype)) + { + consumeEmbeddedMetadata(sourceNodeRef, sourceContentHashCode, transformInputStream, transformDefinition, transformContentHashCode); + } + else + { + consumeTransformReply(sourceNodeRef, transformInputStream, transformDefinition, transformContentHashCode); } - transformReplyProvider.produceTransformEvent(sourceNodeRef, transformInputStream, - (TransformDefinition)renditionDefinition, transformContentHashCode); } else { - consumeRendition(sourceNodeRef, transformInputStream, renditionDefinition, transformContentHashCode); + consumeRendition(sourceNodeRef, sourceContentHashCode, transformInputStream, renditionDefinition, transformContentHashCode); } } + private void consumeExtractedMetadata(NodeRef nodeRef, int sourceContentHashCode, InputStream transformInputStream, + TransformDefinition transformDefinition, int transformContentHashCode) + { + if (transformInputStream == null) + { + if (logger.isDebugEnabled()) + { + logger.debug("Ignore transform for metadata extraction on " + nodeRef + " as it failed"); + } + } + else if (transformContentHashCode != sourceContentHashCode) + { + if (logger.isDebugEnabled()) + { + logger.debug("Ignore transform for metadata extraction on " + nodeRef + " as it is no longer needed"); + } + } + else + { + if (logger.isDebugEnabled()) + { + logger.debug("Set the metadata extraction on " + nodeRef); + } + asynchronousExtractor.setMetadata(nodeRef, transformInputStream); + } + } + + private void consumeEmbeddedMetadata(NodeRef nodeRef, int sourceContentHashCode, InputStream transformInputStream, + TransformDefinition transformDefinition, int transformContentHashCode) + { + if (transformInputStream == null) + { + if (logger.isDebugEnabled()) + { + logger.debug("Ignore transform for metadata embed on " + nodeRef + " as it failed"); + } + } + else if (transformContentHashCode != sourceContentHashCode) + { + if (logger.isDebugEnabled()) + { + logger.debug("Ignore transform for metadata embed on " + nodeRef + " as it is no longer needed"); + } + } + else + { + if (logger.isDebugEnabled()) + { + logger.debug("Set the content with embedded metadata on " + nodeRef); + } + + asynchronousExtractor.setEmbeddedMetadata(nodeRef, transformInputStream); + } + } + + private void consumeTransformReply(NodeRef sourceNodeRef, InputStream transformInputStream, + TransformDefinition transformDefinition, int transformContentHashCode) + { + if (logger.isDebugEnabled()) + { + String transformName = transformDefinition.getTransformName(); + String replyQueue = transformDefinition.getReplyQueue(); + String clientData = transformDefinition.getClientData(); + boolean success = transformInputStream != null; + logger.info("Reply to " + replyQueue + " that the transform " + transformName + + " with the client data " + clientData + " " + (success ? "was successful" : "failed.")); + } + transformReplyProvider.produceTransformEvent(sourceNodeRef, transformInputStream, + transformDefinition, transformContentHashCode); + } + /** * Takes a transformation (InputStream) and attaches it as a rendition to the source node. * Does nothing if there is already a newer rendition. * If the transformInputStream is null, this is taken to be a transform failure. */ - private void consumeRendition(NodeRef sourceNodeRef, InputStream transformInputStream, + private void consumeRendition(NodeRef sourceNodeRef, int sourceContentHashCode, InputStream transformInputStream, RenditionDefinition2 renditionDefinition, int transformContentHashCode) { String renditionName = renditionDefinition.getRenditionName(); - int sourceContentHashCode = getSourceContentHashCode(sourceNodeRef); - if (logger.isDebugEnabled()) - { - logger.debug("Consume: Source " + sourceContentHashCode + " and transform's source " + transformContentHashCode+" hashcodes"); - } if (transformContentHashCode != sourceContentHashCode) { if (logger.isDebugEnabled()) @@ -475,7 +557,7 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea } catch (Exception e) { - logger.error("Failed to read transform InputStream into rendition " + renditionName + " on " + sourceNodeRef); + logger.error("Failed to copy transform InputStream into rendition " + renditionName + " on " + sourceNodeRef); throw e; } } diff --git a/repository/src/main/java/org/alfresco/repo/rendition2/TransformDefinition.java b/repository/src/main/java/org/alfresco/repo/rendition2/TransformDefinition.java index 26df001b31..3c0b1522f6 100644 --- a/repository/src/main/java/org/alfresco/repo/rendition2/TransformDefinition.java +++ b/repository/src/main/java/org/alfresco/repo/rendition2/TransformDefinition.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2019 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -80,7 +80,14 @@ public class TransformDefinition extends RenditionDefinition2Impl public String getTransformName() { String renditionName = getRenditionName(); - return renditionName == null ? null : renditionName.substring(TRANSFORM_NAMESPACE.length()); + return getTransformName(renditionName); + } + + public static String getTransformName(String renditionName) + { + return renditionName == null || !renditionName.startsWith(TRANSFORM_NAMESPACE) + ? null + : renditionName.substring(TRANSFORM_NAMESPACE.length()); } public String getClientData() diff --git a/repository/src/main/java/org/alfresco/repo/tagging/TaggingServiceImpl.java b/repository/src/main/java/org/alfresco/repo/tagging/TaggingServiceImpl.java index 478da899bc..e9f2fc17ae 100644 --- a/repository/src/main/java/org/alfresco/repo/tagging/TaggingServiceImpl.java +++ b/repository/src/main/java/org/alfresco/repo/tagging/TaggingServiceImpl.java @@ -1,28 +1,28 @@ -/* - * #%L - * Alfresco Repository - * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited - * %% - * This file is part of the Alfresco software. - * If the software was purchased under a paid Alfresco license, the terms of - * the paid license agreement will prevail. Otherwise, the software is - * provided under the following open source license terms: - * - * Alfresco is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Alfresco is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Alfresco. If not, see . - * #L% - */ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ package org.alfresco.repo.tagging; import java.io.BufferedReader; @@ -74,9 +74,9 @@ import org.alfresco.service.cmr.repository.NodeService; import org.alfresco.service.cmr.repository.Path; import org.alfresco.service.cmr.repository.StoreRef; import org.alfresco.service.cmr.search.CategoryService; -import org.alfresco.service.cmr.search.ResultSet; -import org.alfresco.service.cmr.search.SearchParameters; -import org.alfresco.service.cmr.search.SearchParameters.FieldFacet; +import org.alfresco.service.cmr.search.ResultSet; +import org.alfresco.service.cmr.search.SearchParameters; +import org.alfresco.service.cmr.search.SearchParameters.FieldFacet; import org.alfresco.service.cmr.search.SearchService; import org.alfresco.service.cmr.tagging.TagDetails; import org.alfresco.service.cmr.tagging.TagScope; @@ -1544,35 +1544,35 @@ public class TaggingServiceImpl implements TaggingService, { updateAllScopeTags(workingCopy, Boolean.FALSE); } - } - - /** - * @see org.alfresco.service.cmr.tagging.TaggingService#findTaggedNodesAndCountByTagName(StoreRef) - */ - @Override - public List> findTaggedNodesAndCountByTagName(StoreRef storeRef) - { - String queryTaggeble = "ASPECT:\"" + ContentModel.ASPECT_TAGGABLE + "\"" + "-ASPECT:\"" + ContentModel.ASPECT_WORKING_COPY + "\""; - SearchParameters sp = new SearchParameters(); - sp.setQuery(queryTaggeble); - sp.setLanguage(SearchService.LANGUAGE_LUCENE); - sp.addStore(storeRef); - sp.addFieldFacet(new FieldFacet("TAG")); - - ResultSet resultSet = null; - try - { - // Do the search for nodes - resultSet = this.searchService.query(sp); - return resultSet.getFieldFacet("TAG"); - } - finally - { - if (resultSet != null) - { - resultSet.close(); - } - } + } + + /** + * @see org.alfresco.service.cmr.tagging.TaggingService#findTaggedNodesAndCountByTagName(StoreRef) + */ + @Override + public List> findTaggedNodesAndCountByTagName(StoreRef storeRef) + { + String queryTaggeble = "ASPECT:\"" + ContentModel.ASPECT_TAGGABLE + "\"" + "-ASPECT:\"" + ContentModel.ASPECT_WORKING_COPY + "\""; + SearchParameters sp = new SearchParameters(); + sp.setQuery(queryTaggeble); + sp.setLanguage(SearchService.LANGUAGE_LUCENE); + sp.addStore(storeRef); + sp.addFieldFacet(new FieldFacet("TAG")); + + ResultSet resultSet = null; + try + { + // Do the search for nodes + resultSet = this.searchService.query(sp); + return resultSet.getFieldFacet("TAG"); + } + finally + { + if (resultSet != null) + { + resultSet.close(); + } + } } } diff --git a/repository/src/main/java/org/alfresco/repo/thumbnail/CreateThumbnailActionExecuter.java b/repository/src/main/java/org/alfresco/repo/thumbnail/CreateThumbnailActionExecuter.java index 9bd8a65864..058b614d95 100644 --- a/repository/src/main/java/org/alfresco/repo/thumbnail/CreateThumbnailActionExecuter.java +++ b/repository/src/main/java/org/alfresco/repo/thumbnail/CreateThumbnailActionExecuter.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -33,6 +33,9 @@ import org.alfresco.error.AlfrescoRuntimeException; import org.alfresco.model.ContentModel; import org.alfresco.repo.action.ParameterDefinitionImpl; import org.alfresco.repo.action.executer.ActionExecuterAbstractBase; +import org.alfresco.repo.rendition2.RenditionDefinition2; +import org.alfresco.repo.rendition2.RenditionDefinitionRegistry2; +import org.alfresco.repo.rendition2.RenditionService2; import org.alfresco.service.cmr.action.Action; import org.alfresco.service.cmr.action.ActionServiceTransientException; import org.alfresco.service.cmr.action.ParameterDefinition; @@ -71,6 +74,8 @@ public class CreateThumbnailActionExecuter extends ActionExecuterAbstractBase // Size limitations (in KBytes) indexed by mimetype for thumbnail creation private HashMap mimetypeMaxSourceSizeKBytes; + private RenditionService2 renditionService2; + /** Action name and parameters */ public static final String NAME = "create-thumbnail"; public static final String PARAM_CONTENT_PROPERTY = "content-property"; @@ -104,7 +109,12 @@ public class CreateThumbnailActionExecuter extends ActionExecuterAbstractBase { this.mimetypeMaxSourceSizeKBytes = mimetypeMaxSourceSizeKBytes; } - + + public void setRenditionService2(RenditionService2 renditionService2) + { + this.renditionService2 = renditionService2; + } + /** * Enable thumbnail creation at all regardless of mimetype. * @param generateThumbnails a {@code false} value turns off all thumbnail creation. @@ -187,30 +197,44 @@ public class CreateThumbnailActionExecuter extends ActionExecuterAbstractBase } } } - + // Create the thumbnail try { - TransformationOptions options = details.getTransformationOptions(); - this.thumbnailService.createThumbnail(actionedUponNodeRef, contentProperty, details.getMimetype(), options, thumbnailName, null); + boolean async = action.getExecuteAsychronously(); + RenditionDefinition2 renditionDefinition = null; + if (async) + { + RenditionDefinitionRegistry2 renditionDefinitionRegistry2 = renditionService2.getRenditionDefinitionRegistry2(); + renditionDefinition = renditionDefinitionRegistry2.getRenditionDefinition(thumbnailName); + } + if (async && renditionDefinition != null) + { + renditionService2.render(actionedUponNodeRef, thumbnailName); + } + else + { + TransformationOptions options = details.getTransformationOptions(); + this.thumbnailService.createThumbnail(actionedUponNodeRef, contentProperty, details.getMimetype(), options, thumbnailName, null); + } } catch (ContentServiceTransientException cste) { // any transient failures in the thumbnail creation must be handled as transient failures of the action to execute. StringBuilder msg = new StringBuilder(); - msg.append("Creation of thumbnail '") .append(details.getName()) .append("' declined"); + msg.append("Creation of thumbnail '").append(details.getName()).append("' declined"); if (logger.isDebugEnabled()) { logger.debug(msg.toString()); } - + throw new ActionServiceTransientException(msg.toString(), cste); } catch (Exception exception) { final String msg = "Creation of thumbnail '" + details.getName() + "' failed"; logger.info(msg); - + // We need to rethrow in order to trigger the compensating action. // See AddFailedThumbnailActionExecuter throw new AlfrescoRuntimeException(msg, exception); diff --git a/repository/src/main/java/org/alfresco/transform/client/registry/CombinedConfig.java b/repository/src/main/java/org/alfresco/transform/client/registry/CombinedConfig.java index 6a5b192901..dcd0b8f9d0 100644 --- a/repository/src/main/java/org/alfresco/transform/client/registry/CombinedConfig.java +++ b/repository/src/main/java/org/alfresco/transform/client/registry/CombinedConfig.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2019 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -48,7 +48,6 @@ import org.apache.http.util.EntityUtils; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -56,6 +55,9 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; +import static org.alfresco.repo.content.metadata.AsynchronousExtractor.isMetadataEmbedMimetype; +import static org.alfresco.repo.content.metadata.AsynchronousExtractor.isMetadataExtractMimetype; + /** * This class reads multiple T-Engine config and local files and registers them all with a registry as if they were all * in one file. Transform options are shared between all sources.

@@ -398,7 +400,13 @@ public class CombinedConfig // the source matches the last intermediate. Set supportedSourceAndTargets = sourceMediaTypesAndMaxSizes.stream(). flatMap(s -> stepTransformer.getSupportedSourceAndTargetList().stream(). - filter(st -> st.getSourceMediaType().equals(src)). + filter(st -> + { + String targetMimetype = st.getTargetMediaType(); + return st.getSourceMediaType().equals(src) && + !(isMetadataExtractMimetype(targetMimetype) || + isMetadataEmbedMimetype(targetMimetype)); + }). map(t -> t.getTargetMediaType()). map(trg -> SupportedSourceAndTarget.builder(). withSourceMediaType(s.getSourceMediaType()). diff --git a/repository/src/main/resources/alfresco/content-services-context.xml b/repository/src/main/resources/alfresco/content-services-context.xml index 8f39d8aeea..95648c0748 100644 --- a/repository/src/main/resources/alfresco/content-services-context.xml +++ b/repository/src/main/resources/alfresco/content-services-context.xml @@ -253,7 +253,10 @@ - + + + + + + + + + + + + + + diff --git a/repository/src/main/resources/alfresco/rendition-services2-context.xml b/repository/src/main/resources/alfresco/rendition-services2-context.xml index 7a88f3b2f1..4280e29e6f 100644 --- a/repository/src/main/resources/alfresco/rendition-services2-context.xml +++ b/repository/src/main/resources/alfresco/rendition-services2-context.xml @@ -59,6 +59,7 @@ + diff --git a/repository/src/main/resources/alfresco/repository.properties b/repository/src/main/resources/alfresco/repository.properties index 9cc7900f66..5aaa00f2bb 100644 --- a/repository/src/main/resources/alfresco/repository.properties +++ b/repository/src/main/resources/alfresco/repository.properties @@ -516,6 +516,9 @@ system.thumbnail.quietPeriod=604800 system.thumbnail.quietPeriodRetriesEnabled=true system.thumbnail.redeployStaticDefsOnStartup=true +content.metadata.async.extract.enabled=true +content.metadata.async.embed.enabled=true + # The default timeout for metadata mapping extracters content.metadataExtracter.default.timeoutMs=20000 diff --git a/repository/src/main/resources/alfresco/thumbnail-service-context.xml b/repository/src/main/resources/alfresco/thumbnail-service-context.xml index 4f1020f6b2..a1dcdf173d 100644 --- a/repository/src/main/resources/alfresco/thumbnail-service-context.xml +++ b/repository/src/main/resources/alfresco/thumbnail-service-context.xml @@ -276,7 +276,8 @@ - + + diff --git a/repository/src/test/java/org/alfresco/AppContext01TestSuite.java b/repository/src/test/java/org/alfresco/AppContext01TestSuite.java index 3dc93230d4..8492621e79 100644 --- a/repository/src/test/java/org/alfresco/AppContext01TestSuite.java +++ b/repository/src/test/java/org/alfresco/AppContext01TestSuite.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2017 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -45,9 +45,12 @@ import org.junit.runners.Suite; org.alfresco.repo.action.evaluator.HasAspectEvaluatorTest.class, org.alfresco.repo.action.executer.SetPropertyValueActionExecuterTest.class, org.alfresco.repo.action.executer.AddFeaturesActionExecuterTest.class, + org.alfresco.repo.action.executer.ContentMetadataExtracterTest.class, org.alfresco.repo.action.executer.ContentMetadataExtracterTagMappingTest.class, org.alfresco.repo.action.executer.ContentMetadataEmbedderTest.class, + org.alfresco.repo.action.executer.AsynchronousExtractorTest.class, + org.alfresco.repo.rule.RuleLinkTest.class, org.alfresco.repo.rule.RuleServiceCoverageTest.class, org.alfresco.repo.rule.RuleServiceImplTest.class, diff --git a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java index c4df229da6..5c41c6c663 100644 --- a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java +++ b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -25,22 +25,11 @@ */ package org.alfresco.repo.action.executer; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.Serializable; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - import org.alfresco.model.ContentModel; import org.alfresco.repo.action.ActionImpl; import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter; import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; -import org.alfresco.repo.content.metadata.TikaPoweredMetadataExtracter; import org.alfresco.repo.content.transform.AbstractContentTransformerTest; import org.alfresco.repo.security.authentication.AuthenticationComponent; import org.alfresco.service.cmr.dictionary.DictionaryService; @@ -55,15 +44,25 @@ import org.alfresco.service.namespace.QName; import org.alfresco.util.BaseSpringTest; import org.alfresco.util.GUID; import org.apache.tika.embedder.Embedder; -import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; +import org.junit.After; import org.junit.Before; import org.junit.Test; import org.springframework.transaction.annotation.Transactional; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + /** * Test of the ActionExecuter for embedding metadata * @@ -94,7 +93,9 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest this.dictionaryService = (DictionaryService) this.applicationContext.getBean("dictionaryService"); this.mimetypeService = (MimetypeService) this.applicationContext.getBean("mimetypeService"); this.metadataExtracterRegistry = (MetadataExtracterRegistry) this.applicationContext.getBean("metadataExtracterRegistry"); - + metadataExtracterRegistry.setAsyncExtractEnabled(false); + metadataExtracterRegistry.setAsyncEmbedEnabled(false); + AuthenticationComponent authenticationComponent = (AuthenticationComponent)applicationContext.getBean("authenticationComponent"); authenticationComponent.setSystemUserAsCurrentUser(); @@ -123,15 +124,21 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest this.executer.setApplicableTypes(new String[] { ContentModel.TYPE_CONTENT.toString() }); } + @After + public void after() + { + metadataExtracterRegistry.setAsyncExtractEnabled(true); + metadataExtracterRegistry.setAsyncEmbedEnabled(true); + } + /** * Test that a failing embedder does not destroy the original content */ @Test public void testFailingEmbedder() { - MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry"); - FailingEmbedder embedder = new FailingEmbedder(Arrays.asList(MimetypeMap.MIMETYPE_PDF)); - embedder.setRegistry(registry); + AbstractMappingMetadataExtracter embedder = new FailingMappingMetadataEmbedder(Arrays.asList(MimetypeMap.MIMETYPE_PDF)); + embedder.setRegistry(metadataExtracterRegistry); embedder.setDictionaryService(this.dictionaryService); embedder.setMimetypeService(this.mimetypeService); embedder.register(); @@ -158,17 +165,16 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest } /** - * Tika-powered embedder which fails upon calling embed on its {@link FailingTikaEmbedder} + * Embedder which fails upon calling embed on its {@link FailingEmbedder} */ - private class FailingEmbedder extends TikaPoweredMetadataExtracter + private class FailingMappingMetadataEmbedder extends AbstractMappingMetadataExtracter { - /** * Constructor for setting supported extract and embed mimetypes * * @param mimetypes the supported extract and embed mimetypes */ - public FailingEmbedder(Collection mimetypes) + public FailingMappingMetadataEmbedder(Collection mimetypes) { super( new HashSet(mimetypes), @@ -176,15 +182,26 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest } @Override - protected Parser getParser() + protected void embedInternal(Map metadata, ContentReader reader, ContentWriter writer) throws Throwable { - return null; + Embedder embedder = getEmbedder(); + if (embedder == null) + { + return; + } + + Map metadataAsStrings = convertMetadataToStrings(metadata); + Metadata metadataToEmbed = new Metadata(); + metadataAsStrings.forEach((k,v)->metadataToEmbed.add(k, v)); + + InputStream inputStream = reader.getContentInputStream(); + OutputStream outputStream = writer.getContentOutputStream(); + embedder.embed(metadataToEmbed, null, outputStream, null); } - @Override protected Embedder getEmbedder() { - return new FailingTikaEmbedder(); + return new FailingEmbedder(); } @Override @@ -202,12 +219,18 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest mapping.put("author", qnames); return mapping; } + + @Override + protected Map extractRaw(ContentReader reader) throws Throwable + { + return null; + } } /** - * Tika metadata embedder which fails on a call to embed. + * Metadata embedder which fails on a call to embed. */ - private class FailingTikaEmbedder implements Embedder + private class FailingEmbedder implements Embedder { private static final long serialVersionUID = -4954679684941467571L; @@ -219,7 +242,7 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest @Override public void embed(Metadata metadata, InputStream originalStream, OutputStream outputStream, ParseContext context) - throws IOException, TikaException + throws IOException { throw new IOException("Forced failure"); } diff --git a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java index dd5bf8252e..a7f46c3bc7 100644 --- a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java +++ b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTagMappingTest.java @@ -45,7 +45,6 @@ import org.alfresco.repo.action.AsynchronousActionExecutionQueuePolicies; import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter; import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; -import org.alfresco.repo.content.metadata.TikaPoweredMetadataExtracter; import org.alfresco.repo.content.transform.AbstractContentTransformerTest; import org.alfresco.repo.policy.Behaviour.NotificationFrequency; import org.alfresco.repo.policy.JavaBehaviour; @@ -74,8 +73,6 @@ import org.alfresco.util.GUID; import org.alfresco.util.testing.category.LuceneTests; import org.alfresco.util.testing.category.RedundantTests; import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.jpeg.JpegParser; import org.junit.experimental.categories.Category; import org.springframework.context.ConfigurableApplicationContext; @@ -112,6 +109,7 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase private TaggingService taggingService; private NodeService nodeService; private ContentService contentService; + private MetadataExtracterRegistry metadataExtracterRegistry; private AuditService auditService; private TransactionService transactionService; private AuthenticationComponent authenticationComponent; @@ -144,7 +142,10 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase this.taggingService = (TaggingService)ctx.getBean("TaggingService"); this.nodeService = (NodeService) ctx.getBean("NodeService"); this.contentService = (ContentService) ctx.getBean("ContentService"); - + this.metadataExtracterRegistry = (MetadataExtracterRegistry) ctx.getBean("metadataExtracterRegistry"); + metadataExtracterRegistry.setAsyncExtractEnabled(false); + metadataExtracterRegistry.setAsyncEmbedEnabled(false); + this.transactionService = (TransactionService)ctx.getBean("transactionComponent"); this.auditService = (AuditService)ctx.getBean("auditService"); this.authenticationComponent = (AuthenticationComponent)ctx.getBean("authenticationComponent"); @@ -208,6 +209,9 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase @Override protected void tearDown() throws Exception { + metadataExtracterRegistry.setAsyncExtractEnabled(true); + metadataExtracterRegistry.setAsyncEmbedEnabled(true); + if (AlfrescoTransactionSupport.getTransactionReadState() != TxnReadState.TXN_NONE) { fail("Test is not transaction-safe. Fix up transaction handling and re-test."); @@ -297,7 +301,7 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase }); } - private static class TagMappingMetadataExtracter extends TikaPoweredMetadataExtracter + private static class TagMappingMetadataExtracter extends AbstractMappingMetadataExtracter { private String existingTagNodeRef; @@ -329,16 +333,10 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase return sourceMimetype.equals(MimetypeMap.MIMETYPE_IMAGE_JPEG); } - @Override - protected Parser getParser() - { - return new JpegParser(); - } - @SuppressWarnings("unchecked") public Map extractRaw(ContentReader reader) throws Throwable { - Map rawMap = super.extractRaw(reader); + Map rawMap = newRawMap(); // Add some test keywords to those actually extracted from the file including a nodeRef List keywords = new ArrayList(Arrays.asList( diff --git a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java index d7ccf2a97d..1e62162c18 100644 --- a/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java +++ b/repository/src/test/java/org/alfresco/repo/action/executer/ContentMetadataExtracterTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -43,16 +43,11 @@ */ package org.alfresco.repo.action.executer; -import java.io.Serializable; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; -import java.util.Set; - import org.alfresco.model.ContentModel; import org.alfresco.repo.action.ActionImpl; import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter; +import org.alfresco.repo.content.metadata.AsynchronousExtractor; import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; import org.alfresco.repo.content.transform.AbstractContentTransformerTest; import org.alfresco.repo.security.authentication.AuthenticationComponent; @@ -67,11 +62,18 @@ import org.alfresco.service.namespace.QName; import org.alfresco.test_category.BaseSpringTestsCategory; import org.alfresco.util.BaseSpringTest; import org.alfresco.util.GUID; +import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; import org.springframework.transaction.annotation.Transactional; +import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + /** * Test of the ActionExecuter for extracting metadata. Note: This test makes * assumptions about the PDF test data for PdfBoxExtracter. @@ -88,6 +90,7 @@ public class ContentMetadataExtracterTest extends BaseSpringTest private NodeService nodeService; private ContentService contentService; + private MetadataExtracterRegistry registry; private StoreRef testStoreRef; private NodeRef rootNodeRef; private NodeRef nodeRef; @@ -101,7 +104,10 @@ public class ContentMetadataExtracterTest extends BaseSpringTest { this.nodeService = (NodeService) this.applicationContext.getBean("nodeService"); this.contentService = (ContentService) this.applicationContext.getBean("contentService"); - + registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry"); + registry.setAsyncExtractEnabled(false); + registry.setAsyncEmbedEnabled(false); + AuthenticationComponent authenticationComponent = (AuthenticationComponent)applicationContext.getBean("authenticationComponent"); authenticationComponent.setSystemUserAsCurrentUser(); @@ -126,6 +132,13 @@ public class ContentMetadataExtracterTest extends BaseSpringTest this.executer = (ContentMetadataExtracter) this.applicationContext.getBean("extract-metadata"); } + @After + public void after() + { + registry.setAsyncExtractEnabled(true); + registry.setAsyncEmbedEnabled(true); + } + /** * Test execution of the extraction itself */ @@ -189,7 +202,6 @@ public class ContentMetadataExtracterTest extends BaseSpringTest @Test public void testUnknownProperties() { - MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry"); TestUnknownMetadataExtracter extracterUnknown = new TestUnknownMetadataExtracter(); extracterUnknown.setRegistry(registry); extracterUnknown.register(); @@ -247,7 +259,6 @@ public class ContentMetadataExtracterTest extends BaseSpringTest @Test public void testNullExtractedValues_ALF1823() { - MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry"); TestNullPropMetadataExtracter extractor = new TestNullPropMetadataExtracter(); extractor.setRegistry(registry); extractor.register(); diff --git a/repository/src/test/java/org/alfresco/repo/content/AbstractJodConverterBasedTest.java b/repository/src/test/java/org/alfresco/repo/content/AbstractJodConverterBasedTest.java index 0a1523d53d..99b8382314 100644 --- a/repository/src/test/java/org/alfresco/repo/content/AbstractJodConverterBasedTest.java +++ b/repository/src/test/java/org/alfresco/repo/content/AbstractJodConverterBasedTest.java @@ -63,11 +63,13 @@ import org.junit.Ignore; import org.springframework.context.ApplicationContext; /** - * + * @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1 and the transformer is also deprecated. + * * @author Neil McErlean * @since 3.3 */ @Ignore("This is an abstract class so don't instaniate it or run it in Junit") +@Deprecated public abstract class AbstractJodConverterBasedTest { private static Log log = LogFactory.getLog(AbstractJodConverterBasedTest.class); diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/AsynchronousExtractorTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/AsynchronousExtractorTest.java new file mode 100644 index 0000000000..862e0430cd --- /dev/null +++ b/repository/src/test/java/org/alfresco/repo/content/metadata/AsynchronousExtractorTest.java @@ -0,0 +1,559 @@ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2020 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.repo.action.executer; + +import org.alfresco.model.ContentModel; +import org.alfresco.repo.action.ActionImpl; +import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter; +import org.alfresco.repo.content.metadata.AsynchronousExtractor; +import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; +import org.alfresco.repo.content.transform.AbstractContentTransformerTest; +import org.alfresco.repo.content.transform.TransformerDebug; +import org.alfresco.repo.content.transform.UnsupportedTransformationException; +import org.alfresco.repo.rendition2.RenditionDefinition2; +import org.alfresco.repo.rendition2.RenditionService2Impl; +import org.alfresco.repo.rendition2.TransformClient; +import org.alfresco.repo.security.authentication.AuthenticationComponent; +import org.alfresco.repo.transaction.RetryingTransactionHelper; +import org.alfresco.service.cmr.dictionary.DictionaryService; +import org.alfresco.service.cmr.repository.ContentIOException; +import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.cmr.repository.ContentService; +import org.alfresco.service.cmr.repository.ContentWriter; +import org.alfresco.service.cmr.repository.MimetypeService; +import org.alfresco.service.cmr.repository.NodeRef; +import org.alfresco.service.cmr.repository.NodeService; +import org.alfresco.service.cmr.repository.StoreRef; +import org.alfresco.service.cmr.tagging.TaggingService; +import org.alfresco.service.namespace.NamespacePrefixResolver; +import org.alfresco.service.namespace.QName; +import org.alfresco.service.transaction.TransactionService; +import org.alfresco.test_category.BaseSpringTestsCategory; +import org.alfresco.transform.client.registry.TransformServiceRegistry; +import org.alfresco.util.BaseSpringTest; +import org.alfresco.util.GUID; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import javax.transaction.HeuristicMixedException; +import javax.transaction.HeuristicRollbackException; +import javax.transaction.NotSupportedException; +import javax.transaction.RollbackException; +import javax.transaction.SystemException; +import javax.transaction.UserTransaction; +import java.io.IOException; +import java.io.InputStream; +import java.io.Serializable; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.StringJoiner; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import static java.util.Arrays.asList; +import static org.alfresco.model.ContentModel.PROP_CONTENT; +import static org.alfresco.model.ContentModel.PROP_CREATED; +import static org.alfresco.model.ContentModel.PROP_CREATOR; +import static org.alfresco.model.ContentModel.PROP_MODIFIED; +import static org.alfresco.model.ContentModel.PROP_MODIFIER; +import static org.alfresco.repo.rendition2.RenditionService2Impl.SOURCE_HAS_NO_CONTENT; + +/** + * Tests the asynchronous extract and embed of metadata. This is normally performed in a T-Engine, but in this test + * class is mocked using a separate Thread that returns well known values. What makes the AsynchronousExtractor + * different from other {@link AbstractMappingMetadataExtracter} sub classes is that the calling Thread does not + * do the work of updating properties or the content, as the T-Engine will reply at some later point. + * + * @author adavis + */ +@Category(BaseSpringTestsCategory.class) +public class AsynchronousExtractorTest extends BaseSpringTest +{ + private final static String ID = GUID.generate(); + private static final String AFTER_CALLING_EXECUTE = "after calling execute"; + private static final String AFTER_THE_TRANSFORM = "after the transform"; + private static final Integer UNCHANGED_HASHCODE = null; + private static final Integer CHANGED_HASHCODE = 1234; + private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat("EEE MMM dd HH:mm:ss Z yyyy"); + private static final ExecutorService executorService = Executors.newCachedThreadPool(); + + private NodeService nodeService; + private ContentService contentService; + private DictionaryService dictionaryService; + private MimetypeService mimetypeService; + private MetadataExtracterRegistry metadataExtracterRegistry; + private StoreRef testStoreRef; + private NodeRef rootNodeRef; + private NodeRef nodeRef; + private AsynchronousExtractor asynchronousExtractor; + private NamespacePrefixResolver namespacePrefixResolver; + private TransformerDebug transformerDebug; + private TransactionService transactionService; + private TransformServiceRegistry transformServiceRegistry; + private TaggingService taggingService; + private ContentMetadataExtracter contentMetadataExtracter; + private ContentMetadataEmbedder contentMetadataEmbedder; + private RenditionService2Impl renditionService2; + private TransformClient transformClient; + + private long origSize; + private Map origProperties; + private Map expectedProperties; + private Map properties; + + private class TestAsynchronousExtractor extends AsynchronousExtractor + { + private final String mockResult; + private final Integer changedHashcode; + private final Random random = new Random(); + + private boolean finished; + + TransformClient mockTransformClient = new TransformClient() + { + @Override + public void checkSupported(NodeRef sourceNodeRef, RenditionDefinition2 renditionDefinition, String sourceMimetype, long sourceSizeInBytes, String contentUrl) + { + } + + @Override + public void transform(NodeRef sourceNodeRef, RenditionDefinition2 renditionDefinition, String user, int sourceContentHashCode) + throws UnsupportedTransformationException, ContentIOException + { + mockTransform(sourceNodeRef, renditionDefinition, sourceContentHashCode); + } + }; + + /** + * Creates an AsynchronousExtractor that simulates a extract or embed. + * + * @param mockResult if specified indicates a value was returned. The result is read as a resource from + * the classpath. + * @param changedHashcode if specified indicates that the source node content changed or was deleted between + * the request to extract or embed and the response. + */ + TestAsynchronousExtractor(String mockResult, Integer changedHashcode) + { + this.mockResult = mockResult; + this.changedHashcode = changedHashcode; + + setNodeService(nodeService); + setNamespacePrefixResolver(namespacePrefixResolver); + setTransformerDebug(transformerDebug); + setRenditionService2(renditionService2); + setContentService(contentService); + setTransactionService(transactionService); + setTransformServiceRegistry(transformServiceRegistry); + setTaggingService(taggingService); + setRegistry(metadataExtracterRegistry); + setMimetypeService(mimetypeService); + setDictionaryService(dictionaryService); + setExecutorService(executorService); + register(); + + renditionService2.setTransformClient(mockTransformClient); + } + + @Override + public boolean isSupported(String sourceMimetype, long sourceSizeInBytes) + { + return true; + } + + @Override + public boolean isEmbedderSupported(String sourceMimetype, long sourceSizeInBytes) + { + return true; + } + + private void mockTransform(NodeRef sourceNodeRef, RenditionDefinition2 renditionDefinition, int sourceContentHashCode) + { + try + { + transformerDebug.pushMisc(); + wait(50, 700); + } + finally + { + transformerDebug.popMisc(); + } + + int transformContentHashCode = changedHashcode == null ? sourceContentHashCode : changedHashcode; + if (mockResult != null) + { + try (InputStream transformInputStream = getClass().getClassLoader().getResourceAsStream(mockResult)) + { + renditionService2.consume(sourceNodeRef, transformInputStream, renditionDefinition, transformContentHashCode); + } + catch (IOException e) + { + throw new RuntimeException("Could not read '" + mockResult + "' from the classpath.", e); + } + } + else + { + renditionService2.failure(sourceNodeRef, renditionDefinition, transformContentHashCode); + } + + synchronized (this) + { + finished = true; + notifyAll(); + } + } + + /** + * Wait for a few milliseconds or until the finished flag is set. + * + * @param from inclusive lower bound. If negative, there is only an upper bound. + * @param to exclusive upper bound. + * @return the wait. + */ + public synchronized void wait(int from, int to) + { + long start = System.currentTimeMillis(); + long end = start + (from < 0 ? to : from + random.nextInt(to - from)); + + while (!finished && System.currentTimeMillis() < end) + { + try + { + long ms = end - System.currentTimeMillis(); + if (ms > 0) + { + wait(ms); + } + } + catch (InterruptedException ignore) + { + } + } + } + } + + @Before + public void before() throws Exception + { + nodeService = (NodeService) applicationContext.getBean("nodeService"); + contentService = (ContentService) applicationContext.getBean("contentService"); + dictionaryService = (DictionaryService) applicationContext.getBean("dictionaryService"); + mimetypeService = (MimetypeService) applicationContext.getBean("mimetypeService"); + namespacePrefixResolver = (NamespacePrefixResolver) applicationContext.getBean("namespaceService"); + transformerDebug = (TransformerDebug) applicationContext.getBean("transformerDebug"); + renditionService2 = (RenditionService2Impl) applicationContext.getBean("renditionService2"); + transactionService = (TransactionService) applicationContext.getBean("transactionService"); + transformServiceRegistry = (TransformServiceRegistry) applicationContext.getBean("transformServiceRegistry"); + taggingService = (TaggingService) applicationContext.getBean("taggingService"); + transformClient = (TransformClient) applicationContext.getBean("transformClient"); + + // Create an empty metadata extractor registry, so that if we add one it will be used + metadataExtracterRegistry = new MetadataExtracterRegistry(); + + contentMetadataExtracter = new ContentMetadataExtracter(); + contentMetadataExtracter.setNodeService(nodeService); + contentMetadataExtracter.setContentService(contentService); + contentMetadataExtracter.setDictionaryService(dictionaryService); + contentMetadataExtracter.setMetadataExtracterRegistry(metadataExtracterRegistry); + contentMetadataExtracter.setApplicableTypes(new String[]{ContentModel.TYPE_CONTENT.toString()}); + contentMetadataExtracter.setCarryAspectProperties(true); + + contentMetadataEmbedder = new ContentMetadataEmbedder(); + contentMetadataEmbedder.setNodeService(nodeService); + contentMetadataEmbedder.setContentService(contentService); + contentMetadataEmbedder.setMetadataExtracterRegistry(metadataExtracterRegistry); + contentMetadataEmbedder.setApplicableTypes(new String[]{ContentModel.TYPE_CONTENT.toString()}); + + transactionService.getRetryingTransactionHelper().doInTransaction(new RetryingTransactionHelper.RetryingTransactionCallback() + { + @Override + public Void execute() throws Throwable + { + AuthenticationComponent authenticationComponent = (AuthenticationComponent) applicationContext.getBean("authenticationComponent"); + authenticationComponent.setSystemUserAsCurrentUser(); + + // Create the store and get the root node + testStoreRef = nodeService.createStore( + StoreRef.PROTOCOL_WORKSPACE, + "Test_" + System.currentTimeMillis()); + rootNodeRef = nodeService.getRootNode(testStoreRef); + + // Create the node used for tests + nodeRef = nodeService.createNode( + rootNodeRef, ContentModel.ASSOC_CHILDREN, + QName.createQName("{test}testnode"), + ContentModel.TYPE_CONTENT).getChildRef(); + + // Authenticate as the system user + authenticationComponent.setSystemUserAsCurrentUser(); + + ContentWriter cw = contentService.getWriter(nodeRef, ContentModel.PROP_CONTENT, true); + cw.setMimetype(MimetypeMap.MIMETYPE_PDF); + cw.putContent(AbstractContentTransformerTest.loadQuickTestFile("pdf")); + + origProperties = nodeService.getProperties(nodeRef); + nodeService.setProperties(nodeRef, origProperties); + origProperties = new HashMap<>(origProperties); // just in case the contents changed. + expectedProperties = new HashMap<>(origProperties); // ready to be modified. + + origSize = getSize(nodeRef); + + return null; + } + }); + } + + @After + public void after() throws Exception + { + renditionService2.setTransformClient(transformClient); + } + + private void assertAsyncMetadataExecute(ActionExecuterAbstractBase executor, String mockResult, + Integer changedHashcode, long expectedSize, + Map expectedProperties, + QName... ignoreProperties) throws Exception + { + TestAsynchronousExtractor extractor = new TestAsynchronousExtractor(mockResult, changedHashcode); + + executeAction(executor, extractor); + assertContentSize(nodeRef, origSize, AFTER_CALLING_EXECUTE); + assertProperties(nodeRef, origProperties, AFTER_CALLING_EXECUTE, ignoreProperties); + + extractor.wait(-1, 10000); + assertContentSize(nodeRef, expectedSize, AFTER_THE_TRANSFORM); + assertProperties(nodeRef, expectedProperties, AFTER_THE_TRANSFORM, ignoreProperties); + } + + private void executeAction(ActionExecuterAbstractBase extractor, TestAsynchronousExtractor asynchronousExtractor) + throws SystemException, NotSupportedException, HeuristicRollbackException, HeuristicMixedException, RollbackException + { + UserTransaction txn = transactionService.getUserTransaction(); + txn.begin(); + ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null); + extractor.execute(action, nodeRef); + txn.commit(); + } + + void assertContentSize(NodeRef nodeRef, long expectSize, String state) + { + long size = getSize(nodeRef); + if (expectSize == origSize) + { + assertEquals("The content should remain unchanged " + state, origSize, size); + } + else + { + assertEquals("The content should have changed " + state, expectSize, size); + } + } + + private long getSize(NodeRef nodeRef) + { + ContentReader reader = contentService.getReader(nodeRef, ContentModel.PROP_CONTENT); + return reader.getSize(); + } + + private void assertProperties(NodeRef nodeRef, Map expectProperties, String state, + QName[] ignoreProperties) + { + properties = nodeService.getProperties(nodeRef); + + // Work out the difference in a human readable form and ignore the 5 system set properties (as they always + // change) plus any the caller has requested. + StringJoiner sj = new StringJoiner("\n"); + List ignoreKeys = new ArrayList<>(asList(PROP_MODIFIED, PROP_MODIFIER, PROP_CONTENT, PROP_CREATED, PROP_CREATOR)); + ignoreKeys.addAll(asList(ignoreProperties)); + for (Map.Entry entry : expectProperties.entrySet()) + { + QName k = entry.getKey(); + Serializable v = entry.getValue(); + Serializable actual = properties.get(k); + if (!ignoreKeys.contains(k) && !v.equals(actual)) + { + sj.add(k + "\n Expected: " + v + "\n Was: " + actual); + } + } + for (QName k : properties.keySet()) + { + Serializable actual = properties.get(k); + if (!ignoreKeys.contains(k) && !expectProperties.containsKey(k)) + { + sj.add(k + "\n Expected: null\n Was: " + actual); + } + } + + if (sj.length() != 0) + { + if (expectProperties.equals(origProperties)) + { + fail("The properties should remain unchanged " + state + "\n" + sj); + } + else + { + fail("The properties should have changed " + state + "\n" + sj); + } + } + } + + @Test + public void testExtractHtml() throws Exception + { + expectedProperties.put(QName.createQName("cm:author", namespacePrefixResolver), "Nevin Nollop"); + expectedProperties.put(QName.createQName("cm:description", namespacePrefixResolver), "Gym class featuring a brown fox and lazy dog"); + expectedProperties.put(QName.createQName("cm:title", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog"); + + assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.html_metadata.json", + UNCHANGED_HASHCODE, origSize, expectedProperties); + } + + @Test + public void testExtractNodeDeleted() throws Exception + { + assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.html_metadata.json", + SOURCE_HAS_NO_CONTENT, origSize, origProperties); + } + + @Test + public void testExtractContentChanged() throws Exception + { + assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.html_metadata.json", + 1234, origSize, origProperties); + } + + @Test + public void testExtractTransformFailure() throws Exception + { + assertAsyncMetadataExecute(contentMetadataExtracter, null, + UNCHANGED_HASHCODE, origSize, origProperties); + } + + @Test + public void testExtractTransformCorrupt() throws Exception + { + assertAsyncMetadataExecute(contentMetadataExtracter, "quick.html", // not json + UNCHANGED_HASHCODE, origSize, origProperties); + } + + @Test + public void testUnknownNamespaceInResponse() throws Exception + { + // "sys:overwritePolicy": "PRAGMATIC" - is used + // "{http://www.unknown}name": "ignored" - is reported in an ERROR log + expectedProperties.put(QName.createQName("cm:author", namespacePrefixResolver), "Used"); + assertAsyncMetadataExecute(contentMetadataExtracter, "quick/unknown_namespace_metadata.json", + UNCHANGED_HASHCODE, origSize, expectedProperties); + } + + @Test + public void testExtractMsg() throws Exception // has dates as RFC822 + { + expectedProperties.put(QName.createQName("cm:addressee", namespacePrefixResolver), "mark.rogers@alfresco.com"); + expectedProperties.put(QName.createQName("cm:description", namespacePrefixResolver), "This is a quick test"); + expectedProperties.put(QName.createQName("cm:addressees", namespacePrefixResolver), + new ArrayList<>(asList("mark.rogers@alfresco.com", "speedy@quick.com", "mrquick@nowhere.com"))); + + expectedProperties.put(QName.createQName("cm:sentdate", namespacePrefixResolver), SIMPLE_DATE_FORMAT.parse("Fri Jan 18 13:44:20 GMT 2013")); // 2013-01-18T13:44:20Z + expectedProperties.put(QName.createQName("cm:subjectline", namespacePrefixResolver), "This is a quick test"); + expectedProperties.put(QName.createQName("cm:author", namespacePrefixResolver), "Mark Rogers"); + expectedProperties.put(QName.createQName("cm:originator", namespacePrefixResolver), "Mark Rogers"); + + assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.msg_metadata.json", + UNCHANGED_HASHCODE, origSize, expectedProperties); + + Serializable sentDate = properties.get(QName.createQName("cm:sentdate", namespacePrefixResolver)); + } + + @Test + public void testExtractEml() throws Exception // has dates as longs since 1970 + { + expectedProperties.put(QName.createQName("cm:addressee", namespacePrefixResolver), "Nevin Nollop "); + expectedProperties.put(QName.createQName("cm:description", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog"); + expectedProperties.put(QName.createQName("cm:addressees", namespacePrefixResolver), + new ArrayList<>(asList("Nevin Nollop "))); + expectedProperties.put(QName.createQName("imap:dateSent", namespacePrefixResolver), SIMPLE_DATE_FORMAT.parse("Fri Jun 04 13:23:22 BST 2004")); + expectedProperties.put(QName.createQName("imap:messageTo", namespacePrefixResolver), "Nevin Nollop "); + expectedProperties.put(QName.createQName("imap:messageId", namespacePrefixResolver), "<20040604122322.GV1905@phoenix.home>"); + expectedProperties.put(QName.createQName("cm:title", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog"); + expectedProperties.put(QName.createQName("imap:messageSubject", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog"); + expectedProperties.put(QName.createQName("imap:messageCc", namespacePrefixResolver), "Nevin Nollop "); + expectedProperties.put(QName.createQName("cm:sentdate", namespacePrefixResolver), SIMPLE_DATE_FORMAT.parse("Fri Jun 04 13:23:22 BST 2004")); + expectedProperties.put(QName.createQName("cm:subjectline", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog"); + expectedProperties.put(QName.createQName("imap:messageFrom", namespacePrefixResolver), "Nevin Nollop "); + expectedProperties.put(QName.createQName("cm:originator", namespacePrefixResolver), "Nevin Nollop "); + + // Note: As the metadata is for eml, an aspect gets added resulting in a second extract because of + // ImapContentPolicy.onAddAspect. I cannot see a good way to avoid this. + assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.eml_metadata.json", + UNCHANGED_HASHCODE, origSize, expectedProperties, + // cm:author is not in the quick.eml_metadata.json but is being added by the second extract which thinks + // the source mimetype is MimetypeMap.MIMETYPE_PDF, because that is what the before() method sets the + // content to. As a result the PdfBox metadata extractor is called, which extracts cm:author. Given that + // we don't know when this will take place, we simply ignore this property. We could fix this up, but it + // does not add anything to the test. + QName.createQName("cm:author", namespacePrefixResolver)); + } + + + @Test + public void testEmbed() throws Exception + { + assertAsyncMetadataExecute(contentMetadataEmbedder, "quick/quick.html", // just replace the pdf with html! + UNCHANGED_HASHCODE, 428, expectedProperties); + } + @Test + public void testEmbedNodeDeleted() throws Exception + { + assertAsyncMetadataExecute(contentMetadataEmbedder, "quick/quick.html", + SOURCE_HAS_NO_CONTENT, origSize, origProperties); + } + + @Test + public void testEmbedContentChanged() throws Exception + { + assertAsyncMetadataExecute(contentMetadataEmbedder, "quick/quick.html", + 1234, origSize, origProperties); + } + + @Test + public void testEmbedTransformFailure() throws Exception + { + assertAsyncMetadataExecute(contentMetadataEmbedder, null, + UNCHANGED_HASHCODE, origSize, origProperties); + } + + // TODO Write tests for: overwritePolicy, enableStringTagging and carryAspectProperties. + // Values are set in AsynchronousExtractor.setMetadata(...) but make use of original code within + // MetadataExtracter and AbstractMappingMetadataExtracter. + // As the tests for exiting extractors are to be removed in ACS 7.0, it is possible that they were being used + // to test these values. +} \ No newline at end of file diff --git a/repository/src/test/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java b/repository/src/test/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java index 44f6fada59..dea0b50a6f 100644 --- a/repository/src/test/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java +++ b/repository/src/test/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java @@ -177,5 +177,5 @@ public class DWGMetadataExtracterTest extends AbstractMetadataExtracterTest assertEquals("Custom DWG property not found", "valueforcustomprop1", properties.get(TIKA_CUSTOM_TEST_PROPERTY)); } - + } diff --git a/repository/src/test/java/org/alfresco/repo/imap/ImapMessageTest.java b/repository/src/test/java/org/alfresco/repo/imap/ImapMessageTest.java index d3cccb2203..7d1b9de47c 100644 --- a/repository/src/test/java/org/alfresco/repo/imap/ImapMessageTest.java +++ b/repository/src/test/java/org/alfresco/repo/imap/ImapMessageTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -102,6 +102,8 @@ import com.sun.mail.imap.protocol.RFC822DATA; import com.sun.mail.imap.protocol.UID; import com.sun.mail.util.ASCIIUtility; +import static org.alfresco.model.ContentModel.PROP_MODIFIED; + @Category({OwnJVMTestsCategory.class, LuceneTests.class}) public class ImapMessageTest extends TestCase { @@ -523,15 +525,30 @@ public class ImapMessageTest extends TestCase messageHelper.addCc(address); // Creating the message node in the repository + UserTransaction txn = transactionService.getUserTransaction(); + txn.begin(); String name = AlfrescoImapConst.MESSAGE_PREFIX + GUID.generate(); FileInfo messageFile = fileFolderService.create(testImapFolderNodeRef, name, ContentModel.TYPE_CONTENT); // Writing a content. + NodeRef nodeRef = messageFile.getNodeRef(); + Serializable origModified = getModified(nodeRef); new IncomingImapMessage(messageFile, serviceRegistry, message); - + txn.commit(); + + // Calls to new IncomingImapMessage(...) only takes place when a new nodeRef is being created. + // No other code will be changing the nodeRef. An ImapModel.ASPECT_IMAP_CONTENT is added, which + // triggers a metadata extract to take place in a post commit method. Previously this would have been a + // synchronous process. This is no longer true as it may now take place in a T-Engine. So, we need to wait + // for the extract to take place. There does not + long end = System.currentTimeMillis()+10000; + while (System.currentTimeMillis() <= end && origModified.equals(getModified(nodeRef))) + { + Thread.currentThread().sleep(1000); + } + // Getting the transformed properties from the repository // cm:originator, cm:addressee, cm:addressees, imap:messageFrom, imap:messageTo, imap:messageCc - Map properties = nodeService.getProperties(messageFile.getNodeRef()); - + Map properties = nodeService.getProperties(nodeRef); String cmOriginator = (String) properties.get(ContentModel.PROP_ORIGINATOR); String cmAddressee = (String) properties.get(ContentModel.PROP_ADDRESSEE); @SuppressWarnings("unchecked") @@ -555,6 +572,12 @@ public class ImapMessageTest extends TestCase assertEquals(decodedAddress, imapMessageCc); } + private Serializable getModified(NodeRef nodeRef) + { + Map origProperties = nodeService.getProperties(nodeRef); + return origProperties.get(PROP_MODIFIED); + } + @Category(RedundantTests.class) public void testEightBitMessage() throws Exception { diff --git a/repository/src/test/java/org/alfresco/repo/rendition/RenditionServicePermissionsTest.java b/repository/src/test/java/org/alfresco/repo/rendition/RenditionServicePermissionsTest.java index d26ae198da..4a3cd67b82 100644 --- a/repository/src/test/java/org/alfresco/repo/rendition/RenditionServicePermissionsTest.java +++ b/repository/src/test/java/org/alfresco/repo/rendition/RenditionServicePermissionsTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2016 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -26,10 +26,6 @@ package org.alfresco.repo.rendition; -import java.io.Serializable; -import java.util.List; -import java.util.Map; - import org.alfresco.model.ContentModel; import org.alfresco.model.RenditionModel; import org.alfresco.repo.content.MimetypeMap; @@ -40,19 +36,18 @@ import org.alfresco.repo.security.authentication.AuthenticationUtil; import org.alfresco.repo.thumbnail.ThumbnailDefinition; import org.alfresco.repo.thumbnail.ThumbnailHelper; import org.alfresco.repo.thumbnail.ThumbnailRegistry; +import org.alfresco.repo.thumbnail.ThumbnailServiceImplTest; import org.alfresco.repo.transaction.RetryingTransactionHelper; import org.alfresco.service.ServiceRegistry; import org.alfresco.service.cmr.action.Action; import org.alfresco.service.cmr.rendition.RenditionDefinition; import org.alfresco.service.cmr.rendition.RenditionService; import org.alfresco.service.cmr.repository.ChildAssociationRef; -import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentService; import org.alfresco.service.cmr.repository.ContentWriter; import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.cmr.repository.NodeService; -import org.alfresco.service.cmr.repository.TransformationOptions; import org.alfresco.service.cmr.security.PermissionService; import org.alfresco.service.cmr.site.SiteVisibility; import org.alfresco.service.namespace.NamespaceService; @@ -76,9 +71,16 @@ import org.junit.experimental.categories.Category; import org.junit.rules.RuleChain; import org.springframework.test.context.ContextConfiguration; +import java.io.Serializable; +import java.util.List; + import static org.alfresco.repo.rendition2.TestSynchronousTransformClient.EXPECTED_USER; import static org.alfresco.repo.rendition2.TestSynchronousTransformClient.TEST_USER_MIME_TYPE; -import static org.junit.Assert.*; +import static org.alfresco.repo.thumbnail.ThumbnailServiceImplTest.TEST_THUMBNAIL; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; /** * @author Neil McErlean @@ -99,7 +101,7 @@ public class RenditionServicePermissionsTest public static ApplicationContextInit APP_CONTEXT_INIT = ApplicationContextInit.createStandardContextWithOverrides("classpath:/test/alfresco/test-renditions-context.xml", "classpath:org/alfresco/repo/rendition2/test-transform-context.xml"); - + // JUnit Rules to create test users. public static AlfrescoPerson TEST_USER1 = new AlfrescoPerson(APP_CONTEXT_INIT, EXPECTED_USER); @@ -151,6 +153,7 @@ public class RenditionServicePermissionsTest transactionHelper = (RetryingTransactionHelper) APP_CONTEXT_INIT.getApplicationContext().getBean("retryingTransactionHelper"); services = (ServiceRegistry) APP_CONTEXT_INIT.getApplicationContext().getBean("ServiceRegistry"); thumbnailRegistry = (ThumbnailRegistry) APP_CONTEXT_INIT.getApplicationContext().getBean("thumbnailRegistry"); + ThumbnailServiceImplTest.createTestThumbnail(thumbnailRegistry); } @Before public void initNonStaticData() throws Exception @@ -319,7 +322,8 @@ public class RenditionServicePermissionsTest { final String siteConsumer = testSiteInfo.siteConsumer; - // Let's trigger the creation of a doclib thumbnail for the broken JPG node. + // Let's trigger the creation of a TEST_THUMBNAIL for the broken JPG node. Previously it was doclib, but the + // newer RenditionService2 knows how to create that and does not do failure recovery needed by this test. // We know this cannot succeed. We also know the user triggering it does not have write permissions for the node. AuthenticationUtil.setFullyAuthenticatedUser(siteConsumer); @@ -328,7 +332,7 @@ public class RenditionServicePermissionsTest public Void execute() throws Throwable { // This is what ScriptNode.createThumbnail does - ThumbnailDefinition details = thumbnailRegistry.getThumbnailDefinition("doclib"); + ThumbnailDefinition details = thumbnailRegistry.getThumbnailDefinition(TEST_THUMBNAIL); Action action = ThumbnailHelper.createCreateThumbnailAction(details, services); // Queue async creation of thumbnail diff --git a/repository/src/test/java/org/alfresco/repo/rendition2/RenditionService2Test.java b/repository/src/test/java/org/alfresco/repo/rendition2/RenditionService2Test.java index ec2eda7cb6..9c163cd253 100644 --- a/repository/src/test/java/org/alfresco/repo/rendition2/RenditionService2Test.java +++ b/repository/src/test/java/org/alfresco/repo/rendition2/RenditionService2Test.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2019 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -27,6 +27,7 @@ package org.alfresco.repo.rendition2; import com.fasterxml.jackson.databind.ObjectMapper; import org.alfresco.model.ContentModel; +import org.alfresco.repo.content.metadata.AsynchronousExtractor; import org.alfresco.repo.policy.BehaviourFilter; import org.alfresco.repo.policy.PolicyComponent; import org.alfresco.repo.rendition.RenditionPreventionRegistry; @@ -46,11 +47,9 @@ import org.junit.runner.RunWith; import org.mockito.Mock; import org.mockito.junit.MockitoJUnitRunner; -import org.quartz.CronExpression; import java.io.IOException; import java.util.Collections; -import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.Set; @@ -61,7 +60,6 @@ import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.*; /** @@ -90,6 +88,7 @@ public class RenditionService2Test @Mock private RuleService ruleService; @Mock private TransformServiceRegistryImpl transformServiceRegistry; @Mock private TransformReplyProvider transformReplyProvider; + @Mock private AsynchronousExtractor asynchronousExtractor; private NodeRef nodeRef = new NodeRef("workspace://spacesStore/test-id"); private NodeRef nodeRefMissing = new NodeRef("workspace://spacesStore/bad-test-id"); @@ -154,6 +153,7 @@ public class RenditionService2Test renditionService2.setTransformReplyProvider(transformReplyProvider); renditionService2.setEnabled(true); renditionService2.setThumbnailsEnabled(true); + renditionService2.setAsynchronousExtractor(asynchronousExtractor); renditionDefinitionRegistry2.setRenditionConfigDir("alfresco/renditions/test"); renditionDefinitionRegistry2.afterPropertiesSet(); diff --git a/repository/src/test/java/org/alfresco/repo/rendition2/TestAsynchronousTransformClient.java b/repository/src/test/java/org/alfresco/repo/rendition2/TestAsynchronousTransformClient.java new file mode 100644 index 0000000000..0119bdad59 --- /dev/null +++ b/repository/src/test/java/org/alfresco/repo/rendition2/TestAsynchronousTransformClient.java @@ -0,0 +1,98 @@ +/* + * #%L + * Alfresco Repository + * %% + * Copyright (C) 2005 - 2019 Alfresco Software Limited + * %% + * This file is part of the Alfresco software. + * If the software was purchased under a paid Alfresco license, the terms of + * the paid license agreement will prevail. Otherwise, the software is + * provided under the following open source license terms: + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + * #L% + */ +package org.alfresco.repo.rendition2; + +import org.alfresco.repo.content.transform.UnsupportedTransformationException; +import org.alfresco.service.cmr.repository.ContentIOException; +import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.cmr.repository.ContentService; +import org.alfresco.service.cmr.repository.ContentWriter; +import org.alfresco.service.cmr.repository.NodeRef; + +import java.io.ByteArrayInputStream; + +import static org.alfresco.model.ContentModel.PROP_CONTENT; +import static org.alfresco.repo.rendition2.TestSynchronousTransformClient.doTest; +import static org.alfresco.repo.rendition2.TestSynchronousTransformClient.isATest; + +/** + * @author adavis + */ +public class TestAsynchronousTransformClient implements TransformClient +{ + private ContentService contentService; + private TransformClient delegate; + private RenditionService2Impl renditionService2; + + public TestAsynchronousTransformClient(ContentService contentService, TransformClient delegate, + RenditionService2Impl renditionService2) + { + this.contentService = contentService; + this.delegate = delegate; + this.renditionService2 = renditionService2; + } + + @Override + public void checkSupported(NodeRef sourceNodeRef, RenditionDefinition2 renditionDefinition, String sourceMimetype, + long sourceSizeInBytes, String contentUrl) + { + String targetMimetype = renditionDefinition.getTargetMimetype(); + if (!isATest(sourceMimetype, targetMimetype)) + { + delegate.checkSupported(sourceNodeRef, renditionDefinition, sourceMimetype, sourceSizeInBytes, contentUrl); + } + } + + @Override + public void transform(NodeRef sourceNodeRef, RenditionDefinition2 renditionDefinition, String user, + int sourceContentHashCode) + throws UnsupportedTransformationException, ContentIOException + { + ContentReader reader = contentService.getReader(sourceNodeRef, PROP_CONTENT); + String sourceMimetype = reader.getMimetype(); + String targetMimetype = renditionDefinition.getTargetMimetype(); + if (isATest(sourceMimetype, targetMimetype)) + { + ContentWriter writer = contentService.getTempWriter(); + writer.setMimetype(targetMimetype); + doTest(sourceMimetype, targetMimetype, writer, + new TestSynchronousTransformClient.TestTransformClientCallback() + { + @Override + public void successfulTransform(ContentWriter writer) + { + ByteArrayInputStream inputStream = new ByteArrayInputStream("SUCCESS".getBytes()); + renditionService2.consume(sourceNodeRef, inputStream, renditionDefinition, + sourceContentHashCode); + } + }); + } + else + { + delegate.transform(sourceNodeRef, renditionDefinition, user, sourceContentHashCode); + } + } +} diff --git a/repository/src/test/java/org/alfresco/repo/rendition2/TestSynchronousTransformClient.java b/repository/src/test/java/org/alfresco/repo/rendition2/TestSynchronousTransformClient.java index 4f10c6ad55..e4538efdf1 100644 --- a/repository/src/test/java/org/alfresco/repo/rendition2/TestSynchronousTransformClient.java +++ b/repository/src/test/java/org/alfresco/repo/rendition2/TestSynchronousTransformClient.java @@ -59,8 +59,7 @@ public class TestSynchronousTransformClient implements SynchronousTransformCl Map actualOptions, String transformName, NodeRef sourceNodeRef) { boolean supported = true; - if (!sourceMimetype.equals(TEST_FAILING_MIME_TYPE) && !sourceMimetype.equals(TEST_LONG_RUNNING_MIME_TYPE) && - !targetMimetype.equals(TEST_FAILING_MIME_TYPE) && !targetMimetype.equals(TEST_LONG_RUNNING_MIME_TYPE)) + if (!isATest(sourceMimetype, targetMimetype)) { supported = delegate.isSupported(sourceMimetype, sourceSizeInBytes, contentUrl, targetMimetype, actualOptions, transformName, sourceNodeRef); @@ -73,11 +72,46 @@ public class TestSynchronousTransformClient implements SynchronousTransformCl { String sourceMimetype = reader.getMimetype(); String targetMimetype = writer.getMimetype(); - if (sourceMimetype.equals(TEST_FAILING_MIME_TYPE) || targetMimetype.equals(TEST_FAILING_MIME_TYPE)) + if (isATest(sourceMimetype, targetMimetype)) + { + doTest(sourceMimetype, targetMimetype, writer, new TestTransformClientCallback()); + } + else + { + delegate.transform(reader, writer, actualOptions, transformName, sourceNodeRef); + } + } + + static boolean isATest(String sourceMimetype, String targetMimetype) + { + return isFailingTest(sourceMimetype, targetMimetype) || + isLongRunningTest(sourceMimetype, targetMimetype) || + isUserTest(sourceMimetype, targetMimetype); + } + + static boolean isFailingTest(String sourceMimetype, String targetMimetype) + { + return sourceMimetype.equals(TEST_FAILING_MIME_TYPE) || targetMimetype.equals(TEST_FAILING_MIME_TYPE); + } + + static boolean isLongRunningTest(String sourceMimetype, String targetMimetype) + { + return sourceMimetype.equals(TEST_LONG_RUNNING_MIME_TYPE) || targetMimetype.equals(TEST_LONG_RUNNING_MIME_TYPE); + } + + static boolean isUserTest(String sourceMimetype, String targetMimetype) + { + return sourceMimetype.equals(TEST_USER_MIME_TYPE) || targetMimetype.equals(TEST_USER_MIME_TYPE); + } + + static void doTest(String sourceMimetype, String targetMimetype, ContentWriter writer, + TestTransformClientCallback callback) + { + if (isFailingTest(sourceMimetype, targetMimetype)) { throw new ContentServiceTransientException("Transformation intentionally failed for test purposes."); } - else if (sourceMimetype.equals(TEST_LONG_RUNNING_MIME_TYPE) || targetMimetype.equals(TEST_LONG_RUNNING_MIME_TYPE)) + else if (isLongRunningTest(sourceMimetype, targetMimetype)) { try { @@ -87,9 +121,9 @@ public class TestSynchronousTransformClient implements SynchronousTransformCl { e.printStackTrace(); } - writer.putContent("SUCCESS"); + callback.successfulTransform(writer); } - else if (sourceMimetype.equals(TEST_USER_MIME_TYPE) || targetMimetype.equals(TEST_USER_MIME_TYPE)) + else if (isUserTest(sourceMimetype, targetMimetype)) { String username = AuthenticationUtil.getFullyAuthenticatedUser(); if (!EXPECTED_USER.equals(username)) @@ -97,11 +131,7 @@ public class TestSynchronousTransformClient implements SynchronousTransformCl throw new ContentIOException( "Expected username '" + EXPECTED_USER + "' but found '" + username + "'"); } - writer.putContent("SUCCESS"); - } - else - { - delegate.transform(reader, writer, actualOptions, transformName, sourceNodeRef); + callback.successfulTransform(writer); } } @@ -110,4 +140,12 @@ public class TestSynchronousTransformClient implements SynchronousTransformCl { return delegate.getName(); } + + static class TestTransformClientCallback + { + public void successfulTransform(ContentWriter writer) + { + writer.putContent("SUCCESS"); + } + } } diff --git a/repository/src/test/java/org/alfresco/repo/thumbnail/ThumbnailServiceImplTest.java b/repository/src/test/java/org/alfresco/repo/thumbnail/ThumbnailServiceImplTest.java index 2e42dd7b84..eb40c6cf1c 100644 --- a/repository/src/test/java/org/alfresco/repo/thumbnail/ThumbnailServiceImplTest.java +++ b/repository/src/test/java/org/alfresco/repo/thumbnail/ThumbnailServiceImplTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2005 - 2019 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -125,6 +125,11 @@ public class ThumbnailServiceImplTest extends BaseAlfrescoSpringTest { private static Log logger = LogFactory.getLog(ThumbnailServiceImplTest.class); + /** + * A test Thumbnail that is not know to the new RenditionService2, so is processed the very old way. + */ + public static final String TEST_THUMBNAIL = "testThumbnail"; + private NodeService secureNodeService; private RenditionService renditionService; private ThumbnailService thumbnailService; @@ -171,8 +176,26 @@ public class ThumbnailServiceImplTest extends BaseAlfrescoSpringTest this.folder = this.secureNodeService.createNode(this.rootNodeRef, ContentModel.ASSOC_CHILDREN, QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "testFolder"), ContentModel.TYPE_FOLDER) .getChildRef(); + + ThumbnailRegistry thumbnailRegistry = thumbnailService.getThumbnailRegistry(); + createTestThumbnail(thumbnailRegistry); } - + + public static void createTestThumbnail(ThumbnailRegistry thumbnailRegistry) + { + // Create a thumbnail that RenditionService2 knows nothing about so cannot process. + if (thumbnailRegistry.getThumbnailDefinition(TEST_THUMBNAIL) == null) + { + ThumbnailDefinition doclib = thumbnailRegistry.getThumbnailDefinition("doclib"); + ThumbnailDefinition testThumbnailDefinition = new ThumbnailDefinition(doclib.getMimetype(), doclib.getTransformationOptions(), TEST_THUMBNAIL); + testThumbnailDefinition.setFailureHandlingOptions(doclib.getFailureHandlingOptions()); + testThumbnailDefinition.setPlaceHolderResourcePath(doclib.getPlaceHolderResourcePath()); + testThumbnailDefinition.setMimeAwarePlaceHolderResourcePath(doclib.getMimeAwarePlaceHolderResourcePath()); + testThumbnailDefinition.setRunAs(doclib.getRunAs()); + thumbnailRegistry.addThumbnailDefinition(testThumbnailDefinition); + } + } + private void checkTransformer() { if (!synchronousTransformClient.isSupported(MimetypeMap.MIMETYPE_IMAGE_JPEG, -1, null, @@ -387,8 +410,8 @@ public class ThumbnailServiceImplTest extends BaseAlfrescoSpringTest { public Void execute() throws Throwable { - ThumbnailDefinition thumbnailDef = thumbnailService.getThumbnailRegistry().getThumbnailDefinition("doclib"); - + ThumbnailDefinition thumbnailDef = thumbnailService.getThumbnailRegistry().getThumbnailDefinition(TEST_THUMBNAIL); + Action createThumbnailAction = ThumbnailHelper.createCreateThumbnailAction(thumbnailDef, services); actionService.executeAction(createThumbnailAction, corruptNode, true, true); return null; @@ -409,17 +432,17 @@ public class ThumbnailServiceImplTest extends BaseAlfrescoSpringTest Map failedThumbnails = thumbnailService.getFailedThumbnails(corruptNode); assertEquals("Wrong number of failed thumbnails", 1, failedThumbnails.size()); - assertTrue("Missing QName for failed thumbnail", failedThumbnails.containsKey("doclib")); - final FailedThumbnailInfo doclibFailureInfo = failedThumbnails.get("doclib"); + assertTrue("Missing QName for failed thumbnail", failedThumbnails.containsKey(TEST_THUMBNAIL)); + final FailedThumbnailInfo doclibFailureInfo = failedThumbnails.get(TEST_THUMBNAIL); assertNotNull("Failure info was null", doclibFailureInfo); assertEquals("Failure count was wrong.", 1, doclibFailureInfo.getFailureCount()); - assertEquals("thumbnail name was wrong.", "doclib", doclibFailureInfo.getThumbnailDefinitionName()); + assertEquals("thumbnail name was wrong.", TEST_THUMBNAIL, doclibFailureInfo.getThumbnailDefinitionName()); return null; } }); - // If you uncomment this line and set the timeout to a value greater than ${system.thumbnail.minimum.retry.period} * 1000. + // If you uncomment this line and set the timeout to a value greater than ${system.thumbnail.retryPeriod} * 1000. // Then the retry period will have passed, the below re-thumbnail attempt will be made and the test will fail with a // failureCount == 2. // @@ -431,7 +454,7 @@ public class ThumbnailServiceImplTest extends BaseAlfrescoSpringTest { public Void execute() throws Throwable { - ThumbnailDefinition thumbnailDef = thumbnailService.getThumbnailRegistry().getThumbnailDefinition("doclib"); + ThumbnailDefinition thumbnailDef = thumbnailService.getThumbnailRegistry().getThumbnailDefinition(TEST_THUMBNAIL); Action createThumbnailAction = ThumbnailHelper.createCreateThumbnailAction(thumbnailDef, services); actionService.executeAction(createThumbnailAction, corruptNode, true, true); @@ -448,11 +471,11 @@ public class ThumbnailServiceImplTest extends BaseAlfrescoSpringTest Map failedThumbnails = thumbnailService.getFailedThumbnails(corruptNode); assertEquals("Wrong number of failed thumbnails", 1, failedThumbnails.size()); - assertTrue("Missing QName for failed thumbnail", failedThumbnails.containsKey("doclib")); - final FailedThumbnailInfo doclibFailureInfo = failedThumbnails.get("doclib"); + assertTrue("Missing QName for failed thumbnail", failedThumbnails.containsKey(TEST_THUMBNAIL)); + final FailedThumbnailInfo doclibFailureInfo = failedThumbnails.get(TEST_THUMBNAIL); assertNotNull("Failure info was null", doclibFailureInfo); assertEquals("Failure count was wrong.", 1, doclibFailureInfo.getFailureCount()); - assertEquals("thumbnail name was wrong.", "doclib", doclibFailureInfo.getThumbnailDefinitionName()); + assertEquals("thumbnail name was wrong.", TEST_THUMBNAIL, doclibFailureInfo.getThumbnailDefinitionName()); return null; } @@ -498,7 +521,7 @@ public class ThumbnailServiceImplTest extends BaseAlfrescoSpringTest { public Void execute() throws Throwable { - ThumbnailDefinition thumbnailDef = thumbnailService.getThumbnailRegistry().getThumbnailDefinition("doclib"); + ThumbnailDefinition thumbnailDef = thumbnailService.getThumbnailRegistry().getThumbnailDefinition(TEST_THUMBNAIL); Action createThumbnailAction = ThumbnailHelper.createCreateThumbnailAction(thumbnailDef, services); actionService.executeAction(createThumbnailAction, corruptNode, true, true); return null; @@ -518,8 +541,8 @@ public class ThumbnailServiceImplTest extends BaseAlfrescoSpringTest Map failedThumbnails = thumbnailService.getFailedThumbnails(corruptNode); assertEquals("Wrong number of failed thumbnails", 1, failedThumbnails.size()); - assertTrue("Missing QName for failed thumbnail", failedThumbnails.containsKey("doclib")); - final FailedThumbnailInfo doclibFailureInfo = failedThumbnails.get("doclib"); + assertTrue("Missing QName for failed thumbnail", failedThumbnails.containsKey(TEST_THUMBNAIL)); + final FailedThumbnailInfo doclibFailureInfo = failedThumbnails.get(TEST_THUMBNAIL); assertNotNull("Failure info was null", doclibFailureInfo); return doclibFailureInfo.getFailedThumbnailNode(); @@ -573,7 +596,7 @@ public class ThumbnailServiceImplTest extends BaseAlfrescoSpringTest { public Void execute() throws Throwable { - ThumbnailDefinition thumbnailDef = thumbnailService.getThumbnailRegistry().getThumbnailDefinition("doclib"); + ThumbnailDefinition thumbnailDef = thumbnailService.getThumbnailRegistry().getThumbnailDefinition(TEST_THUMBNAIL); Action createThumbnailAction = ThumbnailHelper.createCreateThumbnailAction(thumbnailDef, services); actionService.executeAction(createThumbnailAction, testNode, true, true); @@ -1337,24 +1360,24 @@ public class ThumbnailServiceImplTest extends BaseAlfrescoSpringTest concurrentWork.verify(source); } - final int numIterations = 20; + final int multiples = 5; // Wait for thumbnail(s) to finish long endTime = (new Date()).getTime(); for (final ExpectedThumbnail expectedThumbnail : expectedThumbnails) { NodeRef thumbnail = null; - while ((endTime - startTime) < (TEST_LONG_RUNNING_TRANSFORM_TIME * numIterations)) { + while ((endTime - startTime) < (TEST_LONG_RUNNING_TRANSFORM_TIME * multiples)) { thumbnail = transactionService.getRetryingTransactionHelper() .doInTransaction(new RetryingTransactionHelper.RetryingTransactionCallback() { public NodeRef execute() throws Throwable { return thumbnailService.getThumbnailByName(source, ContentModel.PROP_CONTENT, expectedThumbnail.getThumbnailName()); } - }, false, true); + }, true, true); if (thumbnail == null) { Thread.sleep(200); logger.debug("Elapsed " + (endTime - startTime) + " ms of " - + TEST_LONG_RUNNING_TRANSFORM_TIME * numIterations + " ms waiting for " + + TEST_LONG_RUNNING_TRANSFORM_TIME * multiples + " ms waiting for " + expectedThumbnail.getThumbnailName()); endTime = (new Date()).getTime(); } else { diff --git a/repository/src/test/java/org/alfresco/transform/client/registry/LocalTransformServiceRegistryConfigTest.java b/repository/src/test/java/org/alfresco/transform/client/registry/LocalTransformServiceRegistryConfigTest.java index 7f89ff7df5..f8907efa57 100644 --- a/repository/src/test/java/org/alfresco/transform/client/registry/LocalTransformServiceRegistryConfigTest.java +++ b/repository/src/test/java/org/alfresco/transform/client/registry/LocalTransformServiceRegistryConfigTest.java @@ -2,7 +2,7 @@ * #%L * Alfresco Repository * %% - * Copyright (C) 2019 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * If the software was purchased under a paid Alfresco license, the terms of @@ -242,10 +242,13 @@ public class LocalTransformServiceRegistryConfigTest extends TransformServiceReg targetMimetype.add("image/gif"); targetMimetype.add("image/tiff"); imagemagickSupportedTransformation.put("image/tiff", targetMimetype); + targetMimetype = new ArrayList<>(targetMimetype); targetMimetype.add("image/png"); targetMimetype.add("image/jpeg"); imagemagickSupportedTransformation.put("image/gif", targetMimetype); imagemagickSupportedTransformation.put("image/jpeg", targetMimetype); + targetMimetype = new ArrayList<>(targetMimetype); + targetMimetype.add("alfresco-metadata-extract"); // Metadata extract and embed types should be excluded from pipeline cartesian products imagemagickSupportedTransformation.put("image/png", targetMimetype); targetMimetype = new ArrayList<>(); targetMimetype.add("target1"); @@ -333,8 +336,7 @@ public class LocalTransformServiceRegistryConfigTest extends TransformServiceReg @Override public void testJsonConfig() throws IOException { - // Not 60, 60 as we have added source->target1..3 to three transformers - internalTestJsonConfig(63, 69); + internalTestJsonConfig(64, 70); } @Test @@ -368,7 +370,7 @@ public class LocalTransformServiceRegistryConfigTest extends TransformServiceReg switch (t.transformer.getTransformerName()) { case "imagemagick": - assertEquals(t.transformer.getTransformerName() + " incorrect number of supported transform", 17, t.transformer.getSupportedSourceAndTargetList().size()); + assertEquals(t.transformer.getTransformerName() + " incorrect number of supported transform", 18, t.transformer.getSupportedSourceAndTargetList().size()); assertEquals( t.transformer.getTransformerName() + "incorrect number of transform option names", 1, t.transformer.getTransformOptions().size()); assertEquals( t.transformer.getTransformerName() + "incorrect number of transform options", 6, countTopLevelOptions(t.transformer.getTransformOptions())); assertEquals(t.transformer.getTransformerName() + " expected to not be a transformer pipeline", t.transformer.getTransformerPipeline().size(), 0); @@ -428,6 +430,7 @@ public class LocalTransformServiceRegistryConfigTest extends TransformServiceReg break; case "officeToImageViaPdf": + // Note we will get 35 entries in getSupportedSourceAndTargetList() if the metadata transforms are not excluded assertEquals(t.transformer.getTransformerName() + " incorrect number of supported transform", 28, t.transformer.getSupportedSourceAndTargetList().size()); assertEquals( t.transformer.getTransformerName() + "incorrect number of transform option names", 2, t.transformer.getTransformOptions().size()); assertEquals( t.transformer.getTransformerName() + "incorrect number of transform options", 11, countTopLevelOptions(t.transformer.getTransformOptions())); diff --git a/repository/src/test/resources/alfresco/local-transform-service-config-test.json b/repository/src/test/resources/alfresco/local-transform-service-config-test.json index d98a14c3be..337c468bfc 100644 --- a/repository/src/test/resources/alfresco/local-transform-service-config-test.json +++ b/repository/src/test/resources/alfresco/local-transform-service-config-test.json @@ -55,6 +55,7 @@ {"sourceMediaType": "image/png", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "image/png", "targetMediaType": "image/png" }, {"sourceMediaType": "image/png", "targetMediaType": "image/tiff"}, + {"sourceMediaType": "image/png", "targetMediaType": "alfresco-metadata-extract"}, {"sourceMediaType": "image/tiff", "targetMediaType": "image/gif" }, {"sourceMediaType": "image/tiff", "targetMediaType": "image/tiff"}, diff --git a/repository/src/test/resources/log4j.properties b/repository/src/test/resources/log4j.properties index 1ada95c6bb..b9510ac529 100644 --- a/repository/src/test/resources/log4j.properties +++ b/repository/src/test/resources/log4j.properties @@ -253,11 +253,13 @@ log4j.logger.org.keycloak=debug # Renditions and Transforms log4j.logger.org.alfresco.repo.content.transform.TransformerDebug=debug -#log4j.logger.org.alfresco.repo.rendition2=debug +log4j.logger.org.alfresco.repo.rendition2=debug #log4j.logger.org.alfresco.repo.rendition2.LocalTransformClient=debug #log4j.logger.org.alfresco.repo.rendition2.LegacyTransformClient=debug #log4j.logger.org.alfresco.repo.rendition.RenditionServiceImpl=debug #log4j.logger.org.alfresco.enterprise.repo.rendition2.RemoteTransformClient=debug +log4j.logger.org.alfresco.repo.thumbnail.ThumbnailServiceImplTest=DEBUG +log4j.logger.org.alfresco.repo.rendition2.RenditionService2Impl=DEBUG #log4j.logger.org.alfresco.repo.content.transform.LocalTransformServiceRegistry=debug #log4j.logger.org.alfresco.enterprise.repo.rendition2.RemoteTransformServiceRegistry=debug diff --git a/repository/src/test/resources/org/alfresco/repo/rendition2/test-transform-context.xml b/repository/src/test/resources/org/alfresco/repo/rendition2/test-transform-context.xml index 035a9313fb..866e77805e 100644 --- a/repository/src/test/resources/org/alfresco/repo/rendition2/test-transform-context.xml +++ b/repository/src/test/resources/org/alfresco/repo/rendition2/test-transform-context.xml @@ -9,6 +9,12 @@ + + + + + + diff --git a/repository/src/test/resources/quick/quick.eml_metadata.json b/repository/src/test/resources/quick/quick.eml_metadata.json new file mode 100644 index 0000000000..873f2bb1de --- /dev/null +++ b/repository/src/test/resources/quick/quick.eml_metadata.json @@ -0,0 +1,15 @@ +{ + "{http://www.alfresco.org/model/content/1.0}addressee" : "Nevin Nollop ", + "{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog", + "{http://www.alfresco.org/model/content/1.0}addressees" : "Nevin Nollop ", + "{http://www.alfresco.org/model/imap/1.0}dateSent" : 1086351802000, + "{http://www.alfresco.org/model/imap/1.0}messageTo" : "Nevin Nollop ", + "{http://www.alfresco.org/model/imap/1.0}messageId" : "<20040604122322.GV1905@phoenix.home>", + "{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog", + "{http://www.alfresco.org/model/imap/1.0}messageSubject" : "The quick brown fox jumps over the lazy dog", + "{http://www.alfresco.org/model/imap/1.0}messageCc" : "Nevin Nollop ", + "{http://www.alfresco.org/model/content/1.0}sentdate" : 1086351802000, + "{http://www.alfresco.org/model/content/1.0}subjectline" : "The quick brown fox jumps over the lazy dog", + "{http://www.alfresco.org/model/imap/1.0}messageFrom" : "Nevin Nollop ", + "{http://www.alfresco.org/model/content/1.0}originator" : "Nevin Nollop " +} \ No newline at end of file diff --git a/repository/src/test/resources/quick/quick.html_metadata.json b/repository/src/test/resources/quick/quick.html_metadata.json new file mode 100644 index 0000000000..99b5abf9cd --- /dev/null +++ b/repository/src/test/resources/quick/quick.html_metadata.json @@ -0,0 +1,5 @@ +{ + "{http://www.alfresco.org/model/content/1.0}author": "Nevin Nollop", + "{http://www.alfresco.org/model/content/1.0}description": "Gym class featuring a brown fox and lazy dog", + "{http://www.alfresco.org/model/content/1.0}title": "The quick brown fox jumps over the lazy dog" +} \ No newline at end of file diff --git a/repository/src/test/resources/quick/quick.msg_metadata.json b/repository/src/test/resources/quick/quick.msg_metadata.json new file mode 100644 index 0000000000..f5c047860a --- /dev/null +++ b/repository/src/test/resources/quick/quick.msg_metadata.json @@ -0,0 +1,9 @@ +{ + "{http://www.alfresco.org/model/content/1.0}addressee" : "mark.rogers@alfresco.com", + "{http://www.alfresco.org/model/content/1.0}description" : "This is a quick test", + "{http://www.alfresco.org/model/content/1.0}addressees" : [ "mark.rogers@alfresco.com", "speedy@quick.com", "mrquick@nowhere.com" ], + "{http://www.alfresco.org/model/content/1.0}sentdate" : "2013-01-18T13:44:20Z", + "{http://www.alfresco.org/model/content/1.0}subjectline" : "This is a quick test", + "{http://www.alfresco.org/model/content/1.0}author" : "Mark Rogers", + "{http://www.alfresco.org/model/content/1.0}originator" : "Mark Rogers" +} \ No newline at end of file diff --git a/repository/src/test/resources/quick/unknown_namespace_metadata.json b/repository/src/test/resources/quick/unknown_namespace_metadata.json new file mode 100644 index 0000000000..719b134a07 --- /dev/null +++ b/repository/src/test/resources/quick/unknown_namespace_metadata.json @@ -0,0 +1,5 @@ +{ + "sys:overwritePolicy": "PRAGMATIC", + "{http://www.unknown}name": "ignored", + "{http://www.alfresco.org/model/content/1.0}author": "Used" +} \ No newline at end of file