REPO-4334 Move metadata extraction into T-Engines (#1015)

* ContentMetadataExtracter (the Action service Executer):
        * Code refactored so that post processing of extracted properties can be called after a
          transform has taken place in an async call. Previously done in the same Thread.
        * The source size is passed to the metadataExtracterRegistry as this is needed to select
          the transform.
        * Passes the source nodeRef to the metadataExtractor, so that it is possible to update
          the correct node after the async transform call.
    * AbstractMappingMetadataExtracter (the base class for all extractors):
        * Post extract processing extracted into separate methods.
        * NodeRef made available to sub classes.
    * AsynchronousExtractor (new metadata extractor and embedder) that will use
      a T-Engine.
    * Set targetExtension in transform for metadata extract and embed requests to T-Engines
      in both LocalTransforms (community) and RenditionEventProducer (enterprise).
    * RenditionService2Impl (used to request and respond to async transform requests).
        * Add consumeExtractedMetadata and consumeEmbeddedMetadata methods, which
          handle the response, normally be calling AsynchronousExtractor.
    * Discovered that the JodConverterMetadataExtracter was superseded by the
    * TikaPoweredMetadataExtracter for all the types that it support in 6.0.1 and could have been
      deprecated at that time as part of REPO-2910. It will now be deprecated so that it can be
      removed in 7.0.0.

Additional tests are being added to AsynchronousExtractorTest for overwritePolicy, enableStringTagging and carryAspectProperties in a separate PR, as these will cover tests at a likely to be removed when delete local extractors.
This commit is contained in:
Alan Davis
2020-06-16 18:12:30 +01:00
committed by GitHub
parent a70eb5f045
commit 63ace3abd8
63 changed files with 2518 additions and 835 deletions

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.action.executer;
import java.io.Serializable;
@@ -30,7 +30,6 @@ import java.util.List;
import java.util.Map;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.action.executer.ActionExecuterAbstractBase;
import org.alfresco.repo.content.metadata.MetadataEmbedder;
import org.alfresco.repo.content.metadata.MetadataExtracterRegistry;
import org.alfresco.service.cmr.action.Action;
@@ -108,7 +107,8 @@ public class ContentMetadataEmbedder extends ActionExecuterAbstractBase
return;
}
String mimetype = reader.getMimetype();
MetadataEmbedder embedder = metadataExtracterRegistry.getEmbedder(mimetype);
long sourceSizeInBytes = reader.getSize();
MetadataEmbedder embedder = metadataExtracterRegistry.getEmbedder(mimetype, sourceSizeInBytes);
if (embedder == null)
{
if(logger.isDebugEnabled())
@@ -136,7 +136,7 @@ public class ContentMetadataEmbedder extends ActionExecuterAbstractBase
try
{
embedder.embed(nodeProperties, reader, writer);
embedder.embed(actionedUponNodeRef, nodeProperties, reader, writer);
}
catch (Throwable e)
{

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -86,22 +86,23 @@ import org.apache.commons.logging.LogFactory;
public class ContentMetadataExtracter extends ActionExecuterAbstractBase
{
private static Log logger = LogFactory.getLog(ContentMetadataExtracter.class);
public static final String EXECUTOR_NAME = "extract-metadata";
private NodeService nodeService;
private ContentService contentService;
private DictionaryService dictionaryService;
private TaggingService taggingService;
private MetadataExtracterRegistry metadataExtracterRegistry;
private boolean carryAspectProperties = true;
private boolean enableStringTagging = false;
// Default list of separators (when enableStringTagging is enabled)
protected List<String> stringTaggingSeparators = Arrays.asList(",", ";", "\\|");
public final static List<String> DEFAULT_STRING_TAGGING_SEPARATORS = Arrays.asList(",", ";", "\\|");
protected List<String> stringTaggingSeparators = DEFAULT_STRING_TAGGING_SEPARATORS;
public ContentMetadataExtracter()
{
}
@@ -121,7 +122,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
{
this.contentService = contentService;
}
/**
* @param dictService The DictionaryService to set.
*/
@@ -148,7 +149,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
/**
* Whether or not aspect-related properties must be carried to the new version of the node
*
*
* @param carryAspectProperties <tt>true</tt> (default) to carry all aspect-linked
* properties forward. <tt>false</tt> will clean the
* aspect of any unextracted values.
@@ -157,12 +158,12 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
{
this.carryAspectProperties = carryAspectProperties;
}
/**
* Whether or not to enable mapping of simple strings to cm:taggable tags
*
* @param enableStringTagging <tt>true</tt> find or create tags for each string
* mapped to cm:taggable. <tt>false</tt> (default)
*
* @param enableStringTagging <tt>true</tt> find or create tags for each string
* mapped to cm:taggable. <tt>false</tt> (default)
* ignore mapping strings to tags.
*/
public void setEnableStringTagging(boolean enableStringTagging)
@@ -172,7 +173,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
/**
* List of string separators - note: all will be applied to a given string
*
*
* @param stringTaggingSeparators
*/
public void setStringTaggingSeparators(List<String> stringTaggingSeparators)
@@ -188,14 +189,21 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
* the taggable property should still contain raw string values.
* <p>
* Mixing of NodeRefs and string values is permitted so each raw value is
* checked for a valid NodeRef representation and if so, converts to a NodeRef,
* checked for a valid NodeRef representation and if so, converts to a NodeRef,
* if not, adds as a tag via the {@link TaggingService}.
*
*
* @param actionedUponNodeRef The NodeRef being actioned upon
* @param propertyDef the PropertyDefinition of the taggable property
* @param rawValue the raw value from the metadata extracter
*/
protected void addTags(NodeRef actionedUponNodeRef, PropertyDefinition propertyDef, Serializable rawValue)
{
addTags(actionedUponNodeRef, propertyDef, rawValue, nodeService, stringTaggingSeparators, taggingService);
}
private static void addTags(NodeRef actionedUponNodeRef, PropertyDefinition propertyDef, Serializable rawValue,
NodeService nodeService, List<String> stringTaggingSeparators,
TaggingService taggingService)
{
if (rawValue == null)
{
@@ -231,7 +239,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
logger.trace("adding string tag name'" + tagName + "' (from tag nodeRef "+nodeRef+") to " + actionedUponNodeRef);
}
tags.addAll(splitTag(tagName));
tags.addAll(splitTag(tagName, stringTaggingSeparators));
}
catch (InvalidNodeRefException e)
{
@@ -250,7 +258,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
logger.trace("adding string tag name'" + singleValue + "' to " + actionedUponNodeRef);
}
tags.addAll(splitTag((String)singleValue));
tags.addAll(splitTag((String)singleValue, stringTaggingSeparators));
}
}
else if (singleValue instanceof NodeRef)
@@ -263,7 +271,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
logger.trace("adding string tag name'" + tagName + "' (for nodeRef "+nodeRef+") to " + actionedUponNodeRef);
}
tags.addAll(splitTag(tagName));
tags.addAll(splitTag(tagName, stringTaggingSeparators));
}
}
}
@@ -273,8 +281,8 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
{
logger.trace("adding string tag name'" + (String)rawValue + "' to " + actionedUponNodeRef);
}
tags.addAll(splitTag((String)rawValue));
tags.addAll(splitTag((String)rawValue, stringTaggingSeparators));
}
if (logger.isDebugEnabled())
@@ -297,6 +305,11 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
}
protected List<String> splitTag(String str)
{
return splitTag(str, stringTaggingSeparators);
}
private static List<String> splitTag(String str, List<String> stringTaggingSeparators)
{
List<String> result = new ArrayList<>();
if ((str != null) && (!str.equals("")))
@@ -323,7 +336,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
return result;
}
/**
* @see org.alfresco.repo.action.executer.ActionExecuter#execute(Action,
* NodeRef)
@@ -347,7 +360,8 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
return;
}
String mimetype = reader.getMimetype();
MetadataExtracter extracter = metadataExtracterRegistry.getExtracter(mimetype);
long sourceSizeInBytes = reader.getSize();
MetadataExtracter extracter = metadataExtracterRegistry.getExtractor(mimetype, sourceSizeInBytes);
if (extracter == null)
{
if(logger.isDebugEnabled())
@@ -372,6 +386,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
try
{
modifiedProperties = extracter.extract(
actionedUponNodeRef,
reader,
/*OverwritePolicy.PRAGMATIC,*/
nodeProperties);
@@ -408,11 +423,22 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
{
return;
}
addExtractedMetadataToNode(actionedUponNodeRef, nodeProperties, modifiedProperties,
nodeService, dictionaryService, taggingService, enableStringTagging, carryAspectProperties,
stringTaggingSeparators);
}
public static void addExtractedMetadataToNode(NodeRef actionedUponNodeRef, Map<QName, Serializable> nodeProperties,
Map<QName, Serializable> modifiedProperties,
NodeService nodeService, DictionaryService dictionaryService,
TaggingService taggingService, boolean enableStringTagging,
boolean carryAspectProperties, List<String> stringTaggingSeparators)
{
// Check that all properties have the appropriate aspect applied
Set<QName> requiredAspectQNames = new HashSet<QName>(3);
Set<QName> aspectPropertyQNames = new HashSet<QName>(17);
/**
* The modified properties contain null values as well. As we are only interested
* in the keys, this will force aspect aspect properties to be removed even if there
@@ -432,9 +458,10 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
if (enableStringTagging && propertyContainerDef.getName().equals(ContentModel.ASPECT_TAGGABLE))
{
Serializable oldValue = nodeProperties.get(propertyQName);
addTags(actionedUponNodeRef, propertyDef, oldValue);
addTags(actionedUponNodeRef, propertyDef, oldValue,
nodeService, stringTaggingSeparators, taggingService);
// Replace the raw value with the created tag NodeRefs
nodeProperties.put(ContentModel.PROP_TAGS,
nodeProperties.put(ContentModel.PROP_TAGS,
nodeService.getProperty(actionedUponNodeRef, ContentModel.PROP_TAGS));
}
else
@@ -447,7 +474,7 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
}
}
}
if (!carryAspectProperties)
{
// Remove any node properties that are defined on the aspects but were not extracted
@@ -465,10 +492,14 @@ public class ContentMetadataExtracter extends ActionExecuterAbstractBase
}
}
}
// The following code can result in a postCommit to extract the metadata again via JavaBehaviour
// (such as ImapContentPolicy.onAddAspect). Not very efficient, but I cannot think of a way to
// avoid it that does not risk memory leaks or disabling behaviour we want.
// Add all the properties to the node BEFORE we add the aspects
nodeService.setProperties(actionedUponNodeRef, nodeProperties);
// Add each of the aspects, as required
for (QName requiredAspectQName : requiredAspectQNames)
{

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -25,6 +25,33 @@
*/
package org.alfresco.repo.content.metadata;
import org.alfresco.api.AlfrescoPublicApi;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.StreamAwareContentReaderProxy;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MalformedNodeRefException;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
import org.alfresco.service.namespace.InvalidQNameException;
import org.alfresco.service.namespace.QName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.springframework.beans.factory.BeanNameAware;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.extensions.surf.util.ISO8601DateFormat;
import java.io.InputStream;
import java.io.Serializable;
import java.lang.reflect.Array;
@@ -50,32 +77,6 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import org.alfresco.api.AlfrescoPublicApi;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.StreamAwareContentReaderProxy;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MalformedNodeRefException;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
import org.alfresco.service.namespace.InvalidQNameException;
import org.alfresco.service.namespace.QName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.springframework.beans.factory.BeanNameAware;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.extensions.surf.util.ISO8601DateFormat;
/**
* Support class for metadata extracters that support dynamic and config-driven
* mapping between extracted values and model properties. Extraction is broken
@@ -131,7 +132,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
private MetadataExtracterRegistry registry;
private MimetypeService mimetypeService;
private DictionaryService dictionaryService;
protected DictionaryService dictionaryService;
private boolean initialized;
private Set<String> supportedMimetypes;
@@ -232,6 +233,11 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
this.dictionaryService = dictionaryService;
}
public Set<String> getSupportedMimetypes()
{
return supportedMimetypes;
}
/**
* Set the mimetypes that are supported by the extracter.
*
@@ -278,7 +284,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
return supportedEmbedMimetypes.contains(sourceMimetype);
}
private boolean isEnabled(String mimetype)
protected boolean isEnabled(String mimetype)
{
return properties == null || mimetypeService == null ||
(getBooleanProperty(beanName+".enabled", true) &&
@@ -714,10 +720,10 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
{
return readMappingProperties(mappingProperties.entrySet());
}
/**
* A utility method to convert mapping properties entries to the Map form.
*
*
* @see #setMappingProperties(Properties)
*/
private Map<String, Set<QName>> readMappingProperties(Set<Entry<Object, Object>> mappingPropertiesEntries)
@@ -765,8 +771,8 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
{
throw new AlfrescoRuntimeException(
"No prefix mapping for extracter property mapping: \n" +
" Extracter: " + this + "\n" +
" Mapping: " + entry);
" Extracter: " + this + "\n" +
" Mapping: " + entry);
}
qnameStr = QName.NAMESPACE_BEGIN + uri + QName.NAMESPACE_END + suffix;
}
@@ -780,8 +786,8 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
{
throw new AlfrescoRuntimeException(
"Can't create metadata extracter property mapping: \n" +
" Extracter: " + this + "\n" +
" Mapping: " + entry);
" Extracter: " + this + "\n" +
" Mapping: " + entry);
}
}
if (logger.isTraceEnabled())
@@ -1132,7 +1138,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
@Override
public final Map<QName, Serializable> extract(ContentReader reader, Map<QName, Serializable> destination)
{
return extract(reader, this.overwritePolicy, destination, this.mapping);
return extract(null, reader, this.overwritePolicy, destination, this.mapping);
}
/**
@@ -1144,7 +1150,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
OverwritePolicy overwritePolicy,
Map<QName, Serializable> destination)
{
return extract(reader, overwritePolicy, destination, this.mapping);
return extract(null, reader, overwritePolicy, destination, this.mapping);
}
/**
@@ -1156,6 +1162,29 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
OverwritePolicy overwritePolicy,
Map<QName, Serializable> destination,
Map<String, Set<QName>> mapping)
{
return extract(null, reader, overwritePolicy, destination, mapping);
}
/**
* {@inheritDoc}
*/
@Override
public Map<QName, Serializable> extract(NodeRef nodeRef, ContentReader reader, Map<QName, Serializable> destination)
{
return extract(nodeRef, reader, overwritePolicy, destination, mapping);
}
/**
* {@inheritDoc}
*/
@Override
public Map<QName, Serializable> extract(
NodeRef nodeRef,
ContentReader reader,
OverwritePolicy overwritePolicy,
Map<QName, Serializable> destination,
Map<String, Set<QName>> mapping)
{
// Done
if (logger.isDebugEnabled())
@@ -1182,12 +1211,13 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
// Check that the content has some meat
if (reader.getSize() > 0 && reader.exists())
{
rawMetadata = extractRaw(reader, getLimits(reader.getMimetype()));
rawMetadata = extractRaw(nodeRef, reader, getLimits(reader.getMimetype()));
}
else
{
rawMetadata = new HashMap<String, Serializable>(1);
}
// Convert to system properties (standalone)
Map<QName, Serializable> systemProperties = mapRawToSystem(rawMetadata);
// Convert the properties according to the dictionary types
@@ -1215,7 +1245,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
// the current mime type is plausible
String typeErrorMessage = null;
String differentType = null;
if(mimetypeService != null)
if (mimetypeService != null)
{
differentType = mimetypeService.getMimetypeIfNotMatches(reader.getReader());
}
@@ -1224,7 +1254,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
logger.info("Unable to verify mimetype of " + reader.getReader() +
" as no MimetypeService available to " + getClass().getName());
}
if(differentType != null)
if (differentType != null)
{
typeErrorMessage = "\n" +
" claimed mime type: " + reader.getMimetype() + "\n" +
@@ -1285,6 +1315,19 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
Map<QName, Serializable> properties,
ContentReader reader,
ContentWriter writer)
{
embed(null, properties, reader, writer);
}
/**
* {@inheritDoc}
*/
@Override
public void embed(
NodeRef nodeRef,
Map<QName, Serializable> properties,
ContentReader reader,
ContentWriter writer)
{
// Done
if (logger.isDebugEnabled())
@@ -1307,7 +1350,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
try
{
embedInternal(mapSystemToRaw(properties), reader, writer);
embedInternal(nodeRef, mapSystemToRaw(properties), reader, writer);
if(logger.isDebugEnabled())
{
logger.debug("Embedded Metadata into " + writer);
@@ -1472,7 +1515,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
* @return Returns a modified map of properties that have been converted.
*/
@SuppressWarnings("unchecked")
private Map<QName, Serializable> convertSystemPropertyValues(Map<QName, Serializable> systemProperties)
protected Map<QName, Serializable> convertSystemPropertyValues(Map<QName, Serializable> systemProperties)
{
Map<QName, Serializable> convertedProperties = new HashMap<QName, Serializable>(systemProperties.size() + 7);
for (Map.Entry<QName, Serializable> entry : systemProperties.entrySet())
@@ -1500,6 +1543,10 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
{
convertedPropertyValue = propertyValue;
}
else if (propertyValue instanceof Long)
{
convertedPropertyValue = new Date((Long)propertyValue);
}
else if (propertyValue instanceof Collection)
{
convertedPropertyValue = (Serializable) makeDates((Collection<String>) propertyValue);
@@ -1518,7 +1565,9 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
{
StringBuilder mesg = new StringBuilder();
mesg.append("Unable to convert Date property: ").append(propertyQName)
.append(", value: ").append(propertyValue).append(", type: ").append(propertyTypeDef.getName());
.append(", value: ").append(propertyValue).append(" (")
.append(propertyValue.getClass().getSimpleName())
.append("), type: ").append(propertyTypeDef.getName());
logger.warn(mesg.toString());
}
}
@@ -1688,6 +1737,21 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
}
}
// Try milliseconds. This was introduced with T-Engine extractors. Previously Dates would have been
// created and then converted to a Alfresco Date property in a single operation. T-Engines do not know
// about Alfresco Date property formats.
try
{
long ms = Long.parseLong(dateStr);
if (Long.toString(ms).equals(dateStr))
{
date = new Date(ms);
}
}
catch (NumberFormatException ignore)
{
}
if (date == null)
{
// Still no luck
@@ -1982,7 +2046,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
return limits;
}
/**
* <code>Callable</code> wrapper for the
* {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader)} method
@@ -2026,7 +2090,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
/**
* Exception wrapper to handle exceeded limits imposed by {@link MetadataExtracterLimits}
* {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)}
* {@link AbstractMappingMetadataExtracter#extractRaw(NodeRef, ContentReader, MetadataExtracterLimits)}
*/
private class LimitExceededException extends Exception
{
@@ -2047,19 +2111,17 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
* <p>
* If no timeout limit is defined or is unlimited (-1),
* the <code>extractRaw</code> method is called directly.
*
*
* @param nodeRef the node being acted on.
* @param reader the document to extract the values from. This stream provided by
* the reader must be closed if accessed directly.
* @param limits the limits to impose on the extraction
* @return Returns a map of document property values keyed by property name.
* @throws Throwable All exception conditions can be handled.
*/
private Map<String, Serializable> extractRaw(
private Map<String, Serializable> extractRaw(NodeRef nodeRef,
ContentReader reader, MetadataExtracterLimits limits) throws Throwable
{
FutureTask<Map<String, Serializable>> task = null;
StreamAwareContentReaderProxy proxiedReader = null;
if (reader.getSize() > limits.getMaxDocumentSizeMB() * MEGABYTE_SIZE)
{
throw new LimitExceededException("Max doc size exceeded " + limits.getMaxDocumentSizeMB() + " MB");
@@ -2084,7 +2146,16 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
throw new LimitExceededException("Reached concurrent extractions limit - " + limits.getMaxConcurrentExtractionsCount());
}
}
return extractRawInThread(nodeRef, reader, limits);
}
protected Map<String, Serializable> extractRawInThread(NodeRef nodeRef, ContentReader reader,
MetadataExtracterLimits limits)
throws Throwable
{
FutureTask<Map<String, Serializable>> task = null;
StreamAwareContentReaderProxy proxiedReader = null;
try
{
proxiedReader = new StreamAwareContentReaderProxy(reader);
@@ -2119,14 +2190,19 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
}
finally
{
int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.decrementAndGet();
if (logger.isDebugEnabled())
{
logger.debug("Extraction finalized. Remaining concurrent extraction : " + totalDocCount);
}
extractRawThreadFinished();
}
}
protected void extractRawThreadFinished()
{
int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.decrementAndGet();
if (logger.isDebugEnabled())
{
logger.debug("Extraction finalized. Remaining concurrent extraction : " + totalDocCount);
}
}
/**
* Override to provide the raw extracted metadata values. An extracter should extract
* as many of the available properties as is realistically possible. Even if the
@@ -2162,6 +2238,11 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
*/
protected abstract Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable;
protected void embedInternal(NodeRef nodeRef, Map<String, Serializable> metadata, ContentReader reader, ContentWriter writer) throws Throwable
{
embedInternal(metadata, reader, writer);
}
/**
* Override to embed metadata values. An extracter should embed
* as many of the available properties as is realistically possible. Even if the
@@ -2182,4 +2263,46 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
{
// TODO make this an abstract method once more extracters support embedding
}
// Originally in TikaPoweredMetadataExtracter
public static Map<String, String> convertMetadataToStrings(Map<String, Serializable> properties)
{
Map<String, String> propertiesAsStrings = new HashMap<>();
for (String metadataKey : properties.keySet())
{
Serializable value = properties.get(metadataKey);
if (value == null)
{
continue;
}
if (value instanceof Collection<?>)
{
for (Object singleValue : (Collection<?>) value)
{
try
{
// Convert to a string value
propertiesAsStrings.put(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, singleValue));
}
catch (TypeConversionException e)
{
TikaPoweredMetadataExtracter.logger.info("Could not convert " + metadataKey + ": " + e.getMessage());
}
}
}
else
{
try
{
// Convert to a string value
propertiesAsStrings.put(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, value));
}
catch (TypeConversionException e)
{
TikaPoweredMetadataExtracter.logger.info("Could not convert " + metadataKey + ": " + e.getMessage());
}
}
}
return propertiesAsStrings;
}
}

View File

@@ -0,0 +1,537 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.action.executer.ContentMetadataExtracter;
import org.alfresco.repo.content.transform.TransformerDebug;
import org.alfresco.repo.rendition2.RenditionService2;
import org.alfresco.repo.rendition2.TransformDefinition;
import org.alfresco.repo.security.authentication.AuthenticationUtil;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.tagging.TaggingService;
import org.alfresco.service.namespace.NamespaceException;
import org.alfresco.service.namespace.NamespacePrefixResolver;
import org.alfresco.service.namespace.QName;
import org.alfresco.service.transaction.TransactionService;
import org.alfresco.transform.client.registry.TransformServiceRegistry;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.springframework.dao.ConcurrencyFailureException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.StringJoiner;
import java.util.concurrent.ExecutorService;
import static org.alfresco.repo.rendition2.RenditionDefinition2.TIMEOUT;
import static org.alfresco.repo.rendition2.TransformDefinition.getTransformName;
/**
* Requests an extract of metadata via a remote async transform using
* {@link RenditionService2#transform(NodeRef, TransformDefinition)}. The properties that will extracted are defined
* by the transform. This allows out of process metadata extracts to be defined without the need to apply an AMP.
* The actual transform is a request to go from the source mimetype to {@code "alfresco-metadata-extract"}. The
* resulting transform is a Map in json of properties and values to be set on the source node.
* <p>
* As with other sub-classes of {@link AbstractMappingMetadataExtracter} it also supports embedding of metadata in
* a source node. In this case the remote async transform states that it supports a transform from a source mimetype
* to {@code "alfresco-metadata-embed"}. The resulting transform is a replacement for the content of the node.
*
* @author adavis
*/
public class AsynchronousExtractor extends AbstractMappingMetadataExtracter
{
private static final String EXTRACT = "extract";
private static final String EMBED = "embed";
private static final String MIMETYPE_METADATA_EXTRACT = "alfresco-metadata-extract";
private static final String MIMETYPE_METADATA_EMBED = "alfresco-metadata-embed";
private static final String METADATA = "metadata";
private static final Map<String, Serializable> EMPTY_METADATA = Collections.emptyMap();
private final ObjectMapper jsonObjectMapper = new ObjectMapper();
private NodeService nodeService;
private NamespacePrefixResolver namespacePrefixResolver;
private TransformerDebug transformerDebug;
private RenditionService2 renditionService2;
private ContentService contentService;
private TransactionService transactionService;
private TransformServiceRegistry transformServiceRegistry;
private TaggingService taggingService;
public void setNodeService(NodeService nodeService)
{
this.nodeService = nodeService;
}
public void setNamespacePrefixResolver(NamespacePrefixResolver namespacePrefixResolver)
{
this.namespacePrefixResolver = namespacePrefixResolver;
}
public void setTransformerDebug(TransformerDebug transformerDebug)
{
this.transformerDebug = transformerDebug;
}
public void setRenditionService2(RenditionService2 renditionService2)
{
this.renditionService2 = renditionService2;
}
public void setContentService(ContentService contentService)
{
this.contentService = contentService;
}
public void setTransactionService(TransactionService transactionService)
{
this.transactionService = transactionService;
}
public void setTransformServiceRegistry(TransformServiceRegistry transformServiceRegistry)
{
this.transformServiceRegistry = transformServiceRegistry;
}
public void setTaggingService(TaggingService taggingService)
{
this.taggingService = taggingService;
}
@Override
protected Map<String, Set<QName>> getDefaultMapping()
{
return Collections.emptyMap(); // Mappings are done by the transform, but a non null value must be returned.
}
public boolean isSupported(String sourceMimetype, long sourceSizeInBytes)
{
return isEnabled(sourceMimetype) && isSupported(sourceMimetype, sourceSizeInBytes, MIMETYPE_METADATA_EXTRACT);
}
public boolean isEmbedderSupported(String sourceMimetype, long sourceSizeInBytes)
{
return isSupported(sourceMimetype, sourceSizeInBytes, MIMETYPE_METADATA_EMBED);
}
private boolean isSupported(String sourceMimetype, long sourceSizeInBytes, String targetMimetype)
{
return transformServiceRegistry.isSupported(sourceMimetype, sourceSizeInBytes, targetMimetype, Collections.emptyMap(), targetMimetype);
}
public static boolean isMetadataExtractMimetype(String targetMimetype)
{
return MIMETYPE_METADATA_EXTRACT.equals(targetMimetype);
}
public static boolean isMetadataEmbedMimetype(String targetMimetype)
{
return MIMETYPE_METADATA_EMBED.equals(targetMimetype);
}
/**
* Returns a file extension used as the target in a transform. The normal extension is changed if the
* {@code targetMimetype} is an extraction or embedding type.
*
* @param targetMimetype the target mimetype
* @param sourceExtension normal source extension
* @param targetExtension current target extension (normally {@code "bin" for embedding and extraction})
* @return the extension to be used.
*/
public static String getExtension(String targetMimetype, String sourceExtension, String targetExtension)
{
return isMetadataExtractMimetype(targetMimetype)
? "json"
: isMetadataEmbedMimetype(targetMimetype)
? sourceExtension
: targetExtension;
}
/**
* Returns a rendition name used in {@link TransformerDebug}. The normal name is changed if it is a metadata
* extract or embed. The name in this case is actually the {@code "alfresco-metadata-extract/"}
* {@code "alfresco-metadata-embed/"} followed by the source mimetype.
*
* @param renditionName the normal name, or a special one based on the source mimetype and a prefixed.
* @return the renditionName to be used.
*/
public static String getRenditionName(String renditionName)
{
String transformName = getTransformName(renditionName);
return transformName != null && transformName.startsWith(MIMETYPE_METADATA_EXTRACT)
? "metadataExtract"
: transformName != null && transformName.startsWith(MIMETYPE_METADATA_EMBED)
? "metadataEmbed"
: renditionName;
}
@Override
protected void checkIsSupported(ContentReader reader)
{
// Just return, as we have already checked when this extractor was selected.
}
@Override
protected void checkIsEmbedSupported(ContentWriter writer)
{
// Just return, as we have already checked when this embedder was selected.
}
@Override
// Not called. Overloaded method with the NodeRef is called.
protected Map<String, Serializable> extractRaw(ContentReader reader)
{
return null;
}
@Override
protected Map<String, Serializable> extractRawInThread(NodeRef nodeRef, ContentReader reader, MetadataExtracterLimits limits)
throws Throwable
{
long timeoutMs = limits.getTimeoutMs();
Map<String, String> options = Collections.singletonMap(TIMEOUT, Long.toString(timeoutMs));
transformInBackground(nodeRef, reader, MIMETYPE_METADATA_EXTRACT, EXTRACT, options);
return EMPTY_METADATA;
}
@Override
protected void embedInternal(NodeRef nodeRef, Map<String, Serializable> metadata, ContentReader reader, ContentWriter writer)
{
String metadataAsJson = metadataToString(metadata);
Map<String, String> options = Collections.singletonMap(METADATA, metadataAsJson);
transformInBackground(nodeRef, reader, MIMETYPE_METADATA_EMBED, EMBED, options);
}
private void transformInBackground(NodeRef nodeRef, ContentReader reader, String targetMimetype,
String embedOrExtract, Map<String, String> options)
{
ExecutorService executorService = getExecutorService();
executorService.execute(() ->
{
try
{
transform(nodeRef, reader, targetMimetype, embedOrExtract, options);
}
finally
{
extractRawThreadFinished();
}
});
}
private void transform(NodeRef nodeRef, ContentReader reader, String targetMimetype,
String embedOrExtract, Map<String, String> options)
{
String sourceMimetype = reader.getMimetype();
// This needs to be specific to each source mimetype and the extract or embed as the name
// is used to cache the transform name that will be used.
String transformName = targetMimetype + '/' + sourceMimetype;
TransformDefinition transformDefinition = new TransformDefinition(transformName, targetMimetype,
options, null, null, null);
if (logger.isTraceEnabled())
{
StringJoiner sj = new StringJoiner("\n");
sj.add("Request " + embedOrExtract + " transform on " + nodeRef);
options.forEach((k,v)->sj.add(" "+k+"="+v));
logger.trace(sj);
}
AuthenticationUtil.runAs(
(AuthenticationUtil.RunAsWork<Void>) () ->
transactionService.getRetryingTransactionHelper().doInTransaction(() ->
{
try
{
renditionService2.transform(nodeRef, transformDefinition);
}
catch (IllegalArgumentException e)
{
if (e.getMessage().endsWith("The supplied sourceNodeRef "+nodeRef+" does not exist."))
{
throw new ConcurrencyFailureException(
"The original transaction may not have finished. " + e.getMessage());
}
}
return null;
}), AuthenticationUtil.getSystemUserName());
}
public void setMetadata(NodeRef nodeRef, InputStream transformInputStream)
{
if (logger.isTraceEnabled())
{
logger.trace("Update metadata on " + nodeRef);
}
Map<String, Serializable> metadata = readMetadata(transformInputStream);
if (metadata == null)
{
return; // Error state.
}
// Remove well know entries from the map that drive how the real metadata is applied.
OverwritePolicy overwritePolicy = removeOverwritePolicy(metadata, "sys:overwritePolicy", OverwritePolicy.PRAGMATIC);
Boolean enableStringTagging = removeBoolean(metadata, "sys:enableStringTagging", false);
Boolean carryAspectProperties = removeBoolean(metadata, "sys:carryAspectProperties", true);
List<String> stringTaggingSeparators = removeTaggingSeparators(metadata, "sys:stringTaggingSeparators",
ContentMetadataExtracter.DEFAULT_STRING_TAGGING_SEPARATORS);
if (overwritePolicy == null ||
enableStringTagging == null ||
carryAspectProperties == null ||
stringTaggingSeparators == null)
{
return; // Error state.
}
AuthenticationUtil.runAsSystem((AuthenticationUtil.RunAsWork<Void>) () ->
transactionService.getRetryingTransactionHelper().doInTransaction(() ->
{
// Based on: AbstractMappingMetadataExtracter.extract
Map<QName, Serializable> nodeProperties = nodeService.getProperties(nodeRef);
// Convert to system properties (standalone)
Map<QName, Serializable> systemProperties = convertKeysToQNames(metadata);
// Convert the properties according to the dictionary types
systemProperties = convertSystemPropertyValues(systemProperties);
// There is no last filter in the AsynchronousExtractor.
// Now use the proper overwrite policy
Map<QName, Serializable> changedProperties = overwritePolicy.applyProperties(systemProperties, nodeProperties);
// Based on: ContentMetadataExtracter.executeImpl
// If none of the properties where changed, then there is nothing more to do
if (changedProperties.size() == 0)
{
return null;
}
boolean transformerDebugEnabled = transformerDebug.isEnabled();
boolean debugEnabled = logger.isDebugEnabled();
if (transformerDebugEnabled || debugEnabled)
{
for (Map.Entry<QName, Serializable> entry : changedProperties.entrySet())
{
QName qname = entry.getKey();
Serializable value = entry.getValue();
String prefixString = qname.toPrefixString(namespacePrefixResolver);
String debugMessage = prefixString + "=" + (value == null ? "" : value);
if (transformerDebugEnabled)
{
transformerDebug.debugUsingPreviousReference(" "+debugMessage);
}
if (debugEnabled)
{
logger.debug(debugMessage);
}
}
}
ContentMetadataExtracter.addExtractedMetadataToNode(nodeRef, nodeProperties, changedProperties,
nodeService, dictionaryService, taggingService,
enableStringTagging, carryAspectProperties, stringTaggingSeparators);
if (logger.isTraceEnabled())
{
logger.trace("Extraction of Metadata from " + nodeRef + " complete " + changedProperties);
}
return null;
}, false, true));
}
private Map<String, Serializable> readMetadata(InputStream transformInputStream)
{
try
{
TypeReference<HashMap<String, Serializable>> typeRef = new TypeReference<HashMap<String, Serializable>>() {};
return jsonObjectMapper.readValue(transformInputStream, typeRef);
}
catch (IOException e)
{
logger.error("Failed to read metadata from transform result", e);
return null;
}
}
private String metadataToString(Map<String, Serializable> metadata)
{
Map<String, String> metadataAsStrings = AbstractMappingMetadataExtracter.convertMetadataToStrings(metadata);
try
{
return jsonObjectMapper.writeValueAsString(metadataAsStrings);
}
catch (JsonProcessingException e)
{
logger.error("Failed to save metadata as Json", e);
return null;
}
}
private OverwritePolicy removeOverwritePolicy(Map<String, Serializable> map, String key, OverwritePolicy defaultValue)
{
Serializable value = map.remove(key);
if (value == null)
{
return defaultValue;
}
try
{
return OverwritePolicy.valueOf((String)value);
}
catch (IllegalArgumentException|ClassCastException e)
{
logger.error(key + "=" + value + " is invalid");
return null;
}
}
private Boolean removeBoolean(Map<String, Serializable> map, Serializable key, boolean defaultValue)
{
@SuppressWarnings("SuspiciousMethodCalls") Serializable value = map.remove(key);
if (value != null &&
(!(value instanceof String) ||
(!(Boolean.FALSE.toString().equals(value) || Boolean.TRUE.toString().equals(value)))))
{
logger.error(key + "=" + value + " is invalid. Must be " + Boolean.TRUE + " or " + Boolean.FALSE);
return null; // no flexibility of parseBoolean(...). It is just invalid
}
return value == null ? defaultValue : Boolean.parseBoolean((String)value);
}
private List<String> removeTaggingSeparators(Map<String, Serializable> map, String key, List<String> defaultValue)
{
Serializable value = map.remove(key);
if (value == null)
{
return defaultValue;
}
if (!(value instanceof String))
{
logger.error(key + "=" + value + " is invalid.");
return null;
}
List<String> list = new ArrayList<>();
try (CSVParser parser = CSVParser.parse((String)value, CSVFormat.RFC4180))
{
Iterator<CSVRecord> iterator = parser.iterator();
CSVRecord record = iterator.next();
if (iterator.hasNext())
{
logger.error(key + "=" + value + " is invalid. Should only have one record");
return null;
}
record.forEach(list::add);
}
catch (IOException|NoSuchElementException e)
{
logger.error(key + "=" + value + " is invalid. Must be a CSV using CSVFormat.RFC4180");
return null;
}
return list;
}
private Map<QName, Serializable> convertKeysToQNames(Map<String, Serializable> documentMetadata)
{
Map<QName, Serializable> properties = new HashMap<>();
for (Map.Entry<String, Serializable> entry : documentMetadata.entrySet())
{
String key = entry.getKey();
Serializable value = entry.getValue();
try
{
QName qName = QName.createQName(key);
try
{
qName.toPrefixString(namespacePrefixResolver);
properties.put(qName, value);
}
catch (NamespaceException e)
{
logger.error("Error unregistered namespace in " + qName);
}
}
catch (NamespaceException e)
{
logger.error("Error creating qName from "+key);
}
}
return properties;
}
public void setEmbeddedMetadata(NodeRef nodeRef, InputStream transformInputStream)
{
if (logger.isDebugEnabled())
{
logger.debug("Update of content to include metadata on " + nodeRef);
}
AuthenticationUtil.runAsSystem(() ->
transactionService.getRetryingTransactionHelper().doInTransaction(() ->
{
try
{
// Set or replace content
ContentReader reader = contentService.getReader(nodeRef, ContentModel.PROP_CONTENT);
String mimetype = reader.getMimetype();
String encoding = reader.getEncoding();
ContentWriter writer = contentService.getWriter(nodeRef, ContentModel.PROP_CONTENT, true);
writer.setMimetype(mimetype);
writer.setEncoding(encoding);
writer.putContent(transformInputStream);
if (logger.isTraceEnabled())
{
logger.trace("Embedded Metadata on " + nodeRef + " complete");
}
}
catch (Exception e)
{
logger.error("Failed to copy embedded metadata transform InputStream into " + nodeRef);
throw e;
}
return null;
}, false, true));
}
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -36,6 +36,8 @@ import org.apache.tika.parser.dwg.DWGParser;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* Metadata extractor for the
* {@link org.alfresco.repo.content.MimetypeMap#MIMETYPE_APP_DWG MIMETYPE_APP_DWG}
* and
@@ -55,6 +57,7 @@ import org.apache.tika.parser.dwg.DWGParser;
* @since 3.4
* @author Nick Burch
*/
@Deprecated
public class DWGMetadataExtracter extends TikaPoweredMetadataExtracter
{
private static final String KEY_KEYWORD = "keyword";

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -24,7 +24,7 @@
* #L%
*/
/*
* Copyright (C) 2005 Jesper Steen Møller
* Copyright (C) 2005 - 2020 Jesper Steen Møller
*
* This file is part of Alfresco
*
@@ -61,6 +61,8 @@ import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* Extracts the following values from HTML documents:
* <pre>
* <b>author:</b> -- cm:author
@@ -75,6 +77,7 @@ import org.alfresco.service.cmr.repository.ContentReader;
* @author Jesper Steen Møller
* @author Derek Hulley
*/
@Deprecated
public class HtmlMetadataExtracter extends AbstractMappingMetadataExtracter
{
private static final String KEY_AUTHOR = "author";

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2017 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -25,20 +25,17 @@
*/
package org.alfresco.repo.content.metadata;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
import org.alfresco.repo.content.metadata.OpenOfficeMetadataWorker;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.util.PropertyCheck;
import java.io.Serializable;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
/**
* @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1
*
* Extracts values from Open Office documents into the following:
* <pre>
* <b>author:</b> -- cm:author
@@ -48,6 +45,7 @@ import org.alfresco.util.PropertyCheck;
*
* @author Neil McErlean
*/
@Deprecated
public class JodConverterMetadataExtracter extends AbstractMappingMetadataExtracter implements OpenOfficeMetadataWorker
{
private OpenOfficeMetadataWorker worker;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2017 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -35,7 +35,6 @@ import java.util.HashMap;
import java.util.Map;
import org.alfresco.repo.content.JodConverter;
import org.alfresco.repo.content.metadata.OpenOfficeMetadataWorker;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.util.TempFileProvider;
@@ -59,6 +58,19 @@ import com.sun.star.util.CloseVetoException;
import com.sun.star.util.XCloseable;
import com.sun.star.util.XRefreshable;
/**
* Extracts values from Open Office documents into the following:
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>description:</b> -- cm:description
* </pre>
*
* @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1
*
* @author Neil McErlean
*/
@Deprecated
public class JodConverterMetadataExtracterWorker implements
OpenOfficeMetadataWorker
{

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import java.io.Serializable;
@@ -36,6 +36,8 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.parser.mp3.Mp3Parser;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* Extracts the following values from MP3 files:
* <pre>
* <b>songTitle:</b> -- cm:title
@@ -57,6 +59,7 @@ import org.apache.tika.parser.mp3.Mp3Parser;
*
* @author Nick Burch
*/
@Deprecated
public class MP3MetadataExtracter extends TikaAudioMetadataExtracter
{
private static final String KEY_SONG_TITLE = "songTitle";

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import java.io.Serializable;
@@ -35,6 +35,8 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* Outlook MAPI format email meta-data extractor extracting the following values:
* <pre>
* <b>sentDate:</b> -- cm:sentdate
@@ -53,6 +55,7 @@ import org.apache.tika.parser.microsoft.OfficeParser;
* @since 2.1
* @author Kevin Roast
*/
@Deprecated
public class MailMetadataExtracter extends TikaPoweredMetadataExtracter
{
private static final String KEY_SENT_DATE = "sentDate";

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import java.io.Serializable;
@@ -33,6 +33,7 @@ import org.alfresco.repo.content.ContentWorker;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.namespace.QName;
/**
@@ -69,5 +70,18 @@ public interface MetadataEmbedder extends ContentWorker {
*/
public void embed(Map<QName, Serializable> properties, ContentReader reader, ContentWriter writer) throws ContentIOException;
/**
* Identical to {@link #embed(Map, ContentReader, ContentWriter)} but with the addition of the
* {@code NodeRef} being acted on. By default, the method without the {@code NodeRef} is called.
*
* @param nodeRef the node being acted on.
* @param properties the model properties to embed
* @param reader the reader for the original source content file
* @param writer the writer for the content after metadata has been embedded
* @throws ContentIOException
*/
public default void embed(NodeRef nodeRef, Map<QName, Serializable> properties, ContentReader reader, ContentWriter writer) throws ContentIOException
{
embed(properties, reader, writer);
}
}

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005 Jesper Steen Møller
*
@@ -52,6 +52,7 @@ import org.alfresco.api.AlfrescoPublicApi;
import org.alfresco.repo.content.ContentWorker;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.namespace.NamespaceService;
import org.alfresco.service.namespace.QName;
@@ -402,4 +403,46 @@ public interface MetadataExtracter extends ContentWorker
OverwritePolicy overwritePolicy,
Map<QName, Serializable> destination,
Map<String, Set<QName>> mapping);
/**
* Identical to {@link #extract(ContentReader, Map)} but with the addition of the {@code NodeRef} being acted on.
* By default, the method without the {@code NodeRef} is called.
*
* @param nodeRef the node being acted on.
* @param reader the source of the content
* @param destination the map of properties to populate (essentially a return value)
* @return Returns a map of all properties on the destination map that were
* added or modified. If the return map is empty, then no properties
* were modified.
* @throws ContentIOException if a detectable error occurs
*/
public default Map<QName, Serializable> extract(NodeRef nodeRef, ContentReader reader, Map<QName, Serializable> destination)
{
return extract(reader, destination);
}
/**
* Identical to {@link #extract(ContentReader, OverwritePolicy, Map, Map)} but with the addition of the
* {@code NodeRef} being acted on. By default, the method without the {@code NodeRef} is called.
*
* @param nodeRef the node being acted on.
* @param reader the source of the content
* @param overwritePolicy the policy stipulating how the system properties must be
* overwritten if present
* @param destination the map of properties to populate (essentially a return value)
* @param mapping a mapping of document-specific properties to system properties.
* @return Returns a map of all properties on the destination map that were
* added or modified. If the return map is empty, then no properties
* were modified.
* @throws ContentIOException if a detectable error occurs
*/
public default Map<QName, Serializable> extract(
NodeRef nodeRef,
ContentReader reader,
OverwritePolicy overwritePolicy,
Map<QName, Serializable> destination,
Map<String, Set<QName>> mapping)
{
return extract(reader, overwritePolicy, destination, mapping);
}
}

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005-2012 Jesper Steen Møller
*
@@ -73,18 +73,22 @@ public class MetadataExtracterRegistry
private List<MetadataExtracter> extracters;
private Map<String, List<MetadataExtracter>> extracterCache;
private Map<String, List<MetadataEmbedder>> embedderCache;
private AsynchronousExtractor asynchronousExtractor;
/** Controls read access to the cache */
private Lock extracterCacheReadLock;
/** controls write access to the cache */
private Lock extracterCacheWriteLock;
private boolean asyncExtractEnabled = true;
private boolean asyncEmbedEnabled = true;
public MetadataExtracterRegistry()
{
// initialise lists
extracters = new ArrayList<MetadataExtracter>(10);
extracterCache = new HashMap<String, List<MetadataExtracter>>(17);
embedderCache = new HashMap<String, List<MetadataEmbedder>>(17);
extracters = new ArrayList<>(11);
extracterCache = new HashMap<>(18);
embedderCache = new HashMap<>(18);
// create lock objects for access to the cache
ReadWriteLock extractionCacheLock = new ReentrantReadWriteLock();
@@ -125,7 +129,14 @@ public class MetadataExtracterRegistry
extracterCacheWriteLock.lock();
try
{
extracters.add(extracter);
if (extracter instanceof AsynchronousExtractor)
{
asynchronousExtractor = (AsynchronousExtractor)extracter;
}
else
{
extracters.add(extracter);
}
extracterCache.clear();
embedderCache.clear();
}
@@ -135,16 +146,42 @@ public class MetadataExtracterRegistry
}
}
public void setAsyncExtractEnabled(boolean asyncExtractEnabled)
{
this.asyncExtractEnabled = asyncExtractEnabled;
}
public void setAsyncEmbedEnabled(boolean asyncEmbedEnabled)
{
this.asyncEmbedEnabled = asyncEmbedEnabled;
}
/**
* Returns the {@link AsynchronousExtractor} if it is able to perform the extraction and is enabled. Failing that it
* calls {@link #getExtracter(String)}.
*
* @param sourceSizeInBytes size of the source content.
* @param sourceMimetype the source MIMETYPE of the extraction
* @return Returns a metadata extractor that can extract metadata from the chosen MIME type.
*/
public MetadataExtracter getExtractor(String sourceMimetype, long sourceSizeInBytes)
{
return asyncExtractEnabled && asynchronousExtractor != null &&
asynchronousExtractor.isSupported(sourceMimetype, sourceSizeInBytes)
? asynchronousExtractor
: getExtracter(sourceMimetype);
}
/**
* Gets the best metadata extracter. This is a combination of the most
* reliable and the most performant extracter.
* <p>
* The result is cached for quicker access next time.
*
* @param sourceMimetype the source MIME of the extraction
* @return Returns a metadata extracter that can extract metadata from the
* chosen MIME type.
*/
* <p>
* The result is cached for quicker access next time.
*
* @param sourceMimetype the source MIME of the extraction
* @return Returns a metadata extracter that can extract metadata from the
* chosen MIME type.
*/
public MetadataExtracter getExtracter(String sourceMimetype)
{
logger.debug("Get extractors for " + sourceMimetype);
@@ -202,18 +239,18 @@ public class MetadataExtracterRegistry
}
private String getName(MetadataExtracter extractor)
{
if (extractor == null)
{
return null;
}
else if (extractor instanceof AbstractMappingMetadataExtracter)
{
return ((AbstractMappingMetadataExtracter)extractor).getBeanName();
}
else
{
return extractor.getClass().getSimpleName();
{
if (extractor == null)
{
return null;
}
else if (extractor instanceof AbstractMappingMetadataExtracter)
{
return ((AbstractMappingMetadataExtracter)extractor).getBeanName();
}
else
{
return extractor.getClass().getSimpleName();
}
}
@@ -222,48 +259,64 @@ public class MetadataExtracterRegistry
* @return Returns a set of extractors that will work for the given mimetype
*/
private List<MetadataExtracter> findBestExtracters(String sourceMimetype)
{
if (logger.isDebugEnabled())
{
if (logger.isDebugEnabled())
{
logger.debug("Finding extractors for " + sourceMimetype);
logger.debug("Finding extractors for " + sourceMimetype);
}
List<MetadataExtracter> extractors = new ArrayList<MetadataExtracter>(1);
List<MetadataExtracter> extractors = new ArrayList<>(1);
for (MetadataExtracter extractor : extracters)
{
if (!extractor.isSupported(sourceMimetype))
{
// extraction not achievable
if (logger.isDebugEnabled())
// extraction not achievable
if (logger.isDebugEnabled())
{
logger.debug("Find unsupported: "+getName(extractor));
logger.debug("Find unsupported: "+getName(extractor));
}
continue;
}
if (logger.isDebugEnabled())
}
if (logger.isDebugEnabled())
{
logger.debug("Find supported: "+getName(extractor));
logger.debug("Find supported: "+getName(extractor));
}
extractors.add(extractor);
}
if (logger.isDebugEnabled())
}
if (logger.isDebugEnabled())
{
logger.debug("Find returning: "+extractors);
logger.debug("Find returning: "+extractors);
}
return extractors;
}
/**
* Returns the {@link AsynchronousExtractor} if it is able to perform the embedding and is enabled. Failing that it
* calls {@link #getEmbedder(String)}.
*
* @param sourceSizeInBytes size of the source content.
* @param sourceMimetype the source MIMETYPE of the extraction
* @return Returns a metadata extractor that can extract metadata from the chosen MIME type.
*/
public MetadataEmbedder getEmbedder(String sourceMimetype, long sourceSizeInBytes)
{
return asyncEmbedEnabled && asynchronousExtractor != null &&
asynchronousExtractor.isEmbedderSupported(sourceMimetype, sourceSizeInBytes)
? asynchronousExtractor
: getEmbedder(sourceMimetype);
}
/**
* Gets the best metadata embedder. This is a combination of the most
* reliable and the most performant embedder.
* <p>
* The result is cached for quicker access next time.
*
* @param sourceMimetype the source MIME of the extraction
* @return Returns a metadata embedder that can embed metadata in the
* chosen MIME type.
*/
* <p>
* The result is cached for quicker access next time.
*
* @param sourceMimetype the source MIME of the extraction
* @return Returns a metadata embedder that can embed metadata in the
* chosen MIME type.
*/
public MetadataEmbedder getEmbedder(String sourceMimetype)
{
List<MetadataEmbedder> embedders = null;

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005 Jesper Steen Møller
*
@@ -53,6 +53,8 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* Office file format Metadata Extracter. This extracter uses the POI library to extract
* the following:
* <pre>
@@ -78,6 +80,7 @@ import org.apache.tika.parser.microsoft.OfficeParser;
* @author Derek Hulley
* @author Nick Burch
*/
@Deprecated
public class OfficeMetadataExtracter extends TikaPoweredMetadataExtracter
{
public static final String KEY_CREATE_DATETIME = "createDateTime";

View File

@@ -1,30 +1,30 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005 Antti Jokipii
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005 - 2020 Antti Jokipii
*
* This file is part of Alfresco
*
@@ -59,6 +59,8 @@ import org.joda.time.format.DateTimeFormatter;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* Metadata extractor for the
* {@link org.alfresco.repo.content.MimetypeMap#MIMETYPE_OPENDOCUMENT_TEXT MIMETYPE_OPENDOCUMENT_XXX}
* mimetypes.
@@ -86,6 +88,7 @@ import org.joda.time.format.DateTimeFormatter;
* @author Antti Jokipii
* @author Derek Hulley
*/
@Deprecated
public class OpenDocumentMetadataExtracter extends TikaPoweredMetadataExtracter
{
private static final String KEY_CREATION_DATE = "creationDate";

View File

@@ -23,28 +23,31 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import java.io.Serializable;
import java.util.Map;
import org.alfresco.service.cmr.repository.ContentReader;
/**
* An interface that allows separation between the metadata extractor registry and the third party subsystem owning the
* open office connection.
*
* @author dward
*/
public interface OpenOfficeMetadataWorker
{
/**
* @return Returns true if a connection to the Uno server could be established
*/
public boolean isConnected();
/**
* @see AbstractMappingMetadataExtracter#extractRaw(ContentReader)
*/
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable;
package org.alfresco.repo.content.metadata;
import java.io.Serializable;
import java.util.Map;
import org.alfresco.service.cmr.repository.ContentReader;
/**
* @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1
*
* An interface that allows separation between the metadata extractor registry and the third party subsystem owning the
* open office connection.
*
* @author dward
*/
@Deprecated
public interface OpenOfficeMetadataWorker
{
/**
* @return Returns true if a connection to the Uno server could be established
*/
public boolean isConnected();
/**
* @see AbstractMappingMetadataExtracter#extractRaw(ContentReader)
*/
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable;
}

View File

@@ -1,30 +1,30 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005 Jesper Steen Møller
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005 - 2020 Jesper Steen Møller
*
* This file is part of Alfresco
*
@@ -52,6 +52,8 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.parser.pdf.PDFParser;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* Metadata extractor for the PDF documents.
* <pre>
* <b>author:</b> -- cm:author
@@ -66,6 +68,7 @@ import org.apache.tika.parser.pdf.PDFParser;
* @author Jesper Steen Møller
* @author Derek Hulley
*/
@Deprecated
public class PdfBoxMetadataExtracter extends TikaPoweredMetadataExtracter
{
protected static Log pdfLogger = LogFactory.getLog(PdfBoxMetadataExtracter.class);

View File

@@ -36,6 +36,8 @@ import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.springframework.beans.factory.InitializingBean;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* POI-based metadata extractor for Office 07 documents.
* See http://poi.apache.org/ for information on POI.
* <pre>
@@ -52,6 +54,7 @@ import org.springframework.beans.factory.InitializingBean;
* @author Neil McErlean
* @author Dmitry Velichkevich
*/
@Deprecated
public class PoiMetadataExtracter extends TikaPoweredMetadataExtracter
{
protected static Log logger = LogFactory.getLog(PoiMetadataExtracter.class);

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import java.io.IOException;
@@ -66,6 +66,7 @@ import org.alfresco.service.namespace.QName;
* @author Derek Hulley
* @since 3.2
*/
@Deprecated
public class RFC822MetadataExtracter extends AbstractMappingMetadataExtracter
{

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -42,6 +42,8 @@ import org.gagravarr.tika.FlacParser;
import org.gagravarr.tika.VorbisParser;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* A Metadata Extractor which makes use of the Apache
* Tika Audio Parsers to extract metadata from your
* media files.
@@ -64,6 +66,7 @@ import org.gagravarr.tika.VorbisParser;
* @since 4.0
* @author Nick Burch
*/
@Deprecated
public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter
{
protected static final String KEY_LYRICS = "lyrics";

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -40,6 +40,8 @@ import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* A Metadata Extractor which makes use of the Apache
* Tika auto-detection to select the best parser
* to extract the metadata from your document.
@@ -60,6 +62,7 @@ import org.apache.tika.parser.Parser;
* @since 3.4
* @author Nick Burch
*/
@Deprecated
public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter
{
protected static Log logger = LogFactory.getLog(TikaAutoMetadataExtracter.class);

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -31,7 +31,6 @@ import java.io.OutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@@ -45,8 +44,6 @@ import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.embedder.Embedder;
@@ -74,6 +71,8 @@ import org.xml.sax.SAXException;
/**
* @deprecated extractors have been moved to a T-Engine.
*
* The parent of all Metadata Extractors which use
* Apache Tika under the hood. This handles all the
* common parts of processing the files, and the common
@@ -92,6 +91,7 @@ import org.xml.sax.SAXException;
* @author Nick Burch
*/
@AlfrescoPublicApi
@Deprecated
public abstract class TikaPoweredMetadataExtracter
extends AbstractMappingMetadataExtracter
implements MetadataEmbedder
@@ -473,43 +473,11 @@ public abstract class TikaPoweredMetadataExtracter
{
return;
}
Map<String, String> metadataAsStrings = convertMetadataToStrings(properties);
Metadata metadataToEmbed = new Metadata();
for (String metadataKey : properties.keySet())
{
Serializable value = properties.get(metadataKey);
if (value == null)
{
continue;
}
if (value instanceof Collection<?>)
{
for (Object singleValue : (Collection<?>) value)
{
try
{
// Convert to a string value for Tika
metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, singleValue));
}
catch (TypeConversionException e)
{
logger.info("Could not convert " + metadataKey + ": " + e.getMessage());
}
}
}
else
{
try
{
// Convert to a string value for Tika
metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, value));
}
catch (TypeConversionException e)
{
logger.info("Could not convert " + metadataKey + ": " + e.getMessage());
}
}
}
metadataAsStrings.forEach((k,v)->metadataToEmbed.add(k, v));
InputStream inputStream = getInputStream(reader);
OutputStream outputStream = writer.getContentOutputStream();
embedder.embed(metadataToEmbed, inputStream, outputStream, null);

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import java.util.ArrayList;
@@ -37,6 +37,8 @@ import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
/**
* @deprecated Tika extractors have been moved to a T-Engine.
*
* A Metadata Extractor which makes use of Apache Tika,
* and allows the selection of the Tika parser to be
* sprung-in to extract the metadata from your document.
@@ -56,6 +58,7 @@ import org.apache.tika.parser.Parser;
* @author Nick Burch
*/
@AlfrescoPublicApi
@Deprecated
public class TikaSpringConfiguredMetadataExtracter extends TikaPoweredMetadataExtracter
{
protected static Log logger = LogFactory.getLog(TikaSpringConfiguredMetadataExtracter.class);

View File

@@ -60,6 +60,7 @@ import org.alfresco.util.PropertyCheck;
* @since 2.1
* @author Derek Hulley
*/
@Deprecated
public class XmlMetadataExtracter extends AbstractMappingMetadataExtracter
{
public static String[] SUPPORTED_MIMETYPES = new String[] { MimetypeMap.MIMETYPE_XML };

View File

@@ -249,7 +249,7 @@ public class AdminUiTransformerDebug extends TransformerDebug implements Applica
boolean firstTransformer)
{
String mimetypes = firstTransformer
? getMimetypeExt(sourceMimetype)+getMimetypeExt(targetMimetype)
? getSourceAndTargetExt(sourceMimetype, targetMimetype)
: spaces(10);
char c = (char)('a'+transformerCount);
log(mimetypes+

View File

@@ -25,6 +25,7 @@
*/
package org.alfresco.repo.content.transform;
import org.alfresco.repo.content.metadata.AsynchronousExtractor;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.TransformationOptions;
import org.alfresco.transform.client.registry.SupportedTransform;
@@ -99,10 +100,9 @@ public class LegacyTransformerDebug extends AdminUiTransformerDebug
public void blacklistTransform(ContentTransformer transformer, String sourceMimetype,
String targetMimetype, TransformationOptions options)
{
log("Blacklist "+getName(transformer)+" "+getMimetypeExt(sourceMimetype)+getMimetypeExt(targetMimetype));
log("Blacklist "+getName(transformer)+" "+ getSourceAndTargetExt(sourceMimetype, targetMimetype));
}
@Deprecated
public void pushTransform(ContentTransformer transformer, String fromUrl, String sourceMimetype,
String targetMimetype, long sourceSize, TransformationOptions options)
@@ -265,7 +265,10 @@ public class LegacyTransformerDebug extends AdminUiTransformerDebug
}
String i = Integer.toString(mimetypePairCount);
String priority = gePriority(transformer, sourceMimetype, targetMimetype);
log(spaces(5-i.length())+mimetypePairCount+") "+getMimetypeExt(sourceMimetype)+getMimetypeExt(targetMimetype)+
String sourceExt = getMimetypeExt(sourceMimetype);
String targetExt = getMimetypeExt(targetMimetype);
targetExt = AsynchronousExtractor.getExtension(targetMimetype, sourceExt, targetExt);
log(spaces(5-i.length())+mimetypePairCount+") "+ sourceExt + targetExt +
priority +
' '+fileSize((maxSourceSizeKBytes > 0) ? maxSourceSizeKBytes*1024 : maxSourceSizeKBytes)+
(maxSourceSizeKBytes == 0 ? " disabled" : ""));

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -25,6 +25,7 @@
*/
package org.alfresco.repo.content.transform;
import org.alfresco.repo.content.metadata.AsynchronousExtractor;
import org.alfresco.repo.rendition2.RenditionDefinition2;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
@@ -188,6 +189,7 @@ public class LocalTransformImpl extends AbstractLocalTransform
args[i++] = "targetMimetype";
args[i++] = targetMimetype;
targetExtension = AsynchronousExtractor.getExtension(targetMimetype, sourceExtension, targetExtension);
remoteTransformerClient.request(reader, writer, sourceMimetype, sourceExtension, targetExtension,
timeoutMs, log, args);
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -26,6 +26,7 @@
package org.alfresco.repo.content.transform;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.metadata.AsynchronousExtractor;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
@@ -68,6 +69,7 @@ public class TransformerDebug
protected Log logger;
protected NodeService nodeService;
protected MimetypeService mimetypeService;
private final ThreadLocal<Integer> previousTransformId = ThreadLocal.withInitial(()->-1);
protected enum Call
{
@@ -280,6 +282,16 @@ public class TransformerDebug
this.mimetypeService = mimetypeService;
}
public void setPreviousTransformId(int id)
{
previousTransformId.set(id);
}
private int getPreviousTransformId()
{
return previousTransformId.get();
}
public void afterPropertiesSet() throws Exception
{
PropertyCheck.mandatory(this, "nodeService", nodeService);
@@ -351,7 +363,7 @@ public class TransformerDebug
log(frame.sourceMimetype+' '+frame.targetMimetype, false);
String fileName = getFileName(frame.sourceNodeRef, firstLevel, sourceSize);
log(getMimetypeExt(frame.sourceMimetype)+getMimetypeExt(frame.targetMimetype) +
log(getSourceAndTargetExt(frame.sourceMimetype, frame.targetMimetype) +
((fileName != null) ? fileName+' ' : "")+
((sourceSize >= 0) ? fileSize(sourceSize)+' ' : "") +
(firstLevel ? getRenditionName(renditionName) : "") + message);
@@ -370,7 +382,7 @@ public class TransformerDebug
Map<String, String> options, String renditionName, String message)
{
String fileName = getFileName(sourceNodeRef, true, -1);
log(" "+getMimetypeExt(sourceMimetype)+getMimetypeExt(targetMimetype) +
log(" "+ getSourceAndTargetExt(sourceMimetype, targetMimetype) +
((fileName != null) ? fileName+' ' : "")+
((sourceSize >= 0) ? fileSize(sourceSize)+' ' : "") +
(getRenditionName(renditionName)) + message);
@@ -453,6 +465,7 @@ public class TransformerDebug
ourStack.pop();
}
}
setPreviousTransformId(id);
return id;
}
@@ -462,8 +475,7 @@ public class TransformerDebug
{
String failureReason = frame.getFailureReason();
boolean firstLevel = size == 1;
String sourceExt = getMimetypeExt(frame.sourceMimetype);
String targetExt = getMimetypeExt(frame.targetMimetype);
String sourceAndTargetExt = getSourceAndTargetExt(frame.sourceMimetype, frame.targetMimetype);
String fileName = getFileName(frame.sourceNodeRef, firstLevel, frame.sourceSize);
long sourceSize = frame.getSourceSize();
String transformerName = frame.getTransformerName();
@@ -506,19 +518,18 @@ public class TransformerDebug
if (level != null)
{
infoLog(getReference(debug, false), sourceExt, targetExt, level, fileName, sourceSize,
infoLog(getReference(debug, false, false), sourceAndTargetExt, level, fileName, sourceSize,
transformerName, renditionName, failureReason, ms, debug);
}
}
}
private void infoLog(String reference, String sourceExt, String targetExt, String level, String fileName,
private void infoLog(String reference, String sourceAndTargetExt, String level, String fileName,
long sourceSize, String transformerName, String renditionName, String failureReason, String ms, boolean debug)
{
String message =
reference +
sourceExt +
targetExt +
sourceAndTargetExt +
(level == null ? "" : level+' ') +
(fileName == null ? "" : fileName) +
(sourceSize >= 0 ? ' '+fileSize(sourceSize) : "") +
@@ -569,6 +580,18 @@ public class TransformerDebug
}
}
/**
* Log a message prefixed with the previous transformation reference, used by this Thread.
* @param message
*/
public void debugUsingPreviousReference(String message)
{
if (isEnabled() && message != null)
{
log(message, null,true, true);
}
}
/**
* Log a message prefixed with the current transformation reference
* and include a exception, suppressing the stack trace if repeated
@@ -631,16 +654,21 @@ public class TransformerDebug
{
log(message, null, debug);
}
private void log(String message, Throwable t, boolean debug)
{
log(message, t, debug, false);
}
private void log(String message, Throwable t, boolean debug, boolean usePreviousRef)
{
if (debug && ThreadInfo.getDebugOutput() && logger.isDebugEnabled())
{
logger.debug(getReference(false, false)+message, t);
logger.debug(getReference(false, false, usePreviousRef)+message, t);
}
else if (logger.isTraceEnabled())
{
logger.trace(getReference(false, false)+message, t);
logger.trace(getReference(false, false, usePreviousRef)+message, t);
}
if (debug)
@@ -648,7 +676,7 @@ public class TransformerDebug
StringBuilder sb = ThreadInfo.getStringBuilder();
if (sb != null)
{
sb.append(getReference(false, true));
sb.append(getReference(false, true, usePreviousRef));
sb.append(message);
if (t != null)
{
@@ -691,10 +719,21 @@ public class TransformerDebug
* Returns a N.N.N style reference to the transformation.
* @param firstLevelOnly indicates if only the top level should be included and no extra padding.
* @param overrideFirstLevel if the first level id should just be set to 1 (used in test methods)
* @param usePreviousRef if the reference of the last transform performed by this Thread should be used.
* @return a padded (fixed length) reference.
*/
private String getReference(boolean firstLevelOnly, boolean overrideFirstLevel)
private String getReference(boolean firstLevelOnly, boolean overrideFirstLevel, boolean usePreviousRef)
{
if (usePreviousRef)
{
int id = getPreviousTransformId();
String ref = "";
if (id >= 0)
{
ref = Integer.toString(id)+spaces(13);
}
return ref;
}
StringBuilder sb = new StringBuilder("");
Frame frame = null;
Iterator<Frame> iterator = ThreadInfo.getStack().descendingIterator();
@@ -737,7 +776,7 @@ public class TransformerDebug
}
else
{
sb.append(spaces(13-sb.length()+lengthOfFirstId)); // Try to pad to level 7
sb.append(spaces(13-sb.length()+lengthOfFirstId)); // Try to pad to level 7
}
}
return sb.toString();
@@ -783,6 +822,14 @@ public class TransformerDebug
return result;
}
protected String getSourceAndTargetExt(String sourceMimetype, String targetMimetype)
{
String sourceExt = getMimetypeExt(sourceMimetype);
String targetExt = getMimetypeExt(targetMimetype);
targetExt = AsynchronousExtractor.getExtension(targetMimetype, sourceExt, targetExt);
return sourceExt + targetExt + spaces(1+4-targetExt.length());
}
protected String getMimetypeExt(String mimetype)
{
StringBuilder sb = new StringBuilder("");
@@ -867,16 +914,15 @@ public class TransformerDebug
if (isEnabled())
{
pushMisc();
String sourceExt = getMimetypeExt(sourceMimetype);
String targetExt = getMimetypeExt(targetMimetype);
debug(sourceExt + targetExt +
String sourceAndTargetExt = getSourceAndTargetExt(sourceMimetype, targetMimetype);
debug(sourceAndTargetExt +
((fileName != null) ? fileName + ' ' : "") +
((sourceSize >= 0) ? fileSize(sourceSize) + ' ' : "") +
getRenditionName(renditionName) + " "+ TRANSFORM_SERVICE_NAME);
log(options);
log(sourceNodeRef.toString() + ' ' + contentHashcode);
String reference = getReference(true, false);
infoLog(reference, sourceExt, targetExt, null, fileName, sourceSize, TRANSFORM_SERVICE_NAME,
String reference = getReference(true, false, false);
infoLog(reference, sourceAndTargetExt, null, fileName, sourceSize, TRANSFORM_SERVICE_NAME,
renditionName, null, "", true);
}
return pop(Call.AVAILABLE, true, false);
@@ -884,19 +930,21 @@ public class TransformerDebug
private String getRenditionName(String renditionName)
{
return renditionName != null ? "-- "+renditionName+" -- " : "";
return renditionName != null
? "-- "+ AsynchronousExtractor.getRenditionName(renditionName)+" -- "
: "";
}
/**
* Debugs a response to the Transform Service
*/
public void debugTransformServiceResponse(NodeRef sourceNodeRef, int contentHashcode,
long requested, int seq, String sourceExt, String targetExt, String msg)
long requested, int id, String sourceExt, String targetExt, String msg)
{
pushMisc();
Frame frame = ThreadInfo.getStack().getLast();
frame.id = seq;
boolean suppressFinish = seq == -1 || requested == -1;
frame.id = id;
boolean suppressFinish = id == -1 || requested == -1;
if (!suppressFinish)
{
frame.start = requested;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -29,6 +29,7 @@ import org.alfresco.model.ContentModel;
import org.alfresco.model.RenditionModel;
import org.alfresco.repo.content.ContentServicePolicies;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.metadata.AsynchronousExtractor;
import org.alfresco.repo.policy.BehaviourFilter;
import org.alfresco.repo.policy.PolicyComponent;
import org.alfresco.repo.rendition.RenditionPreventionRegistry;
@@ -112,6 +113,7 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea
private RuleService ruleService;
private PostTxnCallbackScheduler renditionRequestSheduler;
private TransformReplyProvider transformReplyProvider;
private AsynchronousExtractor asynchronousExtractor;
private boolean enabled;
private boolean thumbnailsEnabled;
@@ -176,6 +178,11 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea
this.transformReplyProvider = transformReplyProvider;
}
public void setAsynchronousExtractor(AsynchronousExtractor asynchronousExtractor)
{
this.asynchronousExtractor = asynchronousExtractor;
}
public void setEnabled(boolean enabled)
{
this.enabled = enabled;
@@ -203,6 +210,7 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea
PropertyCheck.mandatory(this, "policyComponent", policyComponent);
PropertyCheck.mandatory(this, "behaviourFilter", behaviourFilter);
PropertyCheck.mandatory(this, "ruleService", ruleService);
PropertyCheck.mandatory(this, "asynchronousExtractor", asynchronousExtractor);
}
@Override
@@ -374,41 +382,115 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea
public void consume(NodeRef sourceNodeRef, InputStream transformInputStream, RenditionDefinition2 renditionDefinition,
int transformContentHashCode)
{
int sourceContentHashCode = getSourceContentHashCode(sourceNodeRef);
if (logger.isDebugEnabled())
{
logger.debug("Consume: Source " + sourceContentHashCode + " and transform's source " + transformContentHashCode+" hashcodes");
}
if (renditionDefinition instanceof TransformDefinition)
{
if (logger.isDebugEnabled())
TransformDefinition transformDefinition = (TransformDefinition)renditionDefinition;
String targetMimetype = transformDefinition.getTargetMimetype();
if (AsynchronousExtractor.isMetadataExtractMimetype(targetMimetype))
{
TransformDefinition transformDefinition = (TransformDefinition)renditionDefinition;
String transformName = transformDefinition.getTransformName();
String replyQueue = transformDefinition.getReplyQueue();
String clientData = transformDefinition.getClientData();
boolean success = transformInputStream != null;
logger.info("Reply to " + replyQueue + " that the transform " + transformName +
" with the client data " + clientData + " " + (success ? "was successful" : "failed."));
consumeExtractedMetadata(sourceNodeRef, sourceContentHashCode, transformInputStream, transformDefinition, transformContentHashCode);
}
else if (AsynchronousExtractor.isMetadataEmbedMimetype(targetMimetype))
{
consumeEmbeddedMetadata(sourceNodeRef, sourceContentHashCode, transformInputStream, transformDefinition, transformContentHashCode);
}
else
{
consumeTransformReply(sourceNodeRef, transformInputStream, transformDefinition, transformContentHashCode);
}
transformReplyProvider.produceTransformEvent(sourceNodeRef, transformInputStream,
(TransformDefinition)renditionDefinition, transformContentHashCode);
}
else
{
consumeRendition(sourceNodeRef, transformInputStream, renditionDefinition, transformContentHashCode);
consumeRendition(sourceNodeRef, sourceContentHashCode, transformInputStream, renditionDefinition, transformContentHashCode);
}
}
private void consumeExtractedMetadata(NodeRef nodeRef, int sourceContentHashCode, InputStream transformInputStream,
TransformDefinition transformDefinition, int transformContentHashCode)
{
if (transformInputStream == null)
{
if (logger.isDebugEnabled())
{
logger.debug("Ignore transform for metadata extraction on " + nodeRef + " as it failed");
}
}
else if (transformContentHashCode != sourceContentHashCode)
{
if (logger.isDebugEnabled())
{
logger.debug("Ignore transform for metadata extraction on " + nodeRef + " as it is no longer needed");
}
}
else
{
if (logger.isDebugEnabled())
{
logger.debug("Set the metadata extraction on " + nodeRef);
}
asynchronousExtractor.setMetadata(nodeRef, transformInputStream);
}
}
private void consumeEmbeddedMetadata(NodeRef nodeRef, int sourceContentHashCode, InputStream transformInputStream,
TransformDefinition transformDefinition, int transformContentHashCode)
{
if (transformInputStream == null)
{
if (logger.isDebugEnabled())
{
logger.debug("Ignore transform for metadata embed on " + nodeRef + " as it failed");
}
}
else if (transformContentHashCode != sourceContentHashCode)
{
if (logger.isDebugEnabled())
{
logger.debug("Ignore transform for metadata embed on " + nodeRef + " as it is no longer needed");
}
}
else
{
if (logger.isDebugEnabled())
{
logger.debug("Set the content with embedded metadata on " + nodeRef);
}
asynchronousExtractor.setEmbeddedMetadata(nodeRef, transformInputStream);
}
}
private void consumeTransformReply(NodeRef sourceNodeRef, InputStream transformInputStream,
TransformDefinition transformDefinition, int transformContentHashCode)
{
if (logger.isDebugEnabled())
{
String transformName = transformDefinition.getTransformName();
String replyQueue = transformDefinition.getReplyQueue();
String clientData = transformDefinition.getClientData();
boolean success = transformInputStream != null;
logger.info("Reply to " + replyQueue + " that the transform " + transformName +
" with the client data " + clientData + " " + (success ? "was successful" : "failed."));
}
transformReplyProvider.produceTransformEvent(sourceNodeRef, transformInputStream,
transformDefinition, transformContentHashCode);
}
/**
* Takes a transformation (InputStream) and attaches it as a rendition to the source node.
* Does nothing if there is already a newer rendition.
* If the transformInputStream is null, this is taken to be a transform failure.
*/
private void consumeRendition(NodeRef sourceNodeRef, InputStream transformInputStream,
private void consumeRendition(NodeRef sourceNodeRef, int sourceContentHashCode, InputStream transformInputStream,
RenditionDefinition2 renditionDefinition, int transformContentHashCode)
{
String renditionName = renditionDefinition.getRenditionName();
int sourceContentHashCode = getSourceContentHashCode(sourceNodeRef);
if (logger.isDebugEnabled())
{
logger.debug("Consume: Source " + sourceContentHashCode + " and transform's source " + transformContentHashCode+" hashcodes");
}
if (transformContentHashCode != sourceContentHashCode)
{
if (logger.isDebugEnabled())
@@ -475,7 +557,7 @@ public class RenditionService2Impl implements RenditionService2, InitializingBea
}
catch (Exception e)
{
logger.error("Failed to read transform InputStream into rendition " + renditionName + " on " + sourceNodeRef);
logger.error("Failed to copy transform InputStream into rendition " + renditionName + " on " + sourceNodeRef);
throw e;
}
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -80,7 +80,14 @@ public class TransformDefinition extends RenditionDefinition2Impl
public String getTransformName()
{
String renditionName = getRenditionName();
return renditionName == null ? null : renditionName.substring(TRANSFORM_NAMESPACE.length());
return getTransformName(renditionName);
}
public static String getTransformName(String renditionName)
{
return renditionName == null || !renditionName.startsWith(TRANSFORM_NAMESPACE)
? null
: renditionName.substring(TRANSFORM_NAMESPACE.length());
}
public String getClientData()

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.tagging;
import java.io.BufferedReader;
@@ -74,9 +74,9 @@ import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.repository.Path;
import org.alfresco.service.cmr.repository.StoreRef;
import org.alfresco.service.cmr.search.CategoryService;
import org.alfresco.service.cmr.search.ResultSet;
import org.alfresco.service.cmr.search.SearchParameters;
import org.alfresco.service.cmr.search.SearchParameters.FieldFacet;
import org.alfresco.service.cmr.search.ResultSet;
import org.alfresco.service.cmr.search.SearchParameters;
import org.alfresco.service.cmr.search.SearchParameters.FieldFacet;
import org.alfresco.service.cmr.search.SearchService;
import org.alfresco.service.cmr.tagging.TagDetails;
import org.alfresco.service.cmr.tagging.TagScope;
@@ -1544,35 +1544,35 @@ public class TaggingServiceImpl implements TaggingService,
{
updateAllScopeTags(workingCopy, Boolean.FALSE);
}
}
/**
* @see org.alfresco.service.cmr.tagging.TaggingService#findTaggedNodesAndCountByTagName(StoreRef)
*/
@Override
public List<Pair<String, Integer>> findTaggedNodesAndCountByTagName(StoreRef storeRef)
{
String queryTaggeble = "ASPECT:\"" + ContentModel.ASPECT_TAGGABLE + "\"" + "-ASPECT:\"" + ContentModel.ASPECT_WORKING_COPY + "\"";
SearchParameters sp = new SearchParameters();
sp.setQuery(queryTaggeble);
sp.setLanguage(SearchService.LANGUAGE_LUCENE);
sp.addStore(storeRef);
sp.addFieldFacet(new FieldFacet("TAG"));
ResultSet resultSet = null;
try
{
// Do the search for nodes
resultSet = this.searchService.query(sp);
return resultSet.getFieldFacet("TAG");
}
finally
{
if (resultSet != null)
{
resultSet.close();
}
}
}
/**
* @see org.alfresco.service.cmr.tagging.TaggingService#findTaggedNodesAndCountByTagName(StoreRef)
*/
@Override
public List<Pair<String, Integer>> findTaggedNodesAndCountByTagName(StoreRef storeRef)
{
String queryTaggeble = "ASPECT:\"" + ContentModel.ASPECT_TAGGABLE + "\"" + "-ASPECT:\"" + ContentModel.ASPECT_WORKING_COPY + "\"";
SearchParameters sp = new SearchParameters();
sp.setQuery(queryTaggeble);
sp.setLanguage(SearchService.LANGUAGE_LUCENE);
sp.addStore(storeRef);
sp.addFieldFacet(new FieldFacet("TAG"));
ResultSet resultSet = null;
try
{
// Do the search for nodes
resultSet = this.searchService.query(sp);
return resultSet.getFieldFacet("TAG");
}
finally
{
if (resultSet != null)
{
resultSet.close();
}
}
}
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -48,7 +48,6 @@ import org.apache.http.util.EntityUtils;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -56,6 +55,9 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static org.alfresco.repo.content.metadata.AsynchronousExtractor.isMetadataEmbedMimetype;
import static org.alfresco.repo.content.metadata.AsynchronousExtractor.isMetadataExtractMimetype;
/**
* This class reads multiple T-Engine config and local files and registers them all with a registry as if they were all
* in one file. Transform options are shared between all sources.<p>
@@ -398,7 +400,13 @@ public class CombinedConfig
// the source matches the last intermediate.
Set<SupportedSourceAndTarget> supportedSourceAndTargets = sourceMediaTypesAndMaxSizes.stream().
flatMap(s -> stepTransformer.getSupportedSourceAndTargetList().stream().
filter(st -> st.getSourceMediaType().equals(src)).
filter(st ->
{
String targetMimetype = st.getTargetMediaType();
return st.getSourceMediaType().equals(src) &&
!(isMetadataExtractMimetype(targetMimetype) ||
isMetadataEmbedMimetype(targetMimetype));
}).
map(t -> t.getTargetMediaType()).
map(trg -> SupportedSourceAndTarget.builder().
withSourceMediaType(s.getSourceMediaType()).

View File

@@ -255,7 +255,10 @@
</bean>
<!-- Metadata Extraction Registry -->
<bean id="metadataExtracterRegistry" class="org.alfresco.repo.content.metadata.MetadataExtracterRegistry" />
<bean id="metadataExtracterRegistry" class="org.alfresco.repo.content.metadata.MetadataExtracterRegistry">
<property name="asyncExtractEnabled" value="${content.metadata.async.extract.enabled}" />
<property name="asyncEmbedEnabled" value="${content.metadata.async.embed.enabled}" />
</bean>
<!-- Abstract bean definition defining base definition for all metadata extracters -->
<bean id="baseMetadataExtracter"
@@ -330,6 +333,16 @@
<bean id="extracter.Audio" class="org.alfresco.repo.content.metadata.TikaAudioMetadataExtracter" parent="baseMetadataExtracter">
<property name="tikaConfig" ref="tikaConfig"/>
</bean>
<bean id="extractor.Asynchronous" class="org.alfresco.repo.content.metadata.AsynchronousExtractor" parent="baseMetadataExtracter">
<property name="nodeService" ref="nodeService" />
<property name="namespacePrefixResolver" ref="namespaceService" />
<property name="transformerDebug" ref="transformerDebug" />
<property name="renditionService2" ref="renditionService2" />
<property name="contentService" ref="ContentService" />
<property name="transactionService" ref="transactionService" />
<property name="transformServiceRegistry" ref="transformServiceRegistry" />
<property name="taggingService" ref="taggingService" />
</bean>
<!-- Content Transformation Regisitry -->
<bean id="contentTransformerRegistry" class="org.alfresco.repo.content.transform.ContentTransformerRegistry" >

View File

@@ -59,6 +59,7 @@
<property name="policyComponent" ref="policyComponent" />
<property name="behaviourFilter" ref="policyBehaviourFilter" />
<property name="ruleService" ref="ruleService" />
<property name="asynchronousExtractor" ref="extractor.Asynchronous" />
<property name="renditionRequestSheduler" ref="renditionRequestSheduler" />
<property name="transformReplyProvider" ref="transformReplyProvider" />
<property name="enabled" value="${renditionService2.enabled}" />

View File

@@ -608,6 +608,9 @@ system.thumbnail.quietPeriod=604800
system.thumbnail.quietPeriodRetriesEnabled=true
system.thumbnail.redeployStaticDefsOnStartup=true
content.metadata.async.extract.enabled=true
content.metadata.async.embed.enabled=true
# The default timeout for metadata mapping extracters
content.metadataExtracter.default.timeoutMs=20000

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2017 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -45,9 +45,12 @@ import org.junit.runners.Suite;
org.alfresco.repo.action.evaluator.HasAspectEvaluatorTest.class,
org.alfresco.repo.action.executer.SetPropertyValueActionExecuterTest.class,
org.alfresco.repo.action.executer.AddFeaturesActionExecuterTest.class,
org.alfresco.repo.action.executer.ContentMetadataExtracterTest.class,
org.alfresco.repo.action.executer.ContentMetadataExtracterTagMappingTest.class,
org.alfresco.repo.action.executer.ContentMetadataEmbedderTest.class,
org.alfresco.repo.action.executer.AsynchronousExtractorTest.class,
org.alfresco.repo.rule.RuleLinkTest.class,
org.alfresco.repo.rule.RuleServiceCoverageTest.class,
org.alfresco.repo.rule.RuleServiceImplTest.class,

View File

@@ -0,0 +1,559 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.action.executer;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.action.ActionImpl;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
import org.alfresco.repo.content.metadata.AsynchronousExtractor;
import org.alfresco.repo.content.metadata.MetadataExtracterRegistry;
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.repo.content.transform.TransformerDebug;
import org.alfresco.repo.content.transform.UnsupportedTransformationException;
import org.alfresco.repo.rendition2.RenditionDefinition2;
import org.alfresco.repo.rendition2.RenditionService2Impl;
import org.alfresco.repo.rendition2.TransformClient;
import org.alfresco.repo.security.authentication.AuthenticationComponent;
import org.alfresco.repo.transaction.RetryingTransactionHelper;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.repository.StoreRef;
import org.alfresco.service.cmr.tagging.TaggingService;
import org.alfresco.service.namespace.NamespacePrefixResolver;
import org.alfresco.service.namespace.QName;
import org.alfresco.service.transaction.TransactionService;
import org.alfresco.test_category.BaseSpringTestsCategory;
import org.alfresco.transform.client.registry.TransformServiceRegistry;
import org.alfresco.util.BaseSpringTest;
import org.alfresco.util.GUID;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import javax.transaction.HeuristicMixedException;
import javax.transaction.HeuristicRollbackException;
import javax.transaction.NotSupportedException;
import javax.transaction.RollbackException;
import javax.transaction.SystemException;
import javax.transaction.UserTransaction;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.StringJoiner;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import static java.util.Arrays.asList;
import static org.alfresco.model.ContentModel.PROP_CONTENT;
import static org.alfresco.model.ContentModel.PROP_CREATED;
import static org.alfresco.model.ContentModel.PROP_CREATOR;
import static org.alfresco.model.ContentModel.PROP_MODIFIED;
import static org.alfresco.model.ContentModel.PROP_MODIFIER;
import static org.alfresco.repo.rendition2.RenditionService2Impl.SOURCE_HAS_NO_CONTENT;
/**
* Tests the asynchronous extract and embed of metadata. This is normally performed in a T-Engine, but in this test
* class is mocked using a separate Thread that returns well known values. What make the AsynchronousExtractor
* different from other {@link AbstractMappingMetadataExtracter} sub classes is that the calling Thread does not
* do the work of updating properties or the content, as the T-Engine will reply at some later point.
*
* @author adavis
*/
@Category(BaseSpringTestsCategory.class)
public class AsynchronousExtractorTest extends BaseSpringTest
{
private final static String ID = GUID.generate();
private static final String AFTER_CALLING_EXECUTE = "after calling execute";
private static final String AFTER_THE_TRANSFORM = "after the transform";
private static final Integer UNCHANGED_HASHCODE = null;
private static final Integer CHANGED_HASHCODE = 1234;
private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat("EEE MMM dd HH:mm:ss Z yyyy");
private static final ExecutorService executorService = Executors.newCachedThreadPool();
private NodeService nodeService;
private ContentService contentService;
private DictionaryService dictionaryService;
private MimetypeService mimetypeService;
private MetadataExtracterRegistry metadataExtracterRegistry;
private StoreRef testStoreRef;
private NodeRef rootNodeRef;
private NodeRef nodeRef;
private AsynchronousExtractor asynchronousExtractor;
private NamespacePrefixResolver namespacePrefixResolver;
private TransformerDebug transformerDebug;
private TransactionService transactionService;
private TransformServiceRegistry transformServiceRegistry;
private TaggingService taggingService;
private ContentMetadataExtracter contentMetadataExtracter;
private ContentMetadataEmbedder contentMetadataEmbedder;
private RenditionService2Impl renditionService2;
private TransformClient transformClient;
private long origSize;
private Map<QName, Serializable> origProperties;
private Map<QName, Serializable> expectedProperties;
private Map<QName, Serializable> properties;
private class TestAsynchronousExtractor extends AsynchronousExtractor
{
private final String mockResult;
private final Integer changedHashcode;
private final Random random = new Random();
private boolean finished;
TransformClient mockTransformClient = new TransformClient()
{
@Override
public void checkSupported(NodeRef sourceNodeRef, RenditionDefinition2 renditionDefinition, String sourceMimetype, long sourceSizeInBytes, String contentUrl)
{
}
@Override
public void transform(NodeRef sourceNodeRef, RenditionDefinition2 renditionDefinition, String user, int sourceContentHashCode)
throws UnsupportedTransformationException, ContentIOException
{
mockTransform(sourceNodeRef, renditionDefinition, sourceContentHashCode);
}
};
/**
* Creates an AsynchronousExtractor that simulates a extract or embed.
*
* @param mockResult if specified indicates a value was returned. The result is read as a resource from
* the classpath.
* @param changedHashcode if specified indicates that the source node content changed or was deleted between
* the request to extract or embed and the response.
*/
TestAsynchronousExtractor(String mockResult, Integer changedHashcode)
{
this.mockResult = mockResult;
this.changedHashcode = changedHashcode;
setNodeService(nodeService);
setNamespacePrefixResolver(namespacePrefixResolver);
setTransformerDebug(transformerDebug);
setRenditionService2(renditionService2);
setContentService(contentService);
setTransactionService(transactionService);
setTransformServiceRegistry(transformServiceRegistry);
setTaggingService(taggingService);
setRegistry(metadataExtracterRegistry);
setMimetypeService(mimetypeService);
setDictionaryService(dictionaryService);
setExecutorService(executorService);
register();
renditionService2.setTransformClient(mockTransformClient);
}
@Override
public boolean isSupported(String sourceMimetype, long sourceSizeInBytes)
{
return true;
}
@Override
public boolean isEmbedderSupported(String sourceMimetype, long sourceSizeInBytes)
{
return true;
}
private void mockTransform(NodeRef sourceNodeRef, RenditionDefinition2 renditionDefinition, int sourceContentHashCode)
{
try
{
transformerDebug.pushMisc();
wait(50, 700);
}
finally
{
transformerDebug.popMisc();
}
int transformContentHashCode = changedHashcode == null ? sourceContentHashCode : changedHashcode;
if (mockResult != null)
{
try (InputStream transformInputStream = getClass().getClassLoader().getResourceAsStream(mockResult))
{
renditionService2.consume(sourceNodeRef, transformInputStream, renditionDefinition, transformContentHashCode);
}
catch (IOException e)
{
throw new RuntimeException("Could not read '" + mockResult + "' from the classpath.", e);
}
}
else
{
renditionService2.failure(sourceNodeRef, renditionDefinition, transformContentHashCode);
}
synchronized (this)
{
finished = true;
notifyAll();
}
}
/**
* Wait for a few milliseconds or until the finished flag is set.
*
* @param from inclusive lower bound. If negative, there is only an upper bound.
* @param to exclusive upper bound.
* @return the wait.
*/
public synchronized void wait(int from, int to)
{
long start = System.currentTimeMillis();
long end = start + (from < 0 ? to : from + random.nextInt(to - from));
while (!finished && System.currentTimeMillis() < end)
{
try
{
long ms = end - System.currentTimeMillis();
if (ms > 0)
{
wait(ms);
}
}
catch (InterruptedException ignore)
{
}
}
}
}
@Before
public void before() throws Exception
{
nodeService = (NodeService) applicationContext.getBean("nodeService");
contentService = (ContentService) applicationContext.getBean("contentService");
dictionaryService = (DictionaryService) applicationContext.getBean("dictionaryService");
mimetypeService = (MimetypeService) applicationContext.getBean("mimetypeService");
namespacePrefixResolver = (NamespacePrefixResolver) applicationContext.getBean("namespaceService");
transformerDebug = (TransformerDebug) applicationContext.getBean("transformerDebug");
renditionService2 = (RenditionService2Impl) applicationContext.getBean("renditionService2");
transactionService = (TransactionService) applicationContext.getBean("transactionService");
transformServiceRegistry = (TransformServiceRegistry) applicationContext.getBean("transformServiceRegistry");
taggingService = (TaggingService) applicationContext.getBean("taggingService");
transformClient = (TransformClient) applicationContext.getBean("transformClient");
// Create an empty metadata extractor registry, so that if we add one it will be used
metadataExtracterRegistry = new MetadataExtracterRegistry();
contentMetadataExtracter = new ContentMetadataExtracter();
contentMetadataExtracter.setNodeService(nodeService);
contentMetadataExtracter.setContentService(contentService);
contentMetadataExtracter.setDictionaryService(dictionaryService);
contentMetadataExtracter.setMetadataExtracterRegistry(metadataExtracterRegistry);
contentMetadataExtracter.setApplicableTypes(new String[]{ContentModel.TYPE_CONTENT.toString()});
contentMetadataExtracter.setCarryAspectProperties(true);
contentMetadataEmbedder = new ContentMetadataEmbedder();
contentMetadataEmbedder.setNodeService(nodeService);
contentMetadataEmbedder.setContentService(contentService);
contentMetadataEmbedder.setMetadataExtracterRegistry(metadataExtracterRegistry);
contentMetadataEmbedder.setApplicableTypes(new String[]{ContentModel.TYPE_CONTENT.toString()});
transactionService.getRetryingTransactionHelper().doInTransaction(new RetryingTransactionHelper.RetryingTransactionCallback<Void>()
{
@Override
public Void execute() throws Throwable
{
AuthenticationComponent authenticationComponent = (AuthenticationComponent) applicationContext.getBean("authenticationComponent");
authenticationComponent.setSystemUserAsCurrentUser();
// Create the store and get the root node
testStoreRef = nodeService.createStore(
StoreRef.PROTOCOL_WORKSPACE,
"Test_" + System.currentTimeMillis());
rootNodeRef = nodeService.getRootNode(testStoreRef);
// Create the node used for tests
nodeRef = nodeService.createNode(
rootNodeRef, ContentModel.ASSOC_CHILDREN,
QName.createQName("{test}testnode"),
ContentModel.TYPE_CONTENT).getChildRef();
// Authenticate as the system user
authenticationComponent.setSystemUserAsCurrentUser();
ContentWriter cw = contentService.getWriter(nodeRef, ContentModel.PROP_CONTENT, true);
cw.setMimetype(MimetypeMap.MIMETYPE_PDF);
cw.putContent(AbstractContentTransformerTest.loadQuickTestFile("pdf"));
origProperties = nodeService.getProperties(nodeRef);
nodeService.setProperties(nodeRef, origProperties);
origProperties = new HashMap<>(origProperties); // just in case the contents changed.
expectedProperties = new HashMap<>(origProperties); // ready to be modified.
origSize = getSize(nodeRef);
return null;
}
});
}
@After
public void after() throws Exception
{
renditionService2.setTransformClient(transformClient);
}
private void assertAsyncMetadataExecute(ActionExecuterAbstractBase executor, String mockResult,
Integer changedHashcode, long expectedSize,
Map<QName, Serializable> expectedProperties,
QName... ignoreProperties) throws Exception
{
TestAsynchronousExtractor extractor = new TestAsynchronousExtractor(mockResult, changedHashcode);
executeAction(executor, extractor);
assertContentSize(nodeRef, origSize, AFTER_CALLING_EXECUTE);
assertProperties(nodeRef, origProperties, AFTER_CALLING_EXECUTE, ignoreProperties);
extractor.wait(-1, 10000);
assertContentSize(nodeRef, expectedSize, AFTER_THE_TRANSFORM);
assertProperties(nodeRef, expectedProperties, AFTER_THE_TRANSFORM, ignoreProperties);
}
private void executeAction(ActionExecuterAbstractBase extractor, TestAsynchronousExtractor asynchronousExtractor)
throws SystemException, NotSupportedException, HeuristicRollbackException, HeuristicMixedException, RollbackException
{
UserTransaction txn = transactionService.getUserTransaction();
txn.begin();
ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null);
extractor.execute(action, nodeRef);
txn.commit();
}
void assertContentSize(NodeRef nodeRef, long expectSize, String state)
{
long size = getSize(nodeRef);
if (expectSize == origSize)
{
assertEquals("The content should remain unchanged " + state, origSize, size);
}
else
{
assertEquals("The content should have changed " + state, expectSize, size);
}
}
private long getSize(NodeRef nodeRef)
{
ContentReader reader = contentService.getReader(nodeRef, ContentModel.PROP_CONTENT);
return reader.getSize();
}
private void assertProperties(NodeRef nodeRef, Map<QName, Serializable> expectProperties, String state,
QName[] ignoreProperties)
{
properties = nodeService.getProperties(nodeRef);
// Work out the difference in a human readable form and ignore the 5 system set properties (as they always
// change) plus any the caller has requested.
StringJoiner sj = new StringJoiner("\n");
List<QName> ignoreKeys = new ArrayList<>(asList(PROP_MODIFIED, PROP_MODIFIER, PROP_CONTENT, PROP_CREATED, PROP_CREATOR));
ignoreKeys.addAll(asList(ignoreProperties));
for (Map.Entry<QName, Serializable> entry : expectProperties.entrySet())
{
QName k = entry.getKey();
Serializable v = entry.getValue();
Serializable actual = properties.get(k);
if (!ignoreKeys.contains(k) && !v.equals(actual))
{
sj.add(k + "\n Expected: " + v + "\n Was: " + actual);
}
}
for (QName k : properties.keySet())
{
Serializable actual = properties.get(k);
if (!ignoreKeys.contains(k) && !expectProperties.containsKey(k))
{
sj.add(k + "\n Expected: null\n Was: " + actual);
}
}
if (sj.length() != 0)
{
if (expectProperties.equals(origProperties))
{
fail("The properties should remain unchanged " + state + "\n" + sj);
}
else
{
fail("The properties should have changed " + state + "\n" + sj);
}
}
}
@Test
public void testExtractHtml() throws Exception
{
expectedProperties.put(QName.createQName("cm:author", namespacePrefixResolver), "Nevin Nollop");
expectedProperties.put(QName.createQName("cm:description", namespacePrefixResolver), "Gym class featuring a brown fox and lazy dog");
expectedProperties.put(QName.createQName("cm:title", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog");
assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.html_metadata.json",
UNCHANGED_HASHCODE, origSize, expectedProperties);
}
@Test
public void testExtractNodeDeleted() throws Exception
{
assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.html_metadata.json",
SOURCE_HAS_NO_CONTENT, origSize, origProperties);
}
@Test
public void testExtractContentChanged() throws Exception
{
assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.html_metadata.json",
1234, origSize, origProperties);
}
@Test
public void testExtractTransformFailure() throws Exception
{
assertAsyncMetadataExecute(contentMetadataExtracter, null,
UNCHANGED_HASHCODE, origSize, origProperties);
}
@Test
public void testExtractTransformCorrupt() throws Exception
{
assertAsyncMetadataExecute(contentMetadataExtracter, "quick.html", // not json
UNCHANGED_HASHCODE, origSize, origProperties);
}
@Test
public void testUnknownNamespaceInResponse() throws Exception
{
// "sys:overwritePolicy": "PRAGMATIC" - is used
// "{http://www.unknown}name": "ignored" - is reported in an ERROR log
expectedProperties.put(QName.createQName("cm:author", namespacePrefixResolver), "Used");
assertAsyncMetadataExecute(contentMetadataExtracter, "quick/unknown_namespace_metadata.json",
UNCHANGED_HASHCODE, origSize, expectedProperties);
}
@Test
public void testExtractMsg() throws Exception // has dates as RFC822
{
expectedProperties.put(QName.createQName("cm:addressee", namespacePrefixResolver), "mark.rogers@alfresco.com");
expectedProperties.put(QName.createQName("cm:description", namespacePrefixResolver), "This is a quick test");
expectedProperties.put(QName.createQName("cm:addressees", namespacePrefixResolver),
new ArrayList<>(asList("mark.rogers@alfresco.com", "speedy@quick.com", "mrquick@nowhere.com")));
expectedProperties.put(QName.createQName("cm:sentdate", namespacePrefixResolver), SIMPLE_DATE_FORMAT.parse("Fri Jan 18 13:44:20 GMT 2013")); // 2013-01-18T13:44:20Z
expectedProperties.put(QName.createQName("cm:subjectline", namespacePrefixResolver), "This is a quick test");
expectedProperties.put(QName.createQName("cm:author", namespacePrefixResolver), "Mark Rogers");
expectedProperties.put(QName.createQName("cm:originator", namespacePrefixResolver), "Mark Rogers");
assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.msg_metadata.json",
UNCHANGED_HASHCODE, origSize, expectedProperties);
Serializable sentDate = properties.get(QName.createQName("cm:sentdate", namespacePrefixResolver));
}
@Test
public void testExtractEml() throws Exception // has dates as longs since 1970
{
expectedProperties.put(QName.createQName("cm:addressee", namespacePrefixResolver), "Nevin Nollop <nevin.nollop@gmail.com>");
expectedProperties.put(QName.createQName("cm:description", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog");
expectedProperties.put(QName.createQName("cm:addressees", namespacePrefixResolver),
new ArrayList<>(asList("Nevin Nollop <nevinn@alfresco.com>")));
expectedProperties.put(QName.createQName("imap:dateSent", namespacePrefixResolver), SIMPLE_DATE_FORMAT.parse("Fri Jun 04 13:23:22 BST 2004"));
expectedProperties.put(QName.createQName("imap:messageTo", namespacePrefixResolver), "Nevin Nollop <nevin.nollop@gmail.com>");
expectedProperties.put(QName.createQName("imap:messageId", namespacePrefixResolver), "<20040604122322.GV1905@phoenix.home>");
expectedProperties.put(QName.createQName("cm:title", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog");
expectedProperties.put(QName.createQName("imap:messageSubject", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog");
expectedProperties.put(QName.createQName("imap:messageCc", namespacePrefixResolver), "Nevin Nollop <nevinn@alfresco.com>");
expectedProperties.put(QName.createQName("cm:sentdate", namespacePrefixResolver), SIMPLE_DATE_FORMAT.parse("Fri Jun 04 13:23:22 BST 2004"));
expectedProperties.put(QName.createQName("cm:subjectline", namespacePrefixResolver), "The quick brown fox jumps over the lazy dog");
expectedProperties.put(QName.createQName("imap:messageFrom", namespacePrefixResolver), "Nevin Nollop <nevin.nollop@alfresco.com>");
expectedProperties.put(QName.createQName("cm:originator", namespacePrefixResolver), "Nevin Nollop <nevin.nollop@alfresco.com>");
// Note: As the metadata is for eml, an aspect gets added resulting in a second extract because of
// ImapContentPolicy.onAddAspect. I cannot see a good way to avoid this.
assertAsyncMetadataExecute(contentMetadataExtracter, "quick/quick.eml_metadata.json",
UNCHANGED_HASHCODE, origSize, expectedProperties,
// cm:author is not in the quick.eml_metadata.json but is being added by the second extract which thinks
// the source mimetype is MimetypeMap.MIMETYPE_PDF, because that is what the before() method sets the
// content to. As a result the PdfBox metadata extractor is called, which extracts cm:author. Given that
// we don't know when this will take place, we simply ignore this property. We could fix this up, but it
// does not add anything to the test.
QName.createQName("cm:author", namespacePrefixResolver));
}
@Test
public void testEmbed() throws Exception
{
assertAsyncMetadataExecute(contentMetadataEmbedder, "quick/quick.html", // just replace the pdf with html!
UNCHANGED_HASHCODE, 428, expectedProperties);
}
@Test
public void testEmbedNodeDeleted() throws Exception
{
assertAsyncMetadataExecute(contentMetadataEmbedder, "quick/quick.html",
SOURCE_HAS_NO_CONTENT, origSize, origProperties);
}
@Test
public void testEmbedContentChanged() throws Exception
{
assertAsyncMetadataExecute(contentMetadataEmbedder, "quick/quick.html",
1234, origSize, origProperties);
}
@Test
public void testEmbedTransformFailure() throws Exception
{
assertAsyncMetadataExecute(contentMetadataEmbedder, null,
UNCHANGED_HASHCODE, origSize, origProperties);
}
// TODO Write tests for: overwritePolicy, enableStringTagging and carryAspectProperties.
// Values are set in AsynchronousExtractor.setMetadata(...) but make use of original code within
// MetadataExtracter and AbstractMappingMetadataExtracter.
// As the tests for exiting extractors are to be removed in ACS 7.0, it is possible that they were being used
// to test these values.
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -25,22 +25,11 @@
*/
package org.alfresco.repo.action.executer;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.action.ActionImpl;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
import org.alfresco.repo.content.metadata.MetadataExtracterRegistry;
import org.alfresco.repo.content.metadata.TikaPoweredMetadataExtracter;
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.repo.security.authentication.AuthenticationComponent;
import org.alfresco.service.cmr.dictionary.DictionaryService;
@@ -55,15 +44,25 @@ import org.alfresco.service.namespace.QName;
import org.alfresco.util.BaseSpringTest;
import org.alfresco.util.GUID;
import org.apache.tika.embedder.Embedder;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.springframework.transaction.annotation.Transactional;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
/**
* Test of the ActionExecuter for embedding metadata
*
@@ -94,7 +93,9 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest
this.dictionaryService = (DictionaryService) this.applicationContext.getBean("dictionaryService");
this.mimetypeService = (MimetypeService) this.applicationContext.getBean("mimetypeService");
this.metadataExtracterRegistry = (MetadataExtracterRegistry) this.applicationContext.getBean("metadataExtracterRegistry");
metadataExtracterRegistry.setAsyncExtractEnabled(false);
metadataExtracterRegistry.setAsyncEmbedEnabled(false);
AuthenticationComponent authenticationComponent = (AuthenticationComponent)applicationContext.getBean("authenticationComponent");
authenticationComponent.setSystemUserAsCurrentUser();
@@ -123,15 +124,21 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest
this.executer.setApplicableTypes(new String[] { ContentModel.TYPE_CONTENT.toString() });
}
@After
public void after()
{
metadataExtracterRegistry.setAsyncExtractEnabled(true);
metadataExtracterRegistry.setAsyncEmbedEnabled(true);
}
/**
* Test that a failing embedder does not destroy the original content
*/
@Test
public void testFailingEmbedder()
{
MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry");
FailingEmbedder embedder = new FailingEmbedder(Arrays.asList(MimetypeMap.MIMETYPE_PDF));
embedder.setRegistry(registry);
AbstractMappingMetadataExtracter embedder = new FailingMappingMetadataEmbedder(Arrays.asList(MimetypeMap.MIMETYPE_PDF));
embedder.setRegistry(metadataExtracterRegistry);
embedder.setDictionaryService(this.dictionaryService);
embedder.setMimetypeService(this.mimetypeService);
embedder.register();
@@ -158,17 +165,16 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest
}
/**
* Tika-powered embedder which fails upon calling embed on its {@link FailingTikaEmbedder}
* Embedder which fails upon calling embed on its {@link FailingEmbedder}
*/
private class FailingEmbedder extends TikaPoweredMetadataExtracter
private class FailingMappingMetadataEmbedder extends AbstractMappingMetadataExtracter
{
/**
* Constructor for setting supported extract and embed mimetypes
*
* @param mimetypes the supported extract and embed mimetypes
*/
public FailingEmbedder(Collection<String> mimetypes)
public FailingMappingMetadataEmbedder(Collection<String> mimetypes)
{
super(
new HashSet<String>(mimetypes),
@@ -176,15 +182,26 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest
}
@Override
protected Parser getParser()
protected void embedInternal(Map<String, Serializable> metadata, ContentReader reader, ContentWriter writer) throws Throwable
{
return null;
Embedder embedder = getEmbedder();
if (embedder == null)
{
return;
}
Map<String, String> metadataAsStrings = convertMetadataToStrings(metadata);
Metadata metadataToEmbed = new Metadata();
metadataAsStrings.forEach((k,v)->metadataToEmbed.add(k, v));
InputStream inputStream = reader.getContentInputStream();
OutputStream outputStream = writer.getContentOutputStream();
embedder.embed(metadataToEmbed, null, outputStream, null);
}
@Override
protected Embedder getEmbedder()
{
return new FailingTikaEmbedder();
return new FailingEmbedder();
}
@Override
@@ -202,12 +219,18 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest
mapping.put("author", qnames);
return mapping;
}
@Override
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
{
return null;
}
}
/**
* Tika metadata embedder which fails on a call to embed.
* Metadata embedder which fails on a call to embed.
*/
private class FailingTikaEmbedder implements Embedder
private class FailingEmbedder implements Embedder
{
private static final long serialVersionUID = -4954679684941467571L;
@@ -219,7 +242,7 @@ public class ContentMetadataEmbedderTest extends BaseSpringTest
@Override
public void embed(Metadata metadata, InputStream originalStream, OutputStream outputStream, ParseContext context)
throws IOException, TikaException
throws IOException
{
throw new IOException("Forced failure");
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -43,8 +43,8 @@ import org.alfresco.repo.action.ActionImpl;
import org.alfresco.repo.action.ActionModel;
import org.alfresco.repo.action.AsynchronousActionExecutionQueuePolicies;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
import org.alfresco.repo.content.metadata.MetadataExtracterRegistry;
import org.alfresco.repo.content.metadata.TikaPoweredMetadataExtracter;
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.repo.policy.Behaviour.NotificationFrequency;
import org.alfresco.repo.policy.JavaBehaviour;
@@ -73,8 +73,6 @@ import org.alfresco.util.GUID;
import org.alfresco.util.testing.category.LuceneTests;
import org.alfresco.util.testing.category.RedundantTests;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.jpeg.JpegParser;
import org.junit.experimental.categories.Category;
import org.springframework.context.ConfigurableApplicationContext;
@@ -111,6 +109,7 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
private TaggingService taggingService;
private NodeService nodeService;
private ContentService contentService;
private MetadataExtracterRegistry metadataExtracterRegistry;
private AuditService auditService;
private TransactionService transactionService;
private AuthenticationComponent authenticationComponent;
@@ -143,7 +142,10 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
this.taggingService = (TaggingService)ctx.getBean("TaggingService");
this.nodeService = (NodeService) ctx.getBean("NodeService");
this.contentService = (ContentService) ctx.getBean("ContentService");
this.metadataExtracterRegistry = (MetadataExtracterRegistry) ctx.getBean("metadataExtracterRegistry");
metadataExtracterRegistry.setAsyncExtractEnabled(false);
metadataExtracterRegistry.setAsyncEmbedEnabled(false);
this.transactionService = (TransactionService)ctx.getBean("transactionComponent");
this.auditService = (AuditService)ctx.getBean("auditService");
this.authenticationComponent = (AuthenticationComponent)ctx.getBean("authenticationComponent");
@@ -207,6 +209,9 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
@Override
protected void tearDown() throws Exception
{
metadataExtracterRegistry.setAsyncExtractEnabled(true);
metadataExtracterRegistry.setAsyncEmbedEnabled(true);
if (AlfrescoTransactionSupport.getTransactionReadState() != TxnReadState.TXN_NONE)
{
fail("Test is not transaction-safe. Fix up transaction handling and re-test.");
@@ -296,7 +301,7 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
});
}
private static class TagMappingMetadataExtracter extends TikaPoweredMetadataExtracter
private static class TagMappingMetadataExtracter extends AbstractMappingMetadataExtracter
{
private String existingTagNodeRef;
@@ -328,16 +333,10 @@ public class ContentMetadataExtracterTagMappingTest extends TestCase
return sourceMimetype.equals(MimetypeMap.MIMETYPE_IMAGE_JPEG);
}
@Override
protected Parser getParser()
{
return new JpegParser();
}
@SuppressWarnings("unchecked")
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
{
Map<String, Serializable> rawMap = super.extractRaw(reader);
Map<String, Serializable> rawMap = newRawMap();
// Add some test keywords to those actually extracted from the file including a nodeRef
List<String> keywords = new ArrayList<String>(Arrays.asList(

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -43,16 +43,11 @@
*/
package org.alfresco.repo.action.executer;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.action.ActionImpl;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
import org.alfresco.repo.content.metadata.AsynchronousExtractor;
import org.alfresco.repo.content.metadata.MetadataExtracterRegistry;
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.repo.security.authentication.AuthenticationComponent;
@@ -67,11 +62,18 @@ import org.alfresco.service.namespace.QName;
import org.alfresco.test_category.BaseSpringTestsCategory;
import org.alfresco.util.BaseSpringTest;
import org.alfresco.util.GUID;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.springframework.transaction.annotation.Transactional;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
/**
* Test of the ActionExecuter for extracting metadata. Note: This test makes
* assumptions about the PDF test data for PdfBoxExtracter.
@@ -88,6 +90,7 @@ public class ContentMetadataExtracterTest extends BaseSpringTest
private NodeService nodeService;
private ContentService contentService;
private MetadataExtracterRegistry registry;
private StoreRef testStoreRef;
private NodeRef rootNodeRef;
private NodeRef nodeRef;
@@ -101,7 +104,10 @@ public class ContentMetadataExtracterTest extends BaseSpringTest
{
this.nodeService = (NodeService) this.applicationContext.getBean("nodeService");
this.contentService = (ContentService) this.applicationContext.getBean("contentService");
registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry");
registry.setAsyncExtractEnabled(false);
registry.setAsyncEmbedEnabled(false);
AuthenticationComponent authenticationComponent = (AuthenticationComponent)applicationContext.getBean("authenticationComponent");
authenticationComponent.setSystemUserAsCurrentUser();
@@ -126,6 +132,13 @@ public class ContentMetadataExtracterTest extends BaseSpringTest
this.executer = (ContentMetadataExtracter) this.applicationContext.getBean("extract-metadata");
}
@After
public void after()
{
registry.setAsyncExtractEnabled(true);
registry.setAsyncEmbedEnabled(true);
}
/**
* Test execution of the extraction itself
*/
@@ -189,7 +202,6 @@ public class ContentMetadataExtracterTest extends BaseSpringTest
@Test
public void testUnknownProperties()
{
MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry");
TestUnknownMetadataExtracter extracterUnknown = new TestUnknownMetadataExtracter();
extracterUnknown.setRegistry(registry);
extracterUnknown.register();
@@ -247,7 +259,6 @@ public class ContentMetadataExtracterTest extends BaseSpringTest
@Test
public void testNullExtractedValues_ALF1823()
{
MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry");
TestNullPropMetadataExtracter extractor = new TestNullPropMetadataExtracter();
extractor.setRegistry(registry);
extractor.register();

View File

@@ -63,11 +63,13 @@ import org.junit.Ignore;
import org.springframework.context.ApplicationContext;
/**
*
* @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1 and the transformer is also deprecated.
*
* @author Neil McErlean
* @since 3.3
*/
@Ignore("This is an abstract class so don't instaniate it or run it in Junit")
@Deprecated
public abstract class AbstractJodConverterBasedTest
{
private static Log log = LogFactory.getLog(AbstractJodConverterBasedTest.class);

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import static org.junit.Assert.assertEquals;
@@ -39,8 +39,11 @@ import org.joda.time.format.DateTimeFormat;
import org.junit.Test;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* MNT-8978
*/
@Deprecated
public class ConcurrencyOfficeMetadataExtracterTest
{

View File

@@ -35,11 +35,14 @@ import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* The test designed for testing the concurrent limitations in
* {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)}
*
* @author amukha
*/
@Deprecated
public class ConcurrencyPdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private SlowPdfBoxMetadataExtracter extracter;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -42,10 +42,13 @@ import org.apache.tika.metadata.Metadata;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* @see DWGMetadataExtracter
*
* @author Nick Burch
*/
@Deprecated
public class DWGMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private DWGMetadataExtracter extracter;
@@ -174,5 +177,5 @@ public class DWGMetadataExtracterTest extends AbstractMetadataExtracterTest
assertEquals("Custom DWG property not found", "valueforcustomprop1", properties.get(TIKA_CUSTOM_TEST_PROPERTY));
}
}

View File

@@ -24,7 +24,7 @@
* #L%
*/
/*
* Copyright (C) 2005 Jesper Steen Møller
* Copyright (C) 2005 - 2020 Jesper Steen Møller
*
* This file is part of Alfresco
*
@@ -54,8 +54,11 @@ import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.namespace.QName;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* @author Jesper Steen Møller
*/
@Deprecated
public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private static final String QUICK_TITLE_JAPANESE = "確認した結果を添付しますので、確認してください";

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2017 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -50,10 +50,12 @@ import org.junit.Ignore;
import org.junit.Test;
/**
*
* @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1
*
* @author Neil McErlean
* @since 3.2 SP1
*/
@Deprecated
public class JodMetadataExtractorOOoTest extends AbstractJodConverterBasedTest
{
protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";

View File

@@ -1,30 +1,30 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005 Jesper Steen Møller
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005 - 2020 Jesper Steen Møller
*
* This file is part of Alfresco
*
@@ -52,8 +52,11 @@ import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.namespace.QName;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* Test for the MP3 metadata extraction from id3 tags.
*/
@Deprecated
public class MP3MetadataExtracterTest extends TikaAudioMetadataExtracterTest
{
private MP3MetadataExtracter extracter;

View File

@@ -1,30 +1,30 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005 Jesper Steen Møller
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005 - 2020 Jesper Steen Møller
*
* This file is part of Alfresco
*
@@ -55,9 +55,12 @@ import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.namespace.QName;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* @author Derek Hulley
* @since 3.2
*/
@Deprecated
public class MailMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private MailMetadataExtracter extracter;

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import java.io.Serializable;
@@ -38,10 +38,13 @@ import org.alfresco.service.namespace.QName;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* @see OfficeMetadataExtracter
*
* @author Jesper Steen Møller
*/
@Deprecated
public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private OfficeMetadataExtracter extracter;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -38,10 +38,13 @@ import org.alfresco.service.namespace.QName;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* @see OpenDocumentMetadataExtracter
*
* @author Derek Hulley
*/
@Deprecated
public class OpenDocumentMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private OpenDocumentMetadataExtracter extracter;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -40,10 +40,13 @@ import org.alfresco.service.namespace.QName;
import org.apache.pdfbox.util.DateConverter;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter
*
* @author Jesper Steen Møller
*/
@Deprecated
public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private PdfBoxMetadataExtracter extracter;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -39,11 +39,14 @@ import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.namespace.QName;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* @see org.alfresco.repo.content.metadata.PoiMetadataExtracter
*
* @author Neil McErlean
* @author Dmitry Velichkevich
*/
@Deprecated
public class PoiMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private static final int MINIMAL_EXPECTED_PROPERTIES_AMOUNT = 3;

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import static org.junit.Assert.assertEquals;
@@ -45,8 +45,11 @@ import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* Test for the RFC822 (imap/mbox) extractor
*/
@Deprecated
public class RFC822MetadataExtracterTest extends AbstractMetadataExtracterTest
{
private RFC822MetadataExtracter extracter;

View File

@@ -1,30 +1,30 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005 Jesper Steen Møller
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* Copyright (C) 2005 - 2020 Jesper Steen Møller
*
* This file is part of Alfresco
*
@@ -53,8 +53,11 @@ import org.alfresco.service.namespace.NamespaceService;
import org.alfresco.service.namespace.QName;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* Test for the audio metadata extraction.
*/
@Deprecated
public class TikaAudioMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private TikaAudioMetadataExtracter extracter;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -58,10 +58,13 @@ import org.apache.tika.parser.odf.OpenDocumentParser;
/**
* @deprecated extractor has been moved to a T-Engine.
*
* @see TikaAutoMetadataExtracter
*
* @author Nick Burch
*/
@Deprecated
public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private static Log logger = LogFactory.getLog(TikaAutoMetadataExtracterTest.class);

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -100,6 +100,8 @@ import com.sun.mail.imap.protocol.RFC822DATA;
import com.sun.mail.imap.protocol.UID;
import com.sun.mail.util.ASCIIUtility;
import static org.alfresco.model.ContentModel.PROP_MODIFIED;
@Category({OwnJVMTestsCategory.class, LuceneTests.class})
public class ImapMessageTest extends TestCase
{
@@ -456,15 +458,30 @@ public class ImapMessageTest extends TestCase
messageHelper.addCc(address);
// Creating the message node in the repository
UserTransaction txn = transactionService.getUserTransaction();
txn.begin();
String name = AlfrescoImapConst.MESSAGE_PREFIX + GUID.generate();
FileInfo messageFile = fileFolderService.create(testImapFolderNodeRef, name, ContentModel.TYPE_CONTENT);
// Writing a content.
NodeRef nodeRef = messageFile.getNodeRef();
Serializable origModified = getModified(nodeRef);
new IncomingImapMessage(messageFile, serviceRegistry, message);
txn.commit();
// Calls to new IncomingImapMessage(...) only takes place when a new nodeRef is being created.
// No other code will be changing the nodeRef. An ImapModel.ASPECT_IMAP_CONTENT is added, which
// triggers a metadata extract to take place in a post commit method. Previously this would have been a
// synchronous process. This is no longer true as it may now take place in a T-Engine. So, we need to wait
// for the extract to take place. There does not
long end = System.currentTimeMillis()+10000;
while (System.currentTimeMillis() <= end && origModified.equals(getModified(nodeRef)))
{
Thread.currentThread().sleep(1000);
}
// Getting the transformed properties from the repository
// cm:originator, cm:addressee, cm:addressees, imap:messageFrom, imap:messageTo, imap:messageCc
Map<QName, Serializable> properties = nodeService.getProperties(messageFile.getNodeRef());
Map<QName, Serializable> properties = nodeService.getProperties(nodeRef);
String cmOriginator = (String) properties.get(ContentModel.PROP_ORIGINATOR);
String cmAddressee = (String) properties.get(ContentModel.PROP_ADDRESSEE);
@SuppressWarnings("unchecked")
@@ -488,6 +505,12 @@ public class ImapMessageTest extends TestCase
assertEquals(decodedAddress, imapMessageCc);
}
private Serializable getModified(NodeRef nodeRef)
{
Map<QName, Serializable> origProperties = nodeService.getProperties(nodeRef);
return origProperties.get(PROP_MODIFIED);
}
@Category(RedundantTests.class)
public void testEightBitMessage() throws Exception
{

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -27,6 +27,7 @@ package org.alfresco.repo.rendition2;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.metadata.AsynchronousExtractor;
import org.alfresco.repo.policy.BehaviourFilter;
import org.alfresco.repo.policy.PolicyComponent;
import org.alfresco.repo.rendition.RenditionPreventionRegistry;
@@ -46,11 +47,9 @@ import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
import org.quartz.CronExpression;
import java.io.IOException;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
@@ -61,7 +60,6 @@ import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.anyLong;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.*;
/**
@@ -90,6 +88,7 @@ public class RenditionService2Test
@Mock private RuleService ruleService;
@Mock private TransformServiceRegistryImpl transformServiceRegistry;
@Mock private TransformReplyProvider transformReplyProvider;
@Mock private AsynchronousExtractor asynchronousExtractor;
private NodeRef nodeRef = new NodeRef("workspace://spacesStore/test-id");
private NodeRef nodeRefMissing = new NodeRef("workspace://spacesStore/bad-test-id");
@@ -154,6 +153,7 @@ public class RenditionService2Test
renditionService2.setTransformReplyProvider(transformReplyProvider);
renditionService2.setEnabled(true);
renditionService2.setThumbnailsEnabled(true);
renditionService2.setAsynchronousExtractor(asynchronousExtractor);
renditionDefinitionRegistry2.setRenditionConfigDir("alfresco/renditions/test");
renditionDefinitionRegistry2.afterPropertiesSet();

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -242,10 +242,13 @@ public class LocalTransformServiceRegistryConfigTest extends TransformServiceReg
targetMimetype.add("image/gif");
targetMimetype.add("image/tiff");
imagemagickSupportedTransformation.put("image/tiff", targetMimetype);
targetMimetype = new ArrayList<>(targetMimetype);
targetMimetype.add("image/png");
targetMimetype.add("image/jpeg");
imagemagickSupportedTransformation.put("image/gif", targetMimetype);
imagemagickSupportedTransformation.put("image/jpeg", targetMimetype);
targetMimetype = new ArrayList<>(targetMimetype);
targetMimetype.add("alfresco-metadata-extract"); // Metadata extract and embed types should be excluded from pipeline cartesian products
imagemagickSupportedTransformation.put("image/png", targetMimetype);
targetMimetype = new ArrayList<>();
targetMimetype.add("target1");
@@ -333,8 +336,7 @@ public class LocalTransformServiceRegistryConfigTest extends TransformServiceReg
@Override
public void testJsonConfig() throws IOException
{
// Not 60, 60 as we have added source->target1..3 to three transformers
internalTestJsonConfig(63, 69);
internalTestJsonConfig(64, 70);
}
@Test
@@ -368,7 +370,7 @@ public class LocalTransformServiceRegistryConfigTest extends TransformServiceReg
switch (t.transformer.getTransformerName())
{
case "imagemagick":
assertEquals(t.transformer.getTransformerName() + " incorrect number of supported transform", 17, t.transformer.getSupportedSourceAndTargetList().size());
assertEquals(t.transformer.getTransformerName() + " incorrect number of supported transform", 18, t.transformer.getSupportedSourceAndTargetList().size());
assertEquals( t.transformer.getTransformerName() + "incorrect number of transform option names", 1, t.transformer.getTransformOptions().size());
assertEquals( t.transformer.getTransformerName() + "incorrect number of transform options", 6, countTopLevelOptions(t.transformer.getTransformOptions()));
assertEquals(t.transformer.getTransformerName() + " expected to not be a transformer pipeline", t.transformer.getTransformerPipeline().size(), 0);
@@ -428,6 +430,7 @@ public class LocalTransformServiceRegistryConfigTest extends TransformServiceReg
break;
case "officeToImageViaPdf":
// Note we will get 35 entries in getSupportedSourceAndTargetList() if the metadata transforms are not excluded
assertEquals(t.transformer.getTransformerName() + " incorrect number of supported transform", 28, t.transformer.getSupportedSourceAndTargetList().size());
assertEquals( t.transformer.getTransformerName() + "incorrect number of transform option names", 2, t.transformer.getTransformOptions().size());
assertEquals( t.transformer.getTransformerName() + "incorrect number of transform options", 11, countTopLevelOptions(t.transformer.getTransformOptions()));

View File

@@ -55,6 +55,7 @@
{"sourceMediaType": "image/png", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "image/png", "targetMediaType": "image/png" },
{"sourceMediaType": "image/png", "targetMediaType": "image/tiff"},
{"sourceMediaType": "image/png", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "image/tiff", "targetMediaType": "image/gif" },
{"sourceMediaType": "image/tiff", "targetMediaType": "image/tiff"},

View File

@@ -0,0 +1,15 @@
{
"{http://www.alfresco.org/model/content/1.0}addressee" : "Nevin Nollop <nevin.nollop@gmail.com>",
"{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/content/1.0}addressees" : "Nevin Nollop <nevinn@alfresco.com>",
"{http://www.alfresco.org/model/imap/1.0}dateSent" : 1086351802000,
"{http://www.alfresco.org/model/imap/1.0}messageTo" : "Nevin Nollop <nevin.nollop@gmail.com>",
"{http://www.alfresco.org/model/imap/1.0}messageId" : "<20040604122322.GV1905@phoenix.home>",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageSubject" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageCc" : "Nevin Nollop <nevinn@alfresco.com>",
"{http://www.alfresco.org/model/content/1.0}sentdate" : 1086351802000,
"{http://www.alfresco.org/model/content/1.0}subjectline" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageFrom" : "Nevin Nollop <nevin.nollop@alfresco.com>",
"{http://www.alfresco.org/model/content/1.0}originator" : "Nevin Nollop <nevin.nollop@alfresco.com>"
}

View File

@@ -0,0 +1,5 @@
{
"{http://www.alfresco.org/model/content/1.0}author": "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}description": "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}title": "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,9 @@
{
"{http://www.alfresco.org/model/content/1.0}addressee" : "mark.rogers@alfresco.com",
"{http://www.alfresco.org/model/content/1.0}description" : "This is a quick test",
"{http://www.alfresco.org/model/content/1.0}addressees" : [ "mark.rogers@alfresco.com", "speedy@quick.com", "mrquick@nowhere.com" ],
"{http://www.alfresco.org/model/content/1.0}sentdate" : "2013-01-18T13:44:20Z",
"{http://www.alfresco.org/model/content/1.0}subjectline" : "This is a quick test",
"{http://www.alfresco.org/model/content/1.0}author" : "Mark Rogers",
"{http://www.alfresco.org/model/content/1.0}originator" : "Mark Rogers"
}

View File

@@ -0,0 +1,5 @@
{
"sys:overwritePolicy": "PRAGMATIC",
"{http://www.unknown}name": "ignored",
"{http://www.alfresco.org/model/content/1.0}author": "Used"
}