MNT-15024. Implemented content.metadataExtractor.pdf.overwritePolicy property and get rid of a redundancy setter for the the overwritePolicy which causes a ambitious warning.

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@131900 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Martin Muller
2016-10-31 15:41:32 +00:00
parent 308da0f956
commit 94cebbefbc
4 changed files with 168 additions and 174 deletions

View File

@@ -341,6 +341,9 @@
</entry>
</map>
</property>
<property name="overwritePolicy">
<value>${content.metadataExtracter.pdf.overwritePolicy}</value>
</property>
</bean>
<bean id="extracter.Poi" class="org.alfresco.repo.content.metadata.PoiMetadataExtracter" parent="baseMetadataExtracter">
<property name="poiFootnotesLimit" value="${content.transformer.Poi.poiFootnotesLimit}" />

View File

@@ -644,6 +644,9 @@ content.metadataExtracter.parseShapes=false
content.metadataExtracter.pdf.maxDocumentSizeMB=10
content.metadataExtracter.pdf.maxConcurrentExtractionsCount=5
# The default overwrite policy for PdfBoxMetadataExtracter
content.metadataExtracter.pdf.overwritePolicy=PRAGMATIC
# Property to enable upgrade from 2.1-A
V2.1-A.fixes.to.schema=0
#V2.1-A.fixes.to.schema=82

View File

@@ -1,82 +1,82 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata;
import java.io.InputStream;
import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.io.InputStream;
import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import javax.activation.MimeType;
import java.util.concurrent.atomic.AtomicInteger;
import javax.activation.MimeType;
import org.alfresco.api.AlfrescoPublicApi;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.StreamAwareContentReaderProxy;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MalformedNodeRefException;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
import org.alfresco.service.namespace.InvalidQNameException;
import org.alfresco.service.namespace.QName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.springframework.beans.factory.BeanNameAware;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.extensions.surf.util.ISO8601DateFormat;
import org.alfresco.repo.content.StreamAwareContentReaderProxy;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MalformedNodeRefException;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
import org.alfresco.service.namespace.InvalidQNameException;
import org.alfresco.service.namespace.QName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.springframework.beans.factory.BeanNameAware;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.extensions.surf.util.ISO8601DateFormat;
/**
* Support class for metadata extracters that support dynamic and config-driven
@@ -126,7 +126,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
private static final String PROP_DEFAULT_TIMEOUT = "content.metadataExtracter.default.timeoutMs";
public static final String PROPERTY_PREFIX_METADATA = "metadata.";
public static final String PROPERTY_COMPONENT_EXTRACT = ".extract.";
public static final String PROPERTY_COMPONENT_EMBED = ".embed.";
public static final String PROPERTY_COMPONENT_EMBED = ".embed.";
public static final int MEGABYTE_SIZE = 1048576;
protected static Log logger = LogFactory.getLog(AbstractMappingMetadataExtracter.class);
@@ -151,8 +151,8 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
private Properties properties;
private Map<String, MetadataExtracterLimits> mimetypeLimits;
private ExecutorService executorService;
protected MetadataExtracterConfig metadataExtracterConfig;
protected MetadataExtracterConfig metadataExtracterConfig;
private static final AtomicInteger CONCURRENT_EXTRACTIONS_COUNT = new AtomicInteger(0);
/**
@@ -259,7 +259,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
* {@inheritDoc}
*
* @see #setSupportedMimetypes(Collection)
*/
*/
@Override
public boolean isSupported(String sourceMimetype)
{
@@ -271,7 +271,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
*
* @see #setSupportedEmbedMimetypes(Collection)
*/
@Override
@Override
public boolean isEmbeddingSupported(String sourceMimetype)
{
if (supportedEmbedMimetypes == null)
@@ -314,18 +314,6 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
this.overwritePolicy = overwritePolicy;
}
/**
* Set the policy to use when existing values are encountered. Depending on how the extractor
* is called, this may not be relevant, i.e an empty map of existing properties may be passed
* in by the client code, which may follow its own overwrite strategy.
*
* @param overwritePolicyStr the policy to apply when there are existing system properties
*/
public void setOverwritePolicy(String overwritePolicyStr)
{
this.overwritePolicy = OverwritePolicy.valueOf(overwritePolicyStr);
}
/**
* Set whether the extractor should discard metadata that fails to convert to the target type
* defined in the data dictionary model. This is <tt>true</tt> by default i.e. if the data
@@ -1152,7 +1140,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
/**
* {@inheritDoc}
*/
@Override
@Override
public final Map<QName, Serializable> extract(ContentReader reader, Map<QName, Serializable> destination)
{
return extract(reader, this.overwritePolicy, destination, this.mapping);
@@ -1161,7 +1149,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
/**
* {@inheritDoc}
*/
@Override
@Override
public final Map<QName, Serializable> extract(
ContentReader reader,
OverwritePolicy overwritePolicy,
@@ -1173,7 +1161,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
/**
* {@inheritDoc}
*/
@Override
@Override
public Map<QName, Serializable> extract(
ContentReader reader,
OverwritePolicy overwritePolicy,
@@ -1225,12 +1213,12 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
logger.debug("Extracted Metadata from " + reader + "\n Found: " +
rawMetadata + "\n Mapped and Accepted: " + changedProperties);
}
}
catch (LimitExceededException e)
{
logger.warn("Metadata extraction rejected: \n" +
" Extracter: " + this + "\n" +
" Reason: " + e.getMessage());
}
catch (LimitExceededException e)
{
logger.warn("Metadata extraction rejected: \n" +
" Extracter: " + this + "\n" +
" Reason: " + e.getMessage());
}
catch (Throwable e)
{
@@ -1303,7 +1291,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
/**
* {@inheritDoc}
*/
@Override
@Override
public final void embed(
Map<QName, Serializable> properties,
ContentReader reader,
@@ -1980,7 +1968,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
* Gets the metadata extracter limits for the given mimetype.
* <p>
* A specific match for the given mimetype is tried first and
* if none is found a wildcard of "*" is tried, if still not found
* if none is found a wildcard of "*" is tried, if still not found
* defaults value will be used
*
* @param mimetype String
@@ -1997,11 +1985,11 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
if (limits == null)
{
limits = mimetypeLimits.get("*");
}
if (limits == null)
{
limits = new MetadataExtracterLimits();
}
}
if (limits == null)
{
limits = new MetadataExtracterLimits();
}
return limits;
}
@@ -2045,19 +2033,19 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
{
super(cause);
}
}
}
/**
* Exception wrapper to handle exceeded limits imposed by {@link MetadataExtracterLimits}
* {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)}
*/
private class LimitExceededException extends Exception
{
private static final long serialVersionUID = 702554119174770130L;
public LimitExceededException(String message)
{
super(message);
}
* Exception wrapper to handle exceeded limits imposed by {@link MetadataExtracterLimits}
* {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)}
*/
private class LimitExceededException extends Exception
{
private static final long serialVersionUID = 702554119174770130L;
public LimitExceededException(String message)
{
super(message);
}
}
/**
@@ -2081,32 +2069,32 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
ContentReader reader, MetadataExtracterLimits limits) throws Throwable
{
FutureTask<Map<String, Serializable>> task = null;
StreamAwareContentReaderProxy proxiedReader = null;
if (reader.getSize() > limits.getMaxDocumentSizeMB() * MEGABYTE_SIZE)
{
throw new LimitExceededException("Max doc size exceeded " + limits.getMaxDocumentSizeMB() + " MB");
}
synchronized (CONCURRENT_EXTRACTIONS_COUNT)
{
if (logger.isDebugEnabled())
{
logger.debug("Concurrent extractions : " + CONCURRENT_EXTRACTIONS_COUNT.get());
}
if (CONCURRENT_EXTRACTIONS_COUNT.get() < limits.getMaxConcurrentExtractionsCount())
{
int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.incrementAndGet();
if (logger.isDebugEnabled())
{
logger.debug("New extraction accepted. Concurrent extractions : " + totalDocCount);
}
}
else
{
throw new LimitExceededException("Reached concurrent extractions limit - " + limits.getMaxConcurrentExtractionsCount());
}
}
StreamAwareContentReaderProxy proxiedReader = null;
if (reader.getSize() > limits.getMaxDocumentSizeMB() * MEGABYTE_SIZE)
{
throw new LimitExceededException("Max doc size exceeded " + limits.getMaxDocumentSizeMB() + " MB");
}
synchronized (CONCURRENT_EXTRACTIONS_COUNT)
{
if (logger.isDebugEnabled())
{
logger.debug("Concurrent extractions : " + CONCURRENT_EXTRACTIONS_COUNT.get());
}
if (CONCURRENT_EXTRACTIONS_COUNT.get() < limits.getMaxConcurrentExtractionsCount())
{
int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.incrementAndGet();
if (logger.isDebugEnabled())
{
logger.debug("New extraction accepted. Concurrent extractions : " + totalDocCount);
}
}
else
{
throw new LimitExceededException("Reached concurrent extractions limit - " + limits.getMaxConcurrentExtractionsCount());
}
}
try
{
@@ -2140,13 +2128,13 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
}
throw cause;
}
finally
{
int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.decrementAndGet();
if (logger.isDebugEnabled())
{
logger.debug("Extraction finalized. Remaining concurrent extraction : " + totalDocCount);
}
finally
{
int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.decrementAndGet();
if (logger.isDebugEnabled())
{
logger.debug("Extraction finalized. Remaining concurrent extraction : " + totalDocCount);
}
}
}

View File

@@ -1,28 +1,28 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.content.metadata.xml;
@@ -51,7 +51,7 @@ import org.alfresco.util.PropertyCheck;
* most appropriate of a given XML document. The chosen extracter is then asked
* to extract the values, passing through the
* {@code MetadataExtracter.OverwritePolicy} as
* {@linkplain #setOverwritePolicy(String)} on this instance. The overwrite
* {@linkplain #setOverwritePolicy(org.alfresco.repo.content.metadata.MetadataExtracter.OverwritePolicy)} on this instance. The overwrite
* policy of the embedded extracters is not relevant unless they are used
* separately in another context.
*