mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-10-08 14:51:49 +00:00
MNT-15024. Implemented content.metadataExtractor.pdf.overwritePolicy property and get rid of a redundancy setter for the the overwritePolicy which causes a ambitious warning.
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@131900 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -341,6 +341,9 @@
|
||||
</entry>
|
||||
</map>
|
||||
</property>
|
||||
<property name="overwritePolicy">
|
||||
<value>${content.metadataExtracter.pdf.overwritePolicy}</value>
|
||||
</property>
|
||||
</bean>
|
||||
<bean id="extracter.Poi" class="org.alfresco.repo.content.metadata.PoiMetadataExtracter" parent="baseMetadataExtracter">
|
||||
<property name="poiFootnotesLimit" value="${content.transformer.Poi.poiFootnotesLimit}" />
|
||||
|
@@ -644,6 +644,9 @@ content.metadataExtracter.parseShapes=false
|
||||
content.metadataExtracter.pdf.maxDocumentSizeMB=10
|
||||
content.metadataExtracter.pdf.maxConcurrentExtractionsCount=5
|
||||
|
||||
# The default overwrite policy for PdfBoxMetadataExtracter
|
||||
content.metadataExtracter.pdf.overwritePolicy=PRAGMATIC
|
||||
|
||||
# Property to enable upgrade from 2.1-A
|
||||
V2.1-A.fixes.to.schema=0
|
||||
#V2.1-A.fixes.to.schema=82
|
||||
|
@@ -1,82 +1,82 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Repository
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2016 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Repository
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2016 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.lang.reflect.Array;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.StringTokenizer;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.lang.reflect.Array;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.StringTokenizer;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.FutureTask;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import javax.activation.MimeType;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import javax.activation.MimeType;
|
||||
|
||||
import org.alfresco.api.AlfrescoPublicApi;
|
||||
import org.alfresco.error.AlfrescoRuntimeException;
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.StreamAwareContentReaderProxy;
|
||||
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
|
||||
import org.alfresco.service.cmr.dictionary.DictionaryService;
|
||||
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.cmr.repository.ContentWriter;
|
||||
import org.alfresco.service.cmr.repository.MalformedNodeRefException;
|
||||
import org.alfresco.service.cmr.repository.MimetypeService;
|
||||
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
|
||||
import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
|
||||
import org.alfresco.service.namespace.InvalidQNameException;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.format.DateTimeFormat;
|
||||
import org.joda.time.format.DateTimeFormatter;
|
||||
import org.springframework.beans.factory.BeanNameAware;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.ApplicationContextAware;
|
||||
import org.springframework.extensions.surf.util.ISO8601DateFormat;
|
||||
import org.alfresco.repo.content.StreamAwareContentReaderProxy;
|
||||
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
|
||||
import org.alfresco.service.cmr.dictionary.DictionaryService;
|
||||
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.cmr.repository.ContentWriter;
|
||||
import org.alfresco.service.cmr.repository.MalformedNodeRefException;
|
||||
import org.alfresco.service.cmr.repository.MimetypeService;
|
||||
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
|
||||
import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
|
||||
import org.alfresco.service.namespace.InvalidQNameException;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.format.DateTimeFormat;
|
||||
import org.joda.time.format.DateTimeFormatter;
|
||||
import org.springframework.beans.factory.BeanNameAware;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.ApplicationContextAware;
|
||||
import org.springframework.extensions.surf.util.ISO8601DateFormat;
|
||||
|
||||
/**
|
||||
* Support class for metadata extracters that support dynamic and config-driven
|
||||
@@ -126,7 +126,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
private static final String PROP_DEFAULT_TIMEOUT = "content.metadataExtracter.default.timeoutMs";
|
||||
public static final String PROPERTY_PREFIX_METADATA = "metadata.";
|
||||
public static final String PROPERTY_COMPONENT_EXTRACT = ".extract.";
|
||||
public static final String PROPERTY_COMPONENT_EMBED = ".embed.";
|
||||
public static final String PROPERTY_COMPONENT_EMBED = ".embed.";
|
||||
public static final int MEGABYTE_SIZE = 1048576;
|
||||
|
||||
protected static Log logger = LogFactory.getLog(AbstractMappingMetadataExtracter.class);
|
||||
@@ -151,8 +151,8 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
private Properties properties;
|
||||
private Map<String, MetadataExtracterLimits> mimetypeLimits;
|
||||
private ExecutorService executorService;
|
||||
protected MetadataExtracterConfig metadataExtracterConfig;
|
||||
|
||||
protected MetadataExtracterConfig metadataExtracterConfig;
|
||||
|
||||
private static final AtomicInteger CONCURRENT_EXTRACTIONS_COUNT = new AtomicInteger(0);
|
||||
|
||||
/**
|
||||
@@ -259,7 +259,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* @see #setSupportedMimetypes(Collection)
|
||||
*/
|
||||
*/
|
||||
@Override
|
||||
public boolean isSupported(String sourceMimetype)
|
||||
{
|
||||
@@ -271,7 +271,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
*
|
||||
* @see #setSupportedEmbedMimetypes(Collection)
|
||||
*/
|
||||
@Override
|
||||
@Override
|
||||
public boolean isEmbeddingSupported(String sourceMimetype)
|
||||
{
|
||||
if (supportedEmbedMimetypes == null)
|
||||
@@ -314,18 +314,6 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
this.overwritePolicy = overwritePolicy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the policy to use when existing values are encountered. Depending on how the extractor
|
||||
* is called, this may not be relevant, i.e an empty map of existing properties may be passed
|
||||
* in by the client code, which may follow its own overwrite strategy.
|
||||
*
|
||||
* @param overwritePolicyStr the policy to apply when there are existing system properties
|
||||
*/
|
||||
public void setOverwritePolicy(String overwritePolicyStr)
|
||||
{
|
||||
this.overwritePolicy = OverwritePolicy.valueOf(overwritePolicyStr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whether the extractor should discard metadata that fails to convert to the target type
|
||||
* defined in the data dictionary model. This is <tt>true</tt> by default i.e. if the data
|
||||
@@ -1152,7 +1140,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
@Override
|
||||
public final Map<QName, Serializable> extract(ContentReader reader, Map<QName, Serializable> destination)
|
||||
{
|
||||
return extract(reader, this.overwritePolicy, destination, this.mapping);
|
||||
@@ -1161,7 +1149,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
@Override
|
||||
public final Map<QName, Serializable> extract(
|
||||
ContentReader reader,
|
||||
OverwritePolicy overwritePolicy,
|
||||
@@ -1173,7 +1161,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
@Override
|
||||
public Map<QName, Serializable> extract(
|
||||
ContentReader reader,
|
||||
OverwritePolicy overwritePolicy,
|
||||
@@ -1225,12 +1213,12 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
logger.debug("Extracted Metadata from " + reader + "\n Found: " +
|
||||
rawMetadata + "\n Mapped and Accepted: " + changedProperties);
|
||||
}
|
||||
}
|
||||
catch (LimitExceededException e)
|
||||
{
|
||||
logger.warn("Metadata extraction rejected: \n" +
|
||||
" Extracter: " + this + "\n" +
|
||||
" Reason: " + e.getMessage());
|
||||
}
|
||||
catch (LimitExceededException e)
|
||||
{
|
||||
logger.warn("Metadata extraction rejected: \n" +
|
||||
" Extracter: " + this + "\n" +
|
||||
" Reason: " + e.getMessage());
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
@@ -1303,7 +1291,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
@Override
|
||||
public final void embed(
|
||||
Map<QName, Serializable> properties,
|
||||
ContentReader reader,
|
||||
@@ -1980,7 +1968,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
* Gets the metadata extracter limits for the given mimetype.
|
||||
* <p>
|
||||
* A specific match for the given mimetype is tried first and
|
||||
* if none is found a wildcard of "*" is tried, if still not found
|
||||
* if none is found a wildcard of "*" is tried, if still not found
|
||||
* defaults value will be used
|
||||
*
|
||||
* @param mimetype String
|
||||
@@ -1997,11 +1985,11 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
if (limits == null)
|
||||
{
|
||||
limits = mimetypeLimits.get("*");
|
||||
}
|
||||
if (limits == null)
|
||||
{
|
||||
limits = new MetadataExtracterLimits();
|
||||
}
|
||||
}
|
||||
if (limits == null)
|
||||
{
|
||||
limits = new MetadataExtracterLimits();
|
||||
}
|
||||
|
||||
return limits;
|
||||
}
|
||||
@@ -2045,19 +2033,19 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
{
|
||||
super(cause);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Exception wrapper to handle exceeded limits imposed by {@link MetadataExtracterLimits}
|
||||
* {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)}
|
||||
*/
|
||||
private class LimitExceededException extends Exception
|
||||
{
|
||||
private static final long serialVersionUID = 702554119174770130L;
|
||||
public LimitExceededException(String message)
|
||||
{
|
||||
super(message);
|
||||
}
|
||||
* Exception wrapper to handle exceeded limits imposed by {@link MetadataExtracterLimits}
|
||||
* {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader, MetadataExtracterLimits)}
|
||||
*/
|
||||
private class LimitExceededException extends Exception
|
||||
{
|
||||
private static final long serialVersionUID = 702554119174770130L;
|
||||
public LimitExceededException(String message)
|
||||
{
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2081,32 +2069,32 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
ContentReader reader, MetadataExtracterLimits limits) throws Throwable
|
||||
{
|
||||
FutureTask<Map<String, Serializable>> task = null;
|
||||
StreamAwareContentReaderProxy proxiedReader = null;
|
||||
|
||||
if (reader.getSize() > limits.getMaxDocumentSizeMB() * MEGABYTE_SIZE)
|
||||
{
|
||||
throw new LimitExceededException("Max doc size exceeded " + limits.getMaxDocumentSizeMB() + " MB");
|
||||
}
|
||||
|
||||
synchronized (CONCURRENT_EXTRACTIONS_COUNT)
|
||||
{
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Concurrent extractions : " + CONCURRENT_EXTRACTIONS_COUNT.get());
|
||||
}
|
||||
if (CONCURRENT_EXTRACTIONS_COUNT.get() < limits.getMaxConcurrentExtractionsCount())
|
||||
{
|
||||
int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.incrementAndGet();
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("New extraction accepted. Concurrent extractions : " + totalDocCount);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new LimitExceededException("Reached concurrent extractions limit - " + limits.getMaxConcurrentExtractionsCount());
|
||||
}
|
||||
}
|
||||
StreamAwareContentReaderProxy proxiedReader = null;
|
||||
|
||||
if (reader.getSize() > limits.getMaxDocumentSizeMB() * MEGABYTE_SIZE)
|
||||
{
|
||||
throw new LimitExceededException("Max doc size exceeded " + limits.getMaxDocumentSizeMB() + " MB");
|
||||
}
|
||||
|
||||
synchronized (CONCURRENT_EXTRACTIONS_COUNT)
|
||||
{
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Concurrent extractions : " + CONCURRENT_EXTRACTIONS_COUNT.get());
|
||||
}
|
||||
if (CONCURRENT_EXTRACTIONS_COUNT.get() < limits.getMaxConcurrentExtractionsCount())
|
||||
{
|
||||
int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.incrementAndGet();
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("New extraction accepted. Concurrent extractions : " + totalDocCount);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new LimitExceededException("Reached concurrent extractions limit - " + limits.getMaxConcurrentExtractionsCount());
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
@@ -2140,13 +2128,13 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
}
|
||||
throw cause;
|
||||
}
|
||||
finally
|
||||
{
|
||||
int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.decrementAndGet();
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Extraction finalized. Remaining concurrent extraction : " + totalDocCount);
|
||||
}
|
||||
finally
|
||||
{
|
||||
int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.decrementAndGet();
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Extraction finalized. Remaining concurrent extraction : " + totalDocCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1,28 +1,28 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Repository
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2016 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Repository
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2016 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
|
||||
package org.alfresco.repo.content.metadata.xml;
|
||||
|
||||
@@ -51,7 +51,7 @@ import org.alfresco.util.PropertyCheck;
|
||||
* most appropriate of a given XML document. The chosen extracter is then asked
|
||||
* to extract the values, passing through the
|
||||
* {@code MetadataExtracter.OverwritePolicy} as
|
||||
* {@linkplain #setOverwritePolicy(String)} on this instance. The overwrite
|
||||
* {@linkplain #setOverwritePolicy(org.alfresco.repo.content.metadata.MetadataExtracter.OverwritePolicy)} on this instance. The overwrite
|
||||
* policy of the embedded extracters is not relevant unless they are used
|
||||
* separately in another context.
|
||||
*
|
||||
|
Reference in New Issue
Block a user