Files
alfresco-community-repo/source/java/org/alfresco/repo/content/metadata/MetadataExtracterConfigImpl.java
Alexandra Leahu fc20674988 Merged 5.1.N (5.1.2) to 5.2.N (5.2.1)
125892 adragoi: Merged 5.0.N (5.0.4) to 5.1.N (5.1.2)
      125842 rmunteanu: Merged V4.2-BUG-FIX (4.2.7) to 5.0.N (5.0.4) (PARTIAL MERGE)
         125700 adavis: Merged V4.2.5 (4.2.5.7) to V4.2-BUG-FIX (4.2.7)
            125698: Merged DEV to V4.2.5 (4.2.5.7)
               125677 arebegea: MNT-15219 : Excel (.xlsx) containing xmls (shapes/drawings) with multi byte characters may cause OutOfMemory in Tika
                  - Should not have updated version.properties as the original commit needs to be merged forwards.,
            125696: Merged DEV to V4.2.5 (4.2.5.7)
               125677 arebegea: MNT-15219 : Excel (.xlsx) containing xmls (shapes/drawings) with multi byte characters may cause OutOfMemory in Tika
                  - Modified tika parser and tika core jars to allow some configuration parameters to be sent from Alfresco side using the metadata map parameter
                  - Excluded by default the parsing of drawings/shapes xmls because there was little valuable data that could be extracted from those xmls


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@126004 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
2016-04-29 11:36:11 +00:00

79 lines
2.5 KiB
Java

/*
* Copyright (C) 2005-2016 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.content.metadata;
import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
/**
* Default implementation for the MetadataExtracterConfig;
*
* @author Andrei Rebegea
*/
public class MetadataExtracterConfigImpl implements MetadataExtracterConfig
{
protected static Log logger = LogFactory.getLog(MetadataExtracterConfigImpl.class);
private static final String PARSE_SHAPE_PROP_STRING = "content.metadataExtracter.parseShapes";
private Properties properties;
@Override
public void prepareMetadataWithConfigParams(Metadata metadata)
{
if (metadata == null)
{
return;
}
boolean shouldParseShapes = getBooleanProperty(PARSE_SHAPE_PROP_STRING, TIKA_PARSER_PARSE_SHAPES_DEFAULT_VALUE);
metadata.add(TikaMetadataKeys.TIKA_PARSER_PARSE_SHAPES_KEY, Boolean.toString(shouldParseShapes));
if (logger.isDebugEnabled())
{
logger.debug("Tika metadata options passed to tika parser: " + metadata);
}
}
/**
* The Alfresco global properties.
*/
public void setProperties(Properties properties)
{
this.properties = properties;
}
private boolean getBooleanProperty(String name, boolean defaultValue)
{
boolean value = defaultValue;
if (properties != null)
{
String property = properties.getProperty(name);
if (property != null)
{
value = property.trim().equalsIgnoreCase(Boolean.TRUE.toString());
}
}
return value;
}
}