mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-10-08 14:51:49 +00:00
126004 aleahu: Merged 5.1.N (5.1.2) to 5.2.N (5.2.1) 125892 adragoi: Merged 5.0.N (5.0.4) to 5.1.N (5.1.2) 125842 rmunteanu: Merged V4.2-BUG-FIX (4.2.7) to 5.0.N (5.0.4) (PARTIAL MERGE) 125700 adavis: Merged V4.2.5 (4.2.5.7) to V4.2-BUG-FIX (4.2.7) 125698: Merged DEV to V4.2.5 (4.2.5.7) 125677 arebegea: MNT-15219 : Excel (.xlsx) containing xmls (shapes/drawings) with multi byte characters may cause OutOfMemory in Tika - Should not have updated version.properties as the original commit needs to be merged forwards., 125696: Merged DEV to V4.2.5 (4.2.5.7) 125677 arebegea: MNT-15219 : Excel (.xlsx) containing xmls (shapes/drawings) with multi byte characters may cause OutOfMemory in Tika - Modified tika parser and tika core jars to allow some configuration parameters to be sent from Alfresco side using the metadata map parameter - Excluded by default the parsing of drawings/shapes xmls because there was little valuable data that could be extracted from those xmls git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@127835 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
79 lines
2.5 KiB
Java
79 lines
2.5 KiB
Java
/*
|
|
* Copyright (C) 2005-2016 Alfresco Software Limited.
|
|
*
|
|
* This file is part of Alfresco
|
|
*
|
|
* Alfresco is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Alfresco is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
package org.alfresco.repo.content.metadata;
|
|
|
|
import java.util.Properties;
|
|
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.apache.tika.metadata.Metadata;
|
|
import org.apache.tika.metadata.TikaMetadataKeys;
|
|
|
|
/**
|
|
* Default implementation for the MetadataExtracterConfig;
|
|
*
|
|
* @author Andrei Rebegea
|
|
*/
|
|
public class MetadataExtracterConfigImpl implements MetadataExtracterConfig
|
|
{
|
|
protected static Log logger = LogFactory.getLog(MetadataExtracterConfigImpl.class);
|
|
|
|
private static final String PARSE_SHAPE_PROP_STRING = "content.metadataExtracter.parseShapes";
|
|
|
|
private Properties properties;
|
|
|
|
@Override
|
|
public void prepareMetadataWithConfigParams(Metadata metadata)
|
|
{
|
|
if (metadata == null)
|
|
{
|
|
return;
|
|
}
|
|
boolean shouldParseShapes = getBooleanProperty(PARSE_SHAPE_PROP_STRING, TIKA_PARSER_PARSE_SHAPES_DEFAULT_VALUE);
|
|
metadata.add(TikaMetadataKeys.TIKA_PARSER_PARSE_SHAPES_KEY, Boolean.toString(shouldParseShapes));
|
|
|
|
if (logger.isDebugEnabled())
|
|
{
|
|
logger.debug("Tika metadata options passed to tika parser: " + metadata);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* The Alfresco global properties.
|
|
*/
|
|
public void setProperties(Properties properties)
|
|
{
|
|
this.properties = properties;
|
|
}
|
|
|
|
private boolean getBooleanProperty(String name, boolean defaultValue)
|
|
{
|
|
boolean value = defaultValue;
|
|
if (properties != null)
|
|
{
|
|
String property = properties.getProperty(name);
|
|
if (property != null)
|
|
{
|
|
value = property.trim().equalsIgnoreCase(Boolean.TRUE.toString());
|
|
}
|
|
}
|
|
return value;
|
|
}
|
|
}
|