/*
 * Copyright (C) 2005-2014 Alfresco Software Limited.
 *
 * This file is part of Alfresco
 *
 * Alfresco is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Alfresco is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with Alfresco. If not, see .
 */
package org.alfresco.repo.content.metadata;
import java.util.ArrayList;
import java.util.Set;
import org.alfresco.repo.content.MimetypeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.patch.AlfrescoPoiPatchUtils;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.springframework.beans.factory.InitializingBean;
/**
 * POI-based metadata extractor for Office 07 documents.
 * See http://poi.apache.org/ for information on POI.
 * 
 *   author:                 --      cm:author
 *   title:                  --      cm:title
 *   subject:                --      cm:description
 *   created:                --      cm:created
 *   Any custom property:    --      [not mapped]
 * 
 * 
 * Uses Apache Tika
 * 
 * Configures {@link AlfrescoPoiPatchUtils} to resolve the following issues:
 * 
 * 
 * @author Nick Burch
 * @author Neil McErlean
 * @author Dmitry Velichkevich
 */
public class PoiMetadataExtracter extends TikaPoweredMetadataExtracter implements InitializingBean
{
    protected static Log logger = LogFactory.getLog(PoiMetadataExtracter.class);
    public static ArrayList SUPPORTED_MIMETYPES = buildSupportedMimetypes( 
       new String[] {MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING,
    	               MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET,
    	               MimetypeMap.MIMETYPE_OPENXML_PRESENTATION},
    	 new OOXMLParser() 
    );
    private Integer poiFootnotesLimit;
    private Boolean poiExtractPropertiesOnly = false;
    private Set poiAllowableXslfRelationshipTypes;
    public PoiMetadataExtracter()
    {
        super(PoiMetadataExtracter.class.getName(), SUPPORTED_MIMETYPES);
    }
    @Override
    protected Parser getParser() 
    {
        return new OOXMLParser();
    }
    /**
     * MNT-577: Alfresco is running 100% CPU for over 10 minutes while extracting metadata for Word office document 
     * 
     * 
     * @param poiFootnotesLimit - {@link Integer} value which specifies limit of amount of footnotes of XWPF documents
     */
    public void setPoiFootnotesLimit(Integer poiFootnotesLimit)
    {
        this.poiFootnotesLimit = poiFootnotesLimit;
    }
    /**
     * MNT-11823: Upload of PPTX causes very high memory usage leading to system instability
     * 
     * 
     * @param poiExtractPropertiesOnly - {@link Boolean} value which indicates that POI extractor must avoid building of the full document parts hierarchy and reading content of
     *        the parts
     */
    public void setPoiExtractPropertiesOnly(Boolean poiExtractPropertiesOnly)
    {
        this.poiExtractPropertiesOnly = poiExtractPropertiesOnly;
    }
    public Boolean isPoiExtractPropertiesOnly()
    {
        return (poiExtractPropertiesOnly == null) ? (false) : (poiExtractPropertiesOnly);
    }
    /**
     * MNT-11823: Upload of PPTX causes very high memory usage leading to system instability
     * 
     * 
     * @param poiAllowableXslfRelationshipTypes - {@link Set}<{@link String}> instance which determines the list of allowable relationship types for traversing during
     *        analyzing of XSLF document
     */
    public void setPoiAllowableXslfRelationshipTypes(Set poiAllowableXslfRelationshipTypes)
    {
        this.poiAllowableXslfRelationshipTypes = poiAllowableXslfRelationshipTypes;
    }
    public Set getPoiAllowableXslfRelationshipTypes()
    {
        return poiAllowableXslfRelationshipTypes;
    }
    /**
     * MNT-11823: Upload of PPTX causes very high memory usage leading to system instability
     * 
     * Initialization of {@link AlfrescoPoiPatchUtils} properties for {@link PoiMetadataExtracter#getExtractorContext()} context
     */
    @Override
    public void afterPropertiesSet() throws Exception
    {
        if (null == poiExtractPropertiesOnly)
        {
            poiExtractPropertiesOnly = false;
        }
        String context = getExtractorContext();
        if (null != poiFootnotesLimit)
        {
            AlfrescoPoiPatchUtils.setPoiFootnotesLimit(context, poiFootnotesLimit);
        }
        AlfrescoPoiPatchUtils.setPoiExtractPropertiesOnly(context, poiExtractPropertiesOnly);
        AlfrescoPoiPatchUtils.setPoiAllowableXslfRelationshipTypes(context, poiAllowableXslfRelationshipTypes);
    }
}