/* * Copyright (C) 2005-2010 Alfresco Software Limited. * * This file is part of Alfresco * * Alfresco is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Alfresco is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Alfresco. If not, see . */ package org.alfresco.repo.content.metadata; import java.util.ArrayList; import java.util.HashSet; import org.alfresco.api.AlfrescoPublicApi; import org.alfresco.error.AlfrescoRuntimeException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; /** * A Metadata Extractor which makes use of Apache Tika, * and allows the selection of the Tika parser to be * sprung-in to extract the metadata from your document. * This is typically used with custom Tika Parsers. *
 *   author:                 --      cm:author
 *   title:                  --      cm:title
 *   subject:                --      cm:description
 *   created:                --      cm:created
 *   comments:
 *   

geo:lat: -- cm:latitude *

geo:long: -- cm:longitude *

* * @since 3.4 * @author Nick Burch */ @AlfrescoPublicApi public class TikaSpringConfiguredMetadataExtracter extends TikaPoweredMetadataExtracter { protected static Log logger = LogFactory.getLog(TikaSpringConfiguredMetadataExtracter.class); private Parser tikaParser; private String tikaParserClassName; private Class tikaParserClass; /** * Injects the name of the Tika parser to use * @param className */ @SuppressWarnings("unchecked") public void setTikaParserName(String className) { tikaParserClassName = className; // Load the class try { tikaParserClass = (Class)Class.forName(tikaParserClassName); setTikaParser(getParser()); } catch(ClassNotFoundException e) { throw new AlfrescoRuntimeException("Specified Tika Parser '" + tikaParserClassName + "' not found"); } } /** * Injects the Tika parser to use * @param parser */ public void setTikaParser(Parser tikaParser) { this.tikaParser = tikaParser; // Build the mime types, updating the copy our parent // holds for us as we go along ArrayList mimetypes = new ArrayList(); for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext())) { mimetypes.add( mt.toString() ); } super.setSupportedMimetypes(mimetypes); } public TikaSpringConfiguredMetadataExtracter() { super(new HashSet()); } /** * Returns the Tika parser */ protected Parser getParser() { // If we were given a whole parser, return it if(tikaParser != null) return tikaParser; // Otherwise create a new one try { return tikaParserClass.newInstance(); } catch (InstantiationException e) { throw new AlfrescoRuntimeException("Unable to create specified Parser", e); } catch (IllegalAccessException e) { throw new AlfrescoRuntimeException("Unable to create specified Parser", e); } } }