package org.alfresco.repo.content.metadata; import java.util.ArrayList; import java.util.HashSet; import org.alfresco.api.AlfrescoPublicApi; import org.alfresco.error.AlfrescoRuntimeException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; /** * A Metadata Extractor which makes use of Apache Tika, * and allows the selection of the Tika parser to be * sprung-in to extract the metadata from your document. * This is typically used with custom Tika Parsers. *
* author: -- cm:author * title: -- cm:title * subject: -- cm:description * created: -- cm:created * comments: ** * @since 3.4 * @author Nick Burch */ @AlfrescoPublicApi public class TikaSpringConfiguredMetadataExtracter extends TikaPoweredMetadataExtracter { protected static Log logger = LogFactory.getLog(TikaSpringConfiguredMetadataExtracter.class); private Parser tikaParser; private String tikaParserClassName; private Class extends Parser> tikaParserClass; /** * Injects the name of the Tika parser to use * @param className */ @SuppressWarnings("unchecked") public void setTikaParserName(String className) { tikaParserClassName = className; // Load the class try { tikaParserClass = (Class extends Parser>)Class.forName(tikaParserClassName); setTikaParser(getParser()); } catch(ClassNotFoundException e) { throw new AlfrescoRuntimeException("Specified Tika Parser '" + tikaParserClassName + "' not found"); } } /** * Injects the Tika parser to use * @param tikaParser */ public void setTikaParser(Parser tikaParser) { this.tikaParser = tikaParser; // Build the mime types, updating the copy our parent // holds for us as we go along ArrayListgeo:lat: -- cm:latitude *
geo:long: -- cm:longitude *