mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Tika for metadata extraction
Convert some more metadata extractors to using Tika, and enable the use of the Tika auto-detection parser on any documents without an explicitly defined extractor. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@20667 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -35,6 +35,7 @@ import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.AutoDetectParser;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.apache.tika.sax.BodyContentHandler;
|
||||
@@ -136,14 +137,13 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
||||
}
|
||||
|
||||
/**
|
||||
* Does auto-detection to select the best Tika
|
||||
* Parser.
|
||||
* Implementations can override this if they
|
||||
* know their specific implementations.
|
||||
* Returns the correct Tika Parser to process
|
||||
* the document.
|
||||
* If you don't know which you want, use
|
||||
* {@link TikaAutoMetadataExtracter} which
|
||||
* makes use of the Tika auto-detection.
|
||||
*/
|
||||
protected Parser getParser() {
|
||||
return null;
|
||||
}
|
||||
protected abstract Parser getParser();
|
||||
|
||||
/**
|
||||
* Allows implementation specific mappings
|
||||
|
Reference in New Issue
Block a user