mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Merged DEV/SWIFT to HEAD (Tika and Poi)
26013: (RECORD ONLY) Upgrade POI to get initial TNEF support 26037: (RECORD ONLY) Bump the POI version for ALF-5900, so we get almost correct RTF body decoding in TNEF files 26193: (RECORD ONLY) Upgrade POI and Tika for ALF-5900 26415: (RECORD ONLY) Upgrade Tika to the latest nightly version, to get a BMP fix 27609: (RECORD ONLY) Upgrade Tika and POI for ALF-7874 27611: (RECORD ONLY) Upgrade Tika for ALF-7978 27612: (RECORD ONLY) Another outlook related tika update 27865: (RECORD ONLY FOR JARS) Update Tika, and change the auto detect extractor to register aliases of the mime types along with the canonical one Notes: - There is no way to verify which of the SWIFT or HEAD jars is the 'latest snapshot' - HEAD jars were all preserved; re-apply latest snapshots to HEAD, if required git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@28223 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -22,6 +22,7 @@ import java.util.ArrayList;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.tika.config.TikaConfig;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.AutoDetectParser;
|
||||
import org.apache.tika.parser.Parser;
|
||||
@@ -49,19 +50,35 @@ import org.apache.tika.parser.Parser;
|
||||
public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter
|
||||
{
|
||||
protected static Log logger = LogFactory.getLog(TikaAutoMetadataExtracter.class);
|
||||
private static AutoDetectParser parser;
|
||||
private static TikaConfig config;
|
||||
|
||||
public static ArrayList<String> SUPPORTED_MIMETYPES;
|
||||
static {
|
||||
private static ArrayList<String> buildMimeTypes(TikaConfig tikaConfig)
|
||||
{
|
||||
config = tikaConfig;
|
||||
parser = new AutoDetectParser(config);
|
||||
|
||||
SUPPORTED_MIMETYPES = new ArrayList<String>();
|
||||
AutoDetectParser p = new AutoDetectParser();
|
||||
for(MediaType mt : p.getParsers().keySet()) {
|
||||
parser = new AutoDetectParser();
|
||||
for(MediaType mt : parser.getParsers().keySet())
|
||||
{
|
||||
// Add the canonical mime type
|
||||
SUPPORTED_MIMETYPES.add( mt.toString() );
|
||||
|
||||
// And add any aliases of the mime type too - Alfresco uses some
|
||||
// non canonical forms of various mimetypes, so we need all of them
|
||||
for(MediaType alias : config.getMediaTypeRegistry().getAliases(mt))
|
||||
{
|
||||
SUPPORTED_MIMETYPES.add( alias.toString() );
|
||||
}
|
||||
}
|
||||
return SUPPORTED_MIMETYPES;
|
||||
}
|
||||
|
||||
public TikaAutoMetadataExtracter()
|
||||
public TikaAutoMetadataExtracter(TikaConfig tikaConfig)
|
||||
{
|
||||
super(SUPPORTED_MIMETYPES);
|
||||
super( buildMimeTypes(tikaConfig) );
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -70,6 +87,6 @@ public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter
|
||||
*/
|
||||
@Override
|
||||
protected Parser getParser() {
|
||||
return new AutoDetectParser();
|
||||
return parser;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user