mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-08-07 17:49:17 +00:00
Merged DEV/SWIFT to HEAD (Tika and Poi)
26013: (RECORD ONLY) Upgrade POI to get initial TNEF support 26037: (RECORD ONLY) Bump the POI version for ALF-5900, so we get almost correct RTF body decoding in TNEF files 26193: (RECORD ONLY) Upgrade POI and Tika for ALF-5900 26415: (RECORD ONLY) Upgrade Tika to the latest nightly version, to get a BMP fix 27609: (RECORD ONLY) Upgrade Tika and POI for ALF-7874 27611: (RECORD ONLY) Upgrade Tika for ALF-7978 27612: (RECORD ONLY) Another outlook related tika update 27865: (RECORD ONLY FOR JARS) Update Tika, and change the auto detect extractor to register aliases of the mime types along with the canonical one Notes: - There is no way to verify which of the SWIFT or HEAD jars is the 'latest snapshot' - HEAD jars were all preserved; re-apply latest snapshots to HEAD, if required git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@28223 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -63,7 +63,9 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
super.setUp();
|
||||
extracter = new TikaAutoMetadataExtracter();
|
||||
|
||||
TikaConfig config = (TikaConfig)ctx.getBean("tikaConfig");
|
||||
extracter = new TikaAutoMetadataExtracter(config);
|
||||
extracter.setDictionaryService(dictionaryService);
|
||||
extracter.register();
|
||||
|
||||
@@ -91,37 +93,17 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
|
||||
public void testSupports() throws Exception
|
||||
{
|
||||
TikaConfig config = TikaConfig.getDefaultConfig();
|
||||
|
||||
ArrayList<String> mimeTypes = new ArrayList<String>();
|
||||
for (Parser p : new Parser[] {
|
||||
new OfficeParser(), new OpenDocumentParser(),
|
||||
new Mp3Parser(), new OOXMLParser()
|
||||
}) {
|
||||
Set<MediaType> mts = p.getSupportedTypes(new ParseContext());
|
||||
for (MediaType mt : mts)
|
||||
{
|
||||
MediaType canonical = config.getMediaTypeRegistry().normalize(mt);
|
||||
mimeTypes.add( canonical.toString() );
|
||||
for (MediaType mt : mts) {
|
||||
mimeTypes.add(mt.toString());
|
||||
}
|
||||
}
|
||||
|
||||
// Check Tika handles it properly
|
||||
AutoDetectParser p = new AutoDetectParser();
|
||||
Set<String> amts = new HashSet<String>();
|
||||
for (MediaType mt : p.getSupportedTypes(new ParseContext()))
|
||||
{
|
||||
amts.add(mt.toString());
|
||||
}
|
||||
for (String mimetype : mimeTypes)
|
||||
{
|
||||
assertTrue(
|
||||
"Tika doesn't support expected mimetype: " + mimetype,
|
||||
amts.contains(mimetype)
|
||||
);
|
||||
}
|
||||
|
||||
// Now check the extractor does too
|
||||
for (String mimetype : mimeTypes)
|
||||
{
|
||||
boolean supports = extracter.isSupported(mimetype);
|
||||
@@ -228,8 +210,7 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
assertEquals("8 8 8", p.get("Data BitsPerSample"));
|
||||
assertEquals("none", p.get("Transparency Alpha"));
|
||||
|
||||
//p = openAndCheck(".bmp", "image/bmp"); // TODO Fixed in Swift,
|
||||
p = openAndCheck(".bmp", "image/x-ms-bmp"); // TODO Pre-swift workaround
|
||||
p = openAndCheck(".bmp", "image/bmp");
|
||||
assertEquals("409", p.get("width"));
|
||||
assertEquals("92", p.get("height"));
|
||||
assertEquals("8 8 8", p.get("Data BitsPerSample"));
|
||||
@@ -284,19 +265,22 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
return file;
|
||||
}
|
||||
private Map<String, Serializable> openAndCheck(String fileBase, String expMimeType) throws Throwable {
|
||||
// Cheat and ask Tika for the mime type!
|
||||
// Get the mimetype via the MimeTypeMap
|
||||
// (Uses Tika internally for the detection)
|
||||
File file = open(fileBase);
|
||||
AutoDetectParser ap = new AutoDetectParser();
|
||||
Metadata metadata = new Metadata();
|
||||
metadata.set(Metadata.RESOURCE_NAME_KEY, "quick"+fileBase);
|
||||
MediaType mt = ap.getDetector().detect(
|
||||
new BufferedInputStream(new FileInputStream(file)), metadata);
|
||||
String mimetype = mt.toString();
|
||||
ContentReader detectReader = new FileContentReader(file);
|
||||
String mimetype = mimetypeMap.guessMimetype(fileBase, detectReader);
|
||||
|
||||
assertEquals("Wrong mimetype for " + fileBase, mimetype, expMimeType);
|
||||
|
||||
// Ensure the Tika Auto parser actually handles this
|
||||
assertTrue("Mimetype should be supported but isn't: " + mimetype, extracter.isSupported(mimetype));
|
||||
|
||||
// Now create our proper reader
|
||||
ContentReader sourceReader = new FileContentReader(file);
|
||||
sourceReader.setMimetype(mimetype);
|
||||
|
||||
// And finally do the properties extraction
|
||||
return extracter.extractRaw(sourceReader);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user