Tika for metadata extraction

First pass of converting a few extractors to use Tika rather than 3rd party libraries directly, or use the new style tika structure


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@20640 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Nick Burch
2010-06-14 19:02:37 +00:00
parent 7aeff72605
commit 63b2f5983a
9 changed files with 329 additions and 226 deletions

View File

@@ -132,16 +132,17 @@ public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
}
// Now check the non-standard ones we added in at test time
assertTrue(
"Test Property " + WORD_COUNT_TEST_PROPERTY + " not found for mimetype " + mimetype,
properties.containsKey(WORD_COUNT_TEST_PROPERTY)
);
assertTrue(
"Test Property " + LAST_AUTHOR_TEST_PROPERTY + " not found for mimetype " + mimetype,
properties.containsKey(LAST_AUTHOR_TEST_PROPERTY)
);
if(mimetype.equals(MimetypeMap.MIMETYPE_WORD)) {
assertTrue(
"Test Property " + WORD_COUNT_TEST_PROPERTY + " not found for mimetype " + mimetype,
properties.containsKey(WORD_COUNT_TEST_PROPERTY)
);
assertEquals(
"Test Property " + WORD_COUNT_TEST_PROPERTY + " incorrect for mimetype " + mimetype,
"9",
@@ -151,15 +152,16 @@ public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
AbstractMetadataExtracterTest.QUICK_PREVIOUS_AUTHOR,
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(LAST_AUTHOR_TEST_PROPERTY)));
} else if(mimetype.equals(MimetypeMap.MIMETYPE_EXCEL)) {
assertEquals(
"Test Property " + WORD_COUNT_TEST_PROPERTY + " not found for mimetype " + mimetype,
"0",
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(WORD_COUNT_TEST_PROPERTY)));
assertEquals(
"Test Property " + LAST_AUTHOR_TEST_PROPERTY + " not found for mimetype " + mimetype,
AbstractMetadataExtracterTest.QUICK_PREVIOUS_AUTHOR,
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(LAST_AUTHOR_TEST_PROPERTY)));
} else if(mimetype.equals(MimetypeMap.MIMETYPE_PPT)) {
assertTrue(
"Test Property " + WORD_COUNT_TEST_PROPERTY + " not found for mimetype " + mimetype,
properties.containsKey(WORD_COUNT_TEST_PROPERTY)
);
assertEquals(
"Test Property " + WORD_COUNT_TEST_PROPERTY + " not found for mimetype " + mimetype,
"9",