mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Add @since tags where known, and do a quick coding standards sweep
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@31023 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -154,18 +154,24 @@ public abstract class AbstractMetadataExtracterTest extends TestCase
|
|||||||
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
|
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
|
||||||
{
|
{
|
||||||
// One of Creator or Author
|
// One of Creator or Author
|
||||||
if(!skipAuthorCheck(mimetype)) {
|
if(!skipAuthorCheck(mimetype))
|
||||||
if(properties.containsKey(ContentModel.PROP_CREATOR)) {
|
{
|
||||||
|
if(properties.containsKey(ContentModel.PROP_CREATOR))
|
||||||
|
{
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Property " + ContentModel.PROP_CREATOR + " not found for mimetype " + mimetype,
|
"Property " + ContentModel.PROP_CREATOR + " not found for mimetype " + mimetype,
|
||||||
QUICK_CREATOR,
|
QUICK_CREATOR,
|
||||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATOR)));
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATOR)));
|
||||||
} else if(properties.containsKey(ContentModel.PROP_AUTHOR)) {
|
}
|
||||||
|
else if(properties.containsKey(ContentModel.PROP_AUTHOR))
|
||||||
|
{
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype,
|
"Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype,
|
||||||
QUICK_CREATOR,
|
QUICK_CREATOR,
|
||||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR)));
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR)));
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
fail("Expected one property out of " + ContentModel.PROP_CREATOR + " and " +
|
fail("Expected one property out of " + ContentModel.PROP_CREATOR + " and " +
|
||||||
ContentModel.PROP_AUTHOR + " but found neither of them for " + mimetype);
|
ContentModel.PROP_AUTHOR + " but found neither of them for " + mimetype);
|
||||||
}
|
}
|
||||||
@@ -176,7 +182,8 @@ public abstract class AbstractMetadataExtracterTest extends TestCase
|
|||||||
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
|
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
|
||||||
QUICK_TITLE,
|
QUICK_TITLE,
|
||||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_TITLE)));
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_TITLE)));
|
||||||
if (!skipDescriptionCheck(mimetype)) {
|
if (!skipDescriptionCheck(mimetype))
|
||||||
|
{
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype,
|
"Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype,
|
||||||
QUICK_DESCRIPTION,
|
QUICK_DESCRIPTION,
|
||||||
@@ -236,13 +243,16 @@ public abstract class AbstractMetadataExtracterTest extends TestCase
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected static void assertContains(String message, String needle, String haystack) {
|
protected static void assertContains(String message, String needle, String haystack)
|
||||||
if(haystack.indexOf(needle) > -1) {
|
{
|
||||||
|
if(haystack.indexOf(needle) > -1)
|
||||||
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fail(message);
|
fail(message);
|
||||||
}
|
}
|
||||||
protected static void assertContains(String needle, String haystack) {
|
protected static void assertContains(String needle, String haystack)
|
||||||
|
{
|
||||||
assertContains("'" + needle + "' wasn't found in '" + haystack + "'", needle, haystack);
|
assertContains("'" + needle + "' wasn't found in '" + haystack + "'", needle, haystack);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -43,6 +43,7 @@ import org.apache.tika.parser.dwg.DWGParser;
|
|||||||
*
|
*
|
||||||
* Uses Apache Tika
|
* Uses Apache Tika
|
||||||
*
|
*
|
||||||
|
* @since 3.4
|
||||||
* @author Nick Burch
|
* @author Nick Burch
|
||||||
*/
|
*/
|
||||||
public class DWGMetadataExtracter extends TikaPoweredMetadataExtracter
|
public class DWGMetadataExtracter extends TikaPoweredMetadataExtracter
|
||||||
@@ -66,14 +67,16 @@ public class DWGMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
Map<String, Serializable> properties, Map<String,String> headers) {
|
Map<String, Serializable> properties, Map<String,String> headers)
|
||||||
|
{
|
||||||
putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
|
putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
|
||||||
putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties);
|
putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties);
|
||||||
return properties;
|
return properties;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Parser getParser() {
|
protected Parser getParser()
|
||||||
|
{
|
||||||
return new DWGParser();
|
return new DWGParser();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -118,8 +118,8 @@ public class DWGMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
* We also provide the creation date - check that
|
* We also provide the creation date - check that
|
||||||
*/
|
*/
|
||||||
protected void testFileSpecificMetadata(String mimetype,
|
protected void testFileSpecificMetadata(String mimetype,
|
||||||
Map<QName, Serializable> properties) {
|
Map<QName, Serializable> properties)
|
||||||
|
{
|
||||||
// Check for extra fields
|
// Check for extra fields
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype,
|
"Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype,
|
||||||
|
@@ -72,13 +72,15 @@ public class MP3MetadataExtracter extends TikaAudioMetadataExtracter
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Parser getParser() {
|
protected Parser getParser()
|
||||||
|
{
|
||||||
return new Mp3Parser();
|
return new Mp3Parser();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
Map<String, Serializable> properties, Map<String,String> headers) {
|
Map<String, Serializable> properties, Map<String,String> headers)
|
||||||
|
{
|
||||||
// Do the normal Audio mappings
|
// Do the normal Audio mappings
|
||||||
super.extractSpecific(metadata, properties, headers);
|
super.extractSpecific(metadata, properties, headers);
|
||||||
|
|
||||||
|
@@ -71,7 +71,8 @@ public class MP3MetadataExtracterTest extends TikaAudioMetadataExtracterTest
|
|||||||
* We don't have quite the usual metadata. Tests the descriptions one.
|
* We don't have quite the usual metadata. Tests the descriptions one.
|
||||||
* Other tests in {@link #testFileSpecificMetadata(String, Map)}
|
* Other tests in {@link #testFileSpecificMetadata(String, Map)}
|
||||||
*/
|
*/
|
||||||
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties) {
|
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
|
||||||
|
{
|
||||||
// Title is as normal
|
// Title is as normal
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
|
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
|
||||||
@@ -94,7 +95,8 @@ public class MP3MetadataExtracterTest extends TikaAudioMetadataExtracterTest
|
|||||||
/**
|
/**
|
||||||
* Tests for various MP3 specific bits of metadata
|
* Tests for various MP3 specific bits of metadata
|
||||||
*/
|
*/
|
||||||
public void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties) {
|
public void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties)
|
||||||
|
{
|
||||||
super.testFileSpecificMetadata(mimetype, properties);
|
super.testFileSpecificMetadata(mimetype, properties);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -68,14 +68,16 @@ public class MailMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Parser getParser() {
|
protected Parser getParser()
|
||||||
|
{
|
||||||
// The office parser does Outlook as well as Word, Excel etc
|
// The office parser does Outlook as well as Word, Excel etc
|
||||||
return new OfficeParser();
|
return new OfficeParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
Map<String, Serializable> properties, Map<String,String> headers) {
|
Map<String, Serializable> properties, Map<String,String> headers)
|
||||||
|
{
|
||||||
putRawValue(KEY_ORIGINATOR, metadata.get(Metadata.AUTHOR), properties);
|
putRawValue(KEY_ORIGINATOR, metadata.get(Metadata.AUTHOR), properties);
|
||||||
putRawValue(KEY_SUBJECT, metadata.get(Metadata.TITLE), properties);
|
putRawValue(KEY_SUBJECT, metadata.get(Metadata.TITLE), properties);
|
||||||
putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.SUBJECT), properties);
|
putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.SUBJECT), properties);
|
||||||
|
@@ -86,13 +86,15 @@ public class OfficeMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Parser getParser() {
|
protected Parser getParser()
|
||||||
|
{
|
||||||
return new OfficeParser();
|
return new OfficeParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
Map<String, Serializable> properties, Map<String,String> headers) {
|
Map<String, Serializable> properties, Map<String,String> headers)
|
||||||
|
{
|
||||||
putRawValue(KEY_CREATE_DATETIME, metadata.get(Metadata.CREATION_DATE), properties);
|
putRawValue(KEY_CREATE_DATETIME, metadata.get(Metadata.CREATION_DATE), properties);
|
||||||
putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(Metadata.LAST_SAVED), properties);
|
putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(Metadata.LAST_SAVED), properties);
|
||||||
putRawValue(KEY_EDIT_TIME, metadata.get(Metadata.EDIT_TIME), properties);
|
putRawValue(KEY_EDIT_TIME, metadata.get(Metadata.EDIT_TIME), properties);
|
||||||
|
@@ -105,13 +105,15 @@ public class OpenDocumentMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Parser getParser() {
|
protected Parser getParser()
|
||||||
|
{
|
||||||
return new OpenDocumentParser();
|
return new OpenDocumentParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
Map<String, Serializable> properties, Map<String, String> headers) {
|
Map<String, Serializable> properties, Map<String, String> headers)
|
||||||
|
{
|
||||||
putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), properties);
|
putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), properties);
|
||||||
putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), properties);
|
putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), properties);
|
||||||
putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), properties);
|
putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), properties);
|
||||||
|
@@ -79,14 +79,18 @@ public class OpenDocumentMetadataExtracterTest extends AbstractMetadataExtracter
|
|||||||
* We also provide the creation date - check that
|
* We also provide the creation date - check that
|
||||||
*/
|
*/
|
||||||
protected void testFileSpecificMetadata(String mimetype,
|
protected void testFileSpecificMetadata(String mimetype,
|
||||||
Map<QName, Serializable> properties) {
|
Map<QName, Serializable> properties)
|
||||||
|
{
|
||||||
// Check for two cases
|
// Check for two cases
|
||||||
if(mimetype.equals("application/vnd.oasis.opendocument.text")) {
|
if(mimetype.equals("application/vnd.oasis.opendocument.text"))
|
||||||
|
{
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype,
|
"Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype,
|
||||||
"2005-09-06T23:34:00.000+01:00",
|
"2005-09-06T23:34:00.000+01:00",
|
||||||
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATED)));
|
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATED)));
|
||||||
} else if(mimetype.equals("application/vnd.oasis.opendocument.graphics")) {
|
}
|
||||||
|
else if(mimetype.equals("application/vnd.oasis.opendocument.graphics"))
|
||||||
|
{
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype,
|
"Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype,
|
||||||
"2006-01-27T11:46:11.000Z",
|
"2006-01-27T11:46:11.000Z",
|
||||||
|
@@ -56,7 +56,8 @@ public class PdfBoxMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Parser getParser() {
|
protected Parser getParser()
|
||||||
|
{
|
||||||
return new PDFParser();
|
return new PDFParser();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -59,7 +59,8 @@ public class PoiMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Parser getParser() {
|
protected Parser getParser()
|
||||||
|
{
|
||||||
return new OOXMLParser();
|
return new OOXMLParser();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -68,7 +68,8 @@ public class PoiMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean skipDescriptionCheck(String mimetype) {
|
protected boolean skipDescriptionCheck(String mimetype)
|
||||||
|
{
|
||||||
// Our 3 OpenOffice 07 quick files have no description properties.
|
// Our 3 OpenOffice 07 quick files have no description properties.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -76,7 +77,8 @@ public class PoiMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void testFileSpecificMetadata(String mimetype,
|
protected void testFileSpecificMetadata(String mimetype,
|
||||||
Map<QName, Serializable> properties) {
|
Map<QName, Serializable> properties)
|
||||||
|
{
|
||||||
// This test class is testing 3 files: quick.docx, quick.xlsx & quick.pptx.
|
// This test class is testing 3 files: quick.docx, quick.xlsx & quick.pptx.
|
||||||
// Their created times are hard-coded here for checking.
|
// Their created times are hard-coded here for checking.
|
||||||
// Of course this means that if the files are updated, the test will break
|
// Of course this means that if the files are updated, the test will break
|
||||||
|
@@ -53,6 +53,7 @@ import org.gagravarr.tika.VorbisParser;
|
|||||||
* <b>xmpDM:releaseDate</b> -- audio:releaseDate
|
* <b>xmpDM:releaseDate</b> -- audio:releaseDate
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
|
* @since 4.0
|
||||||
* @author Nick Burch
|
* @author Nick Burch
|
||||||
*/
|
*/
|
||||||
public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter
|
public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter
|
||||||
@@ -84,7 +85,8 @@ public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Parser getParser() {
|
protected Parser getParser()
|
||||||
|
{
|
||||||
return new CompositeParser(
|
return new CompositeParser(
|
||||||
tikaConfig.getMediaTypeRegistry(), parsers
|
tikaConfig.getMediaTypeRegistry(), parsers
|
||||||
);
|
);
|
||||||
@@ -92,7 +94,8 @@ public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
Map<String, Serializable> properties, Map<String,String> headers) {
|
Map<String, Serializable> properties, Map<String,String> headers)
|
||||||
|
{
|
||||||
// Most things can go with the default Tika -> Alfresco Mapping
|
// Most things can go with the default Tika -> Alfresco Mapping
|
||||||
// Handle the few special cases here
|
// Handle the few special cases here
|
||||||
|
|
||||||
|
@@ -75,7 +75,8 @@ public class TikaAudioMetadataExtracterTest extends AbstractMetadataExtracterTes
|
|||||||
* We don't have quite the usual metadata. Tests the descriptions one.
|
* We don't have quite the usual metadata. Tests the descriptions one.
|
||||||
* Other tests in {@link #testFileSpecificMetadata(String, Map)}
|
* Other tests in {@link #testFileSpecificMetadata(String, Map)}
|
||||||
*/
|
*/
|
||||||
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties) {
|
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
|
||||||
|
{
|
||||||
// Title is as normal
|
// Title is as normal
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
|
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
|
||||||
|
@@ -45,6 +45,7 @@ import org.apache.tika.parser.Parser;
|
|||||||
* <p>geo:long:</b> -- cm:longitude
|
* <p>geo:long:</b> -- cm:longitude
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
|
* @since 3.4
|
||||||
* @author Nick Burch
|
* @author Nick Burch
|
||||||
*/
|
*/
|
||||||
public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter
|
public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter
|
||||||
@@ -86,7 +87,8 @@ public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter
|
|||||||
* Parser.
|
* Parser.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected Parser getParser() {
|
protected Parser getParser()
|
||||||
|
{
|
||||||
return parser;
|
return parser;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -99,7 +99,8 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
new Mp3Parser(), new OOXMLParser()
|
new Mp3Parser(), new OOXMLParser()
|
||||||
}) {
|
}) {
|
||||||
Set<MediaType> mts = p.getSupportedTypes(new ParseContext());
|
Set<MediaType> mts = p.getSupportedTypes(new ParseContext());
|
||||||
for (MediaType mt : mts) {
|
for (MediaType mt : mts)
|
||||||
|
{
|
||||||
mimeTypes.add(mt.toString());
|
mimeTypes.add(mt.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -159,7 +160,8 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
protected boolean skipAuthorCheck(String mimetype) { return true; }
|
protected boolean skipAuthorCheck(String mimetype) { return true; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean skipDescriptionCheck(String mimetype) {
|
protected boolean skipDescriptionCheck(String mimetype)
|
||||||
|
{
|
||||||
if(mimetype.endsWith("/ogg"))
|
if(mimetype.endsWith("/ogg"))
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
@@ -171,7 +173,8 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
|
|||||||
* We also provide the creation date - check that
|
* We also provide the creation date - check that
|
||||||
*/
|
*/
|
||||||
protected void testFileSpecificMetadata(String mimetype,
|
protected void testFileSpecificMetadata(String mimetype,
|
||||||
Map<QName, Serializable> properties) {
|
Map<QName, Serializable> properties)
|
||||||
|
{
|
||||||
|
|
||||||
// Check for extra fields
|
// Check for extra fields
|
||||||
// Author isn't there for the OpenDocument ones
|
// Author isn't there for the OpenDocument ones
|
||||||
|
@@ -69,6 +69,7 @@ import org.xml.sax.SAXException;
|
|||||||
* <b>comments:</b>
|
* <b>comments:</b>
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
|
* @since 3.4
|
||||||
* @author Nick Burch
|
* @author Nick Burch
|
||||||
*/
|
*/
|
||||||
public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetadataExtracter
|
public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetadataExtracter
|
||||||
@@ -88,19 +89,25 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
* Builds up a list of supported mime types by merging an explicit
|
* Builds up a list of supported mime types by merging an explicit
|
||||||
* list with any that Tika also claims to support
|
* list with any that Tika also claims to support
|
||||||
*/
|
*/
|
||||||
protected static ArrayList<String> buildSupportedMimetypes(String[] explicitTypes, Parser... tikaParsers) {
|
protected static ArrayList<String> buildSupportedMimetypes(String[] explicitTypes, Parser... tikaParsers)
|
||||||
|
{
|
||||||
ArrayList<String> types = new ArrayList<String>();
|
ArrayList<String> types = new ArrayList<String>();
|
||||||
for(String type : explicitTypes) {
|
for(String type : explicitTypes)
|
||||||
if(!types.contains(type)) {
|
{
|
||||||
|
if(!types.contains(type))
|
||||||
|
{
|
||||||
types.add(type);
|
types.add(type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(tikaParsers != null) {
|
if(tikaParsers != null)
|
||||||
|
{
|
||||||
for(Parser tikaParser : tikaParsers)
|
for(Parser tikaParser : tikaParsers)
|
||||||
{
|
{
|
||||||
for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext())) {
|
for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext()))
|
||||||
|
{
|
||||||
String type = mt.toString();
|
String type = mt.toString();
|
||||||
if(!types.contains(type)) {
|
if(!types.contains(type))
|
||||||
|
{
|
||||||
types.add(type);
|
types.add(type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -153,9 +160,11 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
* and similar formats, which Tika makes use of
|
* and similar formats, which Tika makes use of
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected Date makeDate(String dateStr) {
|
protected Date makeDate(String dateStr)
|
||||||
|
{
|
||||||
// Try our formats first, in order
|
// Try our formats first, in order
|
||||||
for(DateFormat df : this.tikaDateFormats) {
|
for(DateFormat df : this.tikaDateFormats)
|
||||||
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
return df.parse(dateStr);
|
return df.parse(dateStr);
|
||||||
@@ -183,7 +192,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
* Do we care about the contents of the
|
* Do we care about the contents of the
|
||||||
* extracted header, or nothing at all?
|
* extracted header, or nothing at all?
|
||||||
*/
|
*/
|
||||||
protected boolean needHeaderContents() {
|
protected boolean needHeaderContents()
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -192,7 +202,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
* to be done.
|
* to be done.
|
||||||
*/
|
*/
|
||||||
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
protected Map<String, Serializable> extractSpecific(Metadata metadata,
|
||||||
Map<String, Serializable> properties, Map<String,String> headers) {
|
Map<String, Serializable> properties, Map<String,String> headers)
|
||||||
|
{
|
||||||
return properties;
|
return properties;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -206,10 +217,14 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
*/
|
*/
|
||||||
private InputStream getInputStream(ContentReader reader) throws IOException {
|
private InputStream getInputStream(ContentReader reader) throws IOException {
|
||||||
if("image/jpeg".equals(reader.getMimetype()) ||
|
if("image/jpeg".equals(reader.getMimetype()) ||
|
||||||
"image/tiff".equals(reader.getMimetype())) {
|
"image/tiff".equals(reader.getMimetype()))
|
||||||
if(reader instanceof FileContentReader) {
|
{
|
||||||
|
if(reader instanceof FileContentReader)
|
||||||
|
{
|
||||||
return TikaInputStream.get( ((FileContentReader)reader).getFile() );
|
return TikaInputStream.get( ((FileContentReader)reader).getFile() );
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
File tmpFile = TempFileProvider.createTempFile("tika", "tmp");
|
File tmpFile = TempFileProvider.createTempFile("tika", "tmp");
|
||||||
reader.getContent(tmpFile);
|
reader.getContent(tmpFile);
|
||||||
return TikaInputStream.get(tmpFile);
|
return TikaInputStream.get(tmpFile);
|
||||||
@@ -235,12 +250,15 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
|
|
||||||
ContentHandler handler;
|
ContentHandler handler;
|
||||||
Map<String,String> headers = null;
|
Map<String,String> headers = null;
|
||||||
if(needHeaderContents()) {
|
if(needHeaderContents())
|
||||||
|
{
|
||||||
MapCaptureContentHandler headerCapture =
|
MapCaptureContentHandler headerCapture =
|
||||||
new MapCaptureContentHandler();
|
new MapCaptureContentHandler();
|
||||||
headers = headerCapture.tags;
|
headers = headerCapture.tags;
|
||||||
handler = new HeadContentHandler(headerCapture);
|
handler = new HeadContentHandler(headerCapture);
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
handler = new NullContentHandler();
|
handler = new NullContentHandler();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -249,7 +267,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
// First up, copy all the Tika metadata over
|
// First up, copy all the Tika metadata over
|
||||||
// This allows people to map any of the Tika
|
// This allows people to map any of the Tika
|
||||||
// keys onto their own content model
|
// keys onto their own content model
|
||||||
for(String tikaKey : metadata.names()) {
|
for(String tikaKey : metadata.names())
|
||||||
|
{
|
||||||
putRawValue(tikaKey, metadata.get(tikaKey), rawProperties);
|
putRawValue(tikaKey, metadata.get(tikaKey), rawProperties);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -267,21 +286,29 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
// being nearly as consistent as one might hope
|
// being nearly as consistent as one might hope
|
||||||
String subject = metadata.get(Metadata.SUBJECT);
|
String subject = metadata.get(Metadata.SUBJECT);
|
||||||
String description = metadata.get(Metadata.DESCRIPTION);
|
String description = metadata.get(Metadata.DESCRIPTION);
|
||||||
if(subject != null && description != null) {
|
if(subject != null && description != null)
|
||||||
|
{
|
||||||
putRawValue(KEY_DESCRIPTION, description, rawProperties);
|
putRawValue(KEY_DESCRIPTION, description, rawProperties);
|
||||||
putRawValue(KEY_SUBJECT, subject, rawProperties);
|
putRawValue(KEY_SUBJECT, subject, rawProperties);
|
||||||
} else if(subject != null) {
|
}
|
||||||
|
else if(subject != null)
|
||||||
|
{
|
||||||
putRawValue(KEY_DESCRIPTION, subject, rawProperties);
|
putRawValue(KEY_DESCRIPTION, subject, rawProperties);
|
||||||
putRawValue(KEY_SUBJECT, subject, rawProperties);
|
putRawValue(KEY_SUBJECT, subject, rawProperties);
|
||||||
} else if(description != null) {
|
}
|
||||||
|
else if(description != null)
|
||||||
|
{
|
||||||
putRawValue(KEY_DESCRIPTION, description, rawProperties);
|
putRawValue(KEY_DESCRIPTION, description, rawProperties);
|
||||||
putRawValue(KEY_SUBJECT, description, rawProperties);
|
putRawValue(KEY_SUBJECT, description, rawProperties);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try for the dates two different ways too
|
// Try for the dates two different ways too
|
||||||
if(metadata.get(Metadata.CREATION_DATE) != null) {
|
if(metadata.get(Metadata.CREATION_DATE) != null)
|
||||||
|
{
|
||||||
putRawValue(KEY_CREATED, metadata.get(Metadata.CREATION_DATE), rawProperties);
|
putRawValue(KEY_CREATED, metadata.get(Metadata.CREATION_DATE), rawProperties);
|
||||||
} else if(metadata.get(Metadata.DATE) != null) {
|
}
|
||||||
|
else if(metadata.get(Metadata.DATE) != null)
|
||||||
|
{
|
||||||
putRawValue(KEY_CREATED, metadata.get(Metadata.DATE), rawProperties);
|
putRawValue(KEY_CREATED, metadata.get(Metadata.DATE), rawProperties);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -308,7 +335,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
* the header of the Tika content XHTML, but ignore the
|
* the header of the Tika content XHTML, but ignore the
|
||||||
* rest.
|
* rest.
|
||||||
*/
|
*/
|
||||||
protected static class HeadContentHandler extends ContentHandlerDecorator {
|
protected static class HeadContentHandler extends ContentHandlerDecorator
|
||||||
|
{
|
||||||
/**
|
/**
|
||||||
* XHTML XPath parser.
|
* XHTML XPath parser.
|
||||||
*/
|
*/
|
||||||
@@ -327,7 +355,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
*
|
*
|
||||||
* @param handler content handler
|
* @param handler content handler
|
||||||
*/
|
*/
|
||||||
protected HeadContentHandler(ContentHandler handler) {
|
protected HeadContentHandler(ContentHandler handler)
|
||||||
|
{
|
||||||
super(new MatchingContentHandler(handler, MATCHER));
|
super(new MatchingContentHandler(handler, MATCHER));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -337,26 +366,31 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
* of them.
|
* of them.
|
||||||
* Normally only used with {@link HeadContentHandler}
|
* Normally only used with {@link HeadContentHandler}
|
||||||
*/
|
*/
|
||||||
protected static class MapCaptureContentHandler implements ContentHandler {
|
protected static class MapCaptureContentHandler implements ContentHandler
|
||||||
|
{
|
||||||
protected Map<String,String> tags =
|
protected Map<String,String> tags =
|
||||||
new HashMap<String, String>();
|
new HashMap<String, String>();
|
||||||
private StringBuffer text;
|
private StringBuffer text;
|
||||||
|
|
||||||
public void characters(char[] ch, int start, int len) {
|
public void characters(char[] ch, int start, int len)
|
||||||
if(text != null) {
|
{
|
||||||
|
if(text != null)
|
||||||
|
{
|
||||||
text.append(ch, start, len);
|
text.append(ch, start, len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public void endElement(String namespace, String localname,
|
public void endElement(String namespace, String localname, String qname)
|
||||||
String qname) {
|
{
|
||||||
if(text != null && text.length() > 0) {
|
if(text != null && text.length() > 0)
|
||||||
|
{
|
||||||
tags.put(qname, text.toString());
|
tags.put(qname, text.toString());
|
||||||
}
|
}
|
||||||
text = null;
|
text = null;
|
||||||
}
|
}
|
||||||
public void startElement(String namespace, String localname,
|
public void startElement(String namespace, String localname, String qname, Attributes attrs)
|
||||||
String qname, Attributes attrs) {
|
{
|
||||||
for(int i=0; i<attrs.getLength(); i++) {
|
for(int i=0; i<attrs.getLength(); i++)
|
||||||
|
{
|
||||||
tags.put(attrs.getQName(i), attrs.getValue(i));
|
tags.put(attrs.getQName(i), attrs.getValue(i));
|
||||||
}
|
}
|
||||||
text = new StringBuffer();
|
text = new StringBuffer();
|
||||||
@@ -379,7 +413,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
|
|||||||
* Normally used when we only want the metadata, and don't
|
* Normally used when we only want the metadata, and don't
|
||||||
* care about the file contents.
|
* care about the file contents.
|
||||||
*/
|
*/
|
||||||
protected static class NullContentHandler implements ContentHandler {
|
protected static class NullContentHandler implements ContentHandler
|
||||||
|
{
|
||||||
public void characters(char[] paramArrayOfChar, int paramInt1,
|
public void characters(char[] paramArrayOfChar, int paramInt1,
|
||||||
int paramInt2) throws SAXException {}
|
int paramInt2) throws SAXException {}
|
||||||
public void endDocument() throws SAXException {}
|
public void endDocument() throws SAXException {}
|
||||||
|
@@ -44,6 +44,7 @@ import org.apache.tika.parser.Parser;
|
|||||||
* <p>geo:long:</b> -- cm:longitude
|
* <p>geo:long:</b> -- cm:longitude
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
|
* @since 3.4
|
||||||
* @author Nick Burch
|
* @author Nick Burch
|
||||||
*/
|
*/
|
||||||
public class TikaSpringConfiguredMetadataExtracter extends TikaPoweredMetadataExtracter
|
public class TikaSpringConfiguredMetadataExtracter extends TikaPoweredMetadataExtracter
|
||||||
|
Reference in New Issue
Block a user