Add @since tags where known, and do a quick coding standards sweep

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@31023 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Nick Burch
2011-10-06 21:29:50 +00:00
parent e0483f9b36
commit 1520fbe1d9
18 changed files with 146 additions and 70 deletions

View File

@@ -154,18 +154,24 @@ public abstract class AbstractMetadataExtracterTest extends TestCase
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties) protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
{ {
// One of Creator or Author // One of Creator or Author
if(!skipAuthorCheck(mimetype)) { if(!skipAuthorCheck(mimetype))
if(properties.containsKey(ContentModel.PROP_CREATOR)) { {
if(properties.containsKey(ContentModel.PROP_CREATOR))
{
assertEquals( assertEquals(
"Property " + ContentModel.PROP_CREATOR + " not found for mimetype " + mimetype, "Property " + ContentModel.PROP_CREATOR + " not found for mimetype " + mimetype,
QUICK_CREATOR, QUICK_CREATOR,
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATOR))); DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATOR)));
} else if(properties.containsKey(ContentModel.PROP_AUTHOR)) { }
else if(properties.containsKey(ContentModel.PROP_AUTHOR))
{
assertEquals( assertEquals(
"Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype, "Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype,
QUICK_CREATOR, QUICK_CREATOR,
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR))); DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR)));
} else { }
else
{
fail("Expected one property out of " + ContentModel.PROP_CREATOR + " and " + fail("Expected one property out of " + ContentModel.PROP_CREATOR + " and " +
ContentModel.PROP_AUTHOR + " but found neither of them for " + mimetype); ContentModel.PROP_AUTHOR + " but found neither of them for " + mimetype);
} }
@@ -176,7 +182,8 @@ public abstract class AbstractMetadataExtracterTest extends TestCase
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype, "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
QUICK_TITLE, QUICK_TITLE,
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_TITLE))); DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_TITLE)));
if (!skipDescriptionCheck(mimetype)) { if (!skipDescriptionCheck(mimetype))
{
assertEquals( assertEquals(
"Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype, "Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype,
QUICK_DESCRIPTION, QUICK_DESCRIPTION,
@@ -236,13 +243,16 @@ public abstract class AbstractMetadataExtracterTest extends TestCase
} }
protected static void assertContains(String message, String needle, String haystack) { protected static void assertContains(String message, String needle, String haystack)
if(haystack.indexOf(needle) > -1) { {
if(haystack.indexOf(needle) > -1)
{
return; return;
} }
fail(message); fail(message);
} }
protected static void assertContains(String needle, String haystack) { protected static void assertContains(String needle, String haystack)
{
assertContains("'" + needle + "' wasn't found in '" + haystack + "'", needle, haystack); assertContains("'" + needle + "' wasn't found in '" + haystack + "'", needle, haystack);
} }
} }

View File

@@ -43,6 +43,7 @@ import org.apache.tika.parser.dwg.DWGParser;
* *
* Uses Apache Tika * Uses Apache Tika
* *
* @since 3.4
* @author Nick Burch * @author Nick Burch
*/ */
public class DWGMetadataExtracter extends TikaPoweredMetadataExtracter public class DWGMetadataExtracter extends TikaPoweredMetadataExtracter
@@ -66,14 +67,16 @@ public class DWGMetadataExtracter extends TikaPoweredMetadataExtracter
@Override @Override
protected Map<String, Serializable> extractSpecific(Metadata metadata, protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers) { Map<String, Serializable> properties, Map<String,String> headers)
{
putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties); putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties); putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties);
return properties; return properties;
} }
@Override @Override
protected Parser getParser() { protected Parser getParser()
{
return new DWGParser(); return new DWGParser();
} }
} }

View File

@@ -118,8 +118,8 @@ public class DWGMetadataExtracterTest extends AbstractMetadataExtracterTest
* We also provide the creation date - check that * We also provide the creation date - check that
*/ */
protected void testFileSpecificMetadata(String mimetype, protected void testFileSpecificMetadata(String mimetype,
Map<QName, Serializable> properties) { Map<QName, Serializable> properties)
{
// Check for extra fields // Check for extra fields
assertEquals( assertEquals(
"Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype, "Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype,

View File

@@ -72,13 +72,15 @@ public class MP3MetadataExtracter extends TikaAudioMetadataExtracter
} }
@Override @Override
protected Parser getParser() { protected Parser getParser()
{
return new Mp3Parser(); return new Mp3Parser();
} }
@Override @Override
protected Map<String, Serializable> extractSpecific(Metadata metadata, protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers) { Map<String, Serializable> properties, Map<String,String> headers)
{
// Do the normal Audio mappings // Do the normal Audio mappings
super.extractSpecific(metadata, properties, headers); super.extractSpecific(metadata, properties, headers);

View File

@@ -71,7 +71,8 @@ public class MP3MetadataExtracterTest extends TikaAudioMetadataExtracterTest
* We don't have quite the usual metadata. Tests the descriptions one. * We don't have quite the usual metadata. Tests the descriptions one.
* Other tests in {@link #testFileSpecificMetadata(String, Map)} * Other tests in {@link #testFileSpecificMetadata(String, Map)}
*/ */
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties) { protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
{
// Title is as normal // Title is as normal
assertEquals( assertEquals(
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype, "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
@@ -94,7 +95,8 @@ public class MP3MetadataExtracterTest extends TikaAudioMetadataExtracterTest
/** /**
* Tests for various MP3 specific bits of metadata * Tests for various MP3 specific bits of metadata
*/ */
public void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties) { public void testFileSpecificMetadata(String mimetype, Map<QName, Serializable> properties)
{
super.testFileSpecificMetadata(mimetype, properties); super.testFileSpecificMetadata(mimetype, properties);
} }
} }

View File

@@ -68,14 +68,16 @@ public class MailMetadataExtracter extends TikaPoweredMetadataExtracter
} }
@Override @Override
protected Parser getParser() { protected Parser getParser()
{
// The office parser does Outlook as well as Word, Excel etc // The office parser does Outlook as well as Word, Excel etc
return new OfficeParser(); return new OfficeParser();
} }
@Override @Override
protected Map<String, Serializable> extractSpecific(Metadata metadata, protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers) { Map<String, Serializable> properties, Map<String,String> headers)
{
putRawValue(KEY_ORIGINATOR, metadata.get(Metadata.AUTHOR), properties); putRawValue(KEY_ORIGINATOR, metadata.get(Metadata.AUTHOR), properties);
putRawValue(KEY_SUBJECT, metadata.get(Metadata.TITLE), properties); putRawValue(KEY_SUBJECT, metadata.get(Metadata.TITLE), properties);
putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.SUBJECT), properties); putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.SUBJECT), properties);

View File

@@ -86,13 +86,15 @@ public class OfficeMetadataExtracter extends TikaPoweredMetadataExtracter
} }
@Override @Override
protected Parser getParser() { protected Parser getParser()
{
return new OfficeParser(); return new OfficeParser();
} }
@Override @Override
protected Map<String, Serializable> extractSpecific(Metadata metadata, protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers) { Map<String, Serializable> properties, Map<String,String> headers)
{
putRawValue(KEY_CREATE_DATETIME, metadata.get(Metadata.CREATION_DATE), properties); putRawValue(KEY_CREATE_DATETIME, metadata.get(Metadata.CREATION_DATE), properties);
putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(Metadata.LAST_SAVED), properties); putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(Metadata.LAST_SAVED), properties);
putRawValue(KEY_EDIT_TIME, metadata.get(Metadata.EDIT_TIME), properties); putRawValue(KEY_EDIT_TIME, metadata.get(Metadata.EDIT_TIME), properties);
@@ -107,4 +109,4 @@ public class OfficeMetadataExtracter extends TikaPoweredMetadataExtracter
putRawValue(KEY_WORD_COUNT, metadata.get(Metadata.WORD_COUNT), properties); putRawValue(KEY_WORD_COUNT, metadata.get(Metadata.WORD_COUNT), properties);
return properties; return properties;
} }
} }

View File

@@ -105,13 +105,15 @@ public class OpenDocumentMetadataExtracter extends TikaPoweredMetadataExtracter
} }
@Override @Override
protected Parser getParser() { protected Parser getParser()
{
return new OpenDocumentParser(); return new OpenDocumentParser();
} }
@Override @Override
protected Map<String, Serializable> extractSpecific(Metadata metadata, protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String, String> headers) { Map<String, Serializable> properties, Map<String, String> headers)
{
putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), properties); putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), properties);
putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), properties); putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), properties);
putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), properties); putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), properties);

View File

@@ -79,14 +79,18 @@ public class OpenDocumentMetadataExtracterTest extends AbstractMetadataExtracter
* We also provide the creation date - check that * We also provide the creation date - check that
*/ */
protected void testFileSpecificMetadata(String mimetype, protected void testFileSpecificMetadata(String mimetype,
Map<QName, Serializable> properties) { Map<QName, Serializable> properties)
{
// Check for two cases // Check for two cases
if(mimetype.equals("application/vnd.oasis.opendocument.text")) { if(mimetype.equals("application/vnd.oasis.opendocument.text"))
{
assertEquals( assertEquals(
"Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype, "Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype,
"2005-09-06T23:34:00.000+01:00", "2005-09-06T23:34:00.000+01:00",
DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATED))); DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_CREATED)));
} else if(mimetype.equals("application/vnd.oasis.opendocument.graphics")) { }
else if(mimetype.equals("application/vnd.oasis.opendocument.graphics"))
{
assertEquals( assertEquals(
"Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype, "Property " + ContentModel.PROP_CREATED + " not found for mimetype " + mimetype,
"2006-01-27T11:46:11.000Z", "2006-01-27T11:46:11.000Z",

View File

@@ -56,7 +56,8 @@ public class PdfBoxMetadataExtracter extends TikaPoweredMetadataExtracter
} }
@Override @Override
protected Parser getParser() { protected Parser getParser()
{
return new PDFParser(); return new PDFParser();
} }
} }

View File

@@ -59,7 +59,8 @@ public class PoiMetadataExtracter extends TikaPoweredMetadataExtracter
} }
@Override @Override
protected Parser getParser() { protected Parser getParser()
{
return new OOXMLParser(); return new OOXMLParser();
} }
} }

View File

@@ -68,7 +68,8 @@ public class PoiMetadataExtracterTest extends AbstractMetadataExtracterTest
} }
@Override @Override
protected boolean skipDescriptionCheck(String mimetype) { protected boolean skipDescriptionCheck(String mimetype)
{
// Our 3 OpenOffice 07 quick files have no description properties. // Our 3 OpenOffice 07 quick files have no description properties.
return true; return true;
} }
@@ -76,7 +77,8 @@ public class PoiMetadataExtracterTest extends AbstractMetadataExtracterTest
@Override @Override
protected void testFileSpecificMetadata(String mimetype, protected void testFileSpecificMetadata(String mimetype,
Map<QName, Serializable> properties) { Map<QName, Serializable> properties)
{
// This test class is testing 3 files: quick.docx, quick.xlsx & quick.pptx. // This test class is testing 3 files: quick.docx, quick.xlsx & quick.pptx.
// Their created times are hard-coded here for checking. // Their created times are hard-coded here for checking.
// Of course this means that if the files are updated, the test will break // Of course this means that if the files are updated, the test will break

View File

@@ -53,6 +53,7 @@ import org.gagravarr.tika.VorbisParser;
* <b>xmpDM:releaseDate</b> -- audio:releaseDate * <b>xmpDM:releaseDate</b> -- audio:releaseDate
* </pre> * </pre>
* *
* @since 4.0
* @author Nick Burch * @author Nick Burch
*/ */
public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter
@@ -84,7 +85,8 @@ public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter
} }
@Override @Override
protected Parser getParser() { protected Parser getParser()
{
return new CompositeParser( return new CompositeParser(
tikaConfig.getMediaTypeRegistry(), parsers tikaConfig.getMediaTypeRegistry(), parsers
); );
@@ -92,7 +94,8 @@ public class TikaAudioMetadataExtracter extends TikaPoweredMetadataExtracter
@Override @Override
protected Map<String, Serializable> extractSpecific(Metadata metadata, protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers) { Map<String, Serializable> properties, Map<String,String> headers)
{
// Most things can go with the default Tika -> Alfresco Mapping // Most things can go with the default Tika -> Alfresco Mapping
// Handle the few special cases here // Handle the few special cases here

View File

@@ -75,7 +75,8 @@ public class TikaAudioMetadataExtracterTest extends AbstractMetadataExtracterTes
* We don't have quite the usual metadata. Tests the descriptions one. * We don't have quite the usual metadata. Tests the descriptions one.
* Other tests in {@link #testFileSpecificMetadata(String, Map)} * Other tests in {@link #testFileSpecificMetadata(String, Map)}
*/ */
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties) { protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
{
// Title is as normal // Title is as normal
assertEquals( assertEquals(
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype, "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,

View File

@@ -45,6 +45,7 @@ import org.apache.tika.parser.Parser;
* <p>geo:long:</b> -- cm:longitude * <p>geo:long:</b> -- cm:longitude
* </pre> * </pre>
* *
* @since 3.4
* @author Nick Burch * @author Nick Burch
*/ */
public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter
@@ -86,7 +87,8 @@ public class TikaAutoMetadataExtracter extends TikaPoweredMetadataExtracter
* Parser. * Parser.
*/ */
@Override @Override
protected Parser getParser() { protected Parser getParser()
{
return parser; return parser;
} }
} }

View File

@@ -99,7 +99,8 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
new Mp3Parser(), new OOXMLParser() new Mp3Parser(), new OOXMLParser()
}) { }) {
Set<MediaType> mts = p.getSupportedTypes(new ParseContext()); Set<MediaType> mts = p.getSupportedTypes(new ParseContext());
for (MediaType mt : mts) { for (MediaType mt : mts)
{
mimeTypes.add(mt.toString()); mimeTypes.add(mt.toString());
} }
} }
@@ -159,7 +160,8 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
protected boolean skipAuthorCheck(String mimetype) { return true; } protected boolean skipAuthorCheck(String mimetype) { return true; }
@Override @Override
protected boolean skipDescriptionCheck(String mimetype) { protected boolean skipDescriptionCheck(String mimetype)
{
if(mimetype.endsWith("/ogg")) if(mimetype.endsWith("/ogg"))
{ {
return true; return true;
@@ -171,7 +173,8 @@ public class TikaAutoMetadataExtracterTest extends AbstractMetadataExtracterTest
* We also provide the creation date - check that * We also provide the creation date - check that
*/ */
protected void testFileSpecificMetadata(String mimetype, protected void testFileSpecificMetadata(String mimetype,
Map<QName, Serializable> properties) { Map<QName, Serializable> properties)
{
// Check for extra fields // Check for extra fields
// Author isn't there for the OpenDocument ones // Author isn't there for the OpenDocument ones

View File

@@ -69,6 +69,7 @@ import org.xml.sax.SAXException;
* <b>comments:</b> * <b>comments:</b>
* </pre> * </pre>
* *
* @since 3.4
* @author Nick Burch * @author Nick Burch
*/ */
public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetadataExtracter public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetadataExtracter
@@ -88,19 +89,25 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
* Builds up a list of supported mime types by merging an explicit * Builds up a list of supported mime types by merging an explicit
* list with any that Tika also claims to support * list with any that Tika also claims to support
*/ */
protected static ArrayList<String> buildSupportedMimetypes(String[] explicitTypes, Parser... tikaParsers) { protected static ArrayList<String> buildSupportedMimetypes(String[] explicitTypes, Parser... tikaParsers)
{
ArrayList<String> types = new ArrayList<String>(); ArrayList<String> types = new ArrayList<String>();
for(String type : explicitTypes) { for(String type : explicitTypes)
if(!types.contains(type)) { {
if(!types.contains(type))
{
types.add(type); types.add(type);
} }
} }
if(tikaParsers != null) { if(tikaParsers != null)
{
for(Parser tikaParser : tikaParsers) for(Parser tikaParser : tikaParsers)
{ {
for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext())) { for(MediaType mt : tikaParser.getSupportedTypes(new ParseContext()))
{
String type = mt.toString(); String type = mt.toString();
if(!types.contains(type)) { if(!types.contains(type))
{
types.add(type); types.add(type);
} }
} }
@@ -153,9 +160,11 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
* and similar formats, which Tika makes use of * and similar formats, which Tika makes use of
*/ */
@Override @Override
protected Date makeDate(String dateStr) { protected Date makeDate(String dateStr)
{
// Try our formats first, in order // Try our formats first, in order
for(DateFormat df : this.tikaDateFormats) { for(DateFormat df : this.tikaDateFormats)
{
try try
{ {
return df.parse(dateStr); return df.parse(dateStr);
@@ -183,7 +192,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
* Do we care about the contents of the * Do we care about the contents of the
* extracted header, or nothing at all? * extracted header, or nothing at all?
*/ */
protected boolean needHeaderContents() { protected boolean needHeaderContents()
{
return false; return false;
} }
@@ -192,7 +202,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
* to be done. * to be done.
*/ */
protected Map<String, Serializable> extractSpecific(Metadata metadata, protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers) { Map<String, Serializable> properties, Map<String,String> headers)
{
return properties; return properties;
} }
@@ -206,10 +217,14 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
*/ */
private InputStream getInputStream(ContentReader reader) throws IOException { private InputStream getInputStream(ContentReader reader) throws IOException {
if("image/jpeg".equals(reader.getMimetype()) || if("image/jpeg".equals(reader.getMimetype()) ||
"image/tiff".equals(reader.getMimetype())) { "image/tiff".equals(reader.getMimetype()))
if(reader instanceof FileContentReader) { {
if(reader instanceof FileContentReader)
{
return TikaInputStream.get( ((FileContentReader)reader).getFile() ); return TikaInputStream.get( ((FileContentReader)reader).getFile() );
} else { }
else
{
File tmpFile = TempFileProvider.createTempFile("tika", "tmp"); File tmpFile = TempFileProvider.createTempFile("tika", "tmp");
reader.getContent(tmpFile); reader.getContent(tmpFile);
return TikaInputStream.get(tmpFile); return TikaInputStream.get(tmpFile);
@@ -235,12 +250,15 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
ContentHandler handler; ContentHandler handler;
Map<String,String> headers = null; Map<String,String> headers = null;
if(needHeaderContents()) { if(needHeaderContents())
{
MapCaptureContentHandler headerCapture = MapCaptureContentHandler headerCapture =
new MapCaptureContentHandler(); new MapCaptureContentHandler();
headers = headerCapture.tags; headers = headerCapture.tags;
handler = new HeadContentHandler(headerCapture); handler = new HeadContentHandler(headerCapture);
} else { }
else
{
handler = new NullContentHandler(); handler = new NullContentHandler();
} }
@@ -249,7 +267,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
// First up, copy all the Tika metadata over // First up, copy all the Tika metadata over
// This allows people to map any of the Tika // This allows people to map any of the Tika
// keys onto their own content model // keys onto their own content model
for(String tikaKey : metadata.names()) { for(String tikaKey : metadata.names())
{
putRawValue(tikaKey, metadata.get(tikaKey), rawProperties); putRawValue(tikaKey, metadata.get(tikaKey), rawProperties);
} }
@@ -267,21 +286,29 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
// being nearly as consistent as one might hope // being nearly as consistent as one might hope
String subject = metadata.get(Metadata.SUBJECT); String subject = metadata.get(Metadata.SUBJECT);
String description = metadata.get(Metadata.DESCRIPTION); String description = metadata.get(Metadata.DESCRIPTION);
if(subject != null && description != null) { if(subject != null && description != null)
{
putRawValue(KEY_DESCRIPTION, description, rawProperties); putRawValue(KEY_DESCRIPTION, description, rawProperties);
putRawValue(KEY_SUBJECT, subject, rawProperties); putRawValue(KEY_SUBJECT, subject, rawProperties);
} else if(subject != null) { }
else if(subject != null)
{
putRawValue(KEY_DESCRIPTION, subject, rawProperties); putRawValue(KEY_DESCRIPTION, subject, rawProperties);
putRawValue(KEY_SUBJECT, subject, rawProperties); putRawValue(KEY_SUBJECT, subject, rawProperties);
} else if(description != null) { }
else if(description != null)
{
putRawValue(KEY_DESCRIPTION, description, rawProperties); putRawValue(KEY_DESCRIPTION, description, rawProperties);
putRawValue(KEY_SUBJECT, description, rawProperties); putRawValue(KEY_SUBJECT, description, rawProperties);
} }
// Try for the dates two different ways too // Try for the dates two different ways too
if(metadata.get(Metadata.CREATION_DATE) != null) { if(metadata.get(Metadata.CREATION_DATE) != null)
{
putRawValue(KEY_CREATED, metadata.get(Metadata.CREATION_DATE), rawProperties); putRawValue(KEY_CREATED, metadata.get(Metadata.CREATION_DATE), rawProperties);
} else if(metadata.get(Metadata.DATE) != null) { }
else if(metadata.get(Metadata.DATE) != null)
{
putRawValue(KEY_CREATED, metadata.get(Metadata.DATE), rawProperties); putRawValue(KEY_CREATED, metadata.get(Metadata.DATE), rawProperties);
} }
@@ -308,7 +335,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
* the header of the Tika content XHTML, but ignore the * the header of the Tika content XHTML, but ignore the
* rest. * rest.
*/ */
protected static class HeadContentHandler extends ContentHandlerDecorator { protected static class HeadContentHandler extends ContentHandlerDecorator
{
/** /**
* XHTML XPath parser. * XHTML XPath parser.
*/ */
@@ -327,7 +355,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
* *
* @param handler content handler * @param handler content handler
*/ */
protected HeadContentHandler(ContentHandler handler) { protected HeadContentHandler(ContentHandler handler)
{
super(new MatchingContentHandler(handler, MATCHER)); super(new MatchingContentHandler(handler, MATCHER));
} }
} }
@@ -337,26 +366,31 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
* of them. * of them.
* Normally only used with {@link HeadContentHandler} * Normally only used with {@link HeadContentHandler}
*/ */
protected static class MapCaptureContentHandler implements ContentHandler { protected static class MapCaptureContentHandler implements ContentHandler
{
protected Map<String,String> tags = protected Map<String,String> tags =
new HashMap<String, String>(); new HashMap<String, String>();
private StringBuffer text; private StringBuffer text;
public void characters(char[] ch, int start, int len) { public void characters(char[] ch, int start, int len)
if(text != null) { {
if(text != null)
{
text.append(ch, start, len); text.append(ch, start, len);
} }
} }
public void endElement(String namespace, String localname, public void endElement(String namespace, String localname, String qname)
String qname) { {
if(text != null && text.length() > 0) { if(text != null && text.length() > 0)
{
tags.put(qname, text.toString()); tags.put(qname, text.toString());
} }
text = null; text = null;
} }
public void startElement(String namespace, String localname, public void startElement(String namespace, String localname, String qname, Attributes attrs)
String qname, Attributes attrs) { {
for(int i=0; i<attrs.getLength(); i++) { for(int i=0; i<attrs.getLength(); i++)
{
tags.put(attrs.getQName(i), attrs.getValue(i)); tags.put(attrs.getQName(i), attrs.getValue(i));
} }
text = new StringBuffer(); text = new StringBuffer();
@@ -379,7 +413,8 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada
* Normally used when we only want the metadata, and don't * Normally used when we only want the metadata, and don't
* care about the file contents. * care about the file contents.
*/ */
protected static class NullContentHandler implements ContentHandler { protected static class NullContentHandler implements ContentHandler
{
public void characters(char[] paramArrayOfChar, int paramInt1, public void characters(char[] paramArrayOfChar, int paramInt1,
int paramInt2) throws SAXException {} int paramInt2) throws SAXException {}
public void endDocument() throws SAXException {} public void endDocument() throws SAXException {}

View File

@@ -44,6 +44,7 @@ import org.apache.tika.parser.Parser;
* <p>geo:long:</b> -- cm:longitude * <p>geo:long:</b> -- cm:longitude
* </pre> * </pre>
* *
* @since 3.4
* @author Nick Burch * @author Nick Burch
*/ */
public class TikaSpringConfiguredMetadataExtracter extends TikaPoweredMetadataExtracter public class TikaSpringConfiguredMetadataExtracter extends TikaPoweredMetadataExtracter