diff --git a/source/java/org/alfresco/repo/content/metadata/DWGMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/DWGMetadataExtracter.java index 77cabf33a0..82d3205129 100644 --- a/source/java/org/alfresco/repo/content/metadata/DWGMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/DWGMetadataExtracter.java @@ -69,7 +69,6 @@ public class DWGMetadataExtracter extends TikaPoweredMetadataExtracter Map properties) { putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties); putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties); - System.err.println(properties); return properties; } diff --git a/source/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java index e505a1fd4b..28bf031cd6 100644 --- a/source/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/DWGMetadataExtracterTest.java @@ -21,13 +21,17 @@ package org.alfresco.repo.content.metadata; import java.io.File; import java.io.Serializable; import java.net.URL; +import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import org.alfresco.model.ContentModel; import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.transform.AbstractContentTransformerTest; import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; import org.alfresco.service.namespace.QName; +import org.apache.tika.metadata.Metadata; /** @@ -38,6 +42,8 @@ import org.alfresco.service.namespace.QName; public class DWGMetadataExtracterTest extends AbstractMetadataExtracterTest { private DWGMetadataExtracter extracter; + private static final QName TIKA_LAST_AUTHOR_TEST_PROPERTY = + QName.createQName("TikaLastAuthorTestProp"); @Override public void setUp() throws Exception @@ -46,6 +52,19 @@ public class DWGMetadataExtracterTest extends AbstractMetadataExtracterTest extracter = new DWGMetadataExtracter(); extracter.setDictionaryService(dictionaryService); extracter.register(); + + // Attach some extra mappings, using the Tika + // metadata keys namespace + // These will be tested later + HashMap> newMap = new HashMap>( + extracter.getMapping() + ); + + Set tlaSet = new HashSet(); + tlaSet.add(TIKA_LAST_AUTHOR_TEST_PROPERTY); + newMap.put( Metadata.LAST_AUTHOR, tlaSet ); + + extracter.setMapping(newMap); } /** @@ -100,11 +119,23 @@ public class DWGMetadataExtracterTest extends AbstractMetadataExtracterTest */ protected void testFileSpecificMetadata(String mimetype, Map properties) { + // Check for extra fields assertEquals( "Property " + ContentModel.PROP_AUTHOR + " not found for mimetype " + mimetype, "Nevin Nollop", DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(ContentModel.PROP_AUTHOR))); + + // Ensure that we can also get things which are standard + // Tika metadata properties, if we so choose to + assertTrue( + "Test Property " + TIKA_LAST_AUTHOR_TEST_PROPERTY + " not found for mimetype " + mimetype, + properties.containsKey(TIKA_LAST_AUTHOR_TEST_PROPERTY) + ); + assertEquals( + "Test Property " + TIKA_LAST_AUTHOR_TEST_PROPERTY + " incorrect for mimetype " + mimetype, + "paolon", + DefaultTypeConverter.INSTANCE.convert(String.class, properties.get(TIKA_LAST_AUTHOR_TEST_PROPERTY))); } } diff --git a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java index d6d411611e..04b805ebc6 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java @@ -117,6 +117,7 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada * Version which also tries the ISO-8601 formats (in order..), * and similar formats, which Tika makes use of */ + @Override protected Date makeDate(String dateStr) { // Try our formats first, in order for(DateFormat df : this.tikaDateFormats) { @@ -168,11 +169,25 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada parser.parse(is, handler, metadata, context); + // First up, copy all the Tika metadata over + // This allows people to map any of the Tika + // keys onto their own content model + for(String tikaKey : metadata.names()) { + putRawValue(tikaKey, metadata.get(tikaKey), rawProperties); + } + + // Now, map the common Tika metadata keys onto + // the common Alfresco metadata keys. This allows + // existing mapping properties files to continue + // to work without needing any changes + + // The simple ones putRawValue(KEY_AUTHOR, metadata.get(Metadata.AUTHOR), rawProperties); putRawValue(KEY_TITLE, metadata.get(Metadata.TITLE), rawProperties); putRawValue(KEY_COMMENTS, metadata.get(Metadata.COMMENTS), rawProperties); - // Not everything is as consisent about these two as you might hope + // Get the subject and description, despite things not + // being nearly as consistent as one might hope String subject = metadata.get(Metadata.SUBJECT); String description = metadata.get(Metadata.DESCRIPTION); if(subject != null && description != null) { @@ -193,6 +208,11 @@ public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetada putRawValue(KEY_CREATED, metadata.get(Metadata.DATE), rawProperties); } + // If people created a specific instance + // (eg OfficeMetadataExtractor), then allow that + // instance to map the Tika keys onto its + // existing namespace so that older properties + // files continue to map correctly rawProperties = extractSpecific(metadata, rawProperties); } finally