diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java index 8f89b5b5f8..c7cd8a8a34 100644 --- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java @@ -27,29 +27,47 @@ package org.alfresco.repo.content.metadata; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; +import java.util.Arrays; import java.util.Calendar; +import java.util.HashSet; import java.util.Map; -import org.alfresco.model.ContentModel; import org.alfresco.repo.content.MimetypeMap; import org.alfresco.service.cmr.repository.ContentReader; -import org.alfresco.service.namespace.QName; import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.pdmodel.PDDocumentInformation; /** + * Metadata extractor for the PDF documents. + *
+ *   author:                 --      cm:author
+ *   title:                  --      cm:title
+ *   subject:                --      cm:description
+ *   created:                --      cm:created
+ * 
* * @author Jesper Steen Møller + * @author Derek Hulley */ -public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter +public class PdfBoxMetadataExtracter extends AbstractMappingMetadataExtracter { + private static final String KEY_AUTHOR = "author"; + private static final String KEY_TITLE = "title"; + private static final String KEY_SUBJECT = "subject"; + private static final String KEY_CREATED = "created"; + + public static String[] SUPPORTED_MIMETYPES = new String[] {MimetypeMap.MIMETYPE_PDF }; + public PdfBoxMetadataExtracter() { - super(MimetypeMap.MIMETYPE_PDF, 1.0, 1000); + super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES))); } - - public void extractInternal(ContentReader reader, Map destination) throws Throwable + + @Override + public Map extractRaw(ContentReader reader) throws Throwable { + Map rawProperties = newRawMap(); + PDDocument pdf = null; InputStream is = null; try @@ -62,13 +80,15 @@ public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter // Scoop out the metadata PDDocumentInformation docInfo = pdf.getDocumentInformation(); - trimPut(ContentModel.PROP_AUTHOR, docInfo.getAuthor(), destination); - trimPut(ContentModel.PROP_TITLE, docInfo.getTitle(), destination); - trimPut(ContentModel.PROP_DESCRIPTION, docInfo.getSubject(), destination); + putRawValue(KEY_AUTHOR, docInfo.getAuthor(), rawProperties); + putRawValue(KEY_TITLE, docInfo.getTitle(), rawProperties); + putRawValue(KEY_SUBJECT, docInfo.getSubject(), rawProperties); Calendar created = docInfo.getCreationDate(); if (created != null) - destination.put(ContentModel.PROP_CREATED, created.getTime()); + { + putRawValue(KEY_CREATED, created.getTime(), rawProperties); + } } } finally @@ -82,5 +102,7 @@ public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter try { pdf.close(); } catch (Throwable e) { e.printStackTrace(); } } } + // Done + return rawProperties; } } diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.properties b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.properties new file mode 100644 index 0000000000..c5a92bd177 --- /dev/null +++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.properties @@ -0,0 +1,13 @@ +# +# PdfBoxMetadataExtracter - default mapping +# +# author: Derek Hulley + +# Namespaces +namespace.prefix.cm=http://www.alfresco.org/model/content/1.0 + +# Mappings +author=cm:author +title=cm:title +subject=cm:description +created=cm:created diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java index 83cd43f7a5..295c283e92 100644 --- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java +++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java @@ -9,13 +9,14 @@ import org.alfresco.repo.content.MimetypeMap; */ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest { - private MetadataExtracter extracter; + private PdfBoxMetadataExtracter extracter; @Override public void setUp() throws Exception { super.setUp(); extracter = new PdfBoxMetadataExtracter(); + extracter.register(); } /** @@ -26,14 +27,13 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest return extracter; } - public void testReliability() throws Exception + public void testSupports() throws Exception { - double reliability = 0.0; - reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN); - assertEquals("Mimetype should not be supported", 0.0, reliability); - - reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PDF); - assertEquals("Mimetype should be supported", 1.0, reliability); + for (String mimetype : PdfBoxMetadataExtracter.SUPPORTED_MIMETYPES) + { + boolean supports = extracter.isSupported(mimetype); + assertTrue("Mimetype should be supported: " + mimetype, supports); + } } public void testPdfExtraction() throws Exception