diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
index 8f89b5b5f8..c7cd8a8a34 100644
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
@@ -27,29 +27,47 @@ package org.alfresco.repo.content.metadata;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
+import java.util.Arrays;
import java.util.Calendar;
+import java.util.HashSet;
import java.util.Map;
-import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
-import org.alfresco.service.namespace.QName;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.pdmodel.PDDocumentInformation;
/**
+ * Metadata extractor for the PDF documents.
+ *
+ * author: -- cm:author
+ * title: -- cm:title
+ * subject: -- cm:description
+ * created: -- cm:created
+ *
*
* @author Jesper Steen Møller
+ * @author Derek Hulley
*/
-public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
+public class PdfBoxMetadataExtracter extends AbstractMappingMetadataExtracter
{
+ private static final String KEY_AUTHOR = "author";
+ private static final String KEY_TITLE = "title";
+ private static final String KEY_SUBJECT = "subject";
+ private static final String KEY_CREATED = "created";
+
+ public static String[] SUPPORTED_MIMETYPES = new String[] {MimetypeMap.MIMETYPE_PDF };
+
public PdfBoxMetadataExtracter()
{
- super(MimetypeMap.MIMETYPE_PDF, 1.0, 1000);
+ super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES)));
}
-
- public void extractInternal(ContentReader reader, Map destination) throws Throwable
+
+ @Override
+ public Map extractRaw(ContentReader reader) throws Throwable
{
+ Map rawProperties = newRawMap();
+
PDDocument pdf = null;
InputStream is = null;
try
@@ -62,13 +80,15 @@ public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
// Scoop out the metadata
PDDocumentInformation docInfo = pdf.getDocumentInformation();
- trimPut(ContentModel.PROP_AUTHOR, docInfo.getAuthor(), destination);
- trimPut(ContentModel.PROP_TITLE, docInfo.getTitle(), destination);
- trimPut(ContentModel.PROP_DESCRIPTION, docInfo.getSubject(), destination);
+ putRawValue(KEY_AUTHOR, docInfo.getAuthor(), rawProperties);
+ putRawValue(KEY_TITLE, docInfo.getTitle(), rawProperties);
+ putRawValue(KEY_SUBJECT, docInfo.getSubject(), rawProperties);
Calendar created = docInfo.getCreationDate();
if (created != null)
- destination.put(ContentModel.PROP_CREATED, created.getTime());
+ {
+ putRawValue(KEY_CREATED, created.getTime(), rawProperties);
+ }
}
}
finally
@@ -82,5 +102,7 @@ public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
try { pdf.close(); } catch (Throwable e) { e.printStackTrace(); }
}
}
+ // Done
+ return rawProperties;
}
}
diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.properties b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.properties
new file mode 100644
index 0000000000..c5a92bd177
--- /dev/null
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.properties
@@ -0,0 +1,13 @@
+#
+# PdfBoxMetadataExtracter - default mapping
+#
+# author: Derek Hulley
+
+# Namespaces
+namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
+
+# Mappings
+author=cm:author
+title=cm:title
+subject=cm:description
+created=cm:created
diff --git a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
index 83cd43f7a5..295c283e92 100644
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
@@ -9,13 +9,14 @@ import org.alfresco.repo.content.MimetypeMap;
*/
public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
{
- private MetadataExtracter extracter;
+ private PdfBoxMetadataExtracter extracter;
@Override
public void setUp() throws Exception
{
super.setUp();
extracter = new PdfBoxMetadataExtracter();
+ extracter.register();
}
/**
@@ -26,14 +27,13 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
return extracter;
}
- public void testReliability() throws Exception
+ public void testSupports() throws Exception
{
- double reliability = 0.0;
- reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
- assertEquals("Mimetype should not be supported", 0.0, reliability);
-
- reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PDF);
- assertEquals("Mimetype should be supported", 1.0, reliability);
+ for (String mimetype : PdfBoxMetadataExtracter.SUPPORTED_MIMETYPES)
+ {
+ boolean supports = extracter.isSupported(mimetype);
+ assertTrue("Mimetype should be supported: " + mimetype, supports);
+ }
}
public void testPdfExtraction() throws Exception