Updated PdfBoxMetadataExtracter to new mappable format

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5928 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
2025-09-17 14:21:39 +00:00 · 2007-06-13 02:01:45 +00:00
parent 9e836f04f8
commit f1f2d4c035
3 changed files with 53 additions and 18 deletions
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
@@ -27,29 +27,47 @@ package org.alfresco.repo.content.metadata;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Serializable;
 import java.util.Arrays;
 import java.util.Calendar;
 import java.util.HashSet;
 import java.util.Map;
 import org.alfresco.model.ContentModel;
 import org.alfresco.repo.content.MimetypeMap;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.service.namespace.QName;
 import org.pdfbox.pdmodel.PDDocument;
 import org.pdfbox.pdmodel.PDDocumentInformation;
 /**
 * Metadata extractor for the PDF documents.
 * <pre>
 *   <b>author:</b>                 --      cm:author
 *   <b>title:</b>                  --      cm:title
 *   <b>subject:</b>                --      cm:description
 *   <b>created:</b>                --      cm:created
 * </pre>
 * 
 * @author Jesper Steen Møller
 * @author Derek Hulley
 */
-public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
+public class PdfBoxMetadataExtracter extends AbstractMappingMetadataExtracter
 {
    private static final String KEY_AUTHOR = "author";
    private static final String KEY_TITLE = "title";
    private static final String KEY_SUBJECT = "subject";
    private static final String KEY_CREATED = "created";
    public static String[] SUPPORTED_MIMETYPES = new String[] {MimetypeMap.MIMETYPE_PDF };
    public PdfBoxMetadataExtracter()
    {
-        super(MimetypeMap.MIMETYPE_PDF, 1.0, 1000);
+        super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
    }
-    public void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable
+    @Override
    public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
    {
        Map<String, Serializable> rawProperties = newRawMap();
        PDDocument pdf = null;
        InputStream is = null;
        try
@@ -62,13 +80,15 @@ public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
                // Scoop out the metadata
                PDDocumentInformation docInfo = pdf.getDocumentInformation();
-                trimPut(ContentModel.PROP_AUTHOR, docInfo.getAuthor(), destination);
+                putRawValue(KEY_AUTHOR, docInfo.getAuthor(), rawProperties);
-                trimPut(ContentModel.PROP_TITLE, docInfo.getTitle(), destination);
+                putRawValue(KEY_TITLE, docInfo.getTitle(), rawProperties);
-                trimPut(ContentModel.PROP_DESCRIPTION, docInfo.getSubject(), destination);
+                putRawValue(KEY_SUBJECT, docInfo.getSubject(), rawProperties);
                Calendar created = docInfo.getCreationDate();
                if (created != null)
-                    destination.put(ContentModel.PROP_CREATED, created.getTime());
+                {
                    putRawValue(KEY_CREATED, created.getTime(), rawProperties);
                }
            }
        }
        finally
@@ -82,5 +102,7 @@ public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
                try { pdf.close(); } catch (Throwable e) { e.printStackTrace(); }
            }
        }
        // Done
        return rawProperties;
    }
 }
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.properties
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.properties
@@ -0,0 +1,13 @@
 #
 # PdfBoxMetadataExtracter - default mapping
 #
 # author: Derek Hulley
 # Namespaces
 namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
 # Mappings
 author=cm:author
 title=cm:title
 subject=cm:description
 created=cm:created
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
@@ -9,13 +9,14 @@ import org.alfresco.repo.content.MimetypeMap;
 */
 public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
 {
-    private MetadataExtracter extracter;
+    private PdfBoxMetadataExtracter extracter;
    @Override
    public void setUp() throws Exception
    {
        super.setUp();
        extracter = new PdfBoxMetadataExtracter();
        extracter.register();
    }
    /**
@@ -26,14 +27,13 @@ public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
        return extracter;
    }
-    public void testReliability() throws Exception
+    public void testSupports() throws Exception
    {
-        double reliability = 0.0;
+        for (String mimetype : PdfBoxMetadataExtracter.SUPPORTED_MIMETYPES)
-        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
+        {
-        assertEquals("Mimetype should not be supported", 0.0, reliability);
+            boolean supports = extracter.isSupported(mimetype);
-
+            assertTrue("Mimetype should be supported: " + mimetype, supports);
-        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PDF);
+        }
        assertEquals("Mimetype should be supported", 1.0, reliability);
    }
    public void testPdfExtraction() throws Exception