Big honkin' merge from head. Sheesh!

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/WCM-DEV2/root@3617 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
2025-07-24 17:32:48 +00:00 · 2006-08-27 01:01:30 +00:00
parent e2c66899cc
commit 8031cc6574
322 changed files with 20776 additions and 6550 deletions
--- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java
@@ -1,220 +1,220 @@
-/*
- * Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
- *
- * Licensed under the Mozilla Public License version 1.1 
- * with a permitted attribution clause. You may obtain a
- * copy of the License at
- *
- *   http://www.alfresco.org/legal/license.txt
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific
- * language governing permissions and limitations under the
- * License.
- */
-package org.alfresco.repo.content.metadata;
-
-import java.io.Serializable;
-import java.util.Collections;
-import java.util.Map;
-import java.util.Set;
-
-import org.alfresco.error.AlfrescoRuntimeException;
-import org.alfresco.service.cmr.repository.ContentIOException;
-import org.alfresco.service.cmr.repository.ContentReader;
-import org.alfresco.service.cmr.repository.MimetypeService;
-import org.alfresco.service.namespace.QName;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-/**
- * 
- * @author Jesper Steen Møller
- */
-abstract public class AbstractMetadataExtracter implements MetadataExtracter
-{
-    protected static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
-    
-    private MimetypeService mimetypeService;
-    private MetadataExtracterRegistry registry;
-    private Set<String> supportedMimetypes;
-    private double reliability;
-    private long extractionTime;
-
-    protected AbstractMetadataExtracter(String supportedMimetype, double reliability, long extractionTime)
-    {
-        this.supportedMimetypes = Collections.singleton(supportedMimetype);
-        this.reliability = reliability;
-        this.extractionTime = extractionTime;
-    }
-
-    protected AbstractMetadataExtracter(Set<String> supportedMimetypes, double reliability, long extractionTime)
-    {
-        this.supportedMimetypes = supportedMimetypes;
-        this.reliability = reliability;
-        this.extractionTime = extractionTime;
-    }
-
-    /**
-     * Set the registry to register with
-     * 
-     * @param registry a metadata extracter registry
-     */
-    public void setRegistry(MetadataExtracterRegistry registry)
-    {
-        this.registry = registry;
-    }
-
-    /**
-     * Helper setter of the mimetype service.  This is not always required.
-     * 
-     * @param mimetypeService
-     */
-    public void setMimetypeService(MimetypeService mimetypeService)
-    {
-        this.mimetypeService = mimetypeService;
-    }
-
-    /**
-     * @return Returns the mimetype helper
-     */
-    protected MimetypeService getMimetypeService()
-    {
-        return mimetypeService;
-    }
-    
-    /**
-     * Registers this instance of the extracter with the registry.
-     * 
-     * @see #setRegistry(MetadataExtracterRegistry)
-     */
-    public void register()
-    {
-        if (registry == null)
-        {
-            logger.warn("Property 'registry' has not been set.  Ignoring auto-registration: \n" +
-                    "   extracter: " + this);
-            return;
-        }
-        registry.register(this);
-    }
-    
-    /**
-     * Default reliability check that returns the reliability as configured by the contstructor
-     * if the mimetype is in the list of supported mimetypes.
-     * 
-     * @param mimetype the mimetype to check
-     */
-    public double getReliability(String mimetype)
-    {
-        if (supportedMimetypes.contains(mimetype))
-            return reliability;
-        else
-            return 0.0;
-    }
-
-    public long getExtractionTime()
-    {
-        return extractionTime;
-    }
-    
-    /**
-     * Checks if the mimetype is supported.
-     * 
-     * @param reader the reader to check
-     * @throws AlfrescoRuntimeException if the mimetype is not supported
-     */
-    protected void checkReliability(ContentReader reader)
-    {
-        String mimetype = reader.getMimetype();
-        if (getReliability(mimetype) <= 0.0)
-        {
-            throw new AlfrescoRuntimeException(
-                    "Metadata extracter does not support mimetype: \n" +
-                    "   reader: " + reader + "\n" +
-                    "   supported: " + supportedMimetypes + "\n" +
-                    "   extracter: " + this);
-        }
-    }
-
-    public final void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
-    {
-        // check the reliability
-        checkReliability(reader);
-        
-        try
-        {
-            extractInternal(reader, destination);
-        }
-        catch (Throwable e)
-        {
-            throw new ContentIOException("Metadata extraction failed: \n" +
-                    "   reader: " + reader,
-                    e);
-        }
-        finally
-        {
-            // check that the reader was closed
-            if (!reader.isClosed())
-            {
-                logger.error("Content reader not closed by metadata extracter: \n" +
-                        "   reader: " + reader + "\n" +
-                        "   extracter: " + this);
-            }
-        }
-        
-        // done
-        if (logger.isDebugEnabled())
-        {
-            logger.debug("Completed metadata extraction: \n" +
-                    "   reader: " + reader + "\n" +
-                    "   extracter: " + this);
-        }
-    }
-
-    /**
-     * Override to provide the necessary extraction logic.  Implementations must ensure that the reader
-     * is closed before the method exits.
-     * 
-     * @param reader the source of the content
-     * @param destination the property map to fill
-     * @throws Throwable an exception
-     */
-    protected abstract void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable;
-    
-    /**
-     * Examines a value or string for nulls and adds it to the map (if
-     * non-empty)
-     * 
-     * @param prop Alfresco's <code>ContentModel.PROP_</code> to set.
-     * @param value Value to set it to
-     * @param destination Map into which to set it
-     * @return true, if set, false otherwise
-     */
-    protected boolean trimPut(QName prop, Object value, Map<QName, Serializable> destination)
-    {
-        if (value == null)
-            return false;
-        if (value instanceof String)
-        {
-            String svalue = ((String) value).trim();
-            if (svalue.length() > 0)
-            {
-                destination.put(prop, svalue);
-                return true;
-            }
-            return false;
-        }
-        else if (value instanceof Serializable)
-        {
-            destination.put(prop, (Serializable) value);
-        }
-        else
-        {
-            destination.put(prop, value.toString());
-        }
-        return true;
-    }
-}
+/*
+ * Copyright (C) 2005 Jesper Steen Møller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+
+import org.alfresco.error.AlfrescoRuntimeException;
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.cmr.repository.MimetypeService;
+import org.alfresco.service.namespace.QName;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * 
+ * @author Jesper Steen Møller
+ */
+abstract public class AbstractMetadataExtracter implements MetadataExtracter
+{
+    protected static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
+    
+    private MimetypeService mimetypeService;
+    private MetadataExtracterRegistry registry;
+    private Set<String> supportedMimetypes;
+    private double reliability;
+    private long extractionTime;
+
+    protected AbstractMetadataExtracter(String supportedMimetype, double reliability, long extractionTime)
+    {
+        this.supportedMimetypes = Collections.singleton(supportedMimetype);
+        this.reliability = reliability;
+        this.extractionTime = extractionTime;
+    }
+
+    protected AbstractMetadataExtracter(Set<String> supportedMimetypes, double reliability, long extractionTime)
+    {
+        this.supportedMimetypes = supportedMimetypes;
+        this.reliability = reliability;
+        this.extractionTime = extractionTime;
+    }
+
+    /**
+     * Set the registry to register with
+     * 
+     * @param registry a metadata extracter registry
+     */
+    public void setRegistry(MetadataExtracterRegistry registry)
+    {
+        this.registry = registry;
+    }
+
+    /**
+     * Helper setter of the mimetype service.  This is not always required.
+     * 
+     * @param mimetypeService
+     */
+    public void setMimetypeService(MimetypeService mimetypeService)
+    {
+        this.mimetypeService = mimetypeService;
+    }
+
+    /**
+     * @return Returns the mimetype helper
+     */
+    protected MimetypeService getMimetypeService()
+    {
+        return mimetypeService;
+    }
+    
+    /**
+     * Registers this instance of the extracter with the registry.
+     * 
+     * @see #setRegistry(MetadataExtracterRegistry)
+     */
+    public void register()
+    {
+        if (registry == null)
+        {
+            logger.warn("Property 'registry' has not been set.  Ignoring auto-registration: \n" +
+                    "   extracter: " + this);
+            return;
+        }
+        registry.register(this);
+    }
+    
+    /**
+     * Default reliability check that returns the reliability as configured by the contstructor
+     * if the mimetype is in the list of supported mimetypes.
+     * 
+     * @param mimetype the mimetype to check
+     */
+    public double getReliability(String mimetype)
+    {
+        if (supportedMimetypes.contains(mimetype))
+            return reliability;
+        else
+            return 0.0;
+    }
+
+    public long getExtractionTime()
+    {
+        return extractionTime;
+    }
+    
+    /**
+     * Checks if the mimetype is supported.
+     * 
+     * @param reader the reader to check
+     * @throws AlfrescoRuntimeException if the mimetype is not supported
+     */
+    protected void checkReliability(ContentReader reader)
+    {
+        String mimetype = reader.getMimetype();
+        if (getReliability(mimetype) <= 0.0)
+        {
+            throw new AlfrescoRuntimeException(
+                    "Metadata extracter does not support mimetype: \n" +
+                    "   reader: " + reader + "\n" +
+                    "   supported: " + supportedMimetypes + "\n" +
+                    "   extracter: " + this);
+        }
+    }
+
+    public final void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
+    {
+        // check the reliability
+        checkReliability(reader);
+        
+        try
+        {
+            extractInternal(reader, destination);
+        }
+        catch (Throwable e)
+        {
+            throw new ContentIOException("Metadata extraction failed: \n" +
+                    "   reader: " + reader,
+                    e);
+        }
+        finally
+        {
+            // check that the reader was closed
+            if (!reader.isClosed())
+            {
+                logger.error("Content reader not closed by metadata extracter: \n" +
+                        "   reader: " + reader + "\n" +
+                        "   extracter: " + this);
+            }
+        }
+        
+        // done
+        if (logger.isDebugEnabled())
+        {
+            logger.debug("Completed metadata extraction: \n" +
+                    "   reader: " + reader + "\n" +
+                    "   extracter: " + this);
+        }
+    }
+
+    /**
+     * Override to provide the necessary extraction logic.  Implementations must ensure that the reader
+     * is closed before the method exits.
+     * 
+     * @param reader the source of the content
+     * @param destination the property map to fill
+     * @throws Throwable an exception
+     */
+    protected abstract void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable;
+    
+    /**
+     * Examines a value or string for nulls and adds it to the map (if
+     * non-empty)
+     * 
+     * @param prop Alfresco's <code>ContentModel.PROP_</code> to set.
+     * @param value Value to set it to
+     * @param destination Map into which to set it
+     * @return true, if set, false otherwise
+     */
+    protected boolean trimPut(QName prop, Object value, Map<QName, Serializable> destination)
+    {
+        if (value == null)
+            return false;
+        if (value instanceof String)
+        {
+            String svalue = ((String) value).trim();
+            if (svalue.length() > 0)
+            {
+                destination.put(prop, svalue);
+                return true;
+            }
+            return false;
+        }
+        else if (value instanceof Serializable)
+        {
+            destination.put(prop, (Serializable) value);
+        }
+        else
+        {
+            destination.put(prop, value.toString());
+        }
+        return true;
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java
@@ -1,116 +1,116 @@
-/*
- * Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
- *
- * Licensed under the Mozilla Public License version 1.1 
- * with a permitted attribution clause. You may obtain a
- * copy of the License at
- *
- *   http://www.alfresco.org/legal/license.txt
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific
- * language governing permissions and limitations under the
- * License.
- */
-package org.alfresco.repo.content.metadata;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.Serializable;
-import java.util.HashMap;
-import java.util.Map;
-
-import junit.framework.TestCase;
-
-import org.alfresco.model.ContentModel;
-import org.alfresco.repo.content.MimetypeMap;
-import org.alfresco.repo.content.filestore.FileContentReader;
-import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
-import org.alfresco.service.cmr.repository.ContentReader;
-import org.alfresco.service.namespace.QName;
-import org.alfresco.util.ApplicationContextHelper;
-import org.alfresco.util.TempFileProvider;
-import org.springframework.context.ApplicationContext;
-
-/**
- * @see org.alfresco.repo.content.metadata.MetadataExtracter
- * @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter
- * 
- * @author Jesper Steen Møller
- */
-public abstract class AbstractMetadataExtracterTest extends TestCase
-{
-    private static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext();
-    
-    protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";
-    protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog";
-    protected static final String QUICK_CREATOR = "Nevin Nollop";
-
-    protected MimetypeMap mimetypeMap;
-
-    protected abstract MetadataExtracter getExtracter();
-
-    /**
-     * Ensures that the temp locations are cleaned out before the tests start
-     */
-    @Override
-    public void setUp() throws Exception
-    {
-        this.mimetypeMap = (MimetypeMap) ctx.getBean("mimetypeService");
-        
-        // perform a little cleaning up
-        long now = System.currentTimeMillis();
-        TempFileProvider.TempFileCleanerJob.removeFiles(now);
-    }
-
-    /**
-     * Check that all objects are present
-     */
-    public void testSetUp() throws Exception
-    {
-        assertNotNull("MimetypeMap not present", mimetypeMap);
-        // check that the quick resources are available
-        File sourceFile = AbstractContentTransformerTest.loadQuickTestFile("txt");
-        assertNotNull("quick.* files should be available from Tests", sourceFile);
-    }
-    
-    protected void testExtractFromMimetype(String mimetype) throws Exception
-    {
-        Map<QName, Serializable> properties = extractFromMimetype(mimetype);
-        // check
-        testCommonMetadata(mimetype, properties);
-    }
-
-    protected Map<QName, Serializable> extractFromMimetype(String mimetype) throws Exception
-    {
-        Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
-        
-        // get the extension for the mimetype
-        String ext = mimetypeMap.getExtension(mimetype);
-
-        // attempt to get a source file for each mimetype
-        File sourceFile = AbstractContentTransformerTest.loadQuickTestFile(ext);
-        if (sourceFile == null)
-        {
-            throw new FileNotFoundException("No quick." + ext + " file found for test");
-        }
-
-        // construct a reader onto the source file
-        ContentReader sourceReader = new FileContentReader(sourceFile);
-        sourceReader.setMimetype(mimetype);
-        getExtracter().extract(sourceReader, properties);
-        return properties;
-    }
-
-    protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
-    {
-        assertEquals(
-                "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
-                QUICK_TITLE, properties.get(ContentModel.PROP_TITLE));
-        assertEquals(
-                "Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype,
-                QUICK_DESCRIPTION, properties.get(ContentModel.PROP_DESCRIPTION));
-    }
-}
+/*
+ * Copyright (C) 2005 Jesper Steen Møller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
+
+import junit.framework.TestCase;
+
+import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.repo.content.filestore.FileContentReader;
+import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+import org.alfresco.util.ApplicationContextHelper;
+import org.alfresco.util.TempFileProvider;
+import org.springframework.context.ApplicationContext;
+
+/**
+ * @see org.alfresco.repo.content.metadata.MetadataExtracter
+ * @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter
+ * 
+ * @author Jesper Steen Møller
+ */
+public abstract class AbstractMetadataExtracterTest extends TestCase
+{
+    private static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext();
+    
+    protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";
+    protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog";
+    protected static final String QUICK_CREATOR = "Nevin Nollop";
+
+    protected MimetypeMap mimetypeMap;
+
+    protected abstract MetadataExtracter getExtracter();
+
+    /**
+     * Ensures that the temp locations are cleaned out before the tests start
+     */
+    @Override
+    public void setUp() throws Exception
+    {
+        this.mimetypeMap = (MimetypeMap) ctx.getBean("mimetypeService");
+        
+        // perform a little cleaning up
+        long now = System.currentTimeMillis();
+        TempFileProvider.TempFileCleanerJob.removeFiles(now);
+    }
+
+    /**
+     * Check that all objects are present
+     */
+    public void testSetUp() throws Exception
+    {
+        assertNotNull("MimetypeMap not present", mimetypeMap);
+        // check that the quick resources are available
+        File sourceFile = AbstractContentTransformerTest.loadQuickTestFile("txt");
+        assertNotNull("quick.* files should be available from Tests", sourceFile);
+    }
+    
+    protected void testExtractFromMimetype(String mimetype) throws Exception
+    {
+        Map<QName, Serializable> properties = extractFromMimetype(mimetype);
+        // check
+        testCommonMetadata(mimetype, properties);
+    }
+
+    protected Map<QName, Serializable> extractFromMimetype(String mimetype) throws Exception
+    {
+        Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
+        
+        // get the extension for the mimetype
+        String ext = mimetypeMap.getExtension(mimetype);
+
+        // attempt to get a source file for each mimetype
+        File sourceFile = AbstractContentTransformerTest.loadQuickTestFile(ext);
+        if (sourceFile == null)
+        {
+            throw new FileNotFoundException("No quick." + ext + " file found for test");
+        }
+
+        // construct a reader onto the source file
+        ContentReader sourceReader = new FileContentReader(sourceFile);
+        sourceReader.setMimetype(mimetype);
+        getExtracter().extract(sourceReader, properties);
+        return properties;
+    }
+
+    protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
+    {
+        assertEquals(
+                "Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
+                QUICK_TITLE, properties.get(ContentModel.PROP_TITLE));
+        assertEquals(
+                "Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype,
+                QUICK_DESCRIPTION, properties.get(ContentModel.PROP_DESCRIPTION));
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java
@@ -1,169 +1,169 @@
-/*
- * Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
- *
- * Licensed under the Mozilla Public License version 1.1 
- * with a permitted attribution clause. You may obtain a
- * copy of the License at
- *
- *   http://www.alfresco.org/legal/license.txt
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific
- * language governing permissions and limitations under the
- * License.
- */
-package org.alfresco.repo.content.metadata;
-
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.io.Serializable;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
-import javax.swing.text.ChangedCharSetException;
-import javax.swing.text.MutableAttributeSet;
-import javax.swing.text.html.HTML;
-import javax.swing.text.html.HTMLEditorKit;
-import javax.swing.text.html.parser.ParserDelegator;
-
-import org.alfresco.model.ContentModel;
-import org.alfresco.repo.content.MimetypeMap;
-import org.alfresco.service.cmr.repository.ContentReader;
-import org.alfresco.service.namespace.QName;
-
-/**
- * 
- * @author Jesper Steen Møller
- */
-public class HtmlMetadataExtracter extends AbstractMetadataExtracter
-{
-    private static final Set<String> MIMETYPES = new HashSet<String>(5);
-    static
-    {
-        MIMETYPES.add(MimetypeMap.MIMETYPE_HTML);
-        MIMETYPES.add(MimetypeMap.MIMETYPE_XHTML);
-    }
-
-    public HtmlMetadataExtracter()
-    {
-        super(MIMETYPES, 1.0, 1000);
-    }
-
-    public void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable
-    {
-        final Map<QName, Serializable> tempDestination = new HashMap<QName, Serializable>();
-        
-        HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback()
-        {
-            StringBuffer title = null;
-            boolean inHead = false;
-
-            public void handleText(char[] data, int pos)
-            {
-                if (title != null)
-                {
-                    title.append(data);
-                }
-            }
-
-            public void handleComment(char[] data, int pos)
-            {
-                // Perhaps sniff for Office 9+ metadata in here?
-            }
-
-            public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos)
-            {
-                if (HTML.Tag.HEAD.equals(t))
-                {
-                    inHead = true;
-                }
-                else if (HTML.Tag.TITLE.equals(t) && inHead)
-                {
-                    title = new StringBuffer();
-                }
-                else
-                    handleSimpleTag(t, a, pos);
-            }
-
-            public void handleEndTag(HTML.Tag t, int pos)
-            {
-                if (HTML.Tag.HEAD.equals(t))
-                {
-                    inHead = false;
-                }
-                else if (HTML.Tag.TITLE.equals(t) && title != null)
-                {
-                    trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination);
-                    title = null;
-                }
-            }
-
-            public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)
-            {
-                if (HTML.Tag.META.equals(t))
-                {
-                    Object nameO = a.getAttribute(HTML.Attribute.NAME);
-                    Object valueO = a.getAttribute(HTML.Attribute.CONTENT);
-                    if (nameO == null || valueO == null)
-                        return;
-
-                    String name = nameO.toString();
-
-                    if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author")
-                            || name.equalsIgnoreCase("dc.creator"))
-                    {
-                        trimPut(ContentModel.PROP_AUTHOR, valueO, tempDestination);
-                    }
-                    if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
-                    {
-                        trimPut(ContentModel.PROP_DESCRIPTION, valueO, tempDestination);
-                    }
-                }
-            }
-
-            public void handleError(String errorMsg, int pos)
-            {
-            }
-        };
-
-        String charsetGuess = "UTF-8";
-        int tries = 0;
-        while (tries < 3)
-        {
-            tempDestination.clear();
-            Reader r = null;
-            InputStream cis = null;
-            try
-            {
-                cis = reader.getContentInputStream();
-                // TODO: for now, use default charset; we should attempt to map from html meta-data
-                r = new InputStreamReader(cis);
-                HTMLEditorKit.Parser parser = new ParserDelegator();
-                parser.parse(r, callback, tries > 0);
-                destination.putAll(tempDestination);
-                break;
-            }
-            catch (ChangedCharSetException ccse)
-            {
-                tries++;
-                charsetGuess = ccse.getCharSetSpec();
-                int begin = charsetGuess.indexOf("charset=");
-                if (begin > 0)
-                    charsetGuess = charsetGuess.substring(begin + 8, charsetGuess.length());
-                reader = reader.getReader();
-            }
-            finally
-            {
-                if (r != null)
-                    r.close();
-                if (cis != null)
-                    cis.close();
-            }
-        }
-    }
-}
+/*
+ * Copyright (C) 2005 Jesper Steen Møller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import javax.swing.text.ChangedCharSetException;
+import javax.swing.text.MutableAttributeSet;
+import javax.swing.text.html.HTML;
+import javax.swing.text.html.HTMLEditorKit;
+import javax.swing.text.html.parser.ParserDelegator;
+
+import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+
+/**
+ * 
+ * @author Jesper Steen Møller
+ */
+public class HtmlMetadataExtracter extends AbstractMetadataExtracter
+{
+    private static final Set<String> MIMETYPES = new HashSet<String>(5);
+    static
+    {
+        MIMETYPES.add(MimetypeMap.MIMETYPE_HTML);
+        MIMETYPES.add(MimetypeMap.MIMETYPE_XHTML);
+    }
+
+    public HtmlMetadataExtracter()
+    {
+        super(MIMETYPES, 1.0, 1000);
+    }
+
+    public void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable
+    {
+        final Map<QName, Serializable> tempDestination = new HashMap<QName, Serializable>();
+        
+        HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback()
+        {
+            StringBuffer title = null;
+            boolean inHead = false;
+
+            public void handleText(char[] data, int pos)
+            {
+                if (title != null)
+                {
+                    title.append(data);
+                }
+            }
+
+            public void handleComment(char[] data, int pos)
+            {
+                // Perhaps sniff for Office 9+ metadata in here?
+            }
+
+            public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos)
+            {
+                if (HTML.Tag.HEAD.equals(t))
+                {
+                    inHead = true;
+                }
+                else if (HTML.Tag.TITLE.equals(t) && inHead)
+                {
+                    title = new StringBuffer();
+                }
+                else
+                    handleSimpleTag(t, a, pos);
+            }
+
+            public void handleEndTag(HTML.Tag t, int pos)
+            {
+                if (HTML.Tag.HEAD.equals(t))
+                {
+                    inHead = false;
+                }
+                else if (HTML.Tag.TITLE.equals(t) && title != null)
+                {
+                    trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination);
+                    title = null;
+                }
+            }
+
+            public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)
+            {
+                if (HTML.Tag.META.equals(t))
+                {
+                    Object nameO = a.getAttribute(HTML.Attribute.NAME);
+                    Object valueO = a.getAttribute(HTML.Attribute.CONTENT);
+                    if (nameO == null || valueO == null)
+                        return;
+
+                    String name = nameO.toString();
+
+                    if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author")
+                            || name.equalsIgnoreCase("dc.creator"))
+                    {
+                        trimPut(ContentModel.PROP_AUTHOR, valueO, tempDestination);
+                    }
+                    if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
+                    {
+                        trimPut(ContentModel.PROP_DESCRIPTION, valueO, tempDestination);
+                    }
+                }
+            }
+
+            public void handleError(String errorMsg, int pos)
+            {
+            }
+        };
+
+        String charsetGuess = "UTF-8";
+        int tries = 0;
+        while (tries < 3)
+        {
+            tempDestination.clear();
+            Reader r = null;
+            InputStream cis = null;
+            try
+            {
+                cis = reader.getContentInputStream();
+                // TODO: for now, use default charset; we should attempt to map from html meta-data
+                r = new InputStreamReader(cis);
+                HTMLEditorKit.Parser parser = new ParserDelegator();
+                parser.parse(r, callback, tries > 0);
+                destination.putAll(tempDestination);
+                break;
+            }
+            catch (ChangedCharSetException ccse)
+            {
+                tries++;
+                charsetGuess = ccse.getCharSetSpec();
+                int begin = charsetGuess.indexOf("charset=");
+                if (begin > 0)
+                    charsetGuess = charsetGuess.substring(begin + 8, charsetGuess.length());
+                reader = reader.getReader();
+            }
+            finally
+            {
+                if (r != null)
+                    r.close();
+                if (cis != null)
+                    cis.close();
+            }
+        }
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
@@ -1,57 +1,57 @@
-/*
- * Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
- *
- * Licensed under the Mozilla Public License version 1.1 
- * with a permitted attribution clause. You may obtain a
- * copy of the License at
- *
- *   http://www.alfresco.org/legal/license.txt
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific
- * language governing permissions and limitations under the
- * License.
- */
-package org.alfresco.repo.content.metadata;
-
-import org.alfresco.repo.content.MimetypeMap;
-
-/**
- * @author Jesper Steen Møller
- */
-public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
-{
-    private MetadataExtracter extracter;
-
-    @Override
-    public void setUp() throws Exception
-    {
-        super.setUp();
-        extracter = new HtmlMetadataExtracter();
-    }
-
-    /**
-     * @return Returns the same transformer regardless - it is allowed
-     */
-    protected MetadataExtracter getExtracter()
-    {
-        return extracter;
-    }
-
-    public void testReliability() throws Exception
-    {
-        double reliability = 0.0;
-        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
-        assertEquals("Mimetype text should not be supported", 0.0, reliability);
-
-        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_HTML);
-        assertEquals("HTML should be supported", 1.0, reliability);
-    }
-
-    public void testHtmlExtraction() throws Exception
-    {
-        testExtractFromMimetype(MimetypeMap.MIMETYPE_HTML);
-    }
-}
+/*
+ * Copyright (C) 2005 Jesper Steen Møller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import org.alfresco.repo.content.MimetypeMap;
+
+/**
+ * @author Jesper Steen Møller
+ */
+public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
+{
+    private MetadataExtracter extracter;
+
+    @Override
+    public void setUp() throws Exception
+    {
+        super.setUp();
+        extracter = new HtmlMetadataExtracter();
+    }
+
+    /**
+     * @return Returns the same transformer regardless - it is allowed
+     */
+    protected MetadataExtracter getExtracter()
+    {
+        return extracter;
+    }
+
+    public void testReliability() throws Exception
+    {
+        double reliability = 0.0;
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
+        assertEquals("Mimetype text should not be supported", 0.0, reliability);
+
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_HTML);
+        assertEquals("HTML should be supported", 1.0, reliability);
+    }
+
+    public void testHtmlExtraction() throws Exception
+    {
+        testExtractFromMimetype(MimetypeMap.MIMETYPE_HTML);
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/MailMetadataExtracter.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2005 Jesper Steen M<>ller
+ * Copyright (C) 2005 Alfresco, Inc.
 *
 * Licensed under the Mozilla Public License version 1.1 
 * with a permitted attribution clause. You may obtain a
@@ -26,6 +26,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;

+import org.alfresco.model.ContentModel;
 import org.alfresco.service.cmr.repository.ContentIOException;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.service.namespace.NamespaceService;
@@ -45,17 +46,8 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
    public static String[] SUPPORTED_MIMETYPES = new String[] {
        "message/rfc822"};
    
-    private static final String SUBSTG_MESSAGEBODY = "__substg1.0_1000001E";
-    private static final String SUBSTG_RECIPIENTEMAIL = "__substg1.0_39FE001E";
-    private static final String SUBSTG_RECEIVEDEMAIL = "__substg1.0_0076001E";
-    private static final String SUBSTG_SENDEREMAIL = "__substg1.0_0C1F001E";
-    private static final String SUBSTG_DATE = "__substg1.0_00470102";
-    
-    private static final QName ASPECT_MAILED = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "emailed");
-    private static final QName PROP_SENTDATE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "sentdate");
-    private static final QName PROP_ORIGINATOR = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "originator");
-    private static final QName PROP_ADDRESSEE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressee");
-    private static final QName PROP_ADDRESSEES = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressees");
+    private static final String STREAM_PREFIX = "__substg1.0_";
+    private static final int STREAM_PREFIX_LENGTH = STREAM_PREFIX.length();

    // the CC: email addresses
    private ThreadLocal<List<String>> receipientEmails = new ThreadLocal<List<String>>();
@@ -73,47 +65,10 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
            {
                try
                {
-                    String name = event.getName();
-                    
-                    if (name.equals(SUBSTG_RECIPIENTEMAIL))         // a recipient email address
+                    if (event.getName().startsWith(STREAM_PREFIX))
                    {
-                        String emailAddress = readPlainTextStream(event.getStream());
-                        receipientEmails.get().add(convertExchangeAddress(emailAddress));
-                    }
-                    else if (name.equals(SUBSTG_RECEIVEDEMAIL))     // receiver email address
-                    {
-                        String emailAddress = readPlainTextStream(event.getStream());
-                        destination.put(PROP_ADDRESSEE, convertExchangeAddress(emailAddress));
-                    }
-                    else if (name.equals(SUBSTG_SENDEREMAIL))       // sender email - NOTE either email OR full Exchange data e.g. : /O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=MIKE.FARMAN@BEN
-                    {
-                        String emailAddress = readPlainTextStream(event.getStream());
-                        destination.put(PROP_ORIGINATOR, convertExchangeAddress(emailAddress));
-                    }
-                    else if (name.equals(SUBSTG_DATE))
-                    {
-                        // the date is not really plain text - but it's easier to parse as such
-                        String date = readPlainTextStream(event.getStream());
-                        int valueIndex = date.indexOf("l=");
-                        if (valueIndex != -1)
-                        {
-                            int dateIndex = date.indexOf('-', valueIndex);
-                            if (dateIndex != -1)
-                            {
-                                dateIndex++;
-                                String strYear = date.substring(dateIndex, dateIndex + 2);
-                                int year = Integer.parseInt(strYear) + (2000 - 1900);
-                                String strMonth = date.substring(dateIndex + 2, dateIndex + 4);
-                                int month = Integer.parseInt(strMonth) - 1;
-                                String strDay = date.substring(dateIndex + 4, dateIndex + 6);
-                                int day = Integer.parseInt(strDay);
-                                String strHour = date.substring(dateIndex + 6, dateIndex + 8);
-                                int hour = Integer.parseInt(strHour);
-                                String strMinute = date.substring(dateIndex + 10, dateIndex + 12);
-                                int minute = Integer.parseInt(strMinute);
-                                destination.put(PROP_SENTDATE, new Date(year, month, day, hour, minute));
-                            }
-                        }
+                        StreamHandler handler = new StreamHandler(event.getName(), event.getStream());
+                        handler.process(destination);
                    }
                }
                catch (Exception ex)
@@ -145,7 +100,7 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
            // store multi-value extracted property
            if (receipientEmails.get().size() != 0)
            {
-                destination.put(PROP_ADDRESSEES, (Serializable)receipientEmails.get());
+                destination.put(ContentModel.PROP_ADDRESSEES, (Serializable)receipientEmails.get());
            }
        }
        finally
@@ -157,14 +112,6 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
        }
    }
    
-    private static String readPlainTextStream(DocumentInputStream stream)
-        throws IOException
-    {
-        byte[] data = new byte[stream.available()];
-        int read = stream.read(data);
-        return new String(data);
-    }
-    
    private static String convertExchangeAddress(String email)
    {
        if (email.lastIndexOf("/CN=") == -1)
@@ -177,4 +124,111 @@ public class MailMetadataExtracter extends AbstractMetadataExtracter
            return email.substring(email.lastIndexOf("/CN=") + 4);
        }
    }
+    
+    private static final String ENCODING_TEXT = "001E";
+    private static final String ENCODING_BINARY = "0102";
+    private static final String ENCODING_UNICODE = "001F";
+    
+    private static final String SUBSTG_MESSAGEBODY = "1000";
+    private static final String SUBSTG_RECIPIENTEMAIL = "39FE";
+    private static final String SUBSTG_RECEIVEDEMAIL = "0076";
+    private static final String SUBSTG_SENDEREMAIL = "0C1F";
+    private static final String SUBSTG_DATE = "0047";
+    private static final String SUBSTG_SUBJECT = "0037";
+    
+    /**
+     * Class to handle stream types. Can process and extract specific streams.
+     */
+    private class StreamHandler
+    {
+        StreamHandler(String name, DocumentInputStream stream)
+        {
+            this.type = name.substring(STREAM_PREFIX_LENGTH, STREAM_PREFIX_LENGTH + 4);
+            this.encoding = name.substring(STREAM_PREFIX_LENGTH + 4, STREAM_PREFIX_LENGTH + 8);
+            this.stream = stream;
+        }
+        
+        void process(final Map<QName, Serializable> destination)
+            throws IOException
+        {
+            if (type.equals(SUBSTG_SENDEREMAIL))
+            {
+                destination.put(ContentModel.PROP_ORIGINATOR, convertExchangeAddress(extractText()));
+            }
+            else if (type.equals(SUBSTG_RECIPIENTEMAIL))
+            {
+                receipientEmails.get().add(convertExchangeAddress(extractText()));
+            }
+            else if (type.equals(SUBSTG_RECEIVEDEMAIL))
+            {
+                destination.put(ContentModel.PROP_ADDRESSEE, convertExchangeAddress(extractText()));
+            }
+            else if (type.equals(SUBSTG_SUBJECT))
+            {
+                destination.put(ContentModel.PROP_SUBJECT, extractText());
+            }
+            else if (type.equals(SUBSTG_DATE))
+            {
+                // the date is not really plain text - but it's easier to parse as such
+                String date = extractText();
+                int valueIndex = date.indexOf("l=");
+                if (valueIndex != -1)
+                {
+                    int dateIndex = date.indexOf('-', valueIndex);
+                    if (dateIndex != -1)
+                    {
+                        dateIndex++;
+                        String strYear = date.substring(dateIndex, dateIndex + 2);
+                        int year = Integer.parseInt(strYear) + (2000 - 1900);
+                        String strMonth = date.substring(dateIndex + 2, dateIndex + 4);
+                        int month = Integer.parseInt(strMonth) - 1;
+                        String strDay = date.substring(dateIndex + 4, dateIndex + 6);
+                        int day = Integer.parseInt(strDay);
+                        String strHour = date.substring(dateIndex + 6, dateIndex + 8);
+                        int hour = Integer.parseInt(strHour);
+                        String strMinute = date.substring(dateIndex + 10, dateIndex + 12);
+                        int minute = Integer.parseInt(strMinute);
+                        destination.put(ContentModel.PROP_SENTDATE, new Date(year, month, day, hour, minute));
+                    }
+                }
+            }
+        }
+        
+        /**
+         * Extract the text from the stream based on the encoding
+         * 
+         * @return String
+         * 
+         * @throws IOException
+         */
+        private String extractText()
+            throws IOException
+        {
+            byte[] data = new byte[stream.available()];
+            stream.read(data);
+            
+            if (this.encoding.equals(ENCODING_TEXT) || this.encoding.equals(ENCODING_BINARY))
+            {
+                return new String(data);
+            }
+            else if (this.encoding.equals(ENCODING_UNICODE))
+            {
+                // convert double-byte encoding to single byte for String conversion
+                byte[] b = new byte[data.length >> 1];
+                for (int i=0; i<b.length; i++)
+                {
+                    b[i] = data[i << 1];
+                }
+                return new String(b);
+            }
+            else
+            {
+                return new String(data);
+            }
+        }
+        
+        private String type;
+        private String encoding;
+        private DocumentInputStream stream;
+    }
 }
--- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
@@ -1,72 +1,72 @@
-/*
- * Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
- *
- * Licensed under the Mozilla Public License version 1.1 
- * with a permitted attribution clause. You may obtain a
- * copy of the License at
- *
- *   http://www.alfresco.org/legal/license.txt
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific
- * language governing permissions and limitations under the
- * License.
- */
-package org.alfresco.repo.content.metadata;
-
-import java.io.Serializable;
-import java.util.Map;
-
-import org.alfresco.service.cmr.repository.ContentIOException;
-import org.alfresco.service.cmr.repository.ContentReader;
-import org.alfresco.service.namespace.QName;
-
-/**
- * 
- * @author Jesper Steen Møller
- */
-public interface MetadataExtracter
-{
-    /**
-     * Provides the approximate accuracy with which this extracter can extract
-     * metadata for the mimetype.
-     * <p>
-     * 
-     * @param sourceMimetype the source mimetype
-     * @return Returns a score 0.0 to 1.0. 0.0 indicates that the extraction
-     *         cannot be performed at all. 1.0 indicates that the extraction can
-     *         be performed perfectly.
-     */
-    public double getReliability(String sourceMimetype);
-
-    /**
-     * Provides an estimate, usually a worst case guess, of how long an
-     * extraction will take.
-     * <p>
-     * This method is used to determine, up front, which of a set of equally
-     * reliant transformers will be used for a specific extraction.
-     * 
-     * @return Returns the approximate number of milliseconds per transformation
-     */
-    public long getExtractionTime();
-
-    /**
-     * Extracts the metadata from the content provided by the reader and source
-     * mimetype to the supplied map.
-     * <p>
-     * The extraction viability can be determined by an up front call to
-     * {@link #getReliability(String)}.
-     * <p>
-     * The source mimetype <b>must</b> be available on the
-     * {@link org.alfresco.service.cmr.repository.ContentAccessor#getMimetype()} method
-     * of the reader.
-     * 
-     * @param reader the source of the content
-     * @param destination the destination of the extraction
-     * @throws ContentIOException if an IO exception occurs
-     */
-    public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException;
-
-}
+/*
+ * Copyright (C) 2005 Jesper Steen Møller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+
+/**
+ * 
+ * @author Jesper Steen Møller
+ */
+public interface MetadataExtracter
+{
+    /**
+     * Provides the approximate accuracy with which this extracter can extract
+     * metadata for the mimetype.
+     * <p>
+     * 
+     * @param sourceMimetype the source mimetype
+     * @return Returns a score 0.0 to 1.0. 0.0 indicates that the extraction
+     *         cannot be performed at all. 1.0 indicates that the extraction can
+     *         be performed perfectly.
+     */
+    public double getReliability(String sourceMimetype);
+
+    /**
+     * Provides an estimate, usually a worst case guess, of how long an
+     * extraction will take.
+     * <p>
+     * This method is used to determine, up front, which of a set of equally
+     * reliant transformers will be used for a specific extraction.
+     * 
+     * @return Returns the approximate number of milliseconds per transformation
+     */
+    public long getExtractionTime();
+
+    /**
+     * Extracts the metadata from the content provided by the reader and source
+     * mimetype to the supplied map.
+     * <p>
+     * The extraction viability can be determined by an up front call to
+     * {@link #getReliability(String)}.
+     * <p>
+     * The source mimetype <b>must</b> be available on the
+     * {@link org.alfresco.service.cmr.repository.ContentAccessor#getMimetype()} method
+     * of the reader.
+     * 
+     * @param reader the source of the content
+     * @param destination the destination of the extraction
+     * @throws ContentIOException if an IO exception occurs
+     */
+    public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException;
+
+}
--- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
+++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
@@ -1,191 +1,172 @@
-/*
- * Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
- *
- * Licensed under the Mozilla Public License version 1.1 
- * with a permitted attribution clause. You may obtain a
- * copy of the License at
- *
- *   http://www.alfresco.org/legal/license.txt
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific
- * language governing permissions and limitations under the
- * License.
- */
-package org.alfresco.repo.content.metadata;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.locks.Lock;
-import java.util.concurrent.locks.ReadWriteLock;
-import java.util.concurrent.locks.ReentrantReadWriteLock;
-
-import org.alfresco.error.AlfrescoRuntimeException;
-import org.alfresco.repo.content.MimetypeMap;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-/**
- * Holds and provides the most appropriate metadate extracter for a particular
- * mimetype.
- * <p>
- * The extracters themselves know how well they are able to extract metadata.
- * 
- * @see org.alfresco.repo.content.metadata.MetadataExtracter
- * @author Jesper Steen Møller
- */
-public class MetadataExtracterRegistry
-{
-    private static final Log logger = LogFactory.getLog(MetadataExtracterRegistry.class);
-
-    private List<MetadataExtracter> extracters;
-    private Map<String, MetadataExtracter> extracterCache;
-
-    private MimetypeMap mimetypeMap;
-    /** Controls read access to the cache */
-    private Lock extracterCacheReadLock;
-    /** controls write access to the cache */
-    private Lock extracterCacheWriteLock;
-
-    public MetadataExtracterRegistry()
-    {
-        // initialise lists
-        extracters = new ArrayList<MetadataExtracter>(10);
-        extracterCache = new HashMap<String, MetadataExtracter>(17);
-
-        // create lock objects for access to the cache
-        ReadWriteLock extractionCacheLock = new ReentrantReadWriteLock();
-        extracterCacheReadLock = extractionCacheLock.readLock();
-        extracterCacheWriteLock = extractionCacheLock.writeLock();
-    }
-
-    /**
-     * The mimetype map that will be used to check requests against
-     * 
-     * @param mimetypeMap a map of mimetypes
-     */
-    public void setMimetypeMap(MimetypeMap mimetypeMap)
-    {
-        this.mimetypeMap = mimetypeMap;
-    }
-
-    /**
-     * Register an instance of an extracter for use
-     * 
-     * @param extracter an extracter
-     */
-    public void register(MetadataExtracter extracter)
-    {
-        if (logger.isDebugEnabled())
-        {
-            logger.debug("Registering metadata extracter: " + extracter);
-        }
-
-        extracterCacheWriteLock.lock();
-        try
-        {
-            extracters.add(extracter);
-            extracterCache.clear();
-        }
-        finally
-        {
-            extracterCacheWriteLock.unlock();
-        }
-    }
-
-    /**
-     * Gets the best metadata extracter. This is a combination of the most
-     * reliable and the most performant extracter.
-     * <p>
-     * The result is cached for quicker access next time.
-     * 
-     * @param mimetype the source MIME of the extraction
-     * @return Returns a metadata extracter that can extract metadata from the
-     *         chosen MIME type.
-     */
-    public MetadataExtracter getExtracter(String sourceMimetype)
-    {
-        // check that the mimetypes are valid
-        if (!mimetypeMap.getMimetypes().contains(sourceMimetype))
-        {
-            throw new AlfrescoRuntimeException("Unknown extraction source mimetype: " + sourceMimetype);
-        }
-
-        MetadataExtracter extracter = null;
-        extracterCacheReadLock.lock();
-        try
-        {
-            if (extracterCache.containsKey(sourceMimetype))
-            {
-                // the translation has been requested before
-                // it might have been null
-                return extracterCache.get(sourceMimetype);
-            }
-        }
-        finally
-        {
-            extracterCacheReadLock.unlock();
-        }
-
-        // the translation has not been requested before
-        // get a write lock on the cache
-        // no double check done as it is not an expensive task
-        extracterCacheWriteLock.lock();
-        try
-        {
-            // find the most suitable transformer - may be empty list
-            extracter = findBestExtracter(sourceMimetype);
-            // store the result even if it is null
-            extracterCache.put(sourceMimetype, extracter);
-            return extracter;
-        }
-        finally
-        {
-            extracterCacheWriteLock.unlock();
-        }
-    }
-
-    /**
-     * @param sourceMimetype The MIME type under examination
-     * @return The fastest of the most reliable extracters in <code>extracters</code>
-     *      for the given MIME type, or null if none is available.
-     */
-    private MetadataExtracter findBestExtracter(String sourceMimetype)
-    {
-        double bestReliability = -1;
-        long bestTime = Long.MAX_VALUE;
-        logger.debug("Finding best extracter for " + sourceMimetype);
-
-        MetadataExtracter bestExtracter = null;
-
-        for (MetadataExtracter ext : extracters)
-        {
-            double r = ext.getReliability(sourceMimetype);
-            if (r <= 0.0)
-            {
-                // extraction not achievable
-                continue;
-            }
-            else if (r == bestReliability)
-            {
-                long time = ext.getExtractionTime();
-                if (time < bestTime)
-                {
-                    bestExtracter = ext;
-                    bestTime = time;
-                }
-            }
-            else if (r > bestReliability)
-            {
-                bestExtracter = ext;
-                bestReliability = r;
-                bestTime = ext.getExtractionTime();
-            }
-        }
-        return bestExtracter;
-    }
+/*
+ * Copyright (C) 2005 Jesper Steen Møller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Holds and provides the most appropriate metadate extracter for a particular
+ * mimetype.
+ * <p>
+ * The extracters themselves know how well they are able to extract metadata.
+ * 
+ * @see org.alfresco.repo.content.metadata.MetadataExtracter
+ * @author Jesper Steen Møller
+ */
+public class MetadataExtracterRegistry
+{
+    private static final Log logger = LogFactory.getLog(MetadataExtracterRegistry.class);
+
+    private List<MetadataExtracter> extracters;
+    private Map<String, MetadataExtracter> extracterCache;
+
+    /** Controls read access to the cache */
+    private Lock extracterCacheReadLock;
+    /** controls write access to the cache */
+    private Lock extracterCacheWriteLock;
+
+    public MetadataExtracterRegistry()
+    {
+        // initialise lists
+        extracters = new ArrayList<MetadataExtracter>(10);
+        extracterCache = new HashMap<String, MetadataExtracter>(17);
+
+        // create lock objects for access to the cache
+        ReadWriteLock extractionCacheLock = new ReentrantReadWriteLock();
+        extracterCacheReadLock = extractionCacheLock.readLock();
+        extracterCacheWriteLock = extractionCacheLock.writeLock();
+    }
+
+    /**
+     * Register an instance of an extracter for use
+     * 
+     * @param extracter an extracter
+     */
+    public void register(MetadataExtracter extracter)
+    {
+        if (logger.isDebugEnabled())
+        {
+            logger.debug("Registering metadata extracter: " + extracter);
+        }
+
+        extracterCacheWriteLock.lock();
+        try
+        {
+            extracters.add(extracter);
+            extracterCache.clear();
+        }
+        finally
+        {
+            extracterCacheWriteLock.unlock();
+        }
+    }
+
+    /**
+     * Gets the best metadata extracter. This is a combination of the most
+     * reliable and the most performant extracter.
+     * <p>
+     * The result is cached for quicker access next time.
+     * 
+     * @param mimetype the source MIME of the extraction
+     * @return Returns a metadata extracter that can extract metadata from the
+     *         chosen MIME type.
+     */
+    public MetadataExtracter getExtracter(String sourceMimetype)
+    {
+        MetadataExtracter extracter = null;
+        extracterCacheReadLock.lock();
+        try
+        {
+            if (extracterCache.containsKey(sourceMimetype))
+            {
+                // the translation has been requested before
+                // it might have been null
+                return extracterCache.get(sourceMimetype);
+            }
+        }
+        finally
+        {
+            extracterCacheReadLock.unlock();
+        }
+
+        // the translation has not been requested before
+        // get a write lock on the cache
+        // no double check done as it is not an expensive task
+        extracterCacheWriteLock.lock();
+        try
+        {
+            // find the most suitable transformer - may be empty list
+            extracter = findBestExtracter(sourceMimetype);
+            // store the result even if it is null
+            extracterCache.put(sourceMimetype, extracter);
+            return extracter;
+        }
+        finally
+        {
+            extracterCacheWriteLock.unlock();
+        }
+    }
+
+    /**
+     * @param sourceMimetype The MIME type under examination
+     * @return The fastest of the most reliable extracters in <code>extracters</code>
+     *      for the given MIME type, or null if none is available.
+     */
+    private MetadataExtracter findBestExtracter(String sourceMimetype)
+    {
+        double bestReliability = -1;
+        long bestTime = Long.MAX_VALUE;
+        logger.debug("Finding best extracter for " + sourceMimetype);
+
+        MetadataExtracter bestExtracter = null;
+
+        for (MetadataExtracter ext : extracters)
+        {
+            double r = ext.getReliability(sourceMimetype);
+            if (r <= 0.0)
+            {
+                // extraction not achievable
+                continue;
+            }
+            else if (r == bestReliability)
+            {
+                long time = ext.getExtractionTime();
+                if (time < bestTime)
+                {
+                    bestExtracter = ext;
+                    bestTime = time;
+                }
+            }
+            else if (r > bestReliability)
+            {
+                bestExtracter = ext;
+                bestReliability = r;
+                bestTime = ext.getExtractionTime();
+            }
+        }
+        return bestExtracter;
+    }
 }
--- a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
@@ -1,101 +1,101 @@
-/*
- * Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
- *
- * Licensed under the Mozilla Public License version 1.1 
- * with a permitted attribution clause. You may obtain a
- * copy of the License at
- *
- *   http://www.alfresco.org/legal/license.txt
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific
- * language governing permissions and limitations under the
- * License.
- */
-package org.alfresco.repo.content.metadata;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Map;
-
-import org.alfresco.model.ContentModel;
-import org.alfresco.repo.content.MimetypeMap;
-import org.alfresco.service.cmr.repository.ContentIOException;
-import org.alfresco.service.cmr.repository.ContentReader;
-import org.alfresco.service.namespace.QName;
-import org.apache.poi.hpsf.PropertySet;
-import org.apache.poi.hpsf.PropertySetFactory;
-import org.apache.poi.hpsf.SummaryInformation;
-import org.apache.poi.poifs.eventfilesystem.POIFSReader;
-import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
-import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
-
-/**
- * Office file format Metadata Extracter
- * 
- * @author Jesper Steen Møller
- */
-public class OfficeMetadataExtracter extends AbstractMetadataExtracter
-{
-    public static String[] SUPPORTED_MIMETYPES = new String[] {
-        MimetypeMap.MIMETYPE_WORD,
-        MimetypeMap.MIMETYPE_EXCEL,
-        MimetypeMap.MIMETYPE_PPT};
-
-    public OfficeMetadataExtracter()
-    {
-        super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
-    }
-
-    public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable
-    {
-        POIFSReaderListener readerListener = new POIFSReaderListener()
-        {
-            public void processPOIFSReaderEvent(final POIFSReaderEvent event)
-            {
-                try
-                {
-                    PropertySet ps = PropertySetFactory.create(event.getStream());
-                    if (ps instanceof SummaryInformation)
-                    {
-                        SummaryInformation si = (SummaryInformation) ps;
-                        
-                        // Titled aspect
-                        trimPut(ContentModel.PROP_TITLE, si.getTitle(), destination);
-                        trimPut(ContentModel.PROP_DESCRIPTION, si.getSubject(), destination);
-
-                        // Auditable aspect
-                        trimPut(ContentModel.PROP_CREATED, si.getCreateDateTime(), destination);
-                        trimPut(ContentModel.PROP_MODIFIED, si.getLastSaveDateTime(), destination); 
-                        trimPut(ContentModel.PROP_AUTHOR, si.getAuthor(), destination);
-                    }
-                }
-                catch (Exception ex)
-                {
-                    throw new ContentIOException("Property set stream: " + event.getPath() + event.getName(), ex);
-                }
-            }
-        };
-        
-        InputStream is = null;
-        try
-        {
-            is = reader.getContentInputStream();
-            POIFSReader poiFSReader = new POIFSReader();
-            poiFSReader.registerListener(readerListener, SummaryInformation.DEFAULT_STREAM_NAME);
-            poiFSReader.read(is);
-        }
-        finally
-        {
-            if (is != null)
-            {
-                try { is.close(); } catch (IOException e) {}
-            }
-        }
-    }
-}
+/*
+ * Copyright (C) 2005 Jesper Steen Møller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+import org.apache.poi.hpsf.PropertySet;
+import org.apache.poi.hpsf.PropertySetFactory;
+import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.poifs.eventfilesystem.POIFSReader;
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
+
+/**
+ * Office file format Metadata Extracter
+ * 
+ * @author Jesper Steen Møller
+ */
+public class OfficeMetadataExtracter extends AbstractMetadataExtracter
+{
+    public static String[] SUPPORTED_MIMETYPES = new String[] {
+        MimetypeMap.MIMETYPE_WORD,
+        MimetypeMap.MIMETYPE_EXCEL,
+        MimetypeMap.MIMETYPE_PPT};
+
+    public OfficeMetadataExtracter()
+    {
+        super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
+    }
+
+    public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable
+    {
+        POIFSReaderListener readerListener = new POIFSReaderListener()
+        {
+            public void processPOIFSReaderEvent(final POIFSReaderEvent event)
+            {
+                try
+                {
+                    PropertySet ps = PropertySetFactory.create(event.getStream());
+                    if (ps instanceof SummaryInformation)
+                    {
+                        SummaryInformation si = (SummaryInformation) ps;
+                        
+                        // Titled aspect
+                        trimPut(ContentModel.PROP_TITLE, si.getTitle(), destination);
+                        trimPut(ContentModel.PROP_DESCRIPTION, si.getSubject(), destination);
+
+                        // Auditable aspect
+                        trimPut(ContentModel.PROP_CREATED, si.getCreateDateTime(), destination);
+                        trimPut(ContentModel.PROP_MODIFIED, si.getLastSaveDateTime(), destination); 
+                        trimPut(ContentModel.PROP_AUTHOR, si.getAuthor(), destination);
+                    }
+                }
+                catch (Exception ex)
+                {
+                    throw new ContentIOException("Property set stream: " + event.getPath() + event.getName(), ex);
+                }
+            }
+        };
+        
+        InputStream is = null;
+        try
+        {
+            is = reader.getContentInputStream();
+            POIFSReader poiFSReader = new POIFSReader();
+            poiFSReader.registerListener(readerListener, SummaryInformation.DEFAULT_STREAM_NAME);
+            poiFSReader.read(is);
+        }
+        finally
+        {
+            if (is != null)
+            {
+                try { is.close(); } catch (IOException e) {}
+            }
+        }
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracter.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
+ * Copyright (C) 2005 Jesper Steen Møller
 *
 * Licensed under the Mozilla Public License version 1.1 
 * with a permitted attribution clause. You may obtain a
--- a/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/OpenOfficeMetadataExtracterTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
+ * Copyright (C) 2005 Jesper Steen Møller
 *
 * Licensed under the Mozilla Public License version 1.1 
 * with a permitted attribution clause. You may obtain a
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
@@ -1,75 +1,75 @@
-/*
- * Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
- *
- * Licensed under the Mozilla Public License version 1.1 
- * with a permitted attribution clause. You may obtain a
- * copy of the License at
- *
- *   http://www.alfresco.org/legal/license.txt
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific
- * language governing permissions and limitations under the
- * License.
- */
-package org.alfresco.repo.content.metadata;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Serializable;
-import java.util.Calendar;
-import java.util.Map;
-
-import org.alfresco.model.ContentModel;
-import org.alfresco.repo.content.MimetypeMap;
-import org.alfresco.service.cmr.repository.ContentReader;
-import org.alfresco.service.namespace.QName;
-import org.pdfbox.pdmodel.PDDocument;
-import org.pdfbox.pdmodel.PDDocumentInformation;
-
-/**
- * 
- * @author Jesper Steen Møller
- */
-public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
-{
-    public PdfBoxMetadataExtracter()
-    {
-        super(MimetypeMap.MIMETYPE_PDF, 1.0, 1000);
-    }
-
-    public void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable
-    {
-        PDDocument pdf = null;
-        InputStream is = null;
-        try
-        {
-            is = reader.getContentInputStream();
-            // stream the document in
-            pdf = PDDocument.load(is);
-            // Scoop out the metadata
-            PDDocumentInformation docInfo = pdf.getDocumentInformation();
-
-            trimPut(ContentModel.PROP_AUTHOR, docInfo.getAuthor(), destination);
-            trimPut(ContentModel.PROP_TITLE, docInfo.getTitle(), destination);
-            trimPut(ContentModel.PROP_DESCRIPTION, docInfo.getSubject(), destination);
-
-            Calendar created = docInfo.getCreationDate();
-            if (created != null)
-                destination.put(ContentModel.PROP_CREATED, created.getTime());
-        }
-        finally
-        {
-            if (is != null)
-            {
-                try { is.close(); } catch (IOException e) {}
-            }
-            if (pdf != null)
-            {
-                try { pdf.close(); } catch (Throwable e) { e.printStackTrace(); }
-            }
-        }
-    }
-}
+/*
+ * Copyright (C) 2005 Jesper Steen Møller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.util.Calendar;
+import java.util.Map;
+
+import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+import org.pdfbox.pdmodel.PDDocument;
+import org.pdfbox.pdmodel.PDDocumentInformation;
+
+/**
+ * 
+ * @author Jesper Steen Møller
+ */
+public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
+{
+    public PdfBoxMetadataExtracter()
+    {
+        super(MimetypeMap.MIMETYPE_PDF, 1.0, 1000);
+    }
+
+    public void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable
+    {
+        PDDocument pdf = null;
+        InputStream is = null;
+        try
+        {
+            is = reader.getContentInputStream();
+            // stream the document in
+            pdf = PDDocument.load(is);
+            // Scoop out the metadata
+            PDDocumentInformation docInfo = pdf.getDocumentInformation();
+
+            trimPut(ContentModel.PROP_AUTHOR, docInfo.getAuthor(), destination);
+            trimPut(ContentModel.PROP_TITLE, docInfo.getTitle(), destination);
+            trimPut(ContentModel.PROP_DESCRIPTION, docInfo.getSubject(), destination);
+
+            Calendar created = docInfo.getCreationDate();
+            if (created != null)
+                destination.put(ContentModel.PROP_CREATED, created.getTime());
+        }
+        finally
+        {
+            if (is != null)
+            {
+                try { is.close(); } catch (IOException e) {}
+            }
+            if (pdf != null)
+            {
+                try { pdf.close(); } catch (Throwable e) { e.printStackTrace(); }
+            }
+        }
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
@@ -1,43 +1,43 @@
-package org.alfresco.repo.content.metadata;
-
-import org.alfresco.repo.content.MimetypeMap;
-
-/**
- * @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter
- * 
- * @author Jesper Steen Møller
- */
-public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
-{
-    private MetadataExtracter extracter;
-
-    @Override
-    public void setUp() throws Exception
-    {
-        super.setUp();
-        extracter = new PdfBoxMetadataExtracter();
-    }
-
-    /**
-     * @return Returns the same transformer regardless - it is allowed
-     */
-    protected MetadataExtracter getExtracter()
-    {
-        return extracter;
-    }
-
-    public void testReliability() throws Exception
-    {
-        double reliability = 0.0;
-        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
-        assertEquals("Mimetype should not be supported", 0.0, reliability);
-
-        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PDF);
-        assertEquals("Mimetype should be supported", 1.0, reliability);
-    }
-
-    public void testPdfExtraction() throws Exception
-    {
-        testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF);
-    }
-}
+package org.alfresco.repo.content.metadata;
+
+import org.alfresco.repo.content.MimetypeMap;
+
+/**
+ * @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter
+ * 
+ * @author Jesper Steen Møller
+ */
+public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
+{
+    private MetadataExtracter extracter;
+
+    @Override
+    public void setUp() throws Exception
+    {
+        super.setUp();
+        extracter = new PdfBoxMetadataExtracter();
+    }
+
+    /**
+     * @return Returns the same transformer regardless - it is allowed
+     */
+    protected MetadataExtracter getExtracter()
+    {
+        return extracter;
+    }
+
+    public void testReliability() throws Exception
+    {
+        double reliability = 0.0;
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
+        assertEquals("Mimetype should not be supported", 0.0, reliability);
+
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PDF);
+        assertEquals("Mimetype should be supported", 1.0, reliability);
+    }
+
+    public void testPdfExtraction() throws Exception
+    {
+        testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF);
+    }
+}