Moving to root below branch label

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@2005 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
2025-07-24 17:32:48 +00:00 · 2005-12-08 07:13:07 +00:00
commit e1e6508fec
1095 changed files with 230566 additions and 0 deletions
--- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracter.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2005 Jesper Steen M<>ller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+
+import org.alfresco.service.namespace.QName;
+
+/**
+ * 
+ * @author Jesper Steen M<>ller
+ */
+abstract public class AbstractMetadataExtracter implements MetadataExtracter
+{
+
+    private Set<String> mimetypes;
+    private double reliability;
+    private long extractionTime;
+
+    protected AbstractMetadataExtracter(String mimetype, double reliability, long extractionTime)
+    {
+        this.mimetypes = Collections.singleton(mimetype);
+        this.reliability = reliability;
+        this.extractionTime = extractionTime;
+    }
+
+    protected AbstractMetadataExtracter(Set<String> mimetypes, double reliability, long extractionTime)
+    {
+        this.mimetypes = mimetypes;
+        this.reliability = reliability;
+        this.extractionTime = extractionTime;
+    }
+
+    public double getReliability(String sourceMimetype)
+    {
+        if (mimetypes.contains(sourceMimetype))
+            return reliability;
+        else
+            return 0.0;
+    }
+
+    public long getExtractionTime()
+    {
+        return extractionTime;
+    }
+
+    /**
+     * Examines a value or string for nulls and adds it to the map (if
+     * non-empty)
+     * 
+     * @param prop Alfresco's <code>ContentModel.PROP_</code> to set.
+     * @param value Value to set it to
+     * @param destination Map into which to set it
+     * @return true, if set, false otherwise
+     */
+    protected boolean trimPut(QName prop, Object value, Map<QName, Serializable> destination)
+    {
+        if (value == null)
+            return false;
+        if (value instanceof String)
+        {
+            String svalue = ((String) value).trim();
+            if (svalue.length() > 0)
+            {
+                destination.put(prop, svalue);
+                return true;
+            }
+            return false;
+        }
+        else if (value instanceof Serializable)
+        {
+            destination.put(prop, (Serializable) value);
+        }
+        else
+        {
+            destination.put(prop, value.toString());
+        }
+        return true;
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/AbstractMetadataExtracterTest.java
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2005 Jesper Steen M<>ller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.Serializable;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.repo.content.filestore.FileContentReader;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+import org.alfresco.util.BaseSpringTest;
+import org.alfresco.util.TempFileProvider;
+
+/**
+ * Provides a base set of tests for testing 
+ * {@link org.alfresco.repo.content.metadata.MetadataExtracter} implementations.
+ * 
+ * @author Jesper Steen M<>ller
+ */
+public abstract class AbstractMetadataExtracterTest extends BaseSpringTest
+{
+    protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";
+    protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog";
+    protected static final String QUICK_CREATOR = "Nevin Nollop";
+    protected static final String[] QUICK_WORDS = new String[] { "quick", "brown", "fox", "jumps", "lazy", "dog" };
+
+    protected MimetypeMap mimetypeMap;
+    protected MetadataExtracter transformer;
+
+    public final void setMimetypeMap(MimetypeMap mimetypeMap)
+    {
+        this.mimetypeMap = mimetypeMap;
+    }
+
+    protected abstract MetadataExtracter getExtracter();
+
+    /**
+     * Ensures that the temp locations are cleaned out before the tests start
+     */
+    @Override
+    protected void onSetUpInTransaction() throws Exception
+    {
+        // perform a little cleaning up
+        long now = System.currentTimeMillis();
+        TempFileProvider.TempFileCleanerJob.removeFiles(now);
+    }
+
+    /**
+     * Check that all objects are present
+     */
+    public void testSetUp() throws Exception
+    {
+        assertNotNull("MimetypeMap not present", mimetypeMap);
+        // check that the quick resources are available
+        File sourceFile = AbstractMetadataExtracterTest.loadQuickTestFile("txt");
+        assertNotNull("quick.* files should be available from Tests", sourceFile);
+    }
+
+    /**
+     * Helper method to load one of the "The quick brown fox" files from the
+     * classpath.
+     * 
+     * @param extension the extension of the file required
+     * @return Returns a test resource loaded from the classpath or
+     *         <tt>null</tt> if no resource could be found.
+     * @throws IOException
+     */
+    public static File loadQuickTestFile(String extension) throws IOException
+    {
+        URL url = AbstractMetadataExtracterTest.class.getClassLoader().getResource("quick/quick." + extension);
+        if (url == null)
+        {
+            return null;
+        }
+        File file = new File(url.getFile());
+        if (!file.exists())
+        {
+            return null;
+        }
+        return file;
+    }
+
+    public Map<QName, Serializable> extractFromExtension(String ext, String mimetype) throws Exception
+    {
+        Map<QName, Serializable> destination = new HashMap<QName, Serializable>();
+
+        // attempt to get a source file for each mimetype
+        File sourceFile = AbstractMetadataExtracterTest.loadQuickTestFile(ext);
+        if (sourceFile == null)
+        {
+            throw new FileNotFoundException("No quick." + ext + " file found for test");
+        }
+
+        // construct a reader onto the source file
+        ContentReader sourceReader = new FileContentReader(sourceFile);
+        sourceReader.setMimetype(mimetype);
+        getExtracter().extract(sourceReader, destination);
+        return destination;
+    }
+
+    public void testCommonMetadata(Map<QName, Serializable> destination)
+    {
+        assertEquals(QUICK_TITLE, destination.get(ContentModel.PROP_TITLE));
+        assertEquals(QUICK_DESCRIPTION, destination.get(ContentModel.PROP_DESCRIPTION));
+        assertEquals(QUICK_CREATOR, destination.get(ContentModel.PROP_CREATOR));
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracter.java
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2005 Jesper Steen M<>ller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.swing.text.ChangedCharSetException;
+import javax.swing.text.MutableAttributeSet;
+import javax.swing.text.html.HTML;
+import javax.swing.text.html.HTMLEditorKit;
+import javax.swing.text.html.parser.ParserDelegator;
+
+import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * 
+ * @author Jesper Steen M<>ller
+ */
+public class HtmlMetadataExtracter extends AbstractMetadataExtracter
+{
+
+    private static final Log logger = LogFactory.getLog(HtmlMetadataExtracter.class);
+
+    public HtmlMetadataExtracter()
+    {
+        super(MimetypeMap.MIMETYPE_HTML, 1.0, 1000);
+    }
+
+    public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
+    {
+        final Map<QName, Serializable> tempDestination = new HashMap<QName, Serializable>();
+        try
+        {
+            HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback()
+            {
+                StringBuffer title = null;
+                boolean inHead = false;
+
+                public void handleText(char[] data, int pos)
+                {
+                    if (title != null)
+                    {
+                        title.append(data);
+                    }
+                }
+
+                public void handleComment(char[] data, int pos)
+                {
+                    // Perhaps sniff for Office 9+ metadata in here?
+                }
+
+                public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos)
+                {
+                    if (HTML.Tag.HEAD.equals(t))
+                    {
+                        inHead = true;
+                    }
+                    else if (HTML.Tag.TITLE.equals(t) && inHead)
+                    {
+                        title = new StringBuffer();
+                    }
+                    else
+                        handleSimpleTag(t, a, pos);
+                }
+
+                public void handleEndTag(HTML.Tag t, int pos)
+                {
+                    if (HTML.Tag.HEAD.equals(t))
+                    {
+                        inHead = false;
+                    }
+                    else if (HTML.Tag.TITLE.equals(t))
+                    {
+                        trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination);
+                        title = null;
+                    }
+                }
+
+                public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)
+                {
+                    if (HTML.Tag.META.equals(t))
+                    {
+                        Object nameO = a.getAttribute(HTML.Attribute.NAME);
+                        Object valueO = a.getAttribute(HTML.Attribute.CONTENT);
+                        if (nameO == null || valueO == null)
+                            return;
+
+                        String name = nameO.toString();
+
+                        if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author")
+                                || name.equalsIgnoreCase("dc.creator"))
+                        {
+                            trimPut(ContentModel.PROP_CREATOR, valueO, tempDestination);
+                        }
+                        if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
+                        {
+                            trimPut(ContentModel.PROP_DESCRIPTION, valueO, tempDestination);
+                        }
+                    }
+                }
+
+                public void handleError(String errorMsg, int pos)
+                {
+                }
+            };
+
+            String charsetGuess = "UTF-8";
+            int tries = 0;
+            while (tries < 3)
+            {
+                tempDestination.clear();
+                Reader r = null;
+                InputStream cis = null;
+                try
+                {
+                    cis = reader.getContentInputStream();
+                    // TODO: for now, use default charset; we should attempt to map from html meta-data
+                    r = new InputStreamReader(cis);
+                    HTMLEditorKit.Parser parser = new ParserDelegator();
+                    parser.parse(r, callback, tries > 0);
+                    destination.putAll(tempDestination);
+                    break;
+                }
+                catch (ChangedCharSetException ccse)
+                {
+                    tries++;
+                    charsetGuess = ccse.getCharSetSpec();
+                    int begin = charsetGuess.indexOf("charset=");
+                    if (begin > 0)
+                        charsetGuess = charsetGuess.substring(begin + 8, charsetGuess.length());
+                    reader = reader.getReader();
+                }
+                finally
+                {
+                    if (r != null)
+                        r.close();
+                    if (cis != null)
+                        cis.close();
+                }
+            }
+        }
+        catch (IOException e)
+        {
+            throw new ContentIOException("HTML metadata extraction failed: \n" + "   reader: " + reader, e);
+        }
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/HtmlMetadataExtracterTest.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2005 Jesper Steen M<>ller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import org.alfresco.repo.content.MimetypeMap;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * @see org.alfresco.repo.content.transform.OfficeMetadataExtracter
+ * @author Jesper Steen M<>ller
+ */
+public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
+{
+    private static final Log logger = LogFactory.getLog(HtmlMetadataExtracterTest.class);
+    private MetadataExtracter extracter;
+
+    public void onSetUpInTransaction() throws Exception
+    {
+        extracter = new HtmlMetadataExtracter();
+    }
+
+    /**
+     * @return Returns the same transformer regardless - it is allowed
+     */
+    protected MetadataExtracter getExtracter()
+    {
+        return extracter;
+    }
+
+    public void testReliability() throws Exception
+    {
+        double reliability = 0.0;
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
+        assertEquals("Mimetype text should not be supported", 0.0, reliability);
+
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_HTML);
+        assertEquals("HTML should be supported", 1.0, reliability);
+    }
+
+    public void testHtmlExtraction() throws Exception
+    {
+        testCommonMetadata(extractFromExtension("html", MimetypeMap.MIMETYPE_HTML));
+    }
+
+}
--- a/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/MP3MetadataExtracter.java
@@ -0,0 +1,245 @@
+/*
+ * Copyright (C) 2005 Alfresco, Inc.
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+import org.alfresco.util.GUID;
+import org.farng.mp3.AbstractMP3FragmentBody;
+import org.farng.mp3.MP3File;
+import org.farng.mp3.TagException;
+import org.farng.mp3.id3.AbstractID3v2;
+import org.farng.mp3.id3.AbstractID3v2Frame;
+import org.farng.mp3.id3.ID3v1;
+import org.farng.mp3.lyrics3.AbstractLyrics3;
+import org.farng.mp3.lyrics3.Lyrics3v2;
+import org.farng.mp3.lyrics3.Lyrics3v2Field;
+
+/**
+ * @author Roy Wetherall
+ */
+public class MP3MetadataExtracter extends AbstractMetadataExtracter
+{
+    private static final QName PROP_ALBUM_TITLE = QName.createQName("{music}albumTitle");
+    private static final QName PROP_SONG_TITLE = QName.createQName("{music}songTitle");;
+    private static final QName PROP_ARTIST = QName.createQName("{music}artist");;
+    private static final QName PROP_COMMENT = QName.createQName("{music}comment");;
+    private static final QName PROP_YEAR_RELEASED = QName.createQName("{music}yearReleased");;
+    private static final QName PROP_TRACK_NUMBER = QName.createQName("{music}trackNumber");;
+    private static final QName PROP_GENRE = QName.createQName("{music}genre");;
+    private static final QName PROP_COMPOSER = QName.createQName("{music}composer");;
+    private static final QName PROP_LYRICS = QName.createQName("{music}lyrics");;
+
+    public MP3MetadataExtracter()
+    {
+        super(MimetypeMap.MIMETYPE_MP3, 1.0, 1000);
+    }
+
+    /**
+     * @see org.alfresco.repo.content.metadata.MetadataExtracter#extract(org.alfresco.service.cmr.repository.ContentReader, java.util.Map)
+     */
+    public void extract(ContentReader reader,
+            Map<QName, Serializable> destination) throws ContentIOException
+    {
+        try
+        {
+            Map<QName, Serializable> props = new HashMap<QName, Serializable>();            
+            
+            // Create a temp file
+            File tempFile = File.createTempFile(GUID.generate(), ".tmp");
+            try
+            {
+                reader.getContent(tempFile);
+                
+                // Create the MP3 object from the file
+                MP3File mp3File = new MP3File(tempFile);
+                
+                ID3v1 id3v1 = mp3File.getID3v1Tag();
+                if (id3v1 != null)
+                {
+                    setTagValue(props, PROP_ALBUM_TITLE, id3v1.getAlbum());
+                    setTagValue(props, PROP_SONG_TITLE, id3v1.getTitle());
+                    setTagValue(props, PROP_ARTIST, id3v1.getArtist());
+                    setTagValue(props, PROP_COMMENT, id3v1.getComment());
+                    setTagValue(props, PROP_YEAR_RELEASED, id3v1.getYear());
+                    
+                    // TODO sort out the genre
+                    //setTagValue(props, MusicModel.PROP_GENRE, id3v1.getGenre());
+                    
+                    // TODO sort out the size
+                    //setTagValue(props, MusicModel.PROP_SIZE, id3v1.getSize());            
+                }
+                
+                AbstractID3v2 id3v2 = mp3File.getID3v2Tag();
+                if (id3v2 != null)
+                {
+                    setTagValue(props, PROP_SONG_TITLE, getID3V2Value(id3v2, "TIT2"));
+                    setTagValue(props, PROP_ARTIST, getID3V2Value(id3v2, "TPE1"));
+                    setTagValue(props, PROP_ALBUM_TITLE, getID3V2Value(id3v2, "TALB"));
+                    setTagValue(props, PROP_YEAR_RELEASED, getID3V2Value(id3v2, "TDRC"));
+                    setTagValue(props, PROP_COMMENT, getID3V2Value(id3v2, "COMM"));
+                    setTagValue(props, PROP_TRACK_NUMBER, getID3V2Value(id3v2, "TRCK"));
+                    setTagValue(props, PROP_GENRE, getID3V2Value(id3v2, "TCON"));
+                    setTagValue(props, PROP_COMPOSER, getID3V2Value(id3v2, "TCOM"));
+                    
+                    // TODO sort out the lyrics
+                    //System.out.println("Lyrics: " + getID3V2Value(id3v2, "SYLT"));
+                    //System.out.println("Lyrics: " + getID3V2Value(id3v2, "USLT"));
+                }
+                
+                AbstractLyrics3 lyrics3Tag = mp3File.getLyrics3Tag();
+                if (lyrics3Tag != null)
+                {
+                    System.out.println("Lyrics3 tag found.");
+                    if (lyrics3Tag instanceof Lyrics3v2)
+                    {
+                        setTagValue(props, PROP_SONG_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TIT2"));
+                        setTagValue(props, PROP_ARTIST, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TPE1"));
+                        setTagValue(props, PROP_ALBUM_TITLE, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TALB"));
+                        setTagValue(props, PROP_COMMENT, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "COMM"));
+                        setTagValue(props, PROP_LYRICS, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "SYLT"));
+                        setTagValue(props, PROP_COMPOSER, getLyrics3v2Value((Lyrics3v2)lyrics3Tag, "TCOM"));
+                    }
+                }
+                
+            }
+            finally
+            {
+                tempFile.delete();
+            }
+            
+            // Set the destination values
+            if (props.get(PROP_SONG_TITLE) != null)
+            {
+                destination.put(ContentModel.PROP_TITLE, props.get(PROP_SONG_TITLE));
+            }
+            if (props.get(PROP_ARTIST) != null)
+            {
+                destination.put(ContentModel.PROP_CREATOR, props.get(PROP_ARTIST));
+            }
+            String description = getDescription(props);
+            if (description != null)
+            {
+                destination.put(ContentModel.PROP_DESCRIPTION, description);
+            }
+        }       
+        catch (IOException ioException)
+        {
+            // TODO sort out exception handling
+            throw new RuntimeException("Error reading mp3 file.", ioException);
+        }
+        catch (TagException tagException)
+        {
+            // TODO sort out exception handling
+            throw new RuntimeException("Error reading mp3 tag information.", tagException);
+        }
+    }
+    
+
+    /**
+     * Generate the description
+     * 
+     * @param props     the properties extracted from the file
+     * @return          the description
+     */
+    private String getDescription(Map<QName, Serializable> props)
+    {
+        StringBuilder result = new StringBuilder();
+        if (props.get(PROP_SONG_TITLE) != null && props.get(PROP_ARTIST) != null && props.get(PROP_ALBUM_TITLE) != null)
+        {
+            result
+                .append(props.get(PROP_SONG_TITLE))
+                .append(" - ")
+                .append(props.get(PROP_ALBUM_TITLE))
+                .append(" (")
+                .append(props.get(PROP_ARTIST))
+                .append(")");
+                
+        }
+        
+        return result.toString();
+    }
+
+    /**
+     * 
+     * @param props
+     * @param propQName
+     * @param propvalue
+     */
+    private void setTagValue(Map<QName, Serializable> props, QName propQName, String propvalue)
+    {
+        if (propvalue != null && propvalue.length() != 0)
+        {
+            trimPut(propQName, propvalue, props);
+        }       
+    }
+
+    /**
+     * 
+     * @param lyrics3Tag
+     * @param name
+     * @return
+     */
+    private String getLyrics3v2Value(Lyrics3v2 lyrics3Tag, String name) 
+    {
+        String result = "";
+        Lyrics3v2Field field = lyrics3Tag.getField(name);
+        if (field != null)
+        {
+            AbstractMP3FragmentBody body = field.getBody();
+            if (body != null)
+            {
+                result = (String)body.getObject("Text");                
+            }
+        }
+        return result;
+    }
+
+    /**
+     * Get the ID3V2 tag value in a safe way
+     * 
+     * @param id3v2
+     * @param name
+     * @return
+     */
+    private String getID3V2Value(AbstractID3v2 id3v2, String name)
+    {
+        String result = "";
+        
+        AbstractID3v2Frame frame = id3v2.getFrame(name);
+        if (frame != null)
+        {
+            AbstractMP3FragmentBody body = frame.getBody();
+            if (body != null)
+            {
+                result = (String)body.getObject("Text");                
+            }
+        }
+        
+        return result;
+    }
+
+}
--- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2005 Jesper Steen M<>ller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+
+/**
+ * 
+ * @author Jesper Steen M<>ller
+ */
+public interface MetadataExtracter
+{
+    /**
+     * Provides the approximate accuracy with which this extracter can extract
+     * metadata for the mimetype.
+     * <p>
+     * 
+     * @param sourceMimetype the source mimetype
+     * @return Returns a score 0.0 to 1.0. 0.0 indicates that the extraction
+     *         cannot be performed at all. 1.0 indicates that the extraction can
+     *         be performed perfectly.
+     */
+    public double getReliability(String sourceMimetype);
+
+    /**
+     * Provides an estimate, usually a worst case guess, of how long an
+     * extraction will take.
+     * <p>
+     * This method is used to determine, up front, which of a set of equally
+     * reliant transformers will be used for a specific extraction.
+     * 
+     * @return Returns the approximate number of milliseconds per transformation
+     */
+    public long getExtractionTime();
+
+    /**
+     * Extracts the metadata from the content provided by the reader and source
+     * mimetype to the supplied map.
+     * <p>
+     * The extraction viability can be determined by an up front call to
+     * {@link #getReliability(String)}.
+     * <p>
+     * The source mimetype <b>must</b> be available on the
+     * {@link org.alfresco.service.cmr.repository.ContentAccessor#getMimetype()} method
+     * of the reader.
+     * 
+     * @param reader the source of the content
+     * @param destination the destination of the extraction
+     * @throws ContentIOException if an IO exception occurs
+     */
+    public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException;
+
+}
--- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
+++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2005 Jesper Steen M<>ller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.alfresco.error.AlfrescoRuntimeException;
+import org.alfresco.repo.content.MimetypeMap;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.springframework.util.Assert;
+
+/**
+ * Holds and provides the most appropriate metadate extracter for a particular
+ * mimetype.
+ * <p>
+ * The extracters themselves know how well they are able to extract metadata.
+ * 
+ * @see org.alfresco.repo.content.metadata.MetadataExtracter
+ * @author Jesper Steen M<>ller
+ */
+public class MetadataExtracterRegistry
+{
+    private static final Log logger = LogFactory.getLog(MetadataExtracterRegistry.class);
+
+    private List<MetadataExtracter> extracters;
+    private Map<String, MetadataExtracter> extracterCache;
+
+    private MimetypeMap mimetypeMap;
+    /** Controls read access to the cache */
+    private Lock extracterCacheReadLock;
+    /** controls write access to the cache */
+    private Lock extracterCacheWriteLock;
+
+    /**
+     * @param mimetypeMap all the mimetypes available to the system
+     */
+    public MetadataExtracterRegistry(MimetypeMap mimetypeMap)
+    {
+        Assert.notNull(mimetypeMap, "The MimetypeMap is mandatory");
+        this.mimetypeMap = mimetypeMap;
+
+        extracters = Collections.emptyList(); // just in case it isn't set
+        extracterCache = new HashMap<String, MetadataExtracter>(17);
+
+        // create lock objects for access to the cache
+        ReadWriteLock extractionCacheLock = new ReentrantReadWriteLock();
+        extracterCacheReadLock = extractionCacheLock.readLock();
+        extracterCacheWriteLock = extractionCacheLock.writeLock();
+    }
+
+    /**
+     * Gets the best metadata extracter. This is a combination of the most
+     * reliable and the most performant extracter.
+     * <p>
+     * The result is cached for quicker access next time.
+     * 
+     * @param mimetype the source MIME of the extraction
+     * @return Returns a metadata extracter that can extract metadata from the
+     *         chosen MIME type.
+     */
+    public MetadataExtracter getExtracter(String sourceMimetype)
+    {
+        // check that the mimetypes are valid
+        if (!mimetypeMap.getMimetypes().contains(sourceMimetype))
+        {
+            throw new AlfrescoRuntimeException("Unknown extraction source mimetype: " + sourceMimetype);
+        }
+
+        MetadataExtracter extracter = null;
+        extracterCacheReadLock.lock();
+        try
+        {
+            if (extracterCache.containsKey(sourceMimetype))
+            {
+                // the translation has been requested before
+                // it might have been null
+                return extracterCache.get(sourceMimetype);
+            }
+        }
+        finally
+        {
+            extracterCacheReadLock.unlock();
+        }
+
+        // the translation has not been requested before
+        // get a write lock on the cache
+        // no double check done as it is not an expensive task
+        extracterCacheWriteLock.lock();
+        try
+        {
+            // find the most suitable transformer - may be empty list
+            extracter = findBestExtracter(sourceMimetype);
+            // store the result even if it is null
+            extracterCache.put(sourceMimetype, extracter);
+            return extracter;
+        }
+        finally
+        {
+            extracterCacheWriteLock.unlock();
+        }
+    }
+
+    /**
+     * @param sourceMimetype The MIME type under examination
+     * @return The fastest of the most reliable extracters in
+     *         <code>extracters</code> for the given MIME type.
+     */
+    private MetadataExtracter findBestExtracter(String sourceMimetype)
+    {
+        double bestReliability = -1;
+        long bestTime = Long.MAX_VALUE;
+        logger.debug("Finding best extracter for " + sourceMimetype);
+
+        MetadataExtracter bestExtracter = null;
+
+        for (MetadataExtracter ext : extracters)
+        {
+            double r = ext.getReliability(sourceMimetype);
+            if (r == bestReliability)
+            {
+                long time = ext.getExtractionTime();
+                if (time < bestTime)
+                {
+                    bestExtracter = ext;
+                    bestTime = time;
+                }
+            }
+            else if (r > bestReliability)
+            {
+                bestExtracter = ext;
+                bestReliability = r;
+                bestTime = ext.getExtractionTime();
+            }
+        }
+        return bestExtracter;
+    }
+
+    /**
+     * Provides a list of self-discovering extracters.
+     * 
+     * @param transformers all the available extracters that the registry can
+     *        work with
+     */
+    public void setExtracters(List<MetadataExtracter> extracters)
+    {
+        logger.debug("Setting " + extracters.size() + "new extracters.");
+
+        extracterCacheWriteLock.lock();
+        try
+        {
+            this.extracters = extracters;
+            this.extracterCache.clear();
+        }
+        finally
+        {
+            extracterCacheWriteLock.unlock();
+        }
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracter.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2005 Jesper Steen M<>ller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.PropertySet;
+import org.apache.poi.hpsf.PropertySetFactory;
+import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.poifs.eventfilesystem.POIFSReader;
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
+
+/**
+ * 
+ * @author Jesper Steen M<>ller
+ */
+public class OfficeMetadataExtracter extends AbstractMetadataExtracter
+{
+
+    private static final Log logger = LogFactory.getLog(OfficeMetadataExtracter.class);
+    private static String[] mimeTypes = new String[] { MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_EXCEL,
+            MimetypeMap.MIMETYPE_PPT };
+
+    public OfficeMetadataExtracter()
+    {
+        super(new HashSet<String>(Arrays.asList(mimeTypes)), 1.0, 1000);
+    }
+
+    public void extract(ContentReader reader, final Map<QName, Serializable> destination) throws ContentIOException
+    {
+        POIFSReaderListener readerListener = new POIFSReaderListener()
+        {
+            public void processPOIFSReaderEvent(final POIFSReaderEvent event)
+            {
+                try
+                {
+                    PropertySet ps = PropertySetFactory.create(event.getStream());
+                    if (ps instanceof SummaryInformation)
+                    {
+                        SummaryInformation si = (SummaryInformation) ps;
+                        // Titled aspect
+                        trimPut(ContentModel.PROP_TITLE, si.getTitle(), destination);
+                        trimPut(ContentModel.PROP_DESCRIPTION, si.getSubject(), destination);
+
+                        // Auditable aspect
+                        trimPut(ContentModel.PROP_CREATED, si.getCreateDateTime(), destination);
+                        trimPut(ContentModel.PROP_CREATOR, si.getAuthor(), destination);
+                        trimPut(ContentModel.PROP_MODIFIED, si.getLastSaveDateTime(), destination);
+                        trimPut(ContentModel.PROP_MODIFIER, si.getLastAuthor(), destination);
+                    }
+                    else if (ps instanceof DocumentSummaryInformation)
+                    {
+                        DocumentSummaryInformation dsi = (DocumentSummaryInformation) ps;
+
+                        // These are not really interesting to any aspect:
+                        // trimPut(ContentModel.PROP_xxx, dsi.getCompany(),
+                        // destination);
+                        // trimPut(ContentModel.PROP_yyy, dsi.getManager(),
+                        // destination);
+                    }
+                }
+                catch (Exception ex)
+                {
+                    throw new ContentIOException("Property set stream: " + event.getPath() + event.getName(), ex);
+                }
+            }
+        };
+        try
+        {
+            POIFSReader r = new POIFSReader();
+            r.registerListener(readerListener, SummaryInformation.DEFAULT_STREAM_NAME);
+            r.read(reader.getContentInputStream());
+        }
+        catch (IOException e)
+        {
+            throw new ContentIOException("Compound Document SummaryInformation metadata extraction failed: \n"
+                    + "   reader: " + reader,
+                    e);
+        }
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/OfficeMetadataExtracterTest.java
@@ -0,0 +1,60 @@
+package org.alfresco.repo.content.metadata;
+
+import org.alfresco.repo.content.MimetypeMap;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * @see org.alfresco.repo.content.transform.OfficeMetadataExtracter
+ * @author Jesper Steen M<>ller
+ */
+public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
+{
+    private static final Log logger = LogFactory.getLog(OfficeMetadataExtracterTest.class);
+    private MetadataExtracter extracter;
+
+    public void onSetUpInTransaction() throws Exception
+    {
+        extracter = new OfficeMetadataExtracter();
+    }
+
+    /**
+     * @return Returns the same transformer regardless - it is allowed
+     */
+    protected MetadataExtracter getExtracter()
+    {
+        return extracter;
+    }
+
+    public void testReliability() throws Exception
+    {
+        double reliability = 0.0;
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
+        assertEquals("Mimetype text should not be supported", 0.0, reliability);
+
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_WORD);
+        assertEquals("Word should be supported", 1.0, reliability);
+
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_EXCEL);
+        assertEquals("Excel should be supported", 1.0, reliability);
+
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PPT);
+        assertEquals("PowerPoint should be supported", 1.0, reliability);
+    }
+
+    public void testWordExtraction() throws Exception
+    {
+        testCommonMetadata(extractFromExtension("doc", MimetypeMap.MIMETYPE_WORD));
+    }
+
+    public void testExcelExtraction() throws Exception
+    {
+        testCommonMetadata(extractFromExtension("xls", MimetypeMap.MIMETYPE_EXCEL));
+    }
+
+    public void testPowerPointExtraction() throws Exception
+    {
+        testCommonMetadata(extractFromExtension("ppt", MimetypeMap.MIMETYPE_PPT));
+    }
+
+}
--- a/source/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/OpenDocumentMetadataExtracter.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2005 Antti Jokipii
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import com.catcode.odf.ODFMetaFileAnalyzer;
+import com.catcode.odf.OpenDocumentMetadata;
+
+/**
+ * Metadata extractor for the
+ * {@link org.alfresco.repo.content.MimetypeMap#MIMETYPE_OPENDOCUMENT_TEXT MIMETYPE_OPENDOCUMENT_XXX}
+ * mimetypes.
+ * 
+ * @author Antti Jokipii
+ */
+public class OpenDocumentMetadataExtracter extends AbstractMetadataExtracter
+{
+    private static final Log logger = LogFactory.getLog(OpenDocumentMetadataExtracter.class);
+
+    private static String[] mimeTypes = new String[] {
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_GRAPHICS,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_PRESENTATION,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_PRESENTATION_TEMPLATE,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_SPREADSHEET,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_SPREADSHEET_TEMPLATE,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_CHART,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_CHART_TEMPLATE,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_IMAGE,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_IMAGE_TEMPLATE,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_FORMULA,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_FORMULA_TEMPLATE,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_MASTER,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_WEB,
+            MimetypeMap.MIMETYPE_OPENDOCUMENT_DATABASE, };
+
+    public OpenDocumentMetadataExtracter()
+    {
+        super(new HashSet<String>(Arrays.asList(mimeTypes)), 1.00, 1000);
+    }
+
+    public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
+    {
+        ODFMetaFileAnalyzer analyzer = new ODFMetaFileAnalyzer();
+        try
+        {
+            // stream the document in
+            OpenDocumentMetadata docInfo = analyzer.analyzeZip(reader.getContentInputStream());
+
+            if (docInfo != null)
+            {
+                // set the metadata
+                destination.put(ContentModel.PROP_CREATOR, docInfo.getCreator());
+                destination.put(ContentModel.PROP_TITLE, docInfo.getTitle());
+                destination.put(ContentModel.PROP_DESCRIPTION, docInfo.getDescription());
+                destination.put(ContentModel.PROP_CREATED, docInfo.getCreationDate());
+            }
+        }
+        catch (Throwable e)
+        {
+            String message = "Metadata extraction failed: \n" +
+                    "   reader: " + reader;
+            logger.debug(message, e);
+            throw new ContentIOException(message, e);
+        }
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracter.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2005 Jesper Steen M<>ller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Calendar;
+import java.util.Map;
+
+import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.pdfbox.pdmodel.PDDocument;
+import org.pdfbox.pdmodel.PDDocumentInformation;
+
+/**
+ * 
+ * @author Jesper Steen M<>ller
+ */
+public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
+{
+
+    private static final Log logger = LogFactory.getLog(PdfBoxMetadataExtracter.class);
+
+    public PdfBoxMetadataExtracter()
+    {
+        super(MimetypeMap.MIMETYPE_PDF, 1.0, 1000);
+    }
+
+    public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
+    {
+        if (!MimetypeMap.MIMETYPE_PDF.equals(reader.getMimetype()))
+        {
+            logger.debug("No metadata extracted for " + reader.getMimetype());
+            return;
+        }
+        PDDocument pdf = null;
+        try
+        {
+            // stream the document in
+            pdf = PDDocument.load(reader.getContentInputStream());
+            // Scoop out the metadata
+            PDDocumentInformation docInfo = pdf.getDocumentInformation();
+
+            trimPut(ContentModel.PROP_CREATOR, docInfo.getAuthor(), destination);
+            trimPut(ContentModel.PROP_TITLE, docInfo.getTitle(), destination);
+            trimPut(ContentModel.PROP_DESCRIPTION, docInfo.getSubject(), destination);
+
+            Calendar created = docInfo.getCreationDate();
+            if (created != null)
+                destination.put(ContentModel.PROP_CREATED, created.getTime());
+        }
+        catch (IOException e)
+        {
+            throw new ContentIOException("PDF metadata extraction failed: \n" +
+                    "   reader: " + reader);
+        }
+        finally
+        {
+            if (pdf != null)
+            {
+                try
+                {
+                    pdf.close();
+                }
+                catch (Throwable e)
+                {
+                    e.printStackTrace();
+                }
+            }
+        }
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/PdfBoxMetadataExtracterTest.java
@@ -0,0 +1,43 @@
+package org.alfresco.repo.content.metadata;
+
+import org.alfresco.repo.content.MimetypeMap;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * @see org.alfresco.repo.content.transform.PdfBoxContentTransformer
+ * @author Jesper Steen M<>ller
+ */
+public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
+{
+    private static final Log logger = LogFactory.getLog(PdfBoxMetadataExtracterTest.class);
+    private MetadataExtracter extracter;
+
+    public void onSetUpInTransaction() throws Exception
+    {
+        extracter = new PdfBoxMetadataExtracter();
+    }
+
+    /**
+     * @return Returns the same transformer regardless - it is allowed
+     */
+    protected MetadataExtracter getExtracter()
+    {
+        return extracter;
+    }
+
+    public void testReliability() throws Exception
+    {
+        double reliability = 0.0;
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
+        assertEquals("Mimetype should not be supported", 0.0, reliability);
+
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PDF);
+        assertEquals("Mimetype should be supported", 1.0, reliability);
+    }
+
+    public void testPdfExtraction() throws Exception
+    {
+        testCommonMetadata(extractFromExtension("pdf", MimetypeMap.MIMETYPE_PDF));
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/StringMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/StringMetadataExtracter.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2005 Jesper Steen M<>ller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * 
+ * @author Jesper Steen M<>ller
+ */
+public class StringMetadataExtracter implements MetadataExtracter
+{
+    public static final String PREFIX_TEXT = "text/";
+
+    private static final Log logger = LogFactory.getLog(StringMetadataExtracter.class);
+
+    public double getReliability(String sourceMimetype)
+    {
+        if (sourceMimetype.startsWith(PREFIX_TEXT))
+            return 0.1;
+        else
+            return 0.0;
+    }
+
+    public long getExtractionTime()
+    {
+        return 1000;
+    }
+
+    public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
+    {
+        if (logger.isDebugEnabled())
+        {
+            logger.debug("No metadata extracted for " + reader.getMimetype());
+        }
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracter.java
@@ -0,0 +1,205 @@
+/*
+ * Copyright (C) 2005 Jesper Steen M<>ller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.Serializable;
+import java.net.ConnectException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Map;
+
+import net.sf.joott.uno.UnoConnection;
+
+import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+import org.alfresco.util.TempFileProvider;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import com.sun.star.beans.PropertyValue;
+import com.sun.star.beans.XPropertySet;
+import com.sun.star.document.XDocumentInfoSupplier;
+import com.sun.star.frame.XComponentLoader;
+import com.sun.star.lang.XComponent;
+import com.sun.star.ucb.XFileIdentifierConverter;
+import com.sun.star.uno.UnoRuntime;
+
+/**
+ * 
+ * @author Jesper Steen M<>ller
+ */
+public class UnoMetadataExtracter extends AbstractMetadataExtracter
+{
+
+    private static final Log logger = LogFactory.getLog(UnoMetadataExtracter.class);
+
+    private static String[] mimeTypes = new String[] {
+        MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT,
+        MimetypeMap.MIMETYPE_OPENOFFICE_WRITER,
+    // Add the other OpenOffice.org stuff here
+    // In fact, other types may apply as well, but should be counted as lower
+    // quality since they involve conversion.
+    };
+
+    public UnoMetadataExtracter(MimetypeMap mimetypeMap, String connectionUrl)
+    {
+        super(new HashSet<String>(Arrays.asList(mimeTypes)), 1.00, 10000);
+        this.mimetypeMap = mimetypeMap;
+        init(connectionUrl);
+    }
+
+    public UnoMetadataExtracter(MimetypeMap mimetypeMap)
+    {
+        this(mimetypeMap, UnoConnection.DEFAULT_CONNECTION_STRING);
+    }
+
+    private MimetypeMap mimetypeMap;
+    private MyUnoConnection connection;
+    private boolean isConnected;
+
+    /**
+     * @param unoConnectionUrl the URL of the Uno server
+     */
+    private synchronized void init(String unoConnectionUrl)
+    {
+        connection = new MyUnoConnection(unoConnectionUrl);
+        // attempt to make an connection
+        try
+        {
+            connection.connect();
+            isConnected = true;
+        }
+        catch (ConnectException e)
+        {
+            isConnected = false;
+        }
+    }
+
+    /**
+     * @return Returns true if a connection to the Uno server could be
+     *         established
+     */
+    public boolean isConnected()
+    {
+        return isConnected;
+    }
+
+    public void extract(ContentReader reader, final Map<QName, Serializable> destination) throws ContentIOException
+    {
+        String sourceMimetype = reader.getMimetype();
+
+        // create temporary files to convert from and to
+        File tempFromFile = TempFileProvider.createTempFile("UnoContentTransformer", "."
+                + mimetypeMap.getExtension(sourceMimetype));
+        // download the content from the source reader
+        reader.getContent(tempFromFile);
+        String sourceUrl = tempFromFile.toString();
+        try
+        {
+            sourceUrl = toUrl(tempFromFile, connection);
+
+            // UNO Interprocess Bridge *should* be thread-safe, but...
+            synchronized (connection)
+            {
+                XComponentLoader desktop = connection.getDesktop();
+                XComponent document = desktop.loadComponentFromURL(
+                        sourceUrl,
+                        "_blank",
+                        0,
+                        new PropertyValue[] { property("Hidden", Boolean.TRUE) });
+                if (document == null)
+                {
+                    throw new FileNotFoundException("could not open source document: " + sourceUrl);
+                }
+                try
+                {
+                    XDocumentInfoSupplier infoSupplier = (XDocumentInfoSupplier) UnoRuntime.queryInterface(
+                            XDocumentInfoSupplier.class, document);
+                    XPropertySet propSet = (XPropertySet) UnoRuntime.queryInterface(
+                            XPropertySet.class,
+                            infoSupplier
+                            .getDocumentInfo());
+
+                    // Titled aspect
+                    trimPut(ContentModel.PROP_TITLE, propSet.getPropertyValue("Title"), destination);
+                    trimPut(ContentModel.PROP_DESCRIPTION, propSet.getPropertyValue("Subject"), destination);
+
+                    // Auditable aspect
+                    // trimPut(ContentModel.PROP_CREATED,
+                    // si.getCreateDateTime(), destination);
+                    trimPut(ContentModel.PROP_CREATOR, propSet.getPropertyValue("Author"), destination);
+                    // trimPut(ContentModel.PROP_MODIFIED,
+                    // si.getLastSaveDateTime(), destination);
+                    // trimPut(ContentModel.PROP_MODIFIER, si.getLastAuthor(),
+                    // destination);
+                }
+                finally
+                {
+                    document.dispose();
+                }
+            }
+        }
+        catch (Throwable e)
+        {
+            throw new ContentIOException("Conversion failed: \n" +
+                    "   source: " + sourceUrl + "\n",
+                    e);
+        }
+    }
+
+    public String toUrl(File file, MyUnoConnection connection) throws ConnectException
+    {
+        Object fcp = connection.getFileContentService();
+        XFileIdentifierConverter fic = (XFileIdentifierConverter) UnoRuntime.queryInterface(
+                XFileIdentifierConverter.class, fcp);
+        return fic.getFileURLFromSystemPath("", file.getAbsolutePath());
+    }
+
+    public double getReliability(String sourceMimetype)
+    {
+        if (isConnected())
+            return super.getReliability(sourceMimetype);
+        else
+            return 0.0;
+    }
+
+    private static PropertyValue property(String name, Object value)
+    {
+        PropertyValue property = new PropertyValue();
+        property.Name = name;
+        property.Value = value;
+        return property;
+    }
+
+    static class MyUnoConnection extends UnoConnection
+    {
+        public MyUnoConnection(String url)
+        {
+            super(url);
+        }
+
+        public Object getFileContentService() throws ConnectException
+        {
+            return getService("com.sun.star.ucb.FileContentProvider");
+        }
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/UnoMetadataExtracterTest.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2005 Jesper Steen M<>ller
+ *
+ * Licensed under the Mozilla Public License version 1.1 
+ * with a permitted attribution clause. You may obtain a
+ * copy of the License at
+ *
+ *   http://www.alfresco.org/legal/license.txt
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the
+ * License.
+ */
+package org.alfresco.repo.content.metadata;
+
+import org.alfresco.repo.content.MimetypeMap;
+
+/**
+ * @see org.alfresco.repo.content.transform.UnoMetadataExtracter
+ * @author Jesper Steen M<>ller
+ */
+public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
+{
+    private UnoMetadataExtracter extracter;
+
+    public void onSetUpInTransaction() throws Exception
+    {
+        extracter = new UnoMetadataExtracter(mimetypeMap);
+    }
+
+    /**
+     * @return Returns the same extracter regardless - it is allowed
+     */
+    protected MetadataExtracter getExtracter()
+    {
+        return extracter;
+    }
+
+    public void testReliability() throws Exception
+    {
+        if (!extracter.isConnected())
+        {
+            return;
+        }
+        
+        double reliability = 0.0;
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
+        assertEquals("Mimetype text should not be supported", 0.0, reliability);
+
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT);
+        assertEquals("OpenOffice 2.0 Writer (OpenDoc) should be supported", 1.0, reliability);
+
+        reliability = extracter.getReliability(MimetypeMap.MIMETYPE_OPENOFFICE_WRITER);
+        assertEquals("OpenOffice 1.0 Writer should be supported", 1.0, reliability);
+    }
+
+    public void testOOo20WriterExtraction() throws Exception
+    {
+        if (!extracter.isConnected())
+        {
+            return;
+        }
+        
+        testCommonMetadata(extractFromExtension("odt", MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT));
+    }
+
+    public void testOOo10WriterExtraction() throws Exception
+    {
+        if (!extracter.isConnected())
+        {
+            return;
+        }
+        
+        testCommonMetadata(extractFromExtension("sxw", MimetypeMap.MIMETYPE_OPENOFFICE_WRITER));
+    }
+}