XPath-based XML metadata extractor

- No tests - Simple root element name redirector Some comments fleshed out on the new mapping metadata extractor git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5969 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
2025-06-16 17:55:15 +00:00 · 2007-06-15 02:30:14 +00:00 · 2007-06-15 02:30:14 +00:00 · f770bb0190
commit f770bb0190
parent 1b97517ce6
7 changed files with 611 additions and 9 deletions
--- a/source/java/org/alfresco/repo/content/ContentWorker.java
+++ b/source/java/org/alfresco/repo/content/ContentWorker.java
@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2005-2007 Alfresco Software Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ * As a special exception to the terms and conditions of version 2.0 of 
+ * the GPL, you may redistribute this Program in connection with Free/Libre 
+ * and Open Source Software ("FLOSS") applications as described in Alfresco's 
+ * FLOSS exception.  You should have recieved a copy of the text describing 
+ * the FLOSS exception, and it is also available here: 
+ * http://www.alfresco.com/legal/licensing"
+ */
+package org.alfresco.repo.content;
+
+/**
+ * An interface instances that operate on content.  This is a marker interface
+ * for specific <i>worker</i> interfaces such as metadata extractors, content transformers
+ * and so forth.
+ * 
+ * @see ContentWorkerSelector
+ * @since 2.1
+ * @author Derek Hulley
+ */
+public interface ContentWorker
+{
+}
--- a/source/java/org/alfresco/repo/content/ContentWorkerSelector.java
+++ b/source/java/org/alfresco/repo/content/ContentWorkerSelector.java
@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2005-2007 Alfresco Software Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ * As a special exception to the terms and conditions of version 2.0 of 
+ * the GPL, you may redistribute this Program in connection with Free/Libre 
+ * and Open Source Software ("FLOSS") applications as described in Alfresco's 
+ * FLOSS exception.  You should have recieved a copy of the text describing 
+ * the FLOSS exception, and it is also available here: 
+ * http://www.alfresco.com/legal/licensing"
+ */
+package org.alfresco.repo.content;
+
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+
+/**
+ * An interface instances that are able to identify content based on the
+ * {@linkplain ContentReader content reader}.  This is specifically
+ * aimed at extractors, transformers, injectors and similar classes.
+ * <p>
+ * The notion of supplying some type of worker looks a bit odd here, but
+ * really an instance of this type will act as an optional factory.  Also,
+ * in the context of the calling class, the context and the generics will
+ * identify exactly which type is returned by the factory.
+ * 
+ * @since 2.1
+ * @author Derek Hulley
+ */
+public interface ContentWorkerSelector<W extends ContentWorker>
+{
+    /**
+     * Provides an worker appropriate to the given content, if possible.  The reader
+     * should only be used if absolutely required.  The caller should always request
+     * {@linkplain ContentReader#getReader() a new reader} or check the
+     * {@linkplain ContentReader#isClosed() reader's state}.
+     * 
+     * @param reader        the content reader, providing the actual stream metadata
+     *                      and even the stream, if required.
+     * @return              Return a worker that can operate on the content, or <tt>null</tt>
+     *                      if this identifier doesn't support the content.
+     * @throws ContentIOException
+     *                      if the search fails
+     */
+    W getWorker(ContentReader reader);
+}
--- a/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java
@ -254,6 +254,10 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
     * be used to extract values from the documents.  The metadata extraction becomes fully
     * configuration-driven, i.e. declaring further mappings will result in more values being
     * extracted from the documents.
+     * <p>
+     * Most extractors will not be using this method.  For an example of its use, see the
+     * {@linkplain OpenDocumentMetadataExtracter OpenDocument extractor}, which uses the mapping
+     * to select specific user properties from a document.
     */
    protected final Map<String, Set<QName>> getMapping()
    {
@ -324,7 +328,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
        for (Map.Entry entry : mappingProperties.entrySet())
        {
            String propertyName = (String) entry.getKey();
-            if (propertyName.startsWith("namespace.prefix."))
+            if (propertyName.startsWith(NAMESPACE_PROPERTY_PREFIX))
            {
                String prefix = propertyName.substring(17);
                String namespace = (String) entry.getValue();
@ -677,15 +681,15 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
     * system administrators can accurately determine how to best enhance or override the
     * default mapping.
     * <p>
-     * If the default mapping is declared in a properties file, then the
-     * {@link #readMappingProperties(String)} method can be used to quickly generate the
-     * return value:
-     * <pre>
-     *      protected Map<String, Set<QName>> getDefaultMapping()
+     * If the default mapping is declared in a properties file other than the one named after
+     * the class, then the {@link #readMappingProperties(String)} method can be used to quickly
+     * generate the return value:
+     * <pre><code>
+     *      protected Map<<String, Set<QName>> getDefaultMapping()
     *      {
     *          return readMappingProperties(DEFAULT_MAPPING);
     *      }
-     * </pre>
+     * </code></pre>
     * The map can also be created in code either statically or during the call.
     * 
     * @return              Returns the default, static mapping.  It may not be null.
--- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
@ -28,17 +28,20 @@ import java.io.Serializable;
 import java.util.Map;
 import java.util.Set;

+import org.alfresco.repo.content.ContentWorker;
 import org.alfresco.service.cmr.repository.ContentIOException;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.service.namespace.QName;

 /**
 * Interface for document property extracters.
+ * <p>
+ * Please pardon the incorrect spelling of <i>extractor</i>.
 * 
 * @author Jesper Steen Møller
 * @author Derek Hulley
 */
-public interface MetadataExtracter
+public interface MetadataExtracter extends ContentWorker
 {
    /**
     * A enumeration of functional property overwrite policies.  These determine whether extracted properties are
--- a/source/java/org/alfresco/repo/content/metadata/xml/RootElementNameMetadataExtracterSelector.java
+++ b/source/java/org/alfresco/repo/content/metadata/xml/RootElementNameMetadataExtracterSelector.java
@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2005-2007 Alfresco Software Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ * As a special exception to the terms and conditions of version 2.0 of 
+ * the GPL, you may redistribute this Program in connection with Free/Libre 
+ * and Open Source Software ("FLOSS") applications as described in Alfresco's 
+ * FLOSS exception.  You should have recieved a copy of the text describing 
+ * the FLOSS exception, and it is also available here: 
+ * http://www.alfresco.com/legal/licensing"
+ */
+package org.alfresco.repo.content.metadata.xml;
+
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.alfresco.repo.content.ContentWorkerSelector;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.repo.content.metadata.MetadataExtracter;
+import org.alfresco.service.cmr.repository.ContentIOException;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * A selector that looks at the root node of an XML document to determine which worker to provide.
+ * There are many ways to identify XML documents and this is probably the simplest.  Alternate
+ * implementations might execute a series of xpath statements or look for specific namespace
+ * declarations in the document.  The net result is the same, i.e. given an XML document, an
+ * extracter is provided to the caller.
+ * <p>
+ * In this selector, there is no guarantee that the different extracters will generate the same
+ * (or even nearly the same) metadata.  It is up to the configurer to ensure that if it is a
+ * requirement, but otherwise each extracter is responsible for its own mappings.  Mostly, though,
+ * a root node match will imply a structure that has the necessary metadata.
+ * 
+ * @since 2.1
+ * @author Derek Hulley
+ */
+public class RootElementNameMetadataExtracterSelector
+        extends DefaultHandler
+        implements ContentWorkerSelector<MetadataExtracter>
+{
+    private static Log logger = LogFactory.getLog(RootElementNameMetadataExtracterSelector.class);
+    
+    private SAXParserFactory saxParserFactory;
+    private Set<String> supportedMimetypes;
+    private Map<String, MetadataExtracter> extractersByRootElementName;
+    
+    public RootElementNameMetadataExtracterSelector()
+    {
+        saxParserFactory = SAXParserFactory.newInstance();
+        supportedMimetypes = new HashSet<String>();
+        supportedMimetypes.add(MimetypeMap.MIMETYPE_XML);
+        extractersByRootElementName = Collections.emptyMap();
+    }
+
+    /**
+     * Set the SAX parser factory to use to parse the incoming documents.  If not set,
+     * the default system-wide SAX parser factory is used.
+     * 
+     * @param factoryClassName      A {@link SAXParserFactory} class name
+     */
+    public void setSAXParserFactoryClass(String factoryClassName)
+    {
+        try
+        {
+            saxParserFactory = SAXParserFactory.newInstance(
+                    factoryClassName,
+                    this.getClass().getClassLoader());
+        }
+        catch (Throwable e)
+        {
+            throw new IllegalArgumentException("Unable to load SAX parser factory from class: " + factoryClassName);
+        }
+    }
+
+    /**
+     * Optionally set the mimetypes supported.  They must be XML formats that the chosen
+     * parser will be able to handle.
+     * 
+     * @param supportedMimetypes        the list of mimetypes.  The default is <b>text/xml</b>.
+     */
+    public void setSupportedMimetypes(Set<String> supportedMimetypes)
+    {
+        this.supportedMimetypes = supportedMimetypes;
+    }
+
+    /**
+     * Set the extractors to use.
+     * 
+     * @param extracters            a map of {@linkplain MetadataExtracter} instances
+     *                              keyed by root element name
+     */
+    public void setExtracters(Map<String, MetadataExtracter> extracters)
+    {
+        this.extractersByRootElementName = extracters;
+    }
+
+    /**
+     * Performs a match of the root element name to find the correct extracter.
+     */
+    public MetadataExtracter getWorker(ContentReader reader)
+    {
+        /*
+         * Is xml the only mimetype to support?
+         */
+        if (!reader.getMimetype().equals(MimetypeMap.MIMETYPE_XML))
+        {
+            return null;
+        }
+        MetadataExtracter extracter = null;
+        InputStream is = null;
+        String rootElementName = null;
+        try
+        {
+            is = reader.getContentInputStream();
+            SAXParser saxParser = saxParserFactory.newSAXParser();
+            saxParser.parse(is, this);
+            // No match possible
+        }
+        catch (RootElementFoundException e)
+        {
+            rootElementName = e.getElementName();
+            extracter = extractersByRootElementName.get(rootElementName);
+        }
+        catch (Throwable e)
+        {
+            throw new ContentIOException("Failed to extract root element from XML document", e);
+        }
+        finally
+        {
+            if (is != null)
+            {
+                try { is.close(); } catch (Throwable e) {}
+            }
+        }
+        // Done
+        if (logger.isDebugEnabled())
+        {
+            logger.debug("\n" +
+                    "Chosen metadata extracter for reader: \n" +
+                    "   Reader:       " + reader + "\n" +
+                    "   Root Element: " + rootElementName + "\n" +
+                    "   Extracter:    " + extracter);
+        }
+        return extracter;
+    }
+
+    @Override
+    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
+    {
+        throw new RootElementFoundException(localName);
+    }
+
+    /**
+     * An exception to break out of the XML parsing early
+     */
+    private static class RootElementFoundException extends SAXException
+    {
+        private static final long serialVersionUID = 6845880422947198814L;
+        private String elementName;
+        public RootElementFoundException(String elementName)
+        {
+            super(elementName);
+            this.elementName = elementName;
+        }
+        public String getElementName()
+        {
+            return elementName;
+        }
+    }
+}
--- a/source/java/org/alfresco/repo/content/metadata/xml/XPathMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/xml/XPathMetadataExtracter.java
@ -0,0 +1,302 @@
+/*
+ * Copyright (C) 2005 Jesper Steen Møller
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ * As a special exception to the terms and conditions of version 2.0 of 
+ * the GPL, you may redistribute this Program in connection with Free/Libre 
+ * and Open Source Software ("FLOSS") applications as described in Alfresco's 
+ * FLOSS exception.  You should have recieved a copy of the text describing 
+ * the FLOSS exception, and it is also available here: 
+ * http://www.alfresco.com/legal/licensing"
+ */
+package org.alfresco.repo.content.metadata.xml;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+import javax.xml.namespace.NamespaceContext;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathExpression;
+import javax.xml.xpath.XPathExpressionException;
+import javax.xml.xpath.XPathFactory;
+
+import org.alfresco.error.AlfrescoRuntimeException;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.namespace.QName;
+import org.alfresco.util.ParameterCheck;
+import org.alfresco.util.PropertyCheck;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.w3c.dom.Document;
+
+/**
+ * An extracter that pulls values from XML documents using configurable XPath
+ * statements.  It is not possible to list a default set of mappings - this is
+ * down to the configuration only.
+ * <p>
+ * When an instance of this extracter is configured, XPath statements should be
+ * provided to extract all the available metadata.  The implementation is sensitive
+ * to what is actually requested by the
+ * {@linkplain AbstractMappingMetadataExtracter#setMapping(Map) configured mapping}
+ * and will only perform the queries necessary to fulfill the requirements.
+ * <p>
+ * To summarize, there are two configurations required for this class:
+ * <ul>
+ *   <li>
+ *     A mapping of all reasonable document properties to XPath statements.
+ *     See {@link AbstractMappingMetadataExtracter#setMappingProperties(java.util.Properties)}.
+ *   </li>
+ *   <li>
+ *     A mapping of document property names to Alfresco repository model QNames.
+ *     See {@link #setXPathMappingProperties(Properties).}
+ *   </li>
+ * </ul>
+ * <p>
+ * The mapping of document properties to XPaths must look as follows:
+ * <pre>
+ *    # Get the author
+ *    author=/root/author@name
+ * </pre>
+ * 
+ * @author Derek Hulley
+ */
+public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter implements NamespaceContext
+{
+    public static String[] SUPPORTED_MIMETYPES = new String[] {MimetypeMap.MIMETYPE_XML};
+    
+    private static Log logger = LogFactory.getLog(XPathMetadataExtracter.class);
+    
+    private DocumentBuilder documentBuilder;
+    private XPathFactory xpathFactory;
+    private Map<String, String> namespacesByPrefix;
+    private Map<String, XPathExpression> xpathExpressionMapping;
+
+    /**
+     * Default constructor
+     */
+    public XPathMetadataExtracter()
+    {
+        super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
+    }
+
+    /** {@inheritDoc} */
+    public String getNamespaceURI(String prefix)
+    {
+        ParameterCheck.mandatoryString("prefix", prefix);
+        return namespacesByPrefix.get(prefix);
+    }
+
+    /** {@inheritDoc} */
+    public String getPrefix(String namespaceURI)
+    {
+        ParameterCheck.mandatoryString("namespaceURI", namespaceURI);
+        for (Map.Entry<String, String> entry : namespacesByPrefix.entrySet())
+        {
+            if (namespaceURI.equals(entry.getValue()))
+            {
+                return entry.getKey();
+            }
+        }
+        return null;
+    }
+
+    /** {@inheritDoc} */
+    public Iterator getPrefixes(String namespaceURI)
+    {
+        ParameterCheck.mandatoryString("namespaceURI", namespaceURI);
+        List<String> prefixes = new ArrayList<String>(2);
+        for (Map.Entry<String, String> entry : namespacesByPrefix.entrySet())
+        {
+            if (namespaceURI.equals(entry.getValue()))
+            {
+                prefixes.add(entry.getKey());
+            }
+        }
+        return prefixes.iterator();
+    }
+
+    /**
+     * Set the properties file that maps document properties to the XPath statements
+     * necessary to retrieve them.
+     * <p> 
+     * The Xpath mapping is of the form:
+     * <pre>
+     * # Namespaces prefixes
+     * namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
+     * namespace.prefix.my=http://www....com/alfresco/1.0
+     * 
+     * # Mapping
+     * editor=/cm:some-xpath-1
+     * title=/my:some-xpath-2
+     * </pre>
+     */
+    public void setXpathMappingProperties(Properties xpathMappingProperties)
+    {
+        namespacesByPrefix = new HashMap<String, String>(7);
+        xpathExpressionMapping = new HashMap<String, XPathExpression>(17);
+        readXPathMappingProperties(xpathMappingProperties);
+    }
+    
+    @Override
+    protected void init()
+    {
+        PropertyCheck.mandatory(this, "xpathMappingProperties", xpathExpressionMapping);
+        try
+        {
+            documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
+            xpathFactory = XPathFactory.newInstance();
+        }
+        catch (Throwable e)
+        {
+            throw new AlfrescoRuntimeException("Failed to initialize XML metadata extractor", e);
+        }
+        super.init();
+    }
+
+    /**
+     * It is not possible to have any default mappings, but something has to be returned.
+     * 
+     * @return          Returns an empty map
+     */
+    @Override
+    protected Map<String, Set<QName>> getDefaultMapping()
+    {
+        return Collections.emptyMap();
+    }
+
+    @Override
+    protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
+    {
+        InputStream is = null;
+        try
+        {
+            is = reader.getContentInputStream();
+            Document doc = documentBuilder.parse(is);
+            Map<String, Serializable> rawProperties = processDocument(doc);
+            if (logger.isDebugEnabled())
+            {
+                logger.debug("\n" +
+                        "Extracted XML metadata: \n" +
+                        "   Reader:  " + reader + "\n" +
+                        "   Results: " + rawProperties);
+            }
+            return rawProperties;
+        }
+        finally
+        {
+            if (is != null)
+            {
+                try { is.close(); } catch (IOException e) {}
+            }
+        }
+    }
+    
+    /**
+     * Executes all the necessary XPath statements to extract values.
+     */
+    protected Map<String, Serializable> processDocument(Document document) throws Throwable
+    {
+        Map<String, Serializable> rawProperties = super.newRawMap();
+        
+        // Execute all the XPaths that we saved
+        for (Map.Entry<String, XPathExpression> element : xpathExpressionMapping.entrySet())
+        {
+            String documentProperty = element.getKey();
+            XPathExpression xpathExpression = element.getValue();
+            // Execute it
+            String value = xpathExpression.evaluate(document);
+            // Put the value
+            rawProperties.put(documentProperty, value);
+        }
+        // Done
+        return rawProperties;
+    }
+    
+    /**
+     * A utility method to convert mapping properties to the Map form.
+     * 
+     * @see #setMappingProperties(Properties)
+     */
+    protected void readXPathMappingProperties(Properties xpathMappingProperties)
+    {
+        // Get the namespaces
+        for (Map.Entry entry : xpathMappingProperties.entrySet())
+        {
+            String propertyName = (String) entry.getKey();
+            if (propertyName.startsWith("namespace.prefix."))
+            {
+                String prefix = propertyName.substring(17);
+                String namespace = (String) entry.getValue();
+                namespacesByPrefix.put(prefix, namespace);
+            }
+        }
+        // Get the mapping that will be applied by the base class
+        Map<String, Set<QName>> finalMapping = getMapping();
+        // Create the mapping
+        for (Map.Entry entry : xpathMappingProperties.entrySet())
+        {
+            String documentProperty = (String) entry.getKey();
+            String xpathStr = (String) entry.getValue();
+            if (documentProperty.startsWith(NAMESPACE_PROPERTY_PREFIX))
+            {
+                // Ignore these now
+                continue;
+            }
+            // If the property is not going to be mapped, then just ignore it too
+            if (!finalMapping.containsKey(documentProperty))
+            {
+                continue;
+            }
+            // Construct the XPath
+            XPath xpath = xpathFactory.newXPath();
+            xpath.setNamespaceContext(this);
+            XPathExpression xpathExpression = null;
+            try
+            {
+                xpathExpression = xpath.compile(xpathStr);
+            }
+            catch (XPathExpressionException e)
+            {
+                throw new AlfrescoRuntimeException(
+                        "Failed to path XPath expression: \n" +
+                        "   Document property: " + documentProperty + "\n" +
+                        "   XPath:             " + xpathStr);
+            }
+            // Persist it
+            xpathExpressionMapping.put(documentProperty, xpathExpression);
+            if (logger.isDebugEnabled())
+            {
+                logger.debug("Added mapping from " + documentProperty + " to " + xpathExpression);
+            }
+        }
+        // Done
+    }
+}
--- a/source/java/org/alfresco/repo/content/transform/ContentTransformer.java
+++ b/source/java/org/alfresco/repo/content/transform/ContentTransformer.java
@ -26,6 +26,7 @@ package org.alfresco.repo.content.transform;

 import java.util.Map;

+import org.alfresco.repo.content.ContentWorker;
 import org.alfresco.service.cmr.repository.ContentIOException;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.service.cmr.repository.ContentWriter;
@ -35,7 +36,7 @@ import org.alfresco.service.cmr.repository.ContentWriter;
 * 
 * @author Derek Hulley
 */
-public interface ContentTransformer
+public interface ContentTransformer extends ContentWorker
 {
    /**
     * Provides the approximate accuracy with which this transformer can