XML metadata extraction with sample.

Added tests into build. This is now ready for testing, comments and suggestions. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@6056 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
2025-09-17 14:21:39 +00:00 · 2007-06-21 16:09:03 +00:00
parent 757616bc85
commit 55a6e2f287
10 changed files with 789 additions and 53 deletions
--- a/config/alfresco/extension/xml-metadata-extracter-context.xml.sample
+++ b/config/alfresco/extension/xml-metadata-extracter-context.xml.sample
@@ -0,0 +1,95 @@
 <?xml version='1.0' encoding='UTF-8'?>
 <!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
 <!--
   Sample configuration of a XmlMetadataExtracters.
   This show how XML metadata extraction can be set up to extract metadata from different
   formats of XML.
   Since: 2.1
   Author: Derek Hulley
 -->
 <beans>
   <!-- An extractor that operates on Alfresco Model XML -->
   <bean id="extracter.xml.sample.AlfrescoModelMetadataExtracter"
         class="org.alfresco.repo.content.metadata.xml.XPathMetadataExtracter"
         parent="baseMetadataExtracter"
         init-method="init" >
      <property name="mappingProperties">
         <!--
            The properties can also be specified using a properties file on the classpath, e.g.:
            <bean class="org.springframework.beans.factory.config.PropertiesFactoryBean">
               <property name="location">
                  <value>classpath:alfresco/extension/xml-metadata/AlfrescoModel-xpath-mappings.properties</value>
               </property>
            </bean>
         -->
         <bean class="org.springframework.beans.factory.config.PropertiesFactoryBean">
            <property name="properties">
               <props>
                  <prop key="namespace.prefix.cm">http://www.alfresco.org/model/content/1.0</prop>
                  <prop key="author">cm:author</prop>
                  <prop key="title">cm:title</prop>
                  <prop key="description">cm:description</prop>
               </props>
            </property>
         </bean>
      </property>
      <property name="xpathMappingProperties">
         <bean class="org.springframework.beans.factory.config.PropertiesFactoryBean">
            <property name="properties">
               <props>
                  <prop key="namespace.prefix.fm">http://www.alfresco.org/model/forum/1.0</prop>
                  <prop key="author">/model/author/text()</prop>
                  <prop key="title">/model/@name</prop>
                  <prop key="description">/model/description/text()</prop>
                  <prop key="version">/model/version/text()</prop>
               </props>
            </property>
         </bean>
      </property>
   </bean>
   <!--
      This selector examines the XML documents, executing the given XPath statements until a
      result is found.
   -->
   <bean
         id="extracter.xml.sample.selector.XPathSelector"
         class="org.alfresco.repo.content.selector.XPathContentWorkerSelector"
         init-method="init">
      <property name="workers">
         <map>
            <entry key="/my:test">
               <null />
            </entry>
            <entry key="/model">
               <ref bean="extracter.xml.sample.AlfrescoModelMetadataExtracter" />
            </entry>
         </map>
      </property>
   </bean>
   <!--
      This is the face of the XML metadata extraction.  If passes the XML document to each of
      the selectors, until one of them gives back a MetadataExtracter, which is then used as
      normal to extract the values.  The overwrite policy of the embedded extracters has no
      effect.  It is only this extracter's policy that is used.
   -->
   <bean
         id="extracter.xml.sample.XMLMetadataExtracter"
         class="org.alfresco.repo.content.metadata.xml.XmlMetadataExtracter"
         parent="baseMetadataExtracter">
      <property name="overwritePolicy">
         <value>EAGER</value>
      </property>
      <property name="selectors">
         <list>
            <ref bean="extracter.xml.sample.selector.XPathSelector" />
         </list>
      </property>
   </bean>
 </beans>
--- a/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java
@@ -93,7 +93,6 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
    private MetadataExtracterRegistry registry;
    private MimetypeService mimetypeService;
    private long extractionTime;
    private boolean initialized;
    private Set<String> supportedMimetypes;
@@ -101,12 +100,23 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
    private Map<String, Set<QName>> mapping;
    private boolean inheritDefaultMapping;
    /**
     * Default constructor.  If this is called, then {@link #isSupported(String)} should
     * be implemented.  This is useful when the list of supported mimetypes is not known
     * when the instance is constructed.  Alternatively, once the set becomes known, call
     * {@link #setSupportedMimetypes(Collection)}.
     *
     * @see #isSupported(String)
     * @see #setSupportedMimetypes(Collection)
     */
    protected AbstractMappingMetadataExtracter()
    {
        this(Collections.<String>emptySet());
    }
    /**
     * Constructor that can be used when the list of supported mimetypes is known up front.
     * 
     * @param supportedMimetypes    the set of mimetypes supported by default
     */
    protected AbstractMappingMetadataExtracter(Set<String> supportedMimetypes)
@@ -179,13 +189,29 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
    }
    /**
-     * @param overwritePolicy   the policy to apply when there are existing system properties
+     * Set the policy to use when existing values are encountered.  Depending on how the extracer
     * is called, this may not be relevant, i.e an empty map of existing properties may be passed
     * in by the client code, which may follow its own overwrite strategy.
     * 
     * @param overwritePolicy       the policy to apply when there are existing system properties
     */
    public void setOverwritePolicy(OverwritePolicy overwritePolicy)
    {
        this.overwritePolicy = overwritePolicy;
    }
    /**
     * Set the policy to use when existing values are encountered.  Depending on how the extracer
     * is called, this may not be relevant, i.e an empty map of existing properties may be passed
     * in by the client code, which may follow its own overwrite strategy.
     * 
     * @param overwritePolicyStr    the policy to apply when there are existing system properties
     */
    public void setOverwritePolicy(String overwritePolicyStr)
    {
        this.overwritePolicy = OverwritePolicy.valueOf(overwritePolicyStr);
    }
    /**
     * Set if the property mappings augment or override the mapping generically provided by the
     * extracter implementation.  The default is <tt>false</tt>, i.e. any mapping set completely
@@ -410,10 +436,6 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
        {
            registry.register(this);
        }
        else
        {
            logger.warn("No registry provided.  Not registering: " + this);
        }
    }
    /**
@@ -466,7 +488,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
    /** {@inheritDoc} */
    public long getExtractionTime()
    {
-        return extractionTime;
+        return 1000L;
    }
    /**
@@ -510,7 +532,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
    /**
     * {@inheritDoc}
     */
-    public final Map<QName, Serializable> extract(
+    public Map<QName, Serializable> extract(
            ContentReader reader,
            OverwritePolicy overwritePolicy,
            Map<QName, Serializable> destination,
--- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracter.java
@@ -222,6 +222,8 @@ public interface MetadataExtracter extends ContentWorker
     * reliant transformers will be used for a specific extraction.
     * 
     * @return Returns the approximate number of milliseconds per transformation
     * 
     * @deprecated          Generally not useful or used.  Extraction is normally specifically configured.
     */
    public long getExtractionTime();
--- a/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
+++ b/source/java/org/alfresco/repo/content/metadata/MetadataExtracterRegistry.java
@@ -145,7 +145,6 @@ public class MetadataExtracterRegistry
     */
    private MetadataExtracter findBestExtracter(String sourceMimetype)
    {
        long bestTime = Long.MAX_VALUE;
        logger.debug("Finding best extracter for " + sourceMimetype);
        MetadataExtracter bestExtracter = null;
@@ -157,12 +156,7 @@ public class MetadataExtracterRegistry
                // extraction not achievable
                continue;
            }
-            long time = ext.getExtractionTime();
+            bestExtracter = ext;
            if (time < bestTime)
            {
                bestExtracter = ext;
                bestTime = time;
            }
        }
        return bestExtracter;
    }
--- a/source/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracter.java
@@ -0,0 +1,191 @@
 /*
 * Copyright (C) 2005-2007 Alfresco Software Limited.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * As a special exception to the terms and conditions of version 2.0 of 
 * the GPL, you may redistribute this Program in connection with Free/Libre 
 * and Open Source Software ("FLOSS") applications as described in Alfresco's 
 * FLOSS exception.  You should have recieved a copy of the text describing 
 * the FLOSS exception, and it is also available here: 
 * http://www.alfresco.com/legal/licensing"
 */
 package org.alfresco.repo.content.metadata.xml;
 import java.io.Serializable;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import org.alfresco.repo.content.selector.ContentWorkerSelector;
 import org.alfresco.repo.content.MimetypeMap;
 import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
 import org.alfresco.repo.content.metadata.MetadataExtracter;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.service.namespace.QName;
 import org.alfresco.util.PropertyCheck;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 /**
 * A metadata extractor that selects an appropiate workder for the extraction.
 * <p>
 * The {@linkplain #setSelectors(List) selectors} are used to find an extracter most
 * appropriate of a given XML document.  The chosen extracter is then asked to extract
 * the values, passing through the {@linkplain MetadataExtracter.OverwritePolicy overwrite policy}
 * as {@linkplain #setOverwritePolicy(String)} on this instance.  The overwrite policy of the
 * embedded extracters is not relevant unless they are used separately in another context.
 * 
 * @see ContentWorkerSelector
 * @see MetadataExtracter
 * 
 * @since 2.1
 * @author Derek Hulley
 */
 public class XmlMetadataExtracter extends AbstractMappingMetadataExtracter
 {
    public static String[] SUPPORTED_MIMETYPES = new String[] {MimetypeMap.MIMETYPE_XML};
    private static Log logger = LogFactory.getLog(XPathMetadataExtracter.class);
    private List<ContentWorkerSelector<MetadataExtracter>> selectors;
    /**
     * Default constructor
     */
    public XmlMetadataExtracter()
    {
        super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
    }
    /**
     * Sets the list of metadata selectors to use to find the extracter to use, given
     * some content.  The evaluations are done in the order that they occur in the
     * list.
     * 
     * @param selectors     A list of selectors
     */
    public void setSelectors(List<ContentWorkerSelector<MetadataExtracter>> selectors)
    {
        this.selectors = selectors;
    }
    @Override
    protected void init()
    {
        PropertyCheck.mandatory(this, "selectors", selectors);
        // Get the base class to set up its mappings
        super.init();
    }
    /**
     * It is not possible to have any default mappings, but something has to be returned.
     * 
     * @return              Returns an empty map
     */
    @Override
    protected Map<String, Set<QName>> getDefaultMapping()
    {
        return Collections.emptyMap();
    }
    /**
     * Selects and extracter to perform the work and redirects to it.
     */
    @Override
    public Map<QName, Serializable> extract(
            ContentReader reader,
            OverwritePolicy overwritePolicy,
            Map<QName, Serializable> destination,
            Map<String, Set<QName>> mapping)
    {
        MetadataExtracter extracter = null;
        // Select a worker
        for (ContentWorkerSelector<MetadataExtracter> selector : selectors)
        {
            ContentReader spawnedReader = reader.getReader();
            try
            {
                extracter = selector.getWorker(spawnedReader);
            }
            finally
            {
                if (reader.isChannelOpen())
                {
                    logger.error("Content reader not closed by MetadataExtractor selector: \n" +
                            "   reader:   " + reader + "\n" +
                            "   selector: " + selector);
                }
            }
            // Just take the first successful one
            if (extracter != null)
            {
                if (logger.isDebugEnabled())
                {
                    logger.debug("\n" +
                            "Found metadata extracter to process XML document: \n" +
                            "   Selector: " + selector + "\n" +
                            "   Document: " + reader);
                }
                break;
            }
        }
        Map<QName, Serializable> modifiedProperties = null;
        // Did we find anything?
        if (extracter == null)
        {
            // There will be no properties extracted
            modifiedProperties = Collections.emptyMap();
        }
        else
        {
            // An extractor was selected
            try
            {
                modifiedProperties = extracter.extract(reader, overwritePolicy, destination, mapping);
            }
            finally
            {
                if (reader.isChannelOpen())
                {
                    logger.error("Content reader not closed by MetadataExtractor: \n" +
                            "   Reader:   " + reader + "\n" +
                            "   extracter: " + extracter);
                }
            }
        }
        // Done
        if (logger.isDebugEnabled())
        {
            logger.debug("\n" +
                    "XML metadata extractor redirected: \n" +
                    "   Reader:    " + reader + "\n" +
                    "   Extracter: " + extracter + "\n" +
                    "   Extracted: " + modifiedProperties);
        }
        return modifiedProperties;
    }
    /**
     * This is not required as the 
     */
    protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
    {
        throw new UnsupportedOperationException();
    }
 }
--- a/source/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracterTest.java
@@ -31,10 +31,27 @@ import java.net.URL;
 import junit.framework.TestCase;
 import org.alfresco.model.ContentModel;
 import org.alfresco.repo.action.ActionImpl;
 import org.alfresco.repo.action.executer.ActionExecuter;
 import org.alfresco.repo.action.executer.SetPropertyValueActionExecuter;
 import org.alfresco.repo.content.MimetypeMap;
 import org.alfresco.repo.content.filestore.FileContentReader;
 import org.alfresco.repo.content.metadata.MetadataExtracter;
 import org.alfresco.repo.content.selector.RootElementNameContentWorkerSelector;
 import org.alfresco.repo.content.selector.XPathContentWorkerSelector;
 import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
 import org.alfresco.repo.security.authentication.AuthenticationComponent;
 import org.alfresco.service.ServiceRegistry;
 import org.alfresco.service.cmr.action.Action;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.service.cmr.repository.ContentService;
 import org.alfresco.service.cmr.repository.ContentWriter;
 import org.alfresco.service.cmr.repository.NodeRef;
 import org.alfresco.service.cmr.repository.NodeService;
 import org.alfresco.service.cmr.repository.StoreRef;
 import org.alfresco.service.namespace.NamespaceService;
 import org.alfresco.service.namespace.QName;
 import org.alfresco.util.GUID;
 import org.alfresco.util.PropertyMap;
 import org.springframework.context.ApplicationContext;
 import org.springframework.context.support.ClassPathXmlApplicationContext;
@@ -54,8 +71,13 @@ public class XmlMetadataExtracterTest extends TestCase
    private static final String CTX_LOCATION = "classpath:xml-metadata/xml-metadata-test-context.xml";
    private static final ApplicationContext ctx = new ClassPathXmlApplicationContext(CTX_LOCATION);
-    private XPathMetadataExtracter alfrescoModelMetadataExtractor;
+    private ServiceRegistry serviceRegistry;
-    private XPathMetadataExtracter eclipseProjectMetadataExtractor;
+    private AuthenticationComponent authenticationComponent;
    private XPathMetadataExtracter alfrescoModelMetadataExtracter;
    private XPathMetadataExtracter eclipseProjectMetadataExtracter;
    private RootElementNameContentWorkerSelector<MetadataExtracter> rootElementNameMetadataExtracterSelector;
    private XPathContentWorkerSelector<MetadataExtracter> xpathMetadataExtracterSelector;
    private XmlMetadataExtracter xmlMetadataExtracter;
    /**
     * Get a reader for a file that should be on the classpath.
@@ -78,16 +100,30 @@ public class XmlMetadataExtracterTest extends TestCase
    }
    @Override
    @SuppressWarnings("unchecked")
    public void setUp() throws Exception
    {
-        alfrescoModelMetadataExtractor = (XPathMetadataExtracter) ctx.getBean("extracter.xml.AlfrescoModelMetadataExtracter");
+        serviceRegistry = (ServiceRegistry) ctx.getBean(ServiceRegistry.SERVICE_REGISTRY);
-        eclipseProjectMetadataExtractor = (XPathMetadataExtracter) ctx.getBean("extracter.xml.EclipseProjectMetadataExtracter");
+        authenticationComponent = (AuthenticationComponent) ctx.getBean("authenticationComponent");
        alfrescoModelMetadataExtracter = (XPathMetadataExtracter) ctx.getBean("extracter.xml.AlfrescoModelMetadataExtracter");
        eclipseProjectMetadataExtracter = (XPathMetadataExtracter) ctx.getBean("extracter.xml.EclipseProjectMetadataExtracter");
        rootElementNameMetadataExtracterSelector = (RootElementNameContentWorkerSelector<MetadataExtracter>) ctx.getBean("extracter.xml.selector.RootElementSelector");
        xpathMetadataExtracterSelector = (XPathContentWorkerSelector<MetadataExtracter>) ctx.getBean("extracter.xml.selector.XPathSelector");
        xmlMetadataExtracter = (XmlMetadataExtracter) ctx.getBean("extracter.xml.XMLMetadataExtracter");
        authenticationComponent.setSystemUserAsCurrentUser();
    }
    @Override
    public void tearDown() throws Exception
    {
        try { authenticationComponent.clearCurrentSecurityContext(); } catch (Throwable e) {}
    }
    public void testSetUp()
    {
-        assertNotNull(alfrescoModelMetadataExtractor);
+        assertNotNull(alfrescoModelMetadataExtracter);
-        assertNotNull(eclipseProjectMetadataExtractor);
+        assertNotNull(eclipseProjectMetadataExtracter);
    }
    public void testExtractAlfresocModel() throws Exception
@@ -98,7 +134,7 @@ public class XmlMetadataExtracterTest extends TestCase
        // Pass it to the extracter
        PropertyMap checkProperties = new PropertyMap();
-        alfrescoModelMetadataExtractor.extract(reader, checkProperties);
+        alfrescoModelMetadataExtracter.extract(reader, checkProperties);
        // Check the values
        assertEquals("Gavin Cornwell", checkProperties.get(ContentModel.PROP_AUTHOR));
@@ -114,10 +150,128 @@ public class XmlMetadataExtracterTest extends TestCase
        // Pass it to the extracter
        PropertyMap checkProperties = new PropertyMap();
-        eclipseProjectMetadataExtractor.extract(reader, checkProperties);
+        eclipseProjectMetadataExtracter.extract(reader, checkProperties);
        // Check the values
        assertEquals("Repository", checkProperties.get(ContentModel.PROP_TITLE));
        assertEquals("JavaCC Nature", checkProperties.get(ContentModel.PROP_DESCRIPTION));
    }
    public void testRootElementNameSelector() throws Exception
    {
        // Load the example files
        ContentReader alfrescoModelReader = getReader(FILE_ALFRESCO_MODEL);
        assertTrue(alfrescoModelReader.exists());
        ContentReader eclipseProjectReader = getReader(FILE_ECLIPSE_PROJECT);
        assertTrue(eclipseProjectReader.exists());
        // Check with an alfresco model document
        MetadataExtracter alfrescoModelExtracter = rootElementNameMetadataExtracterSelector.getWorker(alfrescoModelReader);
        assertNotNull("Failed to select correct extracter", alfrescoModelExtracter);
        assertTrue("Incorrect extracter instance selected", alfrescoModelMetadataExtracter == alfrescoModelExtracter);
        assertFalse("Read channel not closed", alfrescoModelReader.isChannelOpen());
        // Check with an eclipse project document
        MetadataExtracter eclipseProjectExtracter = rootElementNameMetadataExtracterSelector.getWorker(eclipseProjectReader);
        assertNotNull("Failed to select correct extracter", eclipseProjectExtracter);
        assertTrue("Incorrect extracter instance selected", eclipseProjectMetadataExtracter == eclipseProjectExtracter);
        assertFalse("Read channel not closed", eclipseProjectReader.isChannelOpen());
    }
    public void testXpathSelector() throws Exception
    {
        // Load the example files
        ContentReader alfrescoModelReader = getReader(FILE_ALFRESCO_MODEL);
        assertTrue(alfrescoModelReader.exists());
        ContentReader eclipseProjectReader = getReader(FILE_ECLIPSE_PROJECT);
        assertTrue(eclipseProjectReader.exists());
        // Check with an alfresco model document
        MetadataExtracter alfrescoModelExtracter = xpathMetadataExtracterSelector.getWorker(alfrescoModelReader);
        assertNotNull("Failed to select correct extracter", alfrescoModelExtracter);
        assertTrue("Incorrect extracter instance selected", alfrescoModelMetadataExtracter == alfrescoModelExtracter);
        assertFalse("Read channel not closed", alfrescoModelReader.isChannelOpen());
        // Check with an eclipse project document
        MetadataExtracter eclipseProjectExtracter = xpathMetadataExtracterSelector.getWorker(eclipseProjectReader);
        assertNotNull("Failed to select correct extracter", eclipseProjectExtracter);
        assertTrue("Incorrect extracter instance selected", eclipseProjectMetadataExtracter == eclipseProjectExtracter);
        assertFalse("Read channel not closed", eclipseProjectReader.isChannelOpen());
    }
    public void testXmlMetadataExtracter() throws Exception
    {
        // Load the example files
        ContentReader alfrescoModelReader = getReader(FILE_ALFRESCO_MODEL);
        assertTrue(alfrescoModelReader.exists());
        ContentReader eclipseProjectReader = getReader(FILE_ECLIPSE_PROJECT);
        assertTrue(eclipseProjectReader.exists());
        // Pass the Alfresco Model xml to the extractor
        PropertyMap checkAlfrescoModelProperties = new PropertyMap();
        xmlMetadataExtracter.extract(alfrescoModelReader, checkAlfrescoModelProperties);
        // Check the values
        assertEquals("Gavin Cornwell", checkAlfrescoModelProperties.get(ContentModel.PROP_AUTHOR));
        assertEquals("fm:forummodel", checkAlfrescoModelProperties.get(ContentModel.PROP_TITLE));
        assertEquals("Forum Model", checkAlfrescoModelProperties.get(ContentModel.PROP_DESCRIPTION));
        // Pass the Eclipse Project xml to the extractor
        PropertyMap checkEclipseProjectProperties = new PropertyMap();
        xmlMetadataExtracter.extract(eclipseProjectReader, checkEclipseProjectProperties);
        // Check the values
        assertEquals("Repository", checkEclipseProjectProperties.get(ContentModel.PROP_TITLE));
        assertEquals("JavaCC Nature", checkEclipseProjectProperties.get(ContentModel.PROP_DESCRIPTION));
    }
    /**
     * Tests metadata extraction using an action with an EAGER MetadataExtracter for XML.
     */
    public void testLifecycleOfXmlMetadataExtraction() throws Exception
    {
        NodeService nodeService = serviceRegistry.getNodeService();
        ContentService contentService = serviceRegistry.getContentService();
        ActionExecuter executer = (ActionExecuter) ctx.getBean("extract-metadata");
        Action action = new ActionImpl(null, GUID.generate(), SetPropertyValueActionExecuter.NAME, null);
        StoreRef storeRef = new StoreRef("test", getName());
        NodeRef rootNodeRef = null;
        if (nodeService.exists(storeRef))
        {
            rootNodeRef = nodeService.getRootNode(storeRef);
        }
        else
        {
            nodeService.createStore("test", getName());
            rootNodeRef = nodeService.getRootNode(storeRef);
        }
        // Set up some properties
        PropertyMap properties = new PropertyMap();
        properties.put(ContentModel.PROP_TITLE, "My title");
        properties.put(ContentModel.PROP_DESCRIPTION, "My description");
        NodeRef contentNodeRef = nodeService.createNode(
                rootNodeRef,
                ContentModel.ASSOC_CHILDREN,
                QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, getName()),
                ContentModel.TYPE_CONTENT,
                properties).getChildRef();
        // Add some content
        ContentReader alfrescoModelReader = getReader(FILE_ALFRESCO_MODEL);
        assertTrue(alfrescoModelReader.exists());
        ContentWriter writer = contentService.getWriter(contentNodeRef, ContentModel.PROP_CONTENT, true);
        writer.setEncoding("UTF-8");
        writer.setMimetype(MimetypeMap.MIMETYPE_XML);
        writer.putContent(alfrescoModelReader);
        // Execute the action
        executer.execute(action, contentNodeRef);
        // Check the node's properties.  The EAGER overwrite policy should have replaced the required
        // properties.
        String checkTitle = (String) nodeService.getProperty(contentNodeRef, ContentModel.PROP_TITLE);
        String checkDescription = (String) nodeService.getProperty(contentNodeRef, ContentModel.PROP_DESCRIPTION);
        assertEquals("fm:forummodel", checkTitle);
        assertEquals("Forum Model", checkDescription);
    }
 }
--- a/source/java/org/alfresco/repo/content/selector/ContentWorkerSelector.java
+++ b/source/java/org/alfresco/repo/content/selector/ContentWorkerSelector.java
@@ -22,8 +22,9 @@
 * the FLOSS exception, and it is also available here: 
 * http://www.alfresco.com/legal/licensing"
 */
-package org.alfresco.repo.content;
+package org.alfresco.repo.content.selector;
 import org.alfresco.repo.content.ContentWorker;
 import org.alfresco.service.cmr.repository.ContentIOException;
 import org.alfresco.service.cmr.repository.ContentReader;
--- a/source/java/org/alfresco/repo/content/metadata/xml/RootElementNameMetadataExtracterSelector.java
+++ b/source/java/org/alfresco/repo/content/metadata/xml/RootElementNameMetadataExtracterSelector.java
@@ -22,7 +22,7 @@
 * the FLOSS exception, and it is also available here: 
 * http://www.alfresco.com/legal/licensing"
 */
-package org.alfresco.repo.content.metadata.xml;
+package org.alfresco.repo.content.selector;
 import java.io.InputStream;
 import java.util.Collections;
@@ -33,11 +33,11 @@ import java.util.Set;
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
-import org.alfresco.repo.content.ContentWorkerSelector;
+import org.alfresco.repo.content.ContentWorker;
 import org.alfresco.repo.content.MimetypeMap;
 import org.alfresco.repo.content.metadata.MetadataExtracter;
 import org.alfresco.service.cmr.repository.ContentIOException;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.util.PropertyCheck;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.xml.sax.Attributes;
@@ -48,33 +48,38 @@ import org.xml.sax.helpers.DefaultHandler;
 * A selector that looks at the root node of an XML document to determine which worker to provide.
 * There are many ways to identify XML documents and this is probably the simplest.  Alternate
 * implementations might execute a series of xpath statements or look for specific namespace
- * declarations in the document.  The net result is the same, i.e. given an XML document, an
+ * declarations in the document.  The net result is the same, i.e. given an XML document, a
- * extracter is provided to the caller.
+ * worker is provided to the caller.
 * <p>
 * In this selector, there is no guarantee that the different extracters will generate the same
 * (or even nearly the same) metadata.  It is up to the configurer to ensure that if it is a
 * requirement, but otherwise each extracter is responsible for its own mappings.  Mostly, though,
 * a root node match will imply a structure that has the necessary metadata.
 * 
 * @since 2.1
 * @author Derek Hulley
 */
-public class RootElementNameMetadataExtracterSelector
+public class RootElementNameContentWorkerSelector<W extends ContentWorker>
        extends DefaultHandler
-        implements ContentWorkerSelector<MetadataExtracter>
+        implements ContentWorkerSelector<ContentWorker>
 {
-    private static Log logger = LogFactory.getLog(RootElementNameMetadataExtracterSelector.class);
+    private static Log logger = LogFactory.getLog(RootElementNameContentWorkerSelector.class);
    private SAXParserFactory saxParserFactory;
    private Set<String> supportedMimetypes;
-    private Map<String, MetadataExtracter> extractersByRootElementName;
+    private Map<String, W> workersByRootElementName;
-    public RootElementNameMetadataExtracterSelector()
+    public RootElementNameContentWorkerSelector()
    {
        saxParserFactory = SAXParserFactory.newInstance();
        supportedMimetypes = new HashSet<String>();
        supportedMimetypes.add(MimetypeMap.MIMETYPE_XML);
-        extractersByRootElementName = Collections.emptyMap();
+        workersByRootElementName = Collections.emptyMap();
    }
    @Override
    public String toString()
    {
        StringBuilder sb = new StringBuilder(50);
        sb.append("RootElementNameContentWorkerSelector")
          .append("[ workers=").append(workersByRootElementName)
          .append("]");
        return sb.toString();
    }
    /**
@@ -89,26 +94,35 @@ public class RootElementNameMetadataExtracterSelector
    }
    /**
-     * Set the extractors to use.
+     * Set the workers to choose from.
     * 
-     * @param extracters            a map of {@linkplain MetadataExtracter} instances
+     * @param workers               a map of {@linkplain ContentWorker} instances
     *                              keyed by root element name
     */
-    public void setExtracters(Map<String, MetadataExtracter> extracters)
+    public void setWorkers(Map<String, W> workers)
    {
-        this.extractersByRootElementName = extracters;
+        this.workersByRootElementName = workers;
    }
    /**
-     * Performs a match of the root element name to find the correct extracter.
+     * Checks the configuration.
     */
-    public MetadataExtracter getWorker(ContentReader reader)
+    public void init()
    {
        PropertyCheck.mandatory(this, "workers", workersByRootElementName);
        PropertyCheck.mandatory(this, "supportedMimetypes", supportedMimetypes);
    }
    /**
     * Performs a match of the root element name to find the correct content worker.
     */
    public W getWorker(ContentReader reader)
    {
        if (!supportedMimetypes.contains(reader.getMimetype()))
        {
            return null;
        }
-        MetadataExtracter extracter = null;
+        W worker = null;
        InputStream is = null;
        String rootElementName = null;
        try
@@ -121,11 +135,15 @@ public class RootElementNameMetadataExtracterSelector
        catch (RootElementFoundException e)
        {
            rootElementName = e.getElementName();
-            extracter = extractersByRootElementName.get(rootElementName);
+            worker = workersByRootElementName.get(rootElementName);
        }
        catch (Throwable e)
        {
-            throw new ContentIOException("Failed to extract root element from XML document", e);
+            throw new ContentIOException("\n" +
                    "Failed to extract root element from XML document: \n" +
                    "   Reader:   " + reader + "\n" +
                    "   Selector: " + this,
                    e);
        }
        finally
        {
@@ -138,18 +156,18 @@ public class RootElementNameMetadataExtracterSelector
        if (logger.isDebugEnabled())
        {
            logger.debug("\n" +
-                    "Chosen metadata extracter for reader: \n" +
+                    "Chosen content worker for reader: \n" +
                    "   Reader:       " + reader + "\n" +
                    "   Root Element: " + rootElementName + "\n" +
-                    "   Extracter:    " + extracter);
+                    "   Worker:       " + worker);
        }
-        return extracter;
+        return worker;
    }
    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
    {
-        throw new RootElementFoundException(localName);
+        throw new RootElementFoundException(qName);
    }
    /**
--- a/source/java/org/alfresco/repo/content/selector/XPathContentWorkerSelector.java
+++ b/source/java/org/alfresco/repo/content/selector/XPathContentWorkerSelector.java
@@ -0,0 +1,204 @@
 /*
 * Copyright (C) 2005-2007 Alfresco Software Limited.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * As a special exception to the terms and conditions of version 2.0 of 
 * the GPL, you may redistribute this Program in connection with Free/Libre 
 * and Open Source Software ("FLOSS") applications as described in Alfresco's 
 * FLOSS exception.  You should have recieved a copy of the text describing 
 * the FLOSS exception, and it is also available here: 
 * http://www.alfresco.com/legal/licensing"
 */
 package org.alfresco.repo.content.selector;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpressionException;
 import javax.xml.xpath.XPathFactory;
 import org.alfresco.error.AlfrescoRuntimeException;
 import org.alfresco.repo.content.ContentWorker;
 import org.alfresco.repo.content.MimetypeMap;
 import org.alfresco.service.cmr.repository.ContentIOException;
 import org.alfresco.service.cmr.repository.ContentReader;
 import org.alfresco.util.PropertyCheck;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.w3c.dom.Document;
 /**
 * A selector that executes a set of XPath statements against the XML document to determine
 * which content worker to provide.  The XPath rules are simple, i.e. if an XML node is
 * found by the XPath statement, then it is considered to be a hit and the corresponding
 * worker is returned.
 * <p>
 * Currently, the only namespaces supported are those contained in the XML documents being
 * tested.
 * 
 * @since 2.1
 * @author Derek Hulley
 */
 public class XPathContentWorkerSelector<W extends ContentWorker> implements ContentWorkerSelector
 {
    private static Log logger = LogFactory.getLog(XPathContentWorkerSelector.class);
    private DocumentBuilder documentBuilder;
    private XPathFactory xpathFactory;
    private Set<String> supportedMimetypes;
    private Map<String, W> workersByXPath;
    public XPathContentWorkerSelector()
    {
        try
        {
            documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
            xpathFactory = XPathFactory.newInstance();
        }
        catch (Throwable e)
        {
            throw new AlfrescoRuntimeException("Failed to initialize XPathContentWorkerSelector", e);
        }
        supportedMimetypes = new HashSet<String>();
        supportedMimetypes.add(MimetypeMap.MIMETYPE_XML);
    }
    @Override
    public String toString()
    {
        StringBuilder sb = new StringBuilder(50);
        sb.append("XPathContentWorkerSelector")
          .append("[ workers=").append(workersByXPath)
          .append("]");
        return sb.toString();
    }
    /**
     * Optionally set the mimetypes supported.  They must be XML formats that the chosen
     * parser will be able to handle.
     * 
     * @param supportedMimetypes        the list of mimetypes.  The default is <b>text/xml</b>.
     */
    public void setSupportedMimetypes(Set<String> supportedMimetypes)
    {
        this.supportedMimetypes = supportedMimetypes;
    }
    /**
     * Set the workers to use.  All the XPath statements provided must be compatible with
     * a return value of type {@linkplain XPathConstants#NODE NODE}.
     * 
     * @param workers            a map of {@linkplain ContentWorker} instances
     *                           keyed by XPath statements
     */
    public void setWorkers(Map<String, W> workers)
    {
        this.workersByXPath = workers;
    }
    /**
     * Checks the configuration.
     */
    public void init()
    {
        PropertyCheck.mandatory(this, "workers", workersByXPath);
        PropertyCheck.mandatory(this, "supportedMimetypes", supportedMimetypes);
    }
    /**
     * Execute the XPath statements, in order, against the document.  Any statements that fail
     * to run will be ignored.
     */
    public W getWorker(ContentReader reader)
    {
        if (!supportedMimetypes.contains(reader.getMimetype()))
        {
            return null;
        }
        W worker = null;
        InputStream is = null;
        String xpath = null;
        try
        {
            is = reader.getContentInputStream();
            Document doc = documentBuilder.parse(is);
            // Execute the statements
            worker = processDocument(doc);
        }
        catch (Throwable e)
        {
            throw new ContentIOException("\n" +
                    "Failed to XPaths against XML document: \n" +
                    "   Reader:   " + reader + "\n" +
                    "   Selector: " + this,
                    e);
        }
        finally
        {
            if (is != null)
            {
                try { is.close(); } catch (IOException e) {}
            }
        }
        // Done
        if (logger.isDebugEnabled())
        {
            logger.debug("\n" +
                    "Chosen content worker for reader: \n" +
                    "   Reader:       " + reader + "\n" +
                    "   XPath:        " + xpath + "\n" +
                    "   Worker:    " + worker);
        }
        return worker;
    }
    /**
     * Check the given document against the list of XPath statements provided.
     * 
     * @param document          the XML document
     * @return                  Returns a content worker that was matched or <tt>null</tt>
     */
    private W processDocument(Document doc)
    {
        for (Map.Entry<String, W> entry : workersByXPath.entrySet())
        {
            try
            {
                String xpath = entry.getKey();
                W worker = entry.getValue();
                // Execute the statement
                Object ret = xpathFactory.newXPath().evaluate(xpath, doc, XPathConstants.NODE);
                if (ret != null)
                {
                    // We found one
                    return worker;
                }
            }
            catch (XPathExpressionException e)
            {
                // We accept this and move on
            }
        }
        // Nothing found
        return null;
    }
 }
--- a/source/test-resources/xml-metadata/xml-metadata-test-context.xml
+++ b/source/test-resources/xml-metadata/xml-metadata-test-context.xml
@@ -47,4 +47,59 @@
      </property>
   </bean>
   <!-- A selector that checks root element names -->
   <bean
         id="extracter.xml.selector.RootElementSelector"
         class="org.alfresco.repo.content.selector.RootElementNameContentWorkerSelector"
         init-method="init">
      <property name="workers">
         <map>
            <entry key="BOGUS_ROOT_ELEMENT">
               <null />
            </entry>
            <entry key="model">
               <ref bean="extracter.xml.AlfrescoModelMetadataExtracter" />
            </entry>
            <entry key="projectDescription">
               <ref bean="extracter.xml.EclipseProjectMetadataExtracter" />
            </entry>
         </map>
      </property>
   </bean>
   <!-- A selector that executes XPath statements -->
   <bean
         id="extracter.xml.selector.XPathSelector"
         class="org.alfresco.repo.content.selector.XPathContentWorkerSelector"
         init-method="init">
      <property name="workers">
         <map>
            <entry key="/my:test">
               <null />
            </entry>
            <entry key="/model[@name='fm:forummodel']">
               <ref bean="extracter.xml.AlfrescoModelMetadataExtracter" />
            </entry>
            <entry key="/projectDescription">
               <ref bean="extracter.xml.EclipseProjectMetadataExtracter" />
            </entry>
         </map>
      </property>
   </bean>
   <!-- The wrapper XML metadata extracter -->
   <bean
         id="extracter.xml.XMLMetadataExtracter"
         class="org.alfresco.repo.content.metadata.xml.XmlMetadataExtracter"
         parent="baseMetadataExtracter">
      <property name="overwritePolicy">
         <value>EAGER</value>
      </property>
      <property name="selectors">
         <list>
            <ref bean="extracter.xml.selector.XPathSelector" />
         </list>
      </property>
   </bean>
 </beans>