Test configurable XML metadata extractors.

This doesn't include the wrapper extractor, yet, but there are two extractors that extract metadata from Eclipse Project XML and Alfresco Model XML files. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5980 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
2025-07-24 17:32:48 +00:00 · 2007-06-15 15:11:54 +00:00
parent 94862d134e
commit f1b226a279
9 changed files with 314 additions and 30 deletions
--- a/source/java/org/alfresco/repo/content/metadata/xml/XPathMetadataExtracter.java
+++ b/source/java/org/alfresco/repo/content/metadata/xml/XPathMetadataExtracter.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2005 Jesper Steen Møller
+ * Copyright (C) 2005-2007 Alfresco Software Limited.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
@@ -42,6 +42,7 @@ import javax.xml.namespace.NamespaceContext;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpression;
 import javax.xml.xpath.XPathExpressionException;
 import javax.xml.xpath.XPathFactory;
@@ -80,12 +81,12 @@ import org.w3c.dom.Document;
 *   </li>
 * </ul>
 * <p>
- * The mapping of document properties to XPaths must look as follows:
- * <pre>
- *    # Get the author
- *    author=/root/author@name
- * </pre>
+ * All values are extracted as text values and therefore all XPath statements must evaluate to a node
+ * that can be rendered as text.
 * 
+ * @see AbstractMappingMetadataExtracter#setMappingProperties(Properties)
+ * @see #setXpathMappingProperties(Properties)
+ * @since 2.1
 * @author Derek Hulley
 */
 public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter implements NamespaceContext
@@ -105,13 +106,27 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
    public XPathMetadataExtracter()
    {
        super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
+        try
+        {
+            documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
+            xpathFactory = XPathFactory.newInstance();
+        }
+        catch (Throwable e)
+        {
+            throw new AlfrescoRuntimeException("Failed to initialize XML metadata extractor", e);
+        }
    }

    /** {@inheritDoc} */
    public String getNamespaceURI(String prefix)
    {
        ParameterCheck.mandatoryString("prefix", prefix);
-        return namespacesByPrefix.get(prefix);
+        String namespace = namespacesByPrefix.get(prefix);
+        if (namespace == null)
+        {
+            throw new AlfrescoRuntimeException("Prefix '" + prefix + "' is not associated with a namespace.");
+        }
+        return namespace;
    }

    /** {@inheritDoc} */
@@ -150,12 +165,11 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
     * The Xpath mapping is of the form:
     * <pre>
     * # Namespaces prefixes
-     * namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
     * namespace.prefix.my=http://www....com/alfresco/1.0
     * 
     * # Mapping
-     * editor=/cm:some-xpath-1
-     * title=/my:some-xpath-2
+     * editor=/my:example-element/@cm:editor
+     * title=/my:example-element/text()
     * </pre>
     */
    public void setXpathMappingProperties(Properties xpathMappingProperties)
@@ -169,16 +183,18 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
    protected void init()
    {
        PropertyCheck.mandatory(this, "xpathMappingProperties", xpathExpressionMapping);
-        try
-        {
-            documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
-            xpathFactory = XPathFactory.newInstance();
-        }
-        catch (Throwable e)
-        {
-            throw new AlfrescoRuntimeException("Failed to initialize XML metadata extractor", e);
-        }
+        // Get the base class to set up its mappings
        super.init();
+        // Remove all XPath expressions that aren't going to be used
+        Map<String, Set<QName>> mapping = getMapping();
+        Set<String> xpathExpressionMappingKeys = new HashSet<String>(xpathExpressionMapping.keySet());
+        for (String xpathMappingKey : xpathExpressionMappingKeys)
+        {
+            if (!mapping.containsKey(xpathMappingKey))
+            {
+                xpathExpressionMapping.remove(xpathMappingKey);
+            }
+        }
    }

    /**
@@ -232,7 +248,7 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
            String documentProperty = element.getKey();
            XPathExpression xpathExpression = element.getValue();
            // Execute it
-            String value = xpathExpression.evaluate(document);
+            String value = (String) xpathExpression.evaluate(document, XPathConstants.STRING);
            // Put the value
            rawProperties.put(documentProperty, value);
        }
@@ -258,8 +274,6 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
                namespacesByPrefix.put(prefix, namespace);
            }
        }
-        // Get the mapping that will be applied by the base class
-        Map<String, Set<QName>> finalMapping = getMapping();
        // Create the mapping
        for (Map.Entry entry : xpathMappingProperties.entrySet())
        {
@@ -270,11 +284,6 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
                // Ignore these now
                continue;
            }
-            // If the property is not going to be mapped, then just ignore it too
-            if (!finalMapping.containsKey(documentProperty))
-            {
-                continue;
-            }
            // Construct the XPath
            XPath xpath = xpathFactory.newXPath();
            xpath.setNamespaceContext(this);
@@ -285,10 +294,12 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
            }
            catch (XPathExpressionException e)
            {
-                throw new AlfrescoRuntimeException(
-                        "Failed to path XPath expression: \n" +
+                throw new AlfrescoRuntimeException("\n" +
+                        "Failed to create XPath expression: \n" +
                        "   Document property: " + documentProperty + "\n" +
-                        "   XPath:             " + xpathStr);
+                        "   XPath:             " + xpathStr + "\n" +
+                        "   Error: " + e.getMessage(),
+                        e);
            }
            // Persist it
            xpathExpressionMapping.put(documentProperty, xpathExpression);
--- a/source/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracterTest.java
+++ b/source/java/org/alfresco/repo/content/metadata/xml/XmlMetadataExtracterTest.java
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2005-2007 Alfresco Software Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ * As a special exception to the terms and conditions of version 2.0 of 
+ * the GPL, you may redistribute this Program in connection with Free/Libre 
+ * and Open Source Software ("FLOSS") applications as described in Alfresco's 
+ * FLOSS exception.  You should have recieved a copy of the text describing 
+ * the FLOSS exception, and it is also available here: 
+ * http://www.alfresco.com/legal/licensing"
+ */
+package org.alfresco.repo.content.metadata.xml;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.net.URL;
+
+import junit.framework.TestCase;
+
+import org.alfresco.model.ContentModel;
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.repo.content.filestore.FileContentReader;
+import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.util.PropertyMap;
+import org.springframework.context.ApplicationContext;
+import org.springframework.context.support.ClassPathXmlApplicationContext;
+
+/**
+ * Tests various aspects of XML metadata extraction.
+ * 
+ * @see XPathMetadataExtracter
+ * 
+ * @author Derek Hulley
+ */
+public class XmlMetadataExtracterTest extends TestCase
+{
+    private static final String FILE_ALFRESCO_MODEL = "xml-metadata/alfresco-model-sample.xml";
+    private static final String FILE_ECLIPSE_PROJECT = "xml-metadata/eclipse-project-sample.xml";
+    
+    private static final String CTX_LOCATION = "classpath:xml-metadata/xml-metadata-test-context.xml";
+    private static final ApplicationContext ctx = new ClassPathXmlApplicationContext(CTX_LOCATION);
+    
+    private XPathMetadataExtracter alfrescoModelMetadataExtractor;
+    private XPathMetadataExtracter eclipseProjectMetadataExtractor;
+
+    /**
+     * Get a reader for a file that should be on the classpath.
+     */
+    private static final ContentReader getReader(String fileName) throws FileNotFoundException
+    {
+        URL url = AbstractContentTransformerTest.class.getClassLoader().getResource(fileName);
+        if (url == null)
+        {
+            throw new FileNotFoundException("Could not find file on classpath: " + fileName);
+        }
+        File file = new File(url.getFile());
+        if (!file.exists())
+        {
+            throw new FileNotFoundException("Could not find file on classpath: " + fileName);
+        }
+        ContentReader reader = new FileContentReader(file);
+        reader.setMimetype(MimetypeMap.MIMETYPE_XML);
+        return reader;
+    }
+    
+    @Override
+    public void setUp() throws Exception
+    {
+        alfrescoModelMetadataExtractor = (XPathMetadataExtracter) ctx.getBean("extracter.xml.AlfrescoModelMetadataExtracter");
+        eclipseProjectMetadataExtractor = (XPathMetadataExtracter) ctx.getBean("extracter.xml.EclipseProjectMetadataExtracter");
+    }
+
+    public void testSetUp()
+    {
+        assertNotNull(alfrescoModelMetadataExtractor);
+        assertNotNull(eclipseProjectMetadataExtractor);
+    }
+    
+    public void testExtractAlfresocModel() throws Exception
+    {
+        // Load the example file
+        ContentReader reader = getReader(FILE_ALFRESCO_MODEL);
+        assertTrue(reader.exists());
+        
+        // Pass it to the extracter
+        PropertyMap checkProperties = new PropertyMap();
+        alfrescoModelMetadataExtractor.extract(reader, checkProperties);
+        
+        // Check the values
+        assertEquals("Gavin Cornwell", checkProperties.get(ContentModel.PROP_AUTHOR));
+        assertEquals("fm:forummodel", checkProperties.get(ContentModel.PROP_TITLE));
+        assertEquals("Forum Model", checkProperties.get(ContentModel.PROP_DESCRIPTION));
+    }
+    
+    public void testExtractEclipseProject() throws Exception
+    {
+        // Load the example file
+        ContentReader reader = getReader(FILE_ECLIPSE_PROJECT);
+        assertTrue(reader.exists());
+        
+        // Pass it to the extracter
+        PropertyMap checkProperties = new PropertyMap();
+        eclipseProjectMetadataExtractor.extract(reader, checkProperties);
+        
+        // Check the values
+        assertEquals("Repository", checkProperties.get(ContentModel.PROP_TITLE));
+        assertEquals("JavaCC Nature", checkProperties.get(ContentModel.PROP_DESCRIPTION));
+    }
+}