Test configurable XML metadata extractors.

This doesn't include the wrapper extractor, yet, but there are two extractors that extract metadata from Eclipse Project XML and Alfresco Model XML files.


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5980 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Derek Hulley
2007-06-15 15:11:54 +00:00
parent 94862d134e
commit f1b226a279
9 changed files with 314 additions and 30 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen Møller
* Copyright (C) 2005-2007 Alfresco Software Limited.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -42,6 +42,7 @@ import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
@@ -80,12 +81,12 @@ import org.w3c.dom.Document;
* </li>
* </ul>
* <p>
* The mapping of document properties to XPaths must look as follows:
* <pre>
* # Get the author
* author=/root/author@name
* </pre>
* All values are extracted as text values and therefore all XPath statements must evaluate to a node
* that can be rendered as text.
*
* @see AbstractMappingMetadataExtracter#setMappingProperties(Properties)
* @see #setXpathMappingProperties(Properties)
* @since 2.1
* @author Derek Hulley
*/
public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter implements NamespaceContext
@@ -105,13 +106,27 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
public XPathMetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
try
{
documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
xpathFactory = XPathFactory.newInstance();
}
catch (Throwable e)
{
throw new AlfrescoRuntimeException("Failed to initialize XML metadata extractor", e);
}
}
/** {@inheritDoc} */
public String getNamespaceURI(String prefix)
{
ParameterCheck.mandatoryString("prefix", prefix);
return namespacesByPrefix.get(prefix);
String namespace = namespacesByPrefix.get(prefix);
if (namespace == null)
{
throw new AlfrescoRuntimeException("Prefix '" + prefix + "' is not associated with a namespace.");
}
return namespace;
}
/** {@inheritDoc} */
@@ -150,12 +165,11 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
* The Xpath mapping is of the form:
* <pre>
* # Namespaces prefixes
* namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
* namespace.prefix.my=http://www....com/alfresco/1.0
*
* # Mapping
* editor=/cm:some-xpath-1
* title=/my:some-xpath-2
* editor=/my:example-element/@cm:editor
* title=/my:example-element/text()
* </pre>
*/
public void setXpathMappingProperties(Properties xpathMappingProperties)
@@ -169,16 +183,18 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
protected void init()
{
PropertyCheck.mandatory(this, "xpathMappingProperties", xpathExpressionMapping);
try
{
documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
xpathFactory = XPathFactory.newInstance();
}
catch (Throwable e)
{
throw new AlfrescoRuntimeException("Failed to initialize XML metadata extractor", e);
}
// Get the base class to set up its mappings
super.init();
// Remove all XPath expressions that aren't going to be used
Map<String, Set<QName>> mapping = getMapping();
Set<String> xpathExpressionMappingKeys = new HashSet<String>(xpathExpressionMapping.keySet());
for (String xpathMappingKey : xpathExpressionMappingKeys)
{
if (!mapping.containsKey(xpathMappingKey))
{
xpathExpressionMapping.remove(xpathMappingKey);
}
}
}
/**
@@ -232,7 +248,7 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
String documentProperty = element.getKey();
XPathExpression xpathExpression = element.getValue();
// Execute it
String value = xpathExpression.evaluate(document);
String value = (String) xpathExpression.evaluate(document, XPathConstants.STRING);
// Put the value
rawProperties.put(documentProperty, value);
}
@@ -258,8 +274,6 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
namespacesByPrefix.put(prefix, namespace);
}
}
// Get the mapping that will be applied by the base class
Map<String, Set<QName>> finalMapping = getMapping();
// Create the mapping
for (Map.Entry entry : xpathMappingProperties.entrySet())
{
@@ -270,11 +284,6 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
// Ignore these now
continue;
}
// If the property is not going to be mapped, then just ignore it too
if (!finalMapping.containsKey(documentProperty))
{
continue;
}
// Construct the XPath
XPath xpath = xpathFactory.newXPath();
xpath.setNamespaceContext(this);
@@ -285,10 +294,12 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
}
catch (XPathExpressionException e)
{
throw new AlfrescoRuntimeException(
"Failed to path XPath expression: \n" +
throw new AlfrescoRuntimeException("\n" +
"Failed to create XPath expression: \n" +
" Document property: " + documentProperty + "\n" +
" XPath: " + xpathStr);
" XPath: " + xpathStr + "\n" +
" Error: " + e.getMessage(),
e);
}
// Persist it
xpathExpressionMapping.put(documentProperty, xpathExpression);

View File

@@ -0,0 +1,123 @@
/*
* Copyright (C) 2005-2007 Alfresco Software Limited.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* As a special exception to the terms and conditions of version 2.0 of
* the GPL, you may redistribute this Program in connection with Free/Libre
* and Open Source Software ("FLOSS") applications as described in Alfresco's
* FLOSS exception. You should have recieved a copy of the text describing
* the FLOSS exception, and it is also available here:
* http://www.alfresco.com/legal/licensing"
*/
package org.alfresco.repo.content.metadata.xml;
import java.io.File;
import java.io.FileNotFoundException;
import java.net.URL;
import junit.framework.TestCase;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.util.PropertyMap;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
/**
* Tests various aspects of XML metadata extraction.
*
* @see XPathMetadataExtracter
*
* @author Derek Hulley
*/
public class XmlMetadataExtracterTest extends TestCase
{
private static final String FILE_ALFRESCO_MODEL = "xml-metadata/alfresco-model-sample.xml";
private static final String FILE_ECLIPSE_PROJECT = "xml-metadata/eclipse-project-sample.xml";
private static final String CTX_LOCATION = "classpath:xml-metadata/xml-metadata-test-context.xml";
private static final ApplicationContext ctx = new ClassPathXmlApplicationContext(CTX_LOCATION);
private XPathMetadataExtracter alfrescoModelMetadataExtractor;
private XPathMetadataExtracter eclipseProjectMetadataExtractor;
/**
* Get a reader for a file that should be on the classpath.
*/
private static final ContentReader getReader(String fileName) throws FileNotFoundException
{
URL url = AbstractContentTransformerTest.class.getClassLoader().getResource(fileName);
if (url == null)
{
throw new FileNotFoundException("Could not find file on classpath: " + fileName);
}
File file = new File(url.getFile());
if (!file.exists())
{
throw new FileNotFoundException("Could not find file on classpath: " + fileName);
}
ContentReader reader = new FileContentReader(file);
reader.setMimetype(MimetypeMap.MIMETYPE_XML);
return reader;
}
@Override
public void setUp() throws Exception
{
alfrescoModelMetadataExtractor = (XPathMetadataExtracter) ctx.getBean("extracter.xml.AlfrescoModelMetadataExtracter");
eclipseProjectMetadataExtractor = (XPathMetadataExtracter) ctx.getBean("extracter.xml.EclipseProjectMetadataExtracter");
}
public void testSetUp()
{
assertNotNull(alfrescoModelMetadataExtractor);
assertNotNull(eclipseProjectMetadataExtractor);
}
public void testExtractAlfresocModel() throws Exception
{
// Load the example file
ContentReader reader = getReader(FILE_ALFRESCO_MODEL);
assertTrue(reader.exists());
// Pass it to the extracter
PropertyMap checkProperties = new PropertyMap();
alfrescoModelMetadataExtractor.extract(reader, checkProperties);
// Check the values
assertEquals("Gavin Cornwell", checkProperties.get(ContentModel.PROP_AUTHOR));
assertEquals("fm:forummodel", checkProperties.get(ContentModel.PROP_TITLE));
assertEquals("Forum Model", checkProperties.get(ContentModel.PROP_DESCRIPTION));
}
public void testExtractEclipseProject() throws Exception
{
// Load the example file
ContentReader reader = getReader(FILE_ECLIPSE_PROJECT);
assertTrue(reader.exists());
// Pass it to the extracter
PropertyMap checkProperties = new PropertyMap();
eclipseProjectMetadataExtractor.extract(reader, checkProperties);
// Check the values
assertEquals("Repository", checkProperties.get(ContentModel.PROP_TITLE));
assertEquals("JavaCC Nature", checkProperties.get(ContentModel.PROP_DESCRIPTION));
}
}