mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-06-16 17:55:15 +00:00
XPath-based XML metadata extractor
- No tests - Simple root element name redirector Some comments fleshed out on the new mapping metadata extractor git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5969 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
parent
1b97517ce6
commit
f770bb0190
38
source/java/org/alfresco/repo/content/ContentWorker.java
Normal file
38
source/java/org/alfresco/repo/content/ContentWorker.java
Normal file
@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2007 Alfresco Software Limited.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
* As a special exception to the terms and conditions of version 2.0 of
|
||||
* the GPL, you may redistribute this Program in connection with Free/Libre
|
||||
* and Open Source Software ("FLOSS") applications as described in Alfresco's
|
||||
* FLOSS exception. You should have recieved a copy of the text describing
|
||||
* the FLOSS exception, and it is also available here:
|
||||
* http://www.alfresco.com/legal/licensing"
|
||||
*/
|
||||
package org.alfresco.repo.content;
|
||||
|
||||
/**
|
||||
* An interface instances that operate on content. This is a marker interface
|
||||
* for specific <i>worker</i> interfaces such as metadata extractors, content transformers
|
||||
* and so forth.
|
||||
*
|
||||
* @see ContentWorkerSelector
|
||||
* @since 2.1
|
||||
* @author Derek Hulley
|
||||
*/
|
||||
public interface ContentWorker
|
||||
{
|
||||
}
|
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2007 Alfresco Software Limited.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
* As a special exception to the terms and conditions of version 2.0 of
|
||||
* the GPL, you may redistribute this Program in connection with Free/Libre
|
||||
* and Open Source Software ("FLOSS") applications as described in Alfresco's
|
||||
* FLOSS exception. You should have recieved a copy of the text describing
|
||||
* the FLOSS exception, and it is also available here:
|
||||
* http://www.alfresco.com/legal/licensing"
|
||||
*/
|
||||
package org.alfresco.repo.content;
|
||||
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
|
||||
/**
|
||||
* An interface instances that are able to identify content based on the
|
||||
* {@linkplain ContentReader content reader}. This is specifically
|
||||
* aimed at extractors, transformers, injectors and similar classes.
|
||||
* <p>
|
||||
* The notion of supplying some type of worker looks a bit odd here, but
|
||||
* really an instance of this type will act as an optional factory. Also,
|
||||
* in the context of the calling class, the context and the generics will
|
||||
* identify exactly which type is returned by the factory.
|
||||
*
|
||||
* @since 2.1
|
||||
* @author Derek Hulley
|
||||
*/
|
||||
public interface ContentWorkerSelector<W extends ContentWorker>
|
||||
{
|
||||
/**
|
||||
* Provides an worker appropriate to the given content, if possible. The reader
|
||||
* should only be used if absolutely required. The caller should always request
|
||||
* {@linkplain ContentReader#getReader() a new reader} or check the
|
||||
* {@linkplain ContentReader#isClosed() reader's state}.
|
||||
*
|
||||
* @param reader the content reader, providing the actual stream metadata
|
||||
* and even the stream, if required.
|
||||
* @return Return a worker that can operate on the content, or <tt>null</tt>
|
||||
* if this identifier doesn't support the content.
|
||||
* @throws ContentIOException
|
||||
* if the search fails
|
||||
*/
|
||||
W getWorker(ContentReader reader);
|
||||
}
|
@ -254,6 +254,10 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
* be used to extract values from the documents. The metadata extraction becomes fully
|
||||
* configuration-driven, i.e. declaring further mappings will result in more values being
|
||||
* extracted from the documents.
|
||||
* <p>
|
||||
* Most extractors will not be using this method. For an example of its use, see the
|
||||
* {@linkplain OpenDocumentMetadataExtracter OpenDocument extractor}, which uses the mapping
|
||||
* to select specific user properties from a document.
|
||||
*/
|
||||
protected final Map<String, Set<QName>> getMapping()
|
||||
{
|
||||
@ -324,7 +328,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
for (Map.Entry entry : mappingProperties.entrySet())
|
||||
{
|
||||
String propertyName = (String) entry.getKey();
|
||||
if (propertyName.startsWith("namespace.prefix."))
|
||||
if (propertyName.startsWith(NAMESPACE_PROPERTY_PREFIX))
|
||||
{
|
||||
String prefix = propertyName.substring(17);
|
||||
String namespace = (String) entry.getValue();
|
||||
@ -677,15 +681,15 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
* system administrators can accurately determine how to best enhance or override the
|
||||
* default mapping.
|
||||
* <p>
|
||||
* If the default mapping is declared in a properties file, then the
|
||||
* {@link #readMappingProperties(String)} method can be used to quickly generate the
|
||||
* return value:
|
||||
* <pre>
|
||||
* protected Map<String, Set<QName>> getDefaultMapping()
|
||||
* If the default mapping is declared in a properties file other than the one named after
|
||||
* the class, then the {@link #readMappingProperties(String)} method can be used to quickly
|
||||
* generate the return value:
|
||||
* <pre><code>
|
||||
* protected Map<<String, Set<QName>> getDefaultMapping()
|
||||
* {
|
||||
* return readMappingProperties(DEFAULT_MAPPING);
|
||||
* }
|
||||
* </pre>
|
||||
* </code></pre>
|
||||
* The map can also be created in code either statically or during the call.
|
||||
*
|
||||
* @return Returns the default, static mapping. It may not be null.
|
||||
|
@ -28,17 +28,20 @@ import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.alfresco.repo.content.ContentWorker;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
|
||||
/**
|
||||
* Interface for document property extracters.
|
||||
* <p>
|
||||
* Please pardon the incorrect spelling of <i>extractor</i>.
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
* @author Derek Hulley
|
||||
*/
|
||||
public interface MetadataExtracter
|
||||
public interface MetadataExtracter extends ContentWorker
|
||||
{
|
||||
/**
|
||||
* A enumeration of functional property overwrite policies. These determine whether extracted properties are
|
||||
|
@ -0,0 +1,195 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2007 Alfresco Software Limited.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
* As a special exception to the terms and conditions of version 2.0 of
|
||||
* the GPL, you may redistribute this Program in connection with Free/Libre
|
||||
* and Open Source Software ("FLOSS") applications as described in Alfresco's
|
||||
* FLOSS exception. You should have recieved a copy of the text describing
|
||||
* the FLOSS exception, and it is also available here:
|
||||
* http://www.alfresco.com/legal/licensing"
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata.xml;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.xml.parsers.SAXParser;
|
||||
import javax.xml.parsers.SAXParserFactory;
|
||||
|
||||
import org.alfresco.repo.content.ContentWorkerSelector;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.repo.content.metadata.MetadataExtracter;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
/**
|
||||
* A selector that looks at the root node of an XML document to determine which worker to provide.
|
||||
* There are many ways to identify XML documents and this is probably the simplest. Alternate
|
||||
* implementations might execute a series of xpath statements or look for specific namespace
|
||||
* declarations in the document. The net result is the same, i.e. given an XML document, an
|
||||
* extracter is provided to the caller.
|
||||
* <p>
|
||||
* In this selector, there is no guarantee that the different extracters will generate the same
|
||||
* (or even nearly the same) metadata. It is up to the configurer to ensure that if it is a
|
||||
* requirement, but otherwise each extracter is responsible for its own mappings. Mostly, though,
|
||||
* a root node match will imply a structure that has the necessary metadata.
|
||||
*
|
||||
* @since 2.1
|
||||
* @author Derek Hulley
|
||||
*/
|
||||
public class RootElementNameMetadataExtracterSelector
|
||||
extends DefaultHandler
|
||||
implements ContentWorkerSelector<MetadataExtracter>
|
||||
{
|
||||
private static Log logger = LogFactory.getLog(RootElementNameMetadataExtracterSelector.class);
|
||||
|
||||
private SAXParserFactory saxParserFactory;
|
||||
private Set<String> supportedMimetypes;
|
||||
private Map<String, MetadataExtracter> extractersByRootElementName;
|
||||
|
||||
public RootElementNameMetadataExtracterSelector()
|
||||
{
|
||||
saxParserFactory = SAXParserFactory.newInstance();
|
||||
supportedMimetypes = new HashSet<String>();
|
||||
supportedMimetypes.add(MimetypeMap.MIMETYPE_XML);
|
||||
extractersByRootElementName = Collections.emptyMap();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the SAX parser factory to use to parse the incoming documents. If not set,
|
||||
* the default system-wide SAX parser factory is used.
|
||||
*
|
||||
* @param factoryClassName A {@link SAXParserFactory} class name
|
||||
*/
|
||||
public void setSAXParserFactoryClass(String factoryClassName)
|
||||
{
|
||||
try
|
||||
{
|
||||
saxParserFactory = SAXParserFactory.newInstance(
|
||||
factoryClassName,
|
||||
this.getClass().getClassLoader());
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
throw new IllegalArgumentException("Unable to load SAX parser factory from class: " + factoryClassName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Optionally set the mimetypes supported. They must be XML formats that the chosen
|
||||
* parser will be able to handle.
|
||||
*
|
||||
* @param supportedMimetypes the list of mimetypes. The default is <b>text/xml</b>.
|
||||
*/
|
||||
public void setSupportedMimetypes(Set<String> supportedMimetypes)
|
||||
{
|
||||
this.supportedMimetypes = supportedMimetypes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the extractors to use.
|
||||
*
|
||||
* @param extracters a map of {@linkplain MetadataExtracter} instances
|
||||
* keyed by root element name
|
||||
*/
|
||||
public void setExtracters(Map<String, MetadataExtracter> extracters)
|
||||
{
|
||||
this.extractersByRootElementName = extracters;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a match of the root element name to find the correct extracter.
|
||||
*/
|
||||
public MetadataExtracter getWorker(ContentReader reader)
|
||||
{
|
||||
/*
|
||||
* Is xml the only mimetype to support?
|
||||
*/
|
||||
if (!reader.getMimetype().equals(MimetypeMap.MIMETYPE_XML))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
MetadataExtracter extracter = null;
|
||||
InputStream is = null;
|
||||
String rootElementName = null;
|
||||
try
|
||||
{
|
||||
is = reader.getContentInputStream();
|
||||
SAXParser saxParser = saxParserFactory.newSAXParser();
|
||||
saxParser.parse(is, this);
|
||||
// No match possible
|
||||
}
|
||||
catch (RootElementFoundException e)
|
||||
{
|
||||
rootElementName = e.getElementName();
|
||||
extracter = extractersByRootElementName.get(rootElementName);
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
throw new ContentIOException("Failed to extract root element from XML document", e);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (is != null)
|
||||
{
|
||||
try { is.close(); } catch (Throwable e) {}
|
||||
}
|
||||
}
|
||||
// Done
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("\n" +
|
||||
"Chosen metadata extracter for reader: \n" +
|
||||
" Reader: " + reader + "\n" +
|
||||
" Root Element: " + rootElementName + "\n" +
|
||||
" Extracter: " + extracter);
|
||||
}
|
||||
return extracter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
|
||||
{
|
||||
throw new RootElementFoundException(localName);
|
||||
}
|
||||
|
||||
/**
|
||||
* An exception to break out of the XML parsing early
|
||||
*/
|
||||
private static class RootElementFoundException extends SAXException
|
||||
{
|
||||
private static final long serialVersionUID = 6845880422947198814L;
|
||||
private String elementName;
|
||||
public RootElementFoundException(String elementName)
|
||||
{
|
||||
super(elementName);
|
||||
this.elementName = elementName;
|
||||
}
|
||||
public String getElementName()
|
||||
{
|
||||
return elementName;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,302 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
* As a special exception to the terms and conditions of version 2.0 of
|
||||
* the GPL, you may redistribute this Program in connection with Free/Libre
|
||||
* and Open Source Software ("FLOSS") applications as described in Alfresco's
|
||||
* FLOSS exception. You should have recieved a copy of the text describing
|
||||
* the FLOSS exception, and it is also available here:
|
||||
* http://www.alfresco.com/legal/licensing"
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata.xml;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.xml.namespace.NamespaceContext;
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathExpression;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
|
||||
import org.alfresco.error.AlfrescoRuntimeException;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.util.ParameterCheck;
|
||||
import org.alfresco.util.PropertyCheck;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.w3c.dom.Document;
|
||||
|
||||
/**
|
||||
* An extracter that pulls values from XML documents using configurable XPath
|
||||
* statements. It is not possible to list a default set of mappings - this is
|
||||
* down to the configuration only.
|
||||
* <p>
|
||||
* When an instance of this extracter is configured, XPath statements should be
|
||||
* provided to extract all the available metadata. The implementation is sensitive
|
||||
* to what is actually requested by the
|
||||
* {@linkplain AbstractMappingMetadataExtracter#setMapping(Map) configured mapping}
|
||||
* and will only perform the queries necessary to fulfill the requirements.
|
||||
* <p>
|
||||
* To summarize, there are two configurations required for this class:
|
||||
* <ul>
|
||||
* <li>
|
||||
* A mapping of all reasonable document properties to XPath statements.
|
||||
* See {@link AbstractMappingMetadataExtracter#setMappingProperties(java.util.Properties)}.
|
||||
* </li>
|
||||
* <li>
|
||||
* A mapping of document property names to Alfresco repository model QNames.
|
||||
* See {@link #setXPathMappingProperties(Properties).}
|
||||
* </li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The mapping of document properties to XPaths must look as follows:
|
||||
* <pre>
|
||||
* # Get the author
|
||||
* author=/root/author@name
|
||||
* </pre>
|
||||
*
|
||||
* @author Derek Hulley
|
||||
*/
|
||||
public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter implements NamespaceContext
|
||||
{
|
||||
public static String[] SUPPORTED_MIMETYPES = new String[] {MimetypeMap.MIMETYPE_XML};
|
||||
|
||||
private static Log logger = LogFactory.getLog(XPathMetadataExtracter.class);
|
||||
|
||||
private DocumentBuilder documentBuilder;
|
||||
private XPathFactory xpathFactory;
|
||||
private Map<String, String> namespacesByPrefix;
|
||||
private Map<String, XPathExpression> xpathExpressionMapping;
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*/
|
||||
public XPathMetadataExtracter()
|
||||
{
|
||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public String getNamespaceURI(String prefix)
|
||||
{
|
||||
ParameterCheck.mandatoryString("prefix", prefix);
|
||||
return namespacesByPrefix.get(prefix);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public String getPrefix(String namespaceURI)
|
||||
{
|
||||
ParameterCheck.mandatoryString("namespaceURI", namespaceURI);
|
||||
for (Map.Entry<String, String> entry : namespacesByPrefix.entrySet())
|
||||
{
|
||||
if (namespaceURI.equals(entry.getValue()))
|
||||
{
|
||||
return entry.getKey();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public Iterator getPrefixes(String namespaceURI)
|
||||
{
|
||||
ParameterCheck.mandatoryString("namespaceURI", namespaceURI);
|
||||
List<String> prefixes = new ArrayList<String>(2);
|
||||
for (Map.Entry<String, String> entry : namespacesByPrefix.entrySet())
|
||||
{
|
||||
if (namespaceURI.equals(entry.getValue()))
|
||||
{
|
||||
prefixes.add(entry.getKey());
|
||||
}
|
||||
}
|
||||
return prefixes.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the properties file that maps document properties to the XPath statements
|
||||
* necessary to retrieve them.
|
||||
* <p>
|
||||
* The Xpath mapping is of the form:
|
||||
* <pre>
|
||||
* # Namespaces prefixes
|
||||
* namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
* namespace.prefix.my=http://www....com/alfresco/1.0
|
||||
*
|
||||
* # Mapping
|
||||
* editor=/cm:some-xpath-1
|
||||
* title=/my:some-xpath-2
|
||||
* </pre>
|
||||
*/
|
||||
public void setXpathMappingProperties(Properties xpathMappingProperties)
|
||||
{
|
||||
namespacesByPrefix = new HashMap<String, String>(7);
|
||||
xpathExpressionMapping = new HashMap<String, XPathExpression>(17);
|
||||
readXPathMappingProperties(xpathMappingProperties);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void init()
|
||||
{
|
||||
PropertyCheck.mandatory(this, "xpathMappingProperties", xpathExpressionMapping);
|
||||
try
|
||||
{
|
||||
documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
|
||||
xpathFactory = XPathFactory.newInstance();
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
throw new AlfrescoRuntimeException("Failed to initialize XML metadata extractor", e);
|
||||
}
|
||||
super.init();
|
||||
}
|
||||
|
||||
/**
|
||||
* It is not possible to have any default mappings, but something has to be returned.
|
||||
*
|
||||
* @return Returns an empty map
|
||||
*/
|
||||
@Override
|
||||
protected Map<String, Set<QName>> getDefaultMapping()
|
||||
{
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||
{
|
||||
InputStream is = null;
|
||||
try
|
||||
{
|
||||
is = reader.getContentInputStream();
|
||||
Document doc = documentBuilder.parse(is);
|
||||
Map<String, Serializable> rawProperties = processDocument(doc);
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("\n" +
|
||||
"Extracted XML metadata: \n" +
|
||||
" Reader: " + reader + "\n" +
|
||||
" Results: " + rawProperties);
|
||||
}
|
||||
return rawProperties;
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (is != null)
|
||||
{
|
||||
try { is.close(); } catch (IOException e) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes all the necessary XPath statements to extract values.
|
||||
*/
|
||||
protected Map<String, Serializable> processDocument(Document document) throws Throwable
|
||||
{
|
||||
Map<String, Serializable> rawProperties = super.newRawMap();
|
||||
|
||||
// Execute all the XPaths that we saved
|
||||
for (Map.Entry<String, XPathExpression> element : xpathExpressionMapping.entrySet())
|
||||
{
|
||||
String documentProperty = element.getKey();
|
||||
XPathExpression xpathExpression = element.getValue();
|
||||
// Execute it
|
||||
String value = xpathExpression.evaluate(document);
|
||||
// Put the value
|
||||
rawProperties.put(documentProperty, value);
|
||||
}
|
||||
// Done
|
||||
return rawProperties;
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility method to convert mapping properties to the Map form.
|
||||
*
|
||||
* @see #setMappingProperties(Properties)
|
||||
*/
|
||||
protected void readXPathMappingProperties(Properties xpathMappingProperties)
|
||||
{
|
||||
// Get the namespaces
|
||||
for (Map.Entry entry : xpathMappingProperties.entrySet())
|
||||
{
|
||||
String propertyName = (String) entry.getKey();
|
||||
if (propertyName.startsWith("namespace.prefix."))
|
||||
{
|
||||
String prefix = propertyName.substring(17);
|
||||
String namespace = (String) entry.getValue();
|
||||
namespacesByPrefix.put(prefix, namespace);
|
||||
}
|
||||
}
|
||||
// Get the mapping that will be applied by the base class
|
||||
Map<String, Set<QName>> finalMapping = getMapping();
|
||||
// Create the mapping
|
||||
for (Map.Entry entry : xpathMappingProperties.entrySet())
|
||||
{
|
||||
String documentProperty = (String) entry.getKey();
|
||||
String xpathStr = (String) entry.getValue();
|
||||
if (documentProperty.startsWith(NAMESPACE_PROPERTY_PREFIX))
|
||||
{
|
||||
// Ignore these now
|
||||
continue;
|
||||
}
|
||||
// If the property is not going to be mapped, then just ignore it too
|
||||
if (!finalMapping.containsKey(documentProperty))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// Construct the XPath
|
||||
XPath xpath = xpathFactory.newXPath();
|
||||
xpath.setNamespaceContext(this);
|
||||
XPathExpression xpathExpression = null;
|
||||
try
|
||||
{
|
||||
xpathExpression = xpath.compile(xpathStr);
|
||||
}
|
||||
catch (XPathExpressionException e)
|
||||
{
|
||||
throw new AlfrescoRuntimeException(
|
||||
"Failed to path XPath expression: \n" +
|
||||
" Document property: " + documentProperty + "\n" +
|
||||
" XPath: " + xpathStr);
|
||||
}
|
||||
// Persist it
|
||||
xpathExpressionMapping.put(documentProperty, xpathExpression);
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Added mapping from " + documentProperty + " to " + xpathExpression);
|
||||
}
|
||||
}
|
||||
// Done
|
||||
}
|
||||
}
|
@ -26,6 +26,7 @@ package org.alfresco.repo.content.transform;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.repo.content.ContentWorker;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.cmr.repository.ContentWriter;
|
||||
@ -35,7 +36,7 @@ import org.alfresco.service.cmr.repository.ContentWriter;
|
||||
*
|
||||
* @author Derek Hulley
|
||||
*/
|
||||
public interface ContentTransformer
|
||||
public interface ContentTransformer extends ContentWorker
|
||||
{
|
||||
/**
|
||||
* Provides the approximate accuracy with which this transformer can
|
||||
|
Loading…
x
Reference in New Issue
Block a user