mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-09-17 14:21:39 +00:00
XML metadata extraction with sample.
Added tests into build. This is now ready for testing, comments and suggestions. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@6056 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -0,0 +1,95 @@
|
|||||||
|
<?xml version='1.0' encoding='UTF-8'?>
|
||||||
|
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Sample configuration of a XmlMetadataExtracters.
|
||||||
|
|
||||||
|
This show how XML metadata extraction can be set up to extract metadata from different
|
||||||
|
formats of XML.
|
||||||
|
|
||||||
|
Since: 2.1
|
||||||
|
Author: Derek Hulley
|
||||||
|
-->
|
||||||
|
<beans>
|
||||||
|
|
||||||
|
<!-- An extractor that operates on Alfresco Model XML -->
|
||||||
|
<bean id="extracter.xml.sample.AlfrescoModelMetadataExtracter"
|
||||||
|
class="org.alfresco.repo.content.metadata.xml.XPathMetadataExtracter"
|
||||||
|
parent="baseMetadataExtracter"
|
||||||
|
init-method="init" >
|
||||||
|
<property name="mappingProperties">
|
||||||
|
<!--
|
||||||
|
The properties can also be specified using a properties file on the classpath, e.g.:
|
||||||
|
<bean class="org.springframework.beans.factory.config.PropertiesFactoryBean">
|
||||||
|
<property name="location">
|
||||||
|
<value>classpath:alfresco/extension/xml-metadata/AlfrescoModel-xpath-mappings.properties</value>
|
||||||
|
</property>
|
||||||
|
</bean>
|
||||||
|
-->
|
||||||
|
<bean class="org.springframework.beans.factory.config.PropertiesFactoryBean">
|
||||||
|
<property name="properties">
|
||||||
|
<props>
|
||||||
|
<prop key="namespace.prefix.cm">http://www.alfresco.org/model/content/1.0</prop>
|
||||||
|
<prop key="author">cm:author</prop>
|
||||||
|
<prop key="title">cm:title</prop>
|
||||||
|
<prop key="description">cm:description</prop>
|
||||||
|
</props>
|
||||||
|
</property>
|
||||||
|
</bean>
|
||||||
|
</property>
|
||||||
|
<property name="xpathMappingProperties">
|
||||||
|
<bean class="org.springframework.beans.factory.config.PropertiesFactoryBean">
|
||||||
|
<property name="properties">
|
||||||
|
<props>
|
||||||
|
<prop key="namespace.prefix.fm">http://www.alfresco.org/model/forum/1.0</prop>
|
||||||
|
<prop key="author">/model/author/text()</prop>
|
||||||
|
<prop key="title">/model/@name</prop>
|
||||||
|
<prop key="description">/model/description/text()</prop>
|
||||||
|
<prop key="version">/model/version/text()</prop>
|
||||||
|
</props>
|
||||||
|
</property>
|
||||||
|
</bean>
|
||||||
|
</property>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
This selector examines the XML documents, executing the given XPath statements until a
|
||||||
|
result is found.
|
||||||
|
-->
|
||||||
|
<bean
|
||||||
|
id="extracter.xml.sample.selector.XPathSelector"
|
||||||
|
class="org.alfresco.repo.content.selector.XPathContentWorkerSelector"
|
||||||
|
init-method="init">
|
||||||
|
<property name="workers">
|
||||||
|
<map>
|
||||||
|
<entry key="/my:test">
|
||||||
|
<null />
|
||||||
|
</entry>
|
||||||
|
<entry key="/model">
|
||||||
|
<ref bean="extracter.xml.sample.AlfrescoModelMetadataExtracter" />
|
||||||
|
</entry>
|
||||||
|
</map>
|
||||||
|
</property>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
This is the face of the XML metadata extraction. If passes the XML document to each of
|
||||||
|
the selectors, until one of them gives back a MetadataExtracter, which is then used as
|
||||||
|
normal to extract the values. The overwrite policy of the embedded extracters has no
|
||||||
|
effect. It is only this extracter's policy that is used.
|
||||||
|
-->
|
||||||
|
<bean
|
||||||
|
id="extracter.xml.sample.XMLMetadataExtracter"
|
||||||
|
class="org.alfresco.repo.content.metadata.xml.XmlMetadataExtracter"
|
||||||
|
parent="baseMetadataExtracter">
|
||||||
|
<property name="overwritePolicy">
|
||||||
|
<value>EAGER</value>
|
||||||
|
</property>
|
||||||
|
<property name="selectors">
|
||||||
|
<list>
|
||||||
|
<ref bean="extracter.xml.sample.selector.XPathSelector" />
|
||||||
|
</list>
|
||||||
|
</property>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
</beans>
|
@@ -93,7 +93,6 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
|||||||
|
|
||||||
private MetadataExtracterRegistry registry;
|
private MetadataExtracterRegistry registry;
|
||||||
private MimetypeService mimetypeService;
|
private MimetypeService mimetypeService;
|
||||||
private long extractionTime;
|
|
||||||
private boolean initialized;
|
private boolean initialized;
|
||||||
|
|
||||||
private Set<String> supportedMimetypes;
|
private Set<String> supportedMimetypes;
|
||||||
@@ -101,12 +100,23 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
|||||||
private Map<String, Set<QName>> mapping;
|
private Map<String, Set<QName>> mapping;
|
||||||
private boolean inheritDefaultMapping;
|
private boolean inheritDefaultMapping;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default constructor. If this is called, then {@link #isSupported(String)} should
|
||||||
|
* be implemented. This is useful when the list of supported mimetypes is not known
|
||||||
|
* when the instance is constructed. Alternatively, once the set becomes known, call
|
||||||
|
* {@link #setSupportedMimetypes(Collection)}.
|
||||||
|
*
|
||||||
|
* @see #isSupported(String)
|
||||||
|
* @see #setSupportedMimetypes(Collection)
|
||||||
|
*/
|
||||||
protected AbstractMappingMetadataExtracter()
|
protected AbstractMappingMetadataExtracter()
|
||||||
{
|
{
|
||||||
this(Collections.<String>emptySet());
|
this(Collections.<String>emptySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Constructor that can be used when the list of supported mimetypes is known up front.
|
||||||
|
*
|
||||||
* @param supportedMimetypes the set of mimetypes supported by default
|
* @param supportedMimetypes the set of mimetypes supported by default
|
||||||
*/
|
*/
|
||||||
protected AbstractMappingMetadataExtracter(Set<String> supportedMimetypes)
|
protected AbstractMappingMetadataExtracter(Set<String> supportedMimetypes)
|
||||||
@@ -179,13 +189,29 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param overwritePolicy the policy to apply when there are existing system properties
|
* Set the policy to use when existing values are encountered. Depending on how the extracer
|
||||||
|
* is called, this may not be relevant, i.e an empty map of existing properties may be passed
|
||||||
|
* in by the client code, which may follow its own overwrite strategy.
|
||||||
|
*
|
||||||
|
* @param overwritePolicy the policy to apply when there are existing system properties
|
||||||
*/
|
*/
|
||||||
public void setOverwritePolicy(OverwritePolicy overwritePolicy)
|
public void setOverwritePolicy(OverwritePolicy overwritePolicy)
|
||||||
{
|
{
|
||||||
this.overwritePolicy = overwritePolicy;
|
this.overwritePolicy = overwritePolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the policy to use when existing values are encountered. Depending on how the extracer
|
||||||
|
* is called, this may not be relevant, i.e an empty map of existing properties may be passed
|
||||||
|
* in by the client code, which may follow its own overwrite strategy.
|
||||||
|
*
|
||||||
|
* @param overwritePolicyStr the policy to apply when there are existing system properties
|
||||||
|
*/
|
||||||
|
public void setOverwritePolicy(String overwritePolicyStr)
|
||||||
|
{
|
||||||
|
this.overwritePolicy = OverwritePolicy.valueOf(overwritePolicyStr);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set if the property mappings augment or override the mapping generically provided by the
|
* Set if the property mappings augment or override the mapping generically provided by the
|
||||||
* extracter implementation. The default is <tt>false</tt>, i.e. any mapping set completely
|
* extracter implementation. The default is <tt>false</tt>, i.e. any mapping set completely
|
||||||
@@ -410,10 +436,6 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
|||||||
{
|
{
|
||||||
registry.register(this);
|
registry.register(this);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
logger.warn("No registry provided. Not registering: " + this);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -466,7 +488,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
|||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
public long getExtractionTime()
|
public long getExtractionTime()
|
||||||
{
|
{
|
||||||
return extractionTime;
|
return 1000L;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -510,7 +532,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
|||||||
/**
|
/**
|
||||||
* {@inheritDoc}
|
* {@inheritDoc}
|
||||||
*/
|
*/
|
||||||
public final Map<QName, Serializable> extract(
|
public Map<QName, Serializable> extract(
|
||||||
ContentReader reader,
|
ContentReader reader,
|
||||||
OverwritePolicy overwritePolicy,
|
OverwritePolicy overwritePolicy,
|
||||||
Map<QName, Serializable> destination,
|
Map<QName, Serializable> destination,
|
||||||
|
@@ -222,6 +222,8 @@ public interface MetadataExtracter extends ContentWorker
|
|||||||
* reliant transformers will be used for a specific extraction.
|
* reliant transformers will be used for a specific extraction.
|
||||||
*
|
*
|
||||||
* @return Returns the approximate number of milliseconds per transformation
|
* @return Returns the approximate number of milliseconds per transformation
|
||||||
|
*
|
||||||
|
* @deprecated Generally not useful or used. Extraction is normally specifically configured.
|
||||||
*/
|
*/
|
||||||
public long getExtractionTime();
|
public long getExtractionTime();
|
||||||
|
|
||||||
|
@@ -145,7 +145,6 @@ public class MetadataExtracterRegistry
|
|||||||
*/
|
*/
|
||||||
private MetadataExtracter findBestExtracter(String sourceMimetype)
|
private MetadataExtracter findBestExtracter(String sourceMimetype)
|
||||||
{
|
{
|
||||||
long bestTime = Long.MAX_VALUE;
|
|
||||||
logger.debug("Finding best extracter for " + sourceMimetype);
|
logger.debug("Finding best extracter for " + sourceMimetype);
|
||||||
|
|
||||||
MetadataExtracter bestExtracter = null;
|
MetadataExtracter bestExtracter = null;
|
||||||
@@ -157,12 +156,7 @@ public class MetadataExtracterRegistry
|
|||||||
// extraction not achievable
|
// extraction not achievable
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
long time = ext.getExtractionTime();
|
bestExtracter = ext;
|
||||||
if (time < bestTime)
|
|
||||||
{
|
|
||||||
bestExtracter = ext;
|
|
||||||
bestTime = time;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return bestExtracter;
|
return bestExtracter;
|
||||||
}
|
}
|
||||||
|
@@ -0,0 +1,191 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2005-2007 Alfresco Software Limited.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
|
||||||
|
* As a special exception to the terms and conditions of version 2.0 of
|
||||||
|
* the GPL, you may redistribute this Program in connection with Free/Libre
|
||||||
|
* and Open Source Software ("FLOSS") applications as described in Alfresco's
|
||||||
|
* FLOSS exception. You should have recieved a copy of the text describing
|
||||||
|
* the FLOSS exception, and it is also available here:
|
||||||
|
* http://www.alfresco.com/legal/licensing"
|
||||||
|
*/
|
||||||
|
package org.alfresco.repo.content.metadata.xml;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.alfresco.repo.content.selector.ContentWorkerSelector;
|
||||||
|
import org.alfresco.repo.content.MimetypeMap;
|
||||||
|
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
|
||||||
|
import org.alfresco.repo.content.metadata.MetadataExtracter;
|
||||||
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
|
import org.alfresco.service.namespace.QName;
|
||||||
|
import org.alfresco.util.PropertyCheck;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A metadata extractor that selects an appropiate workder for the extraction.
|
||||||
|
* <p>
|
||||||
|
* The {@linkplain #setSelectors(List) selectors} are used to find an extracter most
|
||||||
|
* appropriate of a given XML document. The chosen extracter is then asked to extract
|
||||||
|
* the values, passing through the {@linkplain MetadataExtracter.OverwritePolicy overwrite policy}
|
||||||
|
* as {@linkplain #setOverwritePolicy(String)} on this instance. The overwrite policy of the
|
||||||
|
* embedded extracters is not relevant unless they are used separately in another context.
|
||||||
|
*
|
||||||
|
* @see ContentWorkerSelector
|
||||||
|
* @see MetadataExtracter
|
||||||
|
*
|
||||||
|
* @since 2.1
|
||||||
|
* @author Derek Hulley
|
||||||
|
*/
|
||||||
|
public class XmlMetadataExtracter extends AbstractMappingMetadataExtracter
|
||||||
|
{
|
||||||
|
public static String[] SUPPORTED_MIMETYPES = new String[] {MimetypeMap.MIMETYPE_XML};
|
||||||
|
|
||||||
|
private static Log logger = LogFactory.getLog(XPathMetadataExtracter.class);
|
||||||
|
|
||||||
|
private List<ContentWorkerSelector<MetadataExtracter>> selectors;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default constructor
|
||||||
|
*/
|
||||||
|
public XmlMetadataExtracter()
|
||||||
|
{
|
||||||
|
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the list of metadata selectors to use to find the extracter to use, given
|
||||||
|
* some content. The evaluations are done in the order that they occur in the
|
||||||
|
* list.
|
||||||
|
*
|
||||||
|
* @param selectors A list of selectors
|
||||||
|
*/
|
||||||
|
public void setSelectors(List<ContentWorkerSelector<MetadataExtracter>> selectors)
|
||||||
|
{
|
||||||
|
this.selectors = selectors;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void init()
|
||||||
|
{
|
||||||
|
PropertyCheck.mandatory(this, "selectors", selectors);
|
||||||
|
// Get the base class to set up its mappings
|
||||||
|
super.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* It is not possible to have any default mappings, but something has to be returned.
|
||||||
|
*
|
||||||
|
* @return Returns an empty map
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected Map<String, Set<QName>> getDefaultMapping()
|
||||||
|
{
|
||||||
|
return Collections.emptyMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Selects and extracter to perform the work and redirects to it.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Map<QName, Serializable> extract(
|
||||||
|
ContentReader reader,
|
||||||
|
OverwritePolicy overwritePolicy,
|
||||||
|
Map<QName, Serializable> destination,
|
||||||
|
Map<String, Set<QName>> mapping)
|
||||||
|
{
|
||||||
|
MetadataExtracter extracter = null;
|
||||||
|
// Select a worker
|
||||||
|
for (ContentWorkerSelector<MetadataExtracter> selector : selectors)
|
||||||
|
{
|
||||||
|
ContentReader spawnedReader = reader.getReader();
|
||||||
|
try
|
||||||
|
{
|
||||||
|
extracter = selector.getWorker(spawnedReader);
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
if (reader.isChannelOpen())
|
||||||
|
{
|
||||||
|
logger.error("Content reader not closed by MetadataExtractor selector: \n" +
|
||||||
|
" reader: " + reader + "\n" +
|
||||||
|
" selector: " + selector);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Just take the first successful one
|
||||||
|
if (extracter != null)
|
||||||
|
{
|
||||||
|
if (logger.isDebugEnabled())
|
||||||
|
{
|
||||||
|
logger.debug("\n" +
|
||||||
|
"Found metadata extracter to process XML document: \n" +
|
||||||
|
" Selector: " + selector + "\n" +
|
||||||
|
" Document: " + reader);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Map<QName, Serializable> modifiedProperties = null;
|
||||||
|
// Did we find anything?
|
||||||
|
if (extracter == null)
|
||||||
|
{
|
||||||
|
// There will be no properties extracted
|
||||||
|
modifiedProperties = Collections.emptyMap();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// An extractor was selected
|
||||||
|
try
|
||||||
|
{
|
||||||
|
modifiedProperties = extracter.extract(reader, overwritePolicy, destination, mapping);
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
if (reader.isChannelOpen())
|
||||||
|
{
|
||||||
|
logger.error("Content reader not closed by MetadataExtractor: \n" +
|
||||||
|
" Reader: " + reader + "\n" +
|
||||||
|
" extracter: " + extracter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Done
|
||||||
|
if (logger.isDebugEnabled())
|
||||||
|
{
|
||||||
|
logger.debug("\n" +
|
||||||
|
"XML metadata extractor redirected: \n" +
|
||||||
|
" Reader: " + reader + "\n" +
|
||||||
|
" Extracter: " + extracter + "\n" +
|
||||||
|
" Extracted: " + modifiedProperties);
|
||||||
|
}
|
||||||
|
return modifiedProperties;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is not required as the
|
||||||
|
*/
|
||||||
|
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||||
|
{
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
}
|
@@ -31,10 +31,27 @@ import java.net.URL;
|
|||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
import org.alfresco.model.ContentModel;
|
import org.alfresco.model.ContentModel;
|
||||||
|
import org.alfresco.repo.action.ActionImpl;
|
||||||
|
import org.alfresco.repo.action.executer.ActionExecuter;
|
||||||
|
import org.alfresco.repo.action.executer.SetPropertyValueActionExecuter;
|
||||||
import org.alfresco.repo.content.MimetypeMap;
|
import org.alfresco.repo.content.MimetypeMap;
|
||||||
import org.alfresco.repo.content.filestore.FileContentReader;
|
import org.alfresco.repo.content.filestore.FileContentReader;
|
||||||
|
import org.alfresco.repo.content.metadata.MetadataExtracter;
|
||||||
|
import org.alfresco.repo.content.selector.RootElementNameContentWorkerSelector;
|
||||||
|
import org.alfresco.repo.content.selector.XPathContentWorkerSelector;
|
||||||
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
|
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
|
||||||
|
import org.alfresco.repo.security.authentication.AuthenticationComponent;
|
||||||
|
import org.alfresco.service.ServiceRegistry;
|
||||||
|
import org.alfresco.service.cmr.action.Action;
|
||||||
import org.alfresco.service.cmr.repository.ContentReader;
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
|
import org.alfresco.service.cmr.repository.ContentService;
|
||||||
|
import org.alfresco.service.cmr.repository.ContentWriter;
|
||||||
|
import org.alfresco.service.cmr.repository.NodeRef;
|
||||||
|
import org.alfresco.service.cmr.repository.NodeService;
|
||||||
|
import org.alfresco.service.cmr.repository.StoreRef;
|
||||||
|
import org.alfresco.service.namespace.NamespaceService;
|
||||||
|
import org.alfresco.service.namespace.QName;
|
||||||
|
import org.alfresco.util.GUID;
|
||||||
import org.alfresco.util.PropertyMap;
|
import org.alfresco.util.PropertyMap;
|
||||||
import org.springframework.context.ApplicationContext;
|
import org.springframework.context.ApplicationContext;
|
||||||
import org.springframework.context.support.ClassPathXmlApplicationContext;
|
import org.springframework.context.support.ClassPathXmlApplicationContext;
|
||||||
@@ -54,8 +71,13 @@ public class XmlMetadataExtracterTest extends TestCase
|
|||||||
private static final String CTX_LOCATION = "classpath:xml-metadata/xml-metadata-test-context.xml";
|
private static final String CTX_LOCATION = "classpath:xml-metadata/xml-metadata-test-context.xml";
|
||||||
private static final ApplicationContext ctx = new ClassPathXmlApplicationContext(CTX_LOCATION);
|
private static final ApplicationContext ctx = new ClassPathXmlApplicationContext(CTX_LOCATION);
|
||||||
|
|
||||||
private XPathMetadataExtracter alfrescoModelMetadataExtractor;
|
private ServiceRegistry serviceRegistry;
|
||||||
private XPathMetadataExtracter eclipseProjectMetadataExtractor;
|
private AuthenticationComponent authenticationComponent;
|
||||||
|
private XPathMetadataExtracter alfrescoModelMetadataExtracter;
|
||||||
|
private XPathMetadataExtracter eclipseProjectMetadataExtracter;
|
||||||
|
private RootElementNameContentWorkerSelector<MetadataExtracter> rootElementNameMetadataExtracterSelector;
|
||||||
|
private XPathContentWorkerSelector<MetadataExtracter> xpathMetadataExtracterSelector;
|
||||||
|
private XmlMetadataExtracter xmlMetadataExtracter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get a reader for a file that should be on the classpath.
|
* Get a reader for a file that should be on the classpath.
|
||||||
@@ -78,16 +100,30 @@ public class XmlMetadataExtracterTest extends TestCase
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
public void setUp() throws Exception
|
public void setUp() throws Exception
|
||||||
{
|
{
|
||||||
alfrescoModelMetadataExtractor = (XPathMetadataExtracter) ctx.getBean("extracter.xml.AlfrescoModelMetadataExtracter");
|
serviceRegistry = (ServiceRegistry) ctx.getBean(ServiceRegistry.SERVICE_REGISTRY);
|
||||||
eclipseProjectMetadataExtractor = (XPathMetadataExtracter) ctx.getBean("extracter.xml.EclipseProjectMetadataExtracter");
|
authenticationComponent = (AuthenticationComponent) ctx.getBean("authenticationComponent");
|
||||||
|
alfrescoModelMetadataExtracter = (XPathMetadataExtracter) ctx.getBean("extracter.xml.AlfrescoModelMetadataExtracter");
|
||||||
|
eclipseProjectMetadataExtracter = (XPathMetadataExtracter) ctx.getBean("extracter.xml.EclipseProjectMetadataExtracter");
|
||||||
|
rootElementNameMetadataExtracterSelector = (RootElementNameContentWorkerSelector<MetadataExtracter>) ctx.getBean("extracter.xml.selector.RootElementSelector");
|
||||||
|
xpathMetadataExtracterSelector = (XPathContentWorkerSelector<MetadataExtracter>) ctx.getBean("extracter.xml.selector.XPathSelector");
|
||||||
|
xmlMetadataExtracter = (XmlMetadataExtracter) ctx.getBean("extracter.xml.XMLMetadataExtracter");
|
||||||
|
|
||||||
|
authenticationComponent.setSystemUserAsCurrentUser();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void tearDown() throws Exception
|
||||||
|
{
|
||||||
|
try { authenticationComponent.clearCurrentSecurityContext(); } catch (Throwable e) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSetUp()
|
public void testSetUp()
|
||||||
{
|
{
|
||||||
assertNotNull(alfrescoModelMetadataExtractor);
|
assertNotNull(alfrescoModelMetadataExtracter);
|
||||||
assertNotNull(eclipseProjectMetadataExtractor);
|
assertNotNull(eclipseProjectMetadataExtracter);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExtractAlfresocModel() throws Exception
|
public void testExtractAlfresocModel() throws Exception
|
||||||
@@ -98,7 +134,7 @@ public class XmlMetadataExtracterTest extends TestCase
|
|||||||
|
|
||||||
// Pass it to the extracter
|
// Pass it to the extracter
|
||||||
PropertyMap checkProperties = new PropertyMap();
|
PropertyMap checkProperties = new PropertyMap();
|
||||||
alfrescoModelMetadataExtractor.extract(reader, checkProperties);
|
alfrescoModelMetadataExtracter.extract(reader, checkProperties);
|
||||||
|
|
||||||
// Check the values
|
// Check the values
|
||||||
assertEquals("Gavin Cornwell", checkProperties.get(ContentModel.PROP_AUTHOR));
|
assertEquals("Gavin Cornwell", checkProperties.get(ContentModel.PROP_AUTHOR));
|
||||||
@@ -114,10 +150,128 @@ public class XmlMetadataExtracterTest extends TestCase
|
|||||||
|
|
||||||
// Pass it to the extracter
|
// Pass it to the extracter
|
||||||
PropertyMap checkProperties = new PropertyMap();
|
PropertyMap checkProperties = new PropertyMap();
|
||||||
eclipseProjectMetadataExtractor.extract(reader, checkProperties);
|
eclipseProjectMetadataExtracter.extract(reader, checkProperties);
|
||||||
|
|
||||||
// Check the values
|
// Check the values
|
||||||
assertEquals("Repository", checkProperties.get(ContentModel.PROP_TITLE));
|
assertEquals("Repository", checkProperties.get(ContentModel.PROP_TITLE));
|
||||||
assertEquals("JavaCC Nature", checkProperties.get(ContentModel.PROP_DESCRIPTION));
|
assertEquals("JavaCC Nature", checkProperties.get(ContentModel.PROP_DESCRIPTION));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRootElementNameSelector() throws Exception
|
||||||
|
{
|
||||||
|
// Load the example files
|
||||||
|
ContentReader alfrescoModelReader = getReader(FILE_ALFRESCO_MODEL);
|
||||||
|
assertTrue(alfrescoModelReader.exists());
|
||||||
|
ContentReader eclipseProjectReader = getReader(FILE_ECLIPSE_PROJECT);
|
||||||
|
assertTrue(eclipseProjectReader.exists());
|
||||||
|
|
||||||
|
// Check with an alfresco model document
|
||||||
|
MetadataExtracter alfrescoModelExtracter = rootElementNameMetadataExtracterSelector.getWorker(alfrescoModelReader);
|
||||||
|
assertNotNull("Failed to select correct extracter", alfrescoModelExtracter);
|
||||||
|
assertTrue("Incorrect extracter instance selected", alfrescoModelMetadataExtracter == alfrescoModelExtracter);
|
||||||
|
assertFalse("Read channel not closed", alfrescoModelReader.isChannelOpen());
|
||||||
|
|
||||||
|
// Check with an eclipse project document
|
||||||
|
MetadataExtracter eclipseProjectExtracter = rootElementNameMetadataExtracterSelector.getWorker(eclipseProjectReader);
|
||||||
|
assertNotNull("Failed to select correct extracter", eclipseProjectExtracter);
|
||||||
|
assertTrue("Incorrect extracter instance selected", eclipseProjectMetadataExtracter == eclipseProjectExtracter);
|
||||||
|
assertFalse("Read channel not closed", eclipseProjectReader.isChannelOpen());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testXpathSelector() throws Exception
|
||||||
|
{
|
||||||
|
// Load the example files
|
||||||
|
ContentReader alfrescoModelReader = getReader(FILE_ALFRESCO_MODEL);
|
||||||
|
assertTrue(alfrescoModelReader.exists());
|
||||||
|
ContentReader eclipseProjectReader = getReader(FILE_ECLIPSE_PROJECT);
|
||||||
|
assertTrue(eclipseProjectReader.exists());
|
||||||
|
|
||||||
|
// Check with an alfresco model document
|
||||||
|
MetadataExtracter alfrescoModelExtracter = xpathMetadataExtracterSelector.getWorker(alfrescoModelReader);
|
||||||
|
assertNotNull("Failed to select correct extracter", alfrescoModelExtracter);
|
||||||
|
assertTrue("Incorrect extracter instance selected", alfrescoModelMetadataExtracter == alfrescoModelExtracter);
|
||||||
|
assertFalse("Read channel not closed", alfrescoModelReader.isChannelOpen());
|
||||||
|
|
||||||
|
// Check with an eclipse project document
|
||||||
|
MetadataExtracter eclipseProjectExtracter = xpathMetadataExtracterSelector.getWorker(eclipseProjectReader);
|
||||||
|
assertNotNull("Failed to select correct extracter", eclipseProjectExtracter);
|
||||||
|
assertTrue("Incorrect extracter instance selected", eclipseProjectMetadataExtracter == eclipseProjectExtracter);
|
||||||
|
assertFalse("Read channel not closed", eclipseProjectReader.isChannelOpen());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testXmlMetadataExtracter() throws Exception
|
||||||
|
{
|
||||||
|
// Load the example files
|
||||||
|
ContentReader alfrescoModelReader = getReader(FILE_ALFRESCO_MODEL);
|
||||||
|
assertTrue(alfrescoModelReader.exists());
|
||||||
|
ContentReader eclipseProjectReader = getReader(FILE_ECLIPSE_PROJECT);
|
||||||
|
assertTrue(eclipseProjectReader.exists());
|
||||||
|
|
||||||
|
// Pass the Alfresco Model xml to the extractor
|
||||||
|
PropertyMap checkAlfrescoModelProperties = new PropertyMap();
|
||||||
|
xmlMetadataExtracter.extract(alfrescoModelReader, checkAlfrescoModelProperties);
|
||||||
|
// Check the values
|
||||||
|
assertEquals("Gavin Cornwell", checkAlfrescoModelProperties.get(ContentModel.PROP_AUTHOR));
|
||||||
|
assertEquals("fm:forummodel", checkAlfrescoModelProperties.get(ContentModel.PROP_TITLE));
|
||||||
|
assertEquals("Forum Model", checkAlfrescoModelProperties.get(ContentModel.PROP_DESCRIPTION));
|
||||||
|
|
||||||
|
// Pass the Eclipse Project xml to the extractor
|
||||||
|
PropertyMap checkEclipseProjectProperties = new PropertyMap();
|
||||||
|
xmlMetadataExtracter.extract(eclipseProjectReader, checkEclipseProjectProperties);
|
||||||
|
// Check the values
|
||||||
|
assertEquals("Repository", checkEclipseProjectProperties.get(ContentModel.PROP_TITLE));
|
||||||
|
assertEquals("JavaCC Nature", checkEclipseProjectProperties.get(ContentModel.PROP_DESCRIPTION));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests metadata extraction using an action with an EAGER MetadataExtracter for XML.
|
||||||
|
*/
|
||||||
|
public void testLifecycleOfXmlMetadataExtraction() throws Exception
|
||||||
|
{
|
||||||
|
NodeService nodeService = serviceRegistry.getNodeService();
|
||||||
|
ContentService contentService = serviceRegistry.getContentService();
|
||||||
|
ActionExecuter executer = (ActionExecuter) ctx.getBean("extract-metadata");
|
||||||
|
Action action = new ActionImpl(null, GUID.generate(), SetPropertyValueActionExecuter.NAME, null);
|
||||||
|
|
||||||
|
StoreRef storeRef = new StoreRef("test", getName());
|
||||||
|
NodeRef rootNodeRef = null;
|
||||||
|
if (nodeService.exists(storeRef))
|
||||||
|
{
|
||||||
|
rootNodeRef = nodeService.getRootNode(storeRef);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
nodeService.createStore("test", getName());
|
||||||
|
rootNodeRef = nodeService.getRootNode(storeRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up some properties
|
||||||
|
PropertyMap properties = new PropertyMap();
|
||||||
|
properties.put(ContentModel.PROP_TITLE, "My title");
|
||||||
|
properties.put(ContentModel.PROP_DESCRIPTION, "My description");
|
||||||
|
|
||||||
|
NodeRef contentNodeRef = nodeService.createNode(
|
||||||
|
rootNodeRef,
|
||||||
|
ContentModel.ASSOC_CHILDREN,
|
||||||
|
QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, getName()),
|
||||||
|
ContentModel.TYPE_CONTENT,
|
||||||
|
properties).getChildRef();
|
||||||
|
// Add some content
|
||||||
|
ContentReader alfrescoModelReader = getReader(FILE_ALFRESCO_MODEL);
|
||||||
|
assertTrue(alfrescoModelReader.exists());
|
||||||
|
ContentWriter writer = contentService.getWriter(contentNodeRef, ContentModel.PROP_CONTENT, true);
|
||||||
|
writer.setEncoding("UTF-8");
|
||||||
|
writer.setMimetype(MimetypeMap.MIMETYPE_XML);
|
||||||
|
writer.putContent(alfrescoModelReader);
|
||||||
|
|
||||||
|
// Execute the action
|
||||||
|
executer.execute(action, contentNodeRef);
|
||||||
|
|
||||||
|
// Check the node's properties. The EAGER overwrite policy should have replaced the required
|
||||||
|
// properties.
|
||||||
|
String checkTitle = (String) nodeService.getProperty(contentNodeRef, ContentModel.PROP_TITLE);
|
||||||
|
String checkDescription = (String) nodeService.getProperty(contentNodeRef, ContentModel.PROP_DESCRIPTION);
|
||||||
|
assertEquals("fm:forummodel", checkTitle);
|
||||||
|
assertEquals("Forum Model", checkDescription);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -22,8 +22,9 @@
|
|||||||
* the FLOSS exception, and it is also available here:
|
* the FLOSS exception, and it is also available here:
|
||||||
* http://www.alfresco.com/legal/licensing"
|
* http://www.alfresco.com/legal/licensing"
|
||||||
*/
|
*/
|
||||||
package org.alfresco.repo.content;
|
package org.alfresco.repo.content.selector;
|
||||||
|
|
||||||
|
import org.alfresco.repo.content.ContentWorker;
|
||||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||||
import org.alfresco.service.cmr.repository.ContentReader;
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
|
|
@@ -22,7 +22,7 @@
|
|||||||
* the FLOSS exception, and it is also available here:
|
* the FLOSS exception, and it is also available here:
|
||||||
* http://www.alfresco.com/legal/licensing"
|
* http://www.alfresco.com/legal/licensing"
|
||||||
*/
|
*/
|
||||||
package org.alfresco.repo.content.metadata.xml;
|
package org.alfresco.repo.content.selector;
|
||||||
|
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
@@ -33,11 +33,11 @@ import java.util.Set;
|
|||||||
import javax.xml.parsers.SAXParser;
|
import javax.xml.parsers.SAXParser;
|
||||||
import javax.xml.parsers.SAXParserFactory;
|
import javax.xml.parsers.SAXParserFactory;
|
||||||
|
|
||||||
import org.alfresco.repo.content.ContentWorkerSelector;
|
import org.alfresco.repo.content.ContentWorker;
|
||||||
import org.alfresco.repo.content.MimetypeMap;
|
import org.alfresco.repo.content.MimetypeMap;
|
||||||
import org.alfresco.repo.content.metadata.MetadataExtracter;
|
|
||||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||||
import org.alfresco.service.cmr.repository.ContentReader;
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
|
import org.alfresco.util.PropertyCheck;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.xml.sax.Attributes;
|
import org.xml.sax.Attributes;
|
||||||
@@ -48,33 +48,38 @@ import org.xml.sax.helpers.DefaultHandler;
|
|||||||
* A selector that looks at the root node of an XML document to determine which worker to provide.
|
* A selector that looks at the root node of an XML document to determine which worker to provide.
|
||||||
* There are many ways to identify XML documents and this is probably the simplest. Alternate
|
* There are many ways to identify XML documents and this is probably the simplest. Alternate
|
||||||
* implementations might execute a series of xpath statements or look for specific namespace
|
* implementations might execute a series of xpath statements or look for specific namespace
|
||||||
* declarations in the document. The net result is the same, i.e. given an XML document, an
|
* declarations in the document. The net result is the same, i.e. given an XML document, a
|
||||||
* extracter is provided to the caller.
|
* worker is provided to the caller.
|
||||||
* <p>
|
|
||||||
* In this selector, there is no guarantee that the different extracters will generate the same
|
|
||||||
* (or even nearly the same) metadata. It is up to the configurer to ensure that if it is a
|
|
||||||
* requirement, but otherwise each extracter is responsible for its own mappings. Mostly, though,
|
|
||||||
* a root node match will imply a structure that has the necessary metadata.
|
|
||||||
*
|
*
|
||||||
* @since 2.1
|
* @since 2.1
|
||||||
* @author Derek Hulley
|
* @author Derek Hulley
|
||||||
*/
|
*/
|
||||||
public class RootElementNameMetadataExtracterSelector
|
public class RootElementNameContentWorkerSelector<W extends ContentWorker>
|
||||||
extends DefaultHandler
|
extends DefaultHandler
|
||||||
implements ContentWorkerSelector<MetadataExtracter>
|
implements ContentWorkerSelector<ContentWorker>
|
||||||
{
|
{
|
||||||
private static Log logger = LogFactory.getLog(RootElementNameMetadataExtracterSelector.class);
|
private static Log logger = LogFactory.getLog(RootElementNameContentWorkerSelector.class);
|
||||||
|
|
||||||
private SAXParserFactory saxParserFactory;
|
private SAXParserFactory saxParserFactory;
|
||||||
private Set<String> supportedMimetypes;
|
private Set<String> supportedMimetypes;
|
||||||
private Map<String, MetadataExtracter> extractersByRootElementName;
|
private Map<String, W> workersByRootElementName;
|
||||||
|
|
||||||
public RootElementNameMetadataExtracterSelector()
|
public RootElementNameContentWorkerSelector()
|
||||||
{
|
{
|
||||||
saxParserFactory = SAXParserFactory.newInstance();
|
saxParserFactory = SAXParserFactory.newInstance();
|
||||||
supportedMimetypes = new HashSet<String>();
|
supportedMimetypes = new HashSet<String>();
|
||||||
supportedMimetypes.add(MimetypeMap.MIMETYPE_XML);
|
supportedMimetypes.add(MimetypeMap.MIMETYPE_XML);
|
||||||
extractersByRootElementName = Collections.emptyMap();
|
workersByRootElementName = Collections.emptyMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
StringBuilder sb = new StringBuilder(50);
|
||||||
|
sb.append("RootElementNameContentWorkerSelector")
|
||||||
|
.append("[ workers=").append(workersByRootElementName)
|
||||||
|
.append("]");
|
||||||
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -89,26 +94,35 @@ public class RootElementNameMetadataExtracterSelector
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the extractors to use.
|
* Set the workers to choose from.
|
||||||
*
|
*
|
||||||
* @param extracters a map of {@linkplain MetadataExtracter} instances
|
* @param workers a map of {@linkplain ContentWorker} instances
|
||||||
* keyed by root element name
|
* keyed by root element name
|
||||||
*/
|
*/
|
||||||
public void setExtracters(Map<String, MetadataExtracter> extracters)
|
public void setWorkers(Map<String, W> workers)
|
||||||
{
|
{
|
||||||
this.extractersByRootElementName = extracters;
|
this.workersByRootElementName = workers;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a match of the root element name to find the correct extracter.
|
* Checks the configuration.
|
||||||
*/
|
*/
|
||||||
public MetadataExtracter getWorker(ContentReader reader)
|
public void init()
|
||||||
|
{
|
||||||
|
PropertyCheck.mandatory(this, "workers", workersByRootElementName);
|
||||||
|
PropertyCheck.mandatory(this, "supportedMimetypes", supportedMimetypes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a match of the root element name to find the correct content worker.
|
||||||
|
*/
|
||||||
|
public W getWorker(ContentReader reader)
|
||||||
{
|
{
|
||||||
if (!supportedMimetypes.contains(reader.getMimetype()))
|
if (!supportedMimetypes.contains(reader.getMimetype()))
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
MetadataExtracter extracter = null;
|
W worker = null;
|
||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
String rootElementName = null;
|
String rootElementName = null;
|
||||||
try
|
try
|
||||||
@@ -121,11 +135,15 @@ public class RootElementNameMetadataExtracterSelector
|
|||||||
catch (RootElementFoundException e)
|
catch (RootElementFoundException e)
|
||||||
{
|
{
|
||||||
rootElementName = e.getElementName();
|
rootElementName = e.getElementName();
|
||||||
extracter = extractersByRootElementName.get(rootElementName);
|
worker = workersByRootElementName.get(rootElementName);
|
||||||
}
|
}
|
||||||
catch (Throwable e)
|
catch (Throwable e)
|
||||||
{
|
{
|
||||||
throw new ContentIOException("Failed to extract root element from XML document", e);
|
throw new ContentIOException("\n" +
|
||||||
|
"Failed to extract root element from XML document: \n" +
|
||||||
|
" Reader: " + reader + "\n" +
|
||||||
|
" Selector: " + this,
|
||||||
|
e);
|
||||||
}
|
}
|
||||||
finally
|
finally
|
||||||
{
|
{
|
||||||
@@ -138,18 +156,18 @@ public class RootElementNameMetadataExtracterSelector
|
|||||||
if (logger.isDebugEnabled())
|
if (logger.isDebugEnabled())
|
||||||
{
|
{
|
||||||
logger.debug("\n" +
|
logger.debug("\n" +
|
||||||
"Chosen metadata extracter for reader: \n" +
|
"Chosen content worker for reader: \n" +
|
||||||
" Reader: " + reader + "\n" +
|
" Reader: " + reader + "\n" +
|
||||||
" Root Element: " + rootElementName + "\n" +
|
" Root Element: " + rootElementName + "\n" +
|
||||||
" Extracter: " + extracter);
|
" Worker: " + worker);
|
||||||
}
|
}
|
||||||
return extracter;
|
return worker;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
|
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
|
||||||
{
|
{
|
||||||
throw new RootElementFoundException(localName);
|
throw new RootElementFoundException(qName);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
@@ -0,0 +1,204 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2005-2007 Alfresco Software Limited.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
|
||||||
|
* As a special exception to the terms and conditions of version 2.0 of
|
||||||
|
* the GPL, you may redistribute this Program in connection with Free/Libre
|
||||||
|
* and Open Source Software ("FLOSS") applications as described in Alfresco's
|
||||||
|
* FLOSS exception. You should have recieved a copy of the text describing
|
||||||
|
* the FLOSS exception, and it is also available here:
|
||||||
|
* http://www.alfresco.com/legal/licensing"
|
||||||
|
*/
|
||||||
|
package org.alfresco.repo.content.selector;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import javax.xml.parsers.DocumentBuilder;
|
||||||
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
|
import javax.xml.xpath.XPathConstants;
|
||||||
|
import javax.xml.xpath.XPathExpressionException;
|
||||||
|
import javax.xml.xpath.XPathFactory;
|
||||||
|
|
||||||
|
import org.alfresco.error.AlfrescoRuntimeException;
|
||||||
|
import org.alfresco.repo.content.ContentWorker;
|
||||||
|
import org.alfresco.repo.content.MimetypeMap;
|
||||||
|
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||||
|
import org.alfresco.service.cmr.repository.ContentReader;
|
||||||
|
import org.alfresco.util.PropertyCheck;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.w3c.dom.Document;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A selector that executes a set of XPath statements against the XML document to determine
|
||||||
|
* which content worker to provide. The XPath rules are simple, i.e. if an XML node is
|
||||||
|
* found by the XPath statement, then it is considered to be a hit and the corresponding
|
||||||
|
* worker is returned.
|
||||||
|
* <p>
|
||||||
|
* Currently, the only namespaces supported are those contained in the XML documents being
|
||||||
|
* tested.
|
||||||
|
*
|
||||||
|
* @since 2.1
|
||||||
|
* @author Derek Hulley
|
||||||
|
*/
|
||||||
|
public class XPathContentWorkerSelector<W extends ContentWorker> implements ContentWorkerSelector
|
||||||
|
{
|
||||||
|
private static Log logger = LogFactory.getLog(XPathContentWorkerSelector.class);
|
||||||
|
|
||||||
|
private DocumentBuilder documentBuilder;
|
||||||
|
private XPathFactory xpathFactory;
|
||||||
|
private Set<String> supportedMimetypes;
|
||||||
|
private Map<String, W> workersByXPath;
|
||||||
|
|
||||||
|
public XPathContentWorkerSelector()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
|
||||||
|
xpathFactory = XPathFactory.newInstance();
|
||||||
|
}
|
||||||
|
catch (Throwable e)
|
||||||
|
{
|
||||||
|
throw new AlfrescoRuntimeException("Failed to initialize XPathContentWorkerSelector", e);
|
||||||
|
}
|
||||||
|
supportedMimetypes = new HashSet<String>();
|
||||||
|
supportedMimetypes.add(MimetypeMap.MIMETYPE_XML);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
StringBuilder sb = new StringBuilder(50);
|
||||||
|
sb.append("XPathContentWorkerSelector")
|
||||||
|
.append("[ workers=").append(workersByXPath)
|
||||||
|
.append("]");
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Optionally set the mimetypes supported. They must be XML formats that the chosen
|
||||||
|
* parser will be able to handle.
|
||||||
|
*
|
||||||
|
* @param supportedMimetypes the list of mimetypes. The default is <b>text/xml</b>.
|
||||||
|
*/
|
||||||
|
public void setSupportedMimetypes(Set<String> supportedMimetypes)
|
||||||
|
{
|
||||||
|
this.supportedMimetypes = supportedMimetypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the workers to use. All the XPath statements provided must be compatible with
|
||||||
|
* a return value of type {@linkplain XPathConstants#NODE NODE}.
|
||||||
|
*
|
||||||
|
* @param workers a map of {@linkplain ContentWorker} instances
|
||||||
|
* keyed by XPath statements
|
||||||
|
*/
|
||||||
|
public void setWorkers(Map<String, W> workers)
|
||||||
|
{
|
||||||
|
this.workersByXPath = workers;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks the configuration.
|
||||||
|
*/
|
||||||
|
public void init()
|
||||||
|
{
|
||||||
|
PropertyCheck.mandatory(this, "workers", workersByXPath);
|
||||||
|
PropertyCheck.mandatory(this, "supportedMimetypes", supportedMimetypes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute the XPath statements, in order, against the document. Any statements that fail
|
||||||
|
* to run will be ignored.
|
||||||
|
*/
|
||||||
|
public W getWorker(ContentReader reader)
|
||||||
|
{
|
||||||
|
if (!supportedMimetypes.contains(reader.getMimetype()))
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
W worker = null;
|
||||||
|
InputStream is = null;
|
||||||
|
String xpath = null;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
is = reader.getContentInputStream();
|
||||||
|
Document doc = documentBuilder.parse(is);
|
||||||
|
// Execute the statements
|
||||||
|
worker = processDocument(doc);
|
||||||
|
}
|
||||||
|
catch (Throwable e)
|
||||||
|
{
|
||||||
|
throw new ContentIOException("\n" +
|
||||||
|
"Failed to XPaths against XML document: \n" +
|
||||||
|
" Reader: " + reader + "\n" +
|
||||||
|
" Selector: " + this,
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
if (is != null)
|
||||||
|
{
|
||||||
|
try { is.close(); } catch (IOException e) {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Done
|
||||||
|
if (logger.isDebugEnabled())
|
||||||
|
{
|
||||||
|
logger.debug("\n" +
|
||||||
|
"Chosen content worker for reader: \n" +
|
||||||
|
" Reader: " + reader + "\n" +
|
||||||
|
" XPath: " + xpath + "\n" +
|
||||||
|
" Worker: " + worker);
|
||||||
|
}
|
||||||
|
return worker;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check the given document against the list of XPath statements provided.
|
||||||
|
*
|
||||||
|
* @param document the XML document
|
||||||
|
* @return Returns a content worker that was matched or <tt>null</tt>
|
||||||
|
*/
|
||||||
|
private W processDocument(Document doc)
|
||||||
|
{
|
||||||
|
for (Map.Entry<String, W> entry : workersByXPath.entrySet())
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
String xpath = entry.getKey();
|
||||||
|
W worker = entry.getValue();
|
||||||
|
// Execute the statement
|
||||||
|
Object ret = xpathFactory.newXPath().evaluate(xpath, doc, XPathConstants.NODE);
|
||||||
|
if (ret != null)
|
||||||
|
{
|
||||||
|
// We found one
|
||||||
|
return worker;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (XPathExpressionException e)
|
||||||
|
{
|
||||||
|
// We accept this and move on
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Nothing found
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
@@ -47,4 +47,59 @@
|
|||||||
</property>
|
</property>
|
||||||
</bean>
|
</bean>
|
||||||
|
|
||||||
|
<!-- A selector that checks root element names -->
|
||||||
|
<bean
|
||||||
|
id="extracter.xml.selector.RootElementSelector"
|
||||||
|
class="org.alfresco.repo.content.selector.RootElementNameContentWorkerSelector"
|
||||||
|
init-method="init">
|
||||||
|
<property name="workers">
|
||||||
|
<map>
|
||||||
|
<entry key="BOGUS_ROOT_ELEMENT">
|
||||||
|
<null />
|
||||||
|
</entry>
|
||||||
|
<entry key="model">
|
||||||
|
<ref bean="extracter.xml.AlfrescoModelMetadataExtracter" />
|
||||||
|
</entry>
|
||||||
|
<entry key="projectDescription">
|
||||||
|
<ref bean="extracter.xml.EclipseProjectMetadataExtracter" />
|
||||||
|
</entry>
|
||||||
|
</map>
|
||||||
|
</property>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
<!-- A selector that executes XPath statements -->
|
||||||
|
<bean
|
||||||
|
id="extracter.xml.selector.XPathSelector"
|
||||||
|
class="org.alfresco.repo.content.selector.XPathContentWorkerSelector"
|
||||||
|
init-method="init">
|
||||||
|
<property name="workers">
|
||||||
|
<map>
|
||||||
|
<entry key="/my:test">
|
||||||
|
<null />
|
||||||
|
</entry>
|
||||||
|
<entry key="/model[@name='fm:forummodel']">
|
||||||
|
<ref bean="extracter.xml.AlfrescoModelMetadataExtracter" />
|
||||||
|
</entry>
|
||||||
|
<entry key="/projectDescription">
|
||||||
|
<ref bean="extracter.xml.EclipseProjectMetadataExtracter" />
|
||||||
|
</entry>
|
||||||
|
</map>
|
||||||
|
</property>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
<!-- The wrapper XML metadata extracter -->
|
||||||
|
<bean
|
||||||
|
id="extracter.xml.XMLMetadataExtracter"
|
||||||
|
class="org.alfresco.repo.content.metadata.xml.XmlMetadataExtracter"
|
||||||
|
parent="baseMetadataExtracter">
|
||||||
|
<property name="overwritePolicy">
|
||||||
|
<value>EAGER</value>
|
||||||
|
</property>
|
||||||
|
<property name="selectors">
|
||||||
|
<list>
|
||||||
|
<ref bean="extracter.xml.selector.XPathSelector" />
|
||||||
|
</list>
|
||||||
|
</property>
|
||||||
|
</bean>
|
||||||
|
|
||||||
</beans>
|
</beans>
|
Reference in New Issue
Block a user