mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-24 17:32:48 +00:00
XML metadata extraction with sample.
Added tests into build. This is now ready for testing, comments and suggestions. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@6056 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -93,7 +93,6 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
|
||||
private MetadataExtracterRegistry registry;
|
||||
private MimetypeService mimetypeService;
|
||||
private long extractionTime;
|
||||
private boolean initialized;
|
||||
|
||||
private Set<String> supportedMimetypes;
|
||||
@@ -101,12 +100,23 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
private Map<String, Set<QName>> mapping;
|
||||
private boolean inheritDefaultMapping;
|
||||
|
||||
/**
|
||||
* Default constructor. If this is called, then {@link #isSupported(String)} should
|
||||
* be implemented. This is useful when the list of supported mimetypes is not known
|
||||
* when the instance is constructed. Alternatively, once the set becomes known, call
|
||||
* {@link #setSupportedMimetypes(Collection)}.
|
||||
*
|
||||
* @see #isSupported(String)
|
||||
* @see #setSupportedMimetypes(Collection)
|
||||
*/
|
||||
protected AbstractMappingMetadataExtracter()
|
||||
{
|
||||
this(Collections.<String>emptySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that can be used when the list of supported mimetypes is known up front.
|
||||
*
|
||||
* @param supportedMimetypes the set of mimetypes supported by default
|
||||
*/
|
||||
protected AbstractMappingMetadataExtracter(Set<String> supportedMimetypes)
|
||||
@@ -179,13 +189,29 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
}
|
||||
|
||||
/**
|
||||
* @param overwritePolicy the policy to apply when there are existing system properties
|
||||
* Set the policy to use when existing values are encountered. Depending on how the extracer
|
||||
* is called, this may not be relevant, i.e an empty map of existing properties may be passed
|
||||
* in by the client code, which may follow its own overwrite strategy.
|
||||
*
|
||||
* @param overwritePolicy the policy to apply when there are existing system properties
|
||||
*/
|
||||
public void setOverwritePolicy(OverwritePolicy overwritePolicy)
|
||||
{
|
||||
this.overwritePolicy = overwritePolicy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the policy to use when existing values are encountered. Depending on how the extracer
|
||||
* is called, this may not be relevant, i.e an empty map of existing properties may be passed
|
||||
* in by the client code, which may follow its own overwrite strategy.
|
||||
*
|
||||
* @param overwritePolicyStr the policy to apply when there are existing system properties
|
||||
*/
|
||||
public void setOverwritePolicy(String overwritePolicyStr)
|
||||
{
|
||||
this.overwritePolicy = OverwritePolicy.valueOf(overwritePolicyStr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set if the property mappings augment or override the mapping generically provided by the
|
||||
* extracter implementation. The default is <tt>false</tt>, i.e. any mapping set completely
|
||||
@@ -410,10 +436,6 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
{
|
||||
registry.register(this);
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.warn("No registry provided. Not registering: " + this);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -466,7 +488,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
/** {@inheritDoc} */
|
||||
public long getExtractionTime()
|
||||
{
|
||||
return extractionTime;
|
||||
return 1000L;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -510,7 +532,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public final Map<QName, Serializable> extract(
|
||||
public Map<QName, Serializable> extract(
|
||||
ContentReader reader,
|
||||
OverwritePolicy overwritePolicy,
|
||||
Map<QName, Serializable> destination,
|
||||
|
@@ -222,6 +222,8 @@ public interface MetadataExtracter extends ContentWorker
|
||||
* reliant transformers will be used for a specific extraction.
|
||||
*
|
||||
* @return Returns the approximate number of milliseconds per transformation
|
||||
*
|
||||
* @deprecated Generally not useful or used. Extraction is normally specifically configured.
|
||||
*/
|
||||
public long getExtractionTime();
|
||||
|
||||
|
@@ -145,7 +145,6 @@ public class MetadataExtracterRegistry
|
||||
*/
|
||||
private MetadataExtracter findBestExtracter(String sourceMimetype)
|
||||
{
|
||||
long bestTime = Long.MAX_VALUE;
|
||||
logger.debug("Finding best extracter for " + sourceMimetype);
|
||||
|
||||
MetadataExtracter bestExtracter = null;
|
||||
@@ -157,12 +156,7 @@ public class MetadataExtracterRegistry
|
||||
// extraction not achievable
|
||||
continue;
|
||||
}
|
||||
long time = ext.getExtractionTime();
|
||||
if (time < bestTime)
|
||||
{
|
||||
bestExtracter = ext;
|
||||
bestTime = time;
|
||||
}
|
||||
bestExtracter = ext;
|
||||
}
|
||||
return bestExtracter;
|
||||
}
|
||||
|
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2007 Alfresco Software Limited.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
* As a special exception to the terms and conditions of version 2.0 of
|
||||
* the GPL, you may redistribute this Program in connection with Free/Libre
|
||||
* and Open Source Software ("FLOSS") applications as described in Alfresco's
|
||||
* FLOSS exception. You should have recieved a copy of the text describing
|
||||
* the FLOSS exception, and it is also available here:
|
||||
* http://www.alfresco.com/legal/licensing"
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata.xml;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.alfresco.repo.content.selector.ContentWorkerSelector;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
|
||||
import org.alfresco.repo.content.metadata.MetadataExtracter;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.util.PropertyCheck;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
/**
|
||||
* A metadata extractor that selects an appropiate workder for the extraction.
|
||||
* <p>
|
||||
* The {@linkplain #setSelectors(List) selectors} are used to find an extracter most
|
||||
* appropriate of a given XML document. The chosen extracter is then asked to extract
|
||||
* the values, passing through the {@linkplain MetadataExtracter.OverwritePolicy overwrite policy}
|
||||
* as {@linkplain #setOverwritePolicy(String)} on this instance. The overwrite policy of the
|
||||
* embedded extracters is not relevant unless they are used separately in another context.
|
||||
*
|
||||
* @see ContentWorkerSelector
|
||||
* @see MetadataExtracter
|
||||
*
|
||||
* @since 2.1
|
||||
* @author Derek Hulley
|
||||
*/
|
||||
public class XmlMetadataExtracter extends AbstractMappingMetadataExtracter
|
||||
{
|
||||
public static String[] SUPPORTED_MIMETYPES = new String[] {MimetypeMap.MIMETYPE_XML};
|
||||
|
||||
private static Log logger = LogFactory.getLog(XPathMetadataExtracter.class);
|
||||
|
||||
private List<ContentWorkerSelector<MetadataExtracter>> selectors;
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*/
|
||||
public XmlMetadataExtracter()
|
||||
{
|
||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the list of metadata selectors to use to find the extracter to use, given
|
||||
* some content. The evaluations are done in the order that they occur in the
|
||||
* list.
|
||||
*
|
||||
* @param selectors A list of selectors
|
||||
*/
|
||||
public void setSelectors(List<ContentWorkerSelector<MetadataExtracter>> selectors)
|
||||
{
|
||||
this.selectors = selectors;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void init()
|
||||
{
|
||||
PropertyCheck.mandatory(this, "selectors", selectors);
|
||||
// Get the base class to set up its mappings
|
||||
super.init();
|
||||
}
|
||||
|
||||
/**
|
||||
* It is not possible to have any default mappings, but something has to be returned.
|
||||
*
|
||||
* @return Returns an empty map
|
||||
*/
|
||||
@Override
|
||||
protected Map<String, Set<QName>> getDefaultMapping()
|
||||
{
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
/**
|
||||
* Selects and extracter to perform the work and redirects to it.
|
||||
*/
|
||||
@Override
|
||||
public Map<QName, Serializable> extract(
|
||||
ContentReader reader,
|
||||
OverwritePolicy overwritePolicy,
|
||||
Map<QName, Serializable> destination,
|
||||
Map<String, Set<QName>> mapping)
|
||||
{
|
||||
MetadataExtracter extracter = null;
|
||||
// Select a worker
|
||||
for (ContentWorkerSelector<MetadataExtracter> selector : selectors)
|
||||
{
|
||||
ContentReader spawnedReader = reader.getReader();
|
||||
try
|
||||
{
|
||||
extracter = selector.getWorker(spawnedReader);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (reader.isChannelOpen())
|
||||
{
|
||||
logger.error("Content reader not closed by MetadataExtractor selector: \n" +
|
||||
" reader: " + reader + "\n" +
|
||||
" selector: " + selector);
|
||||
}
|
||||
}
|
||||
// Just take the first successful one
|
||||
if (extracter != null)
|
||||
{
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("\n" +
|
||||
"Found metadata extracter to process XML document: \n" +
|
||||
" Selector: " + selector + "\n" +
|
||||
" Document: " + reader);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
Map<QName, Serializable> modifiedProperties = null;
|
||||
// Did we find anything?
|
||||
if (extracter == null)
|
||||
{
|
||||
// There will be no properties extracted
|
||||
modifiedProperties = Collections.emptyMap();
|
||||
}
|
||||
else
|
||||
{
|
||||
// An extractor was selected
|
||||
try
|
||||
{
|
||||
modifiedProperties = extracter.extract(reader, overwritePolicy, destination, mapping);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (reader.isChannelOpen())
|
||||
{
|
||||
logger.error("Content reader not closed by MetadataExtractor: \n" +
|
||||
" Reader: " + reader + "\n" +
|
||||
" extracter: " + extracter);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Done
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("\n" +
|
||||
"XML metadata extractor redirected: \n" +
|
||||
" Reader: " + reader + "\n" +
|
||||
" Extracter: " + extracter + "\n" +
|
||||
" Extracted: " + modifiedProperties);
|
||||
}
|
||||
return modifiedProperties;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is not required as the
|
||||
*/
|
||||
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
|
||||
{
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
@@ -31,10 +31,27 @@ import java.net.URL;
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.action.ActionImpl;
|
||||
import org.alfresco.repo.action.executer.ActionExecuter;
|
||||
import org.alfresco.repo.action.executer.SetPropertyValueActionExecuter;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.repo.content.filestore.FileContentReader;
|
||||
import org.alfresco.repo.content.metadata.MetadataExtracter;
|
||||
import org.alfresco.repo.content.selector.RootElementNameContentWorkerSelector;
|
||||
import org.alfresco.repo.content.selector.XPathContentWorkerSelector;
|
||||
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
|
||||
import org.alfresco.repo.security.authentication.AuthenticationComponent;
|
||||
import org.alfresco.service.ServiceRegistry;
|
||||
import org.alfresco.service.cmr.action.Action;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.cmr.repository.ContentService;
|
||||
import org.alfresco.service.cmr.repository.ContentWriter;
|
||||
import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.cmr.repository.NodeService;
|
||||
import org.alfresco.service.cmr.repository.StoreRef;
|
||||
import org.alfresco.service.namespace.NamespaceService;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.util.GUID;
|
||||
import org.alfresco.util.PropertyMap;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.support.ClassPathXmlApplicationContext;
|
||||
@@ -54,8 +71,13 @@ public class XmlMetadataExtracterTest extends TestCase
|
||||
private static final String CTX_LOCATION = "classpath:xml-metadata/xml-metadata-test-context.xml";
|
||||
private static final ApplicationContext ctx = new ClassPathXmlApplicationContext(CTX_LOCATION);
|
||||
|
||||
private XPathMetadataExtracter alfrescoModelMetadataExtractor;
|
||||
private XPathMetadataExtracter eclipseProjectMetadataExtractor;
|
||||
private ServiceRegistry serviceRegistry;
|
||||
private AuthenticationComponent authenticationComponent;
|
||||
private XPathMetadataExtracter alfrescoModelMetadataExtracter;
|
||||
private XPathMetadataExtracter eclipseProjectMetadataExtracter;
|
||||
private RootElementNameContentWorkerSelector<MetadataExtracter> rootElementNameMetadataExtracterSelector;
|
||||
private XPathContentWorkerSelector<MetadataExtracter> xpathMetadataExtracterSelector;
|
||||
private XmlMetadataExtracter xmlMetadataExtracter;
|
||||
|
||||
/**
|
||||
* Get a reader for a file that should be on the classpath.
|
||||
@@ -78,16 +100,30 @@ public class XmlMetadataExtracterTest extends TestCase
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
alfrescoModelMetadataExtractor = (XPathMetadataExtracter) ctx.getBean("extracter.xml.AlfrescoModelMetadataExtracter");
|
||||
eclipseProjectMetadataExtractor = (XPathMetadataExtracter) ctx.getBean("extracter.xml.EclipseProjectMetadataExtracter");
|
||||
serviceRegistry = (ServiceRegistry) ctx.getBean(ServiceRegistry.SERVICE_REGISTRY);
|
||||
authenticationComponent = (AuthenticationComponent) ctx.getBean("authenticationComponent");
|
||||
alfrescoModelMetadataExtracter = (XPathMetadataExtracter) ctx.getBean("extracter.xml.AlfrescoModelMetadataExtracter");
|
||||
eclipseProjectMetadataExtracter = (XPathMetadataExtracter) ctx.getBean("extracter.xml.EclipseProjectMetadataExtracter");
|
||||
rootElementNameMetadataExtracterSelector = (RootElementNameContentWorkerSelector<MetadataExtracter>) ctx.getBean("extracter.xml.selector.RootElementSelector");
|
||||
xpathMetadataExtracterSelector = (XPathContentWorkerSelector<MetadataExtracter>) ctx.getBean("extracter.xml.selector.XPathSelector");
|
||||
xmlMetadataExtracter = (XmlMetadataExtracter) ctx.getBean("extracter.xml.XMLMetadataExtracter");
|
||||
|
||||
authenticationComponent.setSystemUserAsCurrentUser();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception
|
||||
{
|
||||
try { authenticationComponent.clearCurrentSecurityContext(); } catch (Throwable e) {}
|
||||
}
|
||||
|
||||
public void testSetUp()
|
||||
{
|
||||
assertNotNull(alfrescoModelMetadataExtractor);
|
||||
assertNotNull(eclipseProjectMetadataExtractor);
|
||||
assertNotNull(alfrescoModelMetadataExtracter);
|
||||
assertNotNull(eclipseProjectMetadataExtracter);
|
||||
}
|
||||
|
||||
public void testExtractAlfresocModel() throws Exception
|
||||
@@ -98,7 +134,7 @@ public class XmlMetadataExtracterTest extends TestCase
|
||||
|
||||
// Pass it to the extracter
|
||||
PropertyMap checkProperties = new PropertyMap();
|
||||
alfrescoModelMetadataExtractor.extract(reader, checkProperties);
|
||||
alfrescoModelMetadataExtracter.extract(reader, checkProperties);
|
||||
|
||||
// Check the values
|
||||
assertEquals("Gavin Cornwell", checkProperties.get(ContentModel.PROP_AUTHOR));
|
||||
@@ -114,10 +150,128 @@ public class XmlMetadataExtracterTest extends TestCase
|
||||
|
||||
// Pass it to the extracter
|
||||
PropertyMap checkProperties = new PropertyMap();
|
||||
eclipseProjectMetadataExtractor.extract(reader, checkProperties);
|
||||
eclipseProjectMetadataExtracter.extract(reader, checkProperties);
|
||||
|
||||
// Check the values
|
||||
assertEquals("Repository", checkProperties.get(ContentModel.PROP_TITLE));
|
||||
assertEquals("JavaCC Nature", checkProperties.get(ContentModel.PROP_DESCRIPTION));
|
||||
}
|
||||
|
||||
public void testRootElementNameSelector() throws Exception
|
||||
{
|
||||
// Load the example files
|
||||
ContentReader alfrescoModelReader = getReader(FILE_ALFRESCO_MODEL);
|
||||
assertTrue(alfrescoModelReader.exists());
|
||||
ContentReader eclipseProjectReader = getReader(FILE_ECLIPSE_PROJECT);
|
||||
assertTrue(eclipseProjectReader.exists());
|
||||
|
||||
// Check with an alfresco model document
|
||||
MetadataExtracter alfrescoModelExtracter = rootElementNameMetadataExtracterSelector.getWorker(alfrescoModelReader);
|
||||
assertNotNull("Failed to select correct extracter", alfrescoModelExtracter);
|
||||
assertTrue("Incorrect extracter instance selected", alfrescoModelMetadataExtracter == alfrescoModelExtracter);
|
||||
assertFalse("Read channel not closed", alfrescoModelReader.isChannelOpen());
|
||||
|
||||
// Check with an eclipse project document
|
||||
MetadataExtracter eclipseProjectExtracter = rootElementNameMetadataExtracterSelector.getWorker(eclipseProjectReader);
|
||||
assertNotNull("Failed to select correct extracter", eclipseProjectExtracter);
|
||||
assertTrue("Incorrect extracter instance selected", eclipseProjectMetadataExtracter == eclipseProjectExtracter);
|
||||
assertFalse("Read channel not closed", eclipseProjectReader.isChannelOpen());
|
||||
}
|
||||
|
||||
public void testXpathSelector() throws Exception
|
||||
{
|
||||
// Load the example files
|
||||
ContentReader alfrescoModelReader = getReader(FILE_ALFRESCO_MODEL);
|
||||
assertTrue(alfrescoModelReader.exists());
|
||||
ContentReader eclipseProjectReader = getReader(FILE_ECLIPSE_PROJECT);
|
||||
assertTrue(eclipseProjectReader.exists());
|
||||
|
||||
// Check with an alfresco model document
|
||||
MetadataExtracter alfrescoModelExtracter = xpathMetadataExtracterSelector.getWorker(alfrescoModelReader);
|
||||
assertNotNull("Failed to select correct extracter", alfrescoModelExtracter);
|
||||
assertTrue("Incorrect extracter instance selected", alfrescoModelMetadataExtracter == alfrescoModelExtracter);
|
||||
assertFalse("Read channel not closed", alfrescoModelReader.isChannelOpen());
|
||||
|
||||
// Check with an eclipse project document
|
||||
MetadataExtracter eclipseProjectExtracter = xpathMetadataExtracterSelector.getWorker(eclipseProjectReader);
|
||||
assertNotNull("Failed to select correct extracter", eclipseProjectExtracter);
|
||||
assertTrue("Incorrect extracter instance selected", eclipseProjectMetadataExtracter == eclipseProjectExtracter);
|
||||
assertFalse("Read channel not closed", eclipseProjectReader.isChannelOpen());
|
||||
}
|
||||
|
||||
public void testXmlMetadataExtracter() throws Exception
|
||||
{
|
||||
// Load the example files
|
||||
ContentReader alfrescoModelReader = getReader(FILE_ALFRESCO_MODEL);
|
||||
assertTrue(alfrescoModelReader.exists());
|
||||
ContentReader eclipseProjectReader = getReader(FILE_ECLIPSE_PROJECT);
|
||||
assertTrue(eclipseProjectReader.exists());
|
||||
|
||||
// Pass the Alfresco Model xml to the extractor
|
||||
PropertyMap checkAlfrescoModelProperties = new PropertyMap();
|
||||
xmlMetadataExtracter.extract(alfrescoModelReader, checkAlfrescoModelProperties);
|
||||
// Check the values
|
||||
assertEquals("Gavin Cornwell", checkAlfrescoModelProperties.get(ContentModel.PROP_AUTHOR));
|
||||
assertEquals("fm:forummodel", checkAlfrescoModelProperties.get(ContentModel.PROP_TITLE));
|
||||
assertEquals("Forum Model", checkAlfrescoModelProperties.get(ContentModel.PROP_DESCRIPTION));
|
||||
|
||||
// Pass the Eclipse Project xml to the extractor
|
||||
PropertyMap checkEclipseProjectProperties = new PropertyMap();
|
||||
xmlMetadataExtracter.extract(eclipseProjectReader, checkEclipseProjectProperties);
|
||||
// Check the values
|
||||
assertEquals("Repository", checkEclipseProjectProperties.get(ContentModel.PROP_TITLE));
|
||||
assertEquals("JavaCC Nature", checkEclipseProjectProperties.get(ContentModel.PROP_DESCRIPTION));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests metadata extraction using an action with an EAGER MetadataExtracter for XML.
|
||||
*/
|
||||
public void testLifecycleOfXmlMetadataExtraction() throws Exception
|
||||
{
|
||||
NodeService nodeService = serviceRegistry.getNodeService();
|
||||
ContentService contentService = serviceRegistry.getContentService();
|
||||
ActionExecuter executer = (ActionExecuter) ctx.getBean("extract-metadata");
|
||||
Action action = new ActionImpl(null, GUID.generate(), SetPropertyValueActionExecuter.NAME, null);
|
||||
|
||||
StoreRef storeRef = new StoreRef("test", getName());
|
||||
NodeRef rootNodeRef = null;
|
||||
if (nodeService.exists(storeRef))
|
||||
{
|
||||
rootNodeRef = nodeService.getRootNode(storeRef);
|
||||
}
|
||||
else
|
||||
{
|
||||
nodeService.createStore("test", getName());
|
||||
rootNodeRef = nodeService.getRootNode(storeRef);
|
||||
}
|
||||
|
||||
// Set up some properties
|
||||
PropertyMap properties = new PropertyMap();
|
||||
properties.put(ContentModel.PROP_TITLE, "My title");
|
||||
properties.put(ContentModel.PROP_DESCRIPTION, "My description");
|
||||
|
||||
NodeRef contentNodeRef = nodeService.createNode(
|
||||
rootNodeRef,
|
||||
ContentModel.ASSOC_CHILDREN,
|
||||
QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, getName()),
|
||||
ContentModel.TYPE_CONTENT,
|
||||
properties).getChildRef();
|
||||
// Add some content
|
||||
ContentReader alfrescoModelReader = getReader(FILE_ALFRESCO_MODEL);
|
||||
assertTrue(alfrescoModelReader.exists());
|
||||
ContentWriter writer = contentService.getWriter(contentNodeRef, ContentModel.PROP_CONTENT, true);
|
||||
writer.setEncoding("UTF-8");
|
||||
writer.setMimetype(MimetypeMap.MIMETYPE_XML);
|
||||
writer.putContent(alfrescoModelReader);
|
||||
|
||||
// Execute the action
|
||||
executer.execute(action, contentNodeRef);
|
||||
|
||||
// Check the node's properties. The EAGER overwrite policy should have replaced the required
|
||||
// properties.
|
||||
String checkTitle = (String) nodeService.getProperty(contentNodeRef, ContentModel.PROP_TITLE);
|
||||
String checkDescription = (String) nodeService.getProperty(contentNodeRef, ContentModel.PROP_DESCRIPTION);
|
||||
assertEquals("fm:forummodel", checkTitle);
|
||||
assertEquals("Forum Model", checkDescription);
|
||||
}
|
||||
}
|
||||
|
@@ -22,8 +22,9 @@
|
||||
* the FLOSS exception, and it is also available here:
|
||||
* http://www.alfresco.com/legal/licensing"
|
||||
*/
|
||||
package org.alfresco.repo.content;
|
||||
package org.alfresco.repo.content.selector;
|
||||
|
||||
import org.alfresco.repo.content.ContentWorker;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
|
@@ -22,7 +22,7 @@
|
||||
* the FLOSS exception, and it is also available here:
|
||||
* http://www.alfresco.com/legal/licensing"
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata.xml;
|
||||
package org.alfresco.repo.content.selector;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Collections;
|
||||
@@ -33,11 +33,11 @@ import java.util.Set;
|
||||
import javax.xml.parsers.SAXParser;
|
||||
import javax.xml.parsers.SAXParserFactory;
|
||||
|
||||
import org.alfresco.repo.content.ContentWorkerSelector;
|
||||
import org.alfresco.repo.content.ContentWorker;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.repo.content.metadata.MetadataExtracter;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.util.PropertyCheck;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.xml.sax.Attributes;
|
||||
@@ -48,33 +48,38 @@ import org.xml.sax.helpers.DefaultHandler;
|
||||
* A selector that looks at the root node of an XML document to determine which worker to provide.
|
||||
* There are many ways to identify XML documents and this is probably the simplest. Alternate
|
||||
* implementations might execute a series of xpath statements or look for specific namespace
|
||||
* declarations in the document. The net result is the same, i.e. given an XML document, an
|
||||
* extracter is provided to the caller.
|
||||
* <p>
|
||||
* In this selector, there is no guarantee that the different extracters will generate the same
|
||||
* (or even nearly the same) metadata. It is up to the configurer to ensure that if it is a
|
||||
* requirement, but otherwise each extracter is responsible for its own mappings. Mostly, though,
|
||||
* a root node match will imply a structure that has the necessary metadata.
|
||||
* declarations in the document. The net result is the same, i.e. given an XML document, a
|
||||
* worker is provided to the caller.
|
||||
*
|
||||
* @since 2.1
|
||||
* @author Derek Hulley
|
||||
*/
|
||||
public class RootElementNameMetadataExtracterSelector
|
||||
public class RootElementNameContentWorkerSelector<W extends ContentWorker>
|
||||
extends DefaultHandler
|
||||
implements ContentWorkerSelector<MetadataExtracter>
|
||||
implements ContentWorkerSelector<ContentWorker>
|
||||
{
|
||||
private static Log logger = LogFactory.getLog(RootElementNameMetadataExtracterSelector.class);
|
||||
private static Log logger = LogFactory.getLog(RootElementNameContentWorkerSelector.class);
|
||||
|
||||
private SAXParserFactory saxParserFactory;
|
||||
private Set<String> supportedMimetypes;
|
||||
private Map<String, MetadataExtracter> extractersByRootElementName;
|
||||
private Map<String, W> workersByRootElementName;
|
||||
|
||||
public RootElementNameMetadataExtracterSelector()
|
||||
public RootElementNameContentWorkerSelector()
|
||||
{
|
||||
saxParserFactory = SAXParserFactory.newInstance();
|
||||
supportedMimetypes = new HashSet<String>();
|
||||
supportedMimetypes.add(MimetypeMap.MIMETYPE_XML);
|
||||
extractersByRootElementName = Collections.emptyMap();
|
||||
workersByRootElementName = Collections.emptyMap();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
StringBuilder sb = new StringBuilder(50);
|
||||
sb.append("RootElementNameContentWorkerSelector")
|
||||
.append("[ workers=").append(workersByRootElementName)
|
||||
.append("]");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -89,26 +94,35 @@ public class RootElementNameMetadataExtracterSelector
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the extractors to use.
|
||||
* Set the workers to choose from.
|
||||
*
|
||||
* @param extracters a map of {@linkplain MetadataExtracter} instances
|
||||
* @param workers a map of {@linkplain ContentWorker} instances
|
||||
* keyed by root element name
|
||||
*/
|
||||
public void setExtracters(Map<String, MetadataExtracter> extracters)
|
||||
public void setWorkers(Map<String, W> workers)
|
||||
{
|
||||
this.extractersByRootElementName = extracters;
|
||||
this.workersByRootElementName = workers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a match of the root element name to find the correct extracter.
|
||||
* Checks the configuration.
|
||||
*/
|
||||
public MetadataExtracter getWorker(ContentReader reader)
|
||||
public void init()
|
||||
{
|
||||
PropertyCheck.mandatory(this, "workers", workersByRootElementName);
|
||||
PropertyCheck.mandatory(this, "supportedMimetypes", supportedMimetypes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a match of the root element name to find the correct content worker.
|
||||
*/
|
||||
public W getWorker(ContentReader reader)
|
||||
{
|
||||
if (!supportedMimetypes.contains(reader.getMimetype()))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
MetadataExtracter extracter = null;
|
||||
W worker = null;
|
||||
InputStream is = null;
|
||||
String rootElementName = null;
|
||||
try
|
||||
@@ -121,11 +135,15 @@ public class RootElementNameMetadataExtracterSelector
|
||||
catch (RootElementFoundException e)
|
||||
{
|
||||
rootElementName = e.getElementName();
|
||||
extracter = extractersByRootElementName.get(rootElementName);
|
||||
worker = workersByRootElementName.get(rootElementName);
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
throw new ContentIOException("Failed to extract root element from XML document", e);
|
||||
throw new ContentIOException("\n" +
|
||||
"Failed to extract root element from XML document: \n" +
|
||||
" Reader: " + reader + "\n" +
|
||||
" Selector: " + this,
|
||||
e);
|
||||
}
|
||||
finally
|
||||
{
|
||||
@@ -138,18 +156,18 @@ public class RootElementNameMetadataExtracterSelector
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("\n" +
|
||||
"Chosen metadata extracter for reader: \n" +
|
||||
"Chosen content worker for reader: \n" +
|
||||
" Reader: " + reader + "\n" +
|
||||
" Root Element: " + rootElementName + "\n" +
|
||||
" Extracter: " + extracter);
|
||||
" Worker: " + worker);
|
||||
}
|
||||
return extracter;
|
||||
return worker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
|
||||
{
|
||||
throw new RootElementFoundException(localName);
|
||||
throw new RootElementFoundException(qName);
|
||||
}
|
||||
|
||||
/**
|
@@ -0,0 +1,204 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2007 Alfresco Software Limited.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
* As a special exception to the terms and conditions of version 2.0 of
|
||||
* the GPL, you may redistribute this Program in connection with Free/Libre
|
||||
* and Open Source Software ("FLOSS") applications as described in Alfresco's
|
||||
* FLOSS exception. You should have recieved a copy of the text describing
|
||||
* the FLOSS exception, and it is also available here:
|
||||
* http://www.alfresco.com/legal/licensing"
|
||||
*/
|
||||
package org.alfresco.repo.content.selector;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
|
||||
import org.alfresco.error.AlfrescoRuntimeException;
|
||||
import org.alfresco.repo.content.ContentWorker;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.util.PropertyCheck;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.w3c.dom.Document;
|
||||
|
||||
/**
|
||||
* A selector that executes a set of XPath statements against the XML document to determine
|
||||
* which content worker to provide. The XPath rules are simple, i.e. if an XML node is
|
||||
* found by the XPath statement, then it is considered to be a hit and the corresponding
|
||||
* worker is returned.
|
||||
* <p>
|
||||
* Currently, the only namespaces supported are those contained in the XML documents being
|
||||
* tested.
|
||||
*
|
||||
* @since 2.1
|
||||
* @author Derek Hulley
|
||||
*/
|
||||
public class XPathContentWorkerSelector<W extends ContentWorker> implements ContentWorkerSelector
|
||||
{
|
||||
private static Log logger = LogFactory.getLog(XPathContentWorkerSelector.class);
|
||||
|
||||
private DocumentBuilder documentBuilder;
|
||||
private XPathFactory xpathFactory;
|
||||
private Set<String> supportedMimetypes;
|
||||
private Map<String, W> workersByXPath;
|
||||
|
||||
public XPathContentWorkerSelector()
|
||||
{
|
||||
try
|
||||
{
|
||||
documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
|
||||
xpathFactory = XPathFactory.newInstance();
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
throw new AlfrescoRuntimeException("Failed to initialize XPathContentWorkerSelector", e);
|
||||
}
|
||||
supportedMimetypes = new HashSet<String>();
|
||||
supportedMimetypes.add(MimetypeMap.MIMETYPE_XML);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
StringBuilder sb = new StringBuilder(50);
|
||||
sb.append("XPathContentWorkerSelector")
|
||||
.append("[ workers=").append(workersByXPath)
|
||||
.append("]");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Optionally set the mimetypes supported. They must be XML formats that the chosen
|
||||
* parser will be able to handle.
|
||||
*
|
||||
* @param supportedMimetypes the list of mimetypes. The default is <b>text/xml</b>.
|
||||
*/
|
||||
public void setSupportedMimetypes(Set<String> supportedMimetypes)
|
||||
{
|
||||
this.supportedMimetypes = supportedMimetypes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the workers to use. All the XPath statements provided must be compatible with
|
||||
* a return value of type {@linkplain XPathConstants#NODE NODE}.
|
||||
*
|
||||
* @param workers a map of {@linkplain ContentWorker} instances
|
||||
* keyed by XPath statements
|
||||
*/
|
||||
public void setWorkers(Map<String, W> workers)
|
||||
{
|
||||
this.workersByXPath = workers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks the configuration.
|
||||
*/
|
||||
public void init()
|
||||
{
|
||||
PropertyCheck.mandatory(this, "workers", workersByXPath);
|
||||
PropertyCheck.mandatory(this, "supportedMimetypes", supportedMimetypes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the XPath statements, in order, against the document. Any statements that fail
|
||||
* to run will be ignored.
|
||||
*/
|
||||
public W getWorker(ContentReader reader)
|
||||
{
|
||||
if (!supportedMimetypes.contains(reader.getMimetype()))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
W worker = null;
|
||||
InputStream is = null;
|
||||
String xpath = null;
|
||||
try
|
||||
{
|
||||
is = reader.getContentInputStream();
|
||||
Document doc = documentBuilder.parse(is);
|
||||
// Execute the statements
|
||||
worker = processDocument(doc);
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
throw new ContentIOException("\n" +
|
||||
"Failed to XPaths against XML document: \n" +
|
||||
" Reader: " + reader + "\n" +
|
||||
" Selector: " + this,
|
||||
e);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (is != null)
|
||||
{
|
||||
try { is.close(); } catch (IOException e) {}
|
||||
}
|
||||
}
|
||||
// Done
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("\n" +
|
||||
"Chosen content worker for reader: \n" +
|
||||
" Reader: " + reader + "\n" +
|
||||
" XPath: " + xpath + "\n" +
|
||||
" Worker: " + worker);
|
||||
}
|
||||
return worker;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the given document against the list of XPath statements provided.
|
||||
*
|
||||
* @param document the XML document
|
||||
* @return Returns a content worker that was matched or <tt>null</tt>
|
||||
*/
|
||||
private W processDocument(Document doc)
|
||||
{
|
||||
for (Map.Entry<String, W> entry : workersByXPath.entrySet())
|
||||
{
|
||||
try
|
||||
{
|
||||
String xpath = entry.getKey();
|
||||
W worker = entry.getValue();
|
||||
// Execute the statement
|
||||
Object ret = xpathFactory.newXPath().evaluate(xpath, doc, XPathConstants.NODE);
|
||||
if (ret != null)
|
||||
{
|
||||
// We found one
|
||||
return worker;
|
||||
}
|
||||
}
|
||||
catch (XPathExpressionException e)
|
||||
{
|
||||
// We accept this and move on
|
||||
}
|
||||
}
|
||||
// Nothing found
|
||||
return null;
|
||||
}
|
||||
}
|
@@ -47,4 +47,59 @@
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
<!-- A selector that checks root element names -->
|
||||
<bean
|
||||
id="extracter.xml.selector.RootElementSelector"
|
||||
class="org.alfresco.repo.content.selector.RootElementNameContentWorkerSelector"
|
||||
init-method="init">
|
||||
<property name="workers">
|
||||
<map>
|
||||
<entry key="BOGUS_ROOT_ELEMENT">
|
||||
<null />
|
||||
</entry>
|
||||
<entry key="model">
|
||||
<ref bean="extracter.xml.AlfrescoModelMetadataExtracter" />
|
||||
</entry>
|
||||
<entry key="projectDescription">
|
||||
<ref bean="extracter.xml.EclipseProjectMetadataExtracter" />
|
||||
</entry>
|
||||
</map>
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
<!-- A selector that executes XPath statements -->
|
||||
<bean
|
||||
id="extracter.xml.selector.XPathSelector"
|
||||
class="org.alfresco.repo.content.selector.XPathContentWorkerSelector"
|
||||
init-method="init">
|
||||
<property name="workers">
|
||||
<map>
|
||||
<entry key="/my:test">
|
||||
<null />
|
||||
</entry>
|
||||
<entry key="/model[@name='fm:forummodel']">
|
||||
<ref bean="extracter.xml.AlfrescoModelMetadataExtracter" />
|
||||
</entry>
|
||||
<entry key="/projectDescription">
|
||||
<ref bean="extracter.xml.EclipseProjectMetadataExtracter" />
|
||||
</entry>
|
||||
</map>
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
<!-- The wrapper XML metadata extracter -->
|
||||
<bean
|
||||
id="extracter.xml.XMLMetadataExtracter"
|
||||
class="org.alfresco.repo.content.metadata.xml.XmlMetadataExtracter"
|
||||
parent="baseMetadataExtracter">
|
||||
<property name="overwritePolicy">
|
||||
<value>EAGER</value>
|
||||
</property>
|
||||
<property name="selectors">
|
||||
<list>
|
||||
<ref bean="extracter.xml.selector.XPathSelector" />
|
||||
</list>
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
</beans>
|
Reference in New Issue
Block a user