Merged DEV/SWIFT to HEAD

25834: ALF-7070: more tweaks to node properties serialization
          ALF-7071: initial checkin
          SOLR API client library: node metadata, node text content
   25869: ALF-6862 - When performing XML Metadata Extraction on a file with a DTD,
                     if the DTD cannot be found then re-try the extraction with a parser that ignores DTDs.
          Includes unit tests for a file with and without a DTD, showing we now correctly process both.
   25892: OpenCMIS
          - add multi-filing support to CMIS getObjectParents()
          - update OpenCMIS libraries
   25905: Push the DataList model namespace definition into a constant in NameSpaceService in the usual pattern,
          rather than being hard coded in a util class
   25922: (RECORD ONLY) Fix version number

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@28115 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Derek Hulley
2011-05-30 16:15:56 +00:00
parent a5f1ef9735
commit bdd75588c2
11 changed files with 241 additions and 41 deletions

View File

@@ -57,11 +57,11 @@ public abstract class AbstractMetadataExtracterTest extends TestCase
*/
protected ApplicationContext ctx;
protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";
protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog";
protected static final String QUICK_CREATOR = "Nevin Nollop";
protected static final String QUICK_CREATOR_EMAIL = "nevin.nollop@alfresco.com";
protected static final String QUICK_PREVIOUS_AUTHOR = "Derek Hulley";
public static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";
public static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog";
public static final String QUICK_CREATOR = "Nevin Nollop";
public static final String QUICK_CREATOR_EMAIL = "nevin.nollop@alfresco.com";
public static final String QUICK_PREVIOUS_AUTHOR = "Derek Hulley";
protected MimetypeMap mimetypeMap;
protected DictionaryService dictionaryService;

View File

@@ -18,6 +18,7 @@
*/
package org.alfresco.repo.content.metadata.xml;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
@@ -46,10 +47,10 @@ import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
import org.springframework.extensions.surf.util.ParameterCheck;
import org.alfresco.util.PropertyCheck;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.extensions.surf.util.ParameterCheck;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
@@ -92,6 +93,7 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
private static Log logger = LogFactory.getLog(XPathMetadataExtracter.class);
private DocumentBuilder documentBuilder;
private DocumentBuilder dtdIgnoringDocumentBuilder;
private XPathFactory xpathFactory;
private Map<String, String> namespacesByPrefix;
private Map<String, XPathExpression> xpathExpressionMapping;
@@ -104,7 +106,14 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
try
{
documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
DocumentBuilderFactory normalFactory = DocumentBuilderFactory.newInstance();
documentBuilder = normalFactory.newDocumentBuilder();
DocumentBuilderFactory dtdIgnoringFactory = DocumentBuilderFactory.newInstance();
dtdIgnoringFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
dtdIgnoringFactory.setFeature("http://xml.org/sax/features/validation", false);
dtdIgnoringDocumentBuilder = dtdIgnoringFactory.newDocumentBuilder();
xpathFactory = XPathFactory.newInstance();
}
catch (Throwable e)
@@ -211,7 +220,22 @@ public class XPathMetadataExtracter extends AbstractMappingMetadataExtracter imp
try
{
is = reader.getContentInputStream();
Document doc = documentBuilder.parse(is);
Document doc;
try
{
// Try with the default settings
doc = documentBuilder.parse(is);
}
catch(FileNotFoundException e)
{
// The XML depends on a DTD we don't have available
// Try to parse it without using DTDs. (This may mean we miss
// out on some entities, but it's better than nothing!)
is = reader.getReader().getContentInputStream();
doc = dtdIgnoringDocumentBuilder.parse(is);
}
Map<String, Serializable> rawProperties = processDocument(doc);
if (logger.isDebugEnabled())
{

View File

@@ -18,6 +18,9 @@
*/
package org.alfresco.repo.content.metadata.xml;
import static org.alfresco.repo.content.metadata.AbstractMetadataExtracterTest.QUICK_DESCRIPTION;
import static org.alfresco.repo.content.metadata.AbstractMetadataExtracterTest.QUICK_TITLE;
import java.io.File;
import java.io.FileNotFoundException;
import java.net.URL;
@@ -30,6 +33,7 @@ import org.alfresco.repo.action.executer.ActionExecuter;
import org.alfresco.repo.action.executer.SetPropertyValueActionExecuter;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.repo.content.metadata.MetadataExtracter;
import org.alfresco.repo.content.selector.RootElementNameContentWorkerSelector;
import org.alfresco.repo.content.selector.XPathContentWorkerSelector;
@@ -40,6 +44,7 @@ import org.alfresco.service.cmr.action.Action;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MLText;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.repository.StoreRef;
@@ -60,6 +65,7 @@ import org.springframework.context.support.ClassPathXmlApplicationContext;
public class XmlMetadataExtracterTest extends TestCase
{
private static final String FILE_ALFRESCO_MODEL = "xml-metadata/alfresco-model-sample.xml";
private static final String FILE_DITA_FILE = "xml-metadata/dita-concept-quick.xml";
private static final String FILE_ECLIPSE_PROJECT = "xml-metadata/eclipse-project-sample.xml";
private static final String FILE_EMPTY = "xml-metadata/empty-sample.xml";
private static final String FILE_MALFORMED = "xml-metadata/malformed-sample.xml";
@@ -71,6 +77,7 @@ public class XmlMetadataExtracterTest extends TestCase
private AuthenticationComponent authenticationComponent;
private XPathMetadataExtracter alfrescoModelMetadataExtracter;
private XPathMetadataExtracter eclipseProjectMetadataExtracter;
private XPathMetadataExtracter ditaConceptMetadataExtracter;
private RootElementNameContentWorkerSelector<MetadataExtracter> rootElementNameMetadataExtracterSelector;
private XPathContentWorkerSelector<MetadataExtracter> xpathMetadataExtracterSelector;
private XmlMetadataExtracter xmlMetadataExtracter;
@@ -103,6 +110,7 @@ public class XmlMetadataExtracterTest extends TestCase
authenticationComponent = (AuthenticationComponent) ctx.getBean("authenticationComponent");
alfrescoModelMetadataExtracter = (XPathMetadataExtracter) ctx.getBean("extracter.xml.AlfrescoModelMetadataExtracter");
eclipseProjectMetadataExtracter = (XPathMetadataExtracter) ctx.getBean("extracter.xml.EclipseProjectMetadataExtracter");
ditaConceptMetadataExtracter = (XPathMetadataExtracter) ctx.getBean("extracter.xml.DITAConceptMetadataExtracter");
rootElementNameMetadataExtracterSelector = (RootElementNameContentWorkerSelector<MetadataExtracter>) ctx.getBean("extracter.xml.selector.RootElementSelector");
xpathMetadataExtracterSelector = (XPathContentWorkerSelector<MetadataExtracter>) ctx.getBean("extracter.xml.selector.XPathSelector");
xmlMetadataExtracter = (XmlMetadataExtracter) ctx.getBean("extracter.xml.XMLMetadataExtracter");
@@ -133,9 +141,9 @@ public class XmlMetadataExtracterTest extends TestCase
alfrescoModelMetadataExtracter.extract(reader, checkProperties);
// Check the values
assertEquals("Gavin Cornwell", checkProperties.get(ContentModel.PROP_AUTHOR));
assertEquals("fm:forummodel", checkProperties.get(ContentModel.PROP_TITLE));
assertEquals("Forum Model", checkProperties.get(ContentModel.PROP_DESCRIPTION));
assertEquals("Gavin Cornwell", getPropertyValue(checkProperties, ContentModel.PROP_AUTHOR));
assertEquals("fm:forummodel", getPropertyValue(checkProperties, ContentModel.PROP_TITLE));
assertEquals("Forum Model", getPropertyValue(checkProperties, ContentModel.PROP_DESCRIPTION));
}
public void testExtractEclipseProject() throws Exception
@@ -149,8 +157,53 @@ public class XmlMetadataExtracterTest extends TestCase
eclipseProjectMetadataExtracter.extract(reader, checkProperties);
// Check the values
assertEquals("Repository", checkProperties.get(ContentModel.PROP_TITLE));
assertEquals("JavaCC Nature", checkProperties.get(ContentModel.PROP_DESCRIPTION));
assertEquals("Repository", getPropertyValue(checkProperties, ContentModel.PROP_TITLE));
assertEquals("JavaCC Nature", getPropertyValue(checkProperties, ContentModel.PROP_DESCRIPTION));
}
public void testDITAFileWithDoctype() throws Exception
{
// Load the file as-is, with it's doctype
ContentReader reader = getReader(FILE_DITA_FILE);
assertTrue(reader.exists());
// Check we have the doctype
String contents = reader.getContentString();
assertTrue("DOCTYPE should be present but wasn't", contents.indexOf("<!DOCTYPE") > -1);
// Reset ready for the extraction test
reader = reader.getReader();
// Now test extraction
doTestDITAFile(reader);
}
public void testDITAFileWithoutDoctype() throws Exception
{
// Munge the file to skip the doctype
ContentReader reader = getReader(FILE_DITA_FILE);
assertTrue(reader.exists());
String contents = reader.getContentString();
String noDocType = contents.replaceAll("<!DOCTYPE.*?>", "");
File tmp = File.createTempFile("alfresco", ".xml");
tmp.deleteOnExit();
ContentWriter w = new FileContentWriter(tmp);
w.setEncoding(reader.getEncoding());
w.setMimetype(reader.getMimetype());
w.putContent(noDocType);
// Now test extraction
doTestDITAFile(w.getReader());
}
private void doTestDITAFile(ContentReader reader) throws Exception
{
// Pass it to the extracter
PropertyMap checkProperties = new PropertyMap();
ditaConceptMetadataExtracter.extract(reader, checkProperties);
// Check the values
assertEquals(QUICK_TITLE, getPropertyValue(checkProperties, ContentModel.PROP_TITLE));
assertEquals(QUICK_DESCRIPTION, getPropertyValue(checkProperties, ContentModel.PROP_DESCRIPTION));
}
public void testEmptyFile() throws Exception
@@ -239,16 +292,16 @@ public class XmlMetadataExtracterTest extends TestCase
PropertyMap checkAlfrescoModelProperties = new PropertyMap();
xmlMetadataExtracter.extract(alfrescoModelReader, checkAlfrescoModelProperties);
// Check the values
assertEquals("Gavin Cornwell", checkAlfrescoModelProperties.get(ContentModel.PROP_AUTHOR));
assertEquals("fm:forummodel", checkAlfrescoModelProperties.get(ContentModel.PROP_TITLE));
assertEquals("Forum Model", checkAlfrescoModelProperties.get(ContentModel.PROP_DESCRIPTION));
assertEquals("Gavin Cornwell", getPropertyValue(checkAlfrescoModelProperties, ContentModel.PROP_AUTHOR));
assertEquals("fm:forummodel", getPropertyValue(checkAlfrescoModelProperties, ContentModel.PROP_TITLE));
assertEquals("Forum Model", getPropertyValue(checkAlfrescoModelProperties, ContentModel.PROP_DESCRIPTION));
// Pass the Eclipse Project xml to the extractor
PropertyMap checkEclipseProjectProperties = new PropertyMap();
xmlMetadataExtracter.extract(eclipseProjectReader, checkEclipseProjectProperties);
// Check the values
assertEquals("Repository", checkEclipseProjectProperties.get(ContentModel.PROP_TITLE));
assertEquals("JavaCC Nature", checkEclipseProjectProperties.get(ContentModel.PROP_DESCRIPTION));
assertEquals("Repository", getPropertyValue(checkEclipseProjectProperties, ContentModel.PROP_TITLE));
assertEquals("JavaCC Nature", getPropertyValue(checkEclipseProjectProperties, ContentModel.PROP_DESCRIPTION));
}
/**
@@ -302,4 +355,16 @@ public class XmlMetadataExtracterTest extends TestCase
assertEquals("fm:forummodel", checkTitle);
assertEquals("Forum Model", checkDescription);
}
private String getPropertyValue(PropertyMap properties, QName qname)
{
Object val = properties.get(qname);
assertNotNull("Property " + qname + " missing, properties are " + properties.keySet(), val);
if(val instanceof String)
return (String)val;
if(val instanceof MLText)
return ((MLText)val).getDefaultValue();
return val.toString();
}
}