mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-08-07 17:49:17 +00:00
Add support for real image extraction to the html document rendering engine, and unit tests
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@22459 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -19,11 +19,15 @@
|
||||
|
||||
package org.alfresco.repo.rendition.executer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.io.StringWriter;
|
||||
import java.io.Writer;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.xml.transform.OutputKeys;
|
||||
import javax.xml.transform.TransformerConfigurationException;
|
||||
@@ -42,12 +46,14 @@ import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.AutoDetectParser;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
/**
|
||||
* This class provides a way to turn documents supported by the
|
||||
@@ -106,62 +112,8 @@ public class HTMLRenderingEngine extends AbstractRenderingEngine
|
||||
}
|
||||
|
||||
// Make the HTML Version using Tika
|
||||
// This will also extract out any images as found
|
||||
generateHTML(p, context);
|
||||
|
||||
// Extract out any images
|
||||
// TODO
|
||||
boolean hasImages = true; // TODO
|
||||
if(hasImages)
|
||||
{
|
||||
Map<QName,Serializable> properties = new HashMap<QName,Serializable>();
|
||||
NodeRef imgFolder = null;
|
||||
|
||||
// Extract into it
|
||||
boolean donePrimary = false;
|
||||
for(String fakeContent : new String[] {"Test1","Test2"})
|
||||
{
|
||||
if(imgFolder == null)
|
||||
imgFolder = createImagesDirectory(context);
|
||||
|
||||
// Create the node if needed
|
||||
NodeRef img = nodeService.getChildByName(
|
||||
imgFolder, ContentModel.ASSOC_CONTAINS, fakeContent
|
||||
);
|
||||
if(img == null)
|
||||
{
|
||||
properties.clear();
|
||||
properties.put(ContentModel.PROP_NAME, fakeContent);
|
||||
img = nodeService.createNode(
|
||||
imgFolder,
|
||||
ContentModel.ASSOC_CONTAINS,
|
||||
QName.createQName(fakeContent),
|
||||
ContentModel.TYPE_CONTENT,
|
||||
properties
|
||||
).getChildRef();
|
||||
}
|
||||
|
||||
// If we can, associate it with the rendered HTML, so
|
||||
// that they're properly linked
|
||||
QName assocType = SECONDARY_IMAGE;
|
||||
if(!donePrimary)
|
||||
{
|
||||
assocType = PRIMARY_IMAGE;
|
||||
donePrimary = true;
|
||||
}
|
||||
if(dictionaryService.getAssociation(assocType) != null)
|
||||
{
|
||||
nodeService.createAssociation(
|
||||
context.getDestinationNode(), img, assocType
|
||||
);
|
||||
}
|
||||
|
||||
// Put the image into the node
|
||||
ContentWriter writer = contentService.getWriter(
|
||||
img, ContentModel.PROP_CONTENT, true
|
||||
);
|
||||
writer.putContent(fakeContent);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -211,6 +163,52 @@ public class HTMLRenderingEngine extends AbstractRenderingEngine
|
||||
return imgFolder;
|
||||
}
|
||||
|
||||
private NodeRef createEmbeddedImage(NodeRef imgFolder, boolean primary,
|
||||
String filename, String contentType, InputStream imageSource,
|
||||
RenderingContext context)
|
||||
{
|
||||
// Create the node if needed
|
||||
NodeRef img = nodeService.getChildByName(
|
||||
imgFolder, ContentModel.ASSOC_CONTAINS, filename
|
||||
);
|
||||
if(img == null)
|
||||
{
|
||||
Map<QName,Serializable> properties = new HashMap<QName,Serializable>();
|
||||
properties.put(ContentModel.PROP_NAME, filename);
|
||||
img = nodeService.createNode(
|
||||
imgFolder,
|
||||
ContentModel.ASSOC_CONTAINS,
|
||||
QName.createQName(filename),
|
||||
ContentModel.TYPE_CONTENT,
|
||||
properties
|
||||
).getChildRef();
|
||||
}
|
||||
|
||||
// If we can, associate it with the rendered HTML, so
|
||||
// that they're properly linked
|
||||
QName assocType = SECONDARY_IMAGE;
|
||||
if(primary)
|
||||
{
|
||||
assocType = PRIMARY_IMAGE;
|
||||
}
|
||||
if(dictionaryService.getAssociation(assocType) != null)
|
||||
{
|
||||
nodeService.createAssociation(
|
||||
context.getDestinationNode(), img, assocType
|
||||
);
|
||||
}
|
||||
|
||||
// Put the image into the node
|
||||
ContentWriter writer = contentService.getWriter(
|
||||
img, ContentModel.PROP_CONTENT, true
|
||||
);
|
||||
writer.setMimetype(contentType);
|
||||
writer.putContent(imageSource);
|
||||
|
||||
// All done
|
||||
return img;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a Tika-compatible SAX content handler, which will
|
||||
* be used to generate+capture the XHTML
|
||||
@@ -241,10 +239,13 @@ public class HTMLRenderingEngine extends AbstractRenderingEngine
|
||||
{
|
||||
// Setup things to parse with
|
||||
Metadata metadata = new Metadata();
|
||||
ParseContext parseContext = new ParseContext();
|
||||
StringWriter sw = new StringWriter();
|
||||
ContentHandler handler = buildContentHandler(sw);
|
||||
|
||||
// Our parse context needs to extract images
|
||||
ParseContext parseContext = new ParseContext();
|
||||
parseContext.set(Parser.class, new TikaImageExtractingParser(context));
|
||||
|
||||
// Parse
|
||||
try {
|
||||
p.parse(
|
||||
@@ -259,4 +260,91 @@ public class HTMLRenderingEngine extends AbstractRenderingEngine
|
||||
ContentWriter contentWriter = context.makeContentWriter();
|
||||
contentWriter.putContent( sw.toString() );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A nested Tika parser which extracts out any
|
||||
* images as they come past.
|
||||
*/
|
||||
@SuppressWarnings("serial")
|
||||
private class TikaImageExtractingParser implements Parser {
|
||||
private Set<MediaType> types;
|
||||
|
||||
private RenderingContext renderingContext;
|
||||
private NodeRef imgFolder = null;
|
||||
private int count = 0;
|
||||
|
||||
private TikaImageExtractingParser(RenderingContext renderingContext) {
|
||||
this.renderingContext = renderingContext;
|
||||
|
||||
// Our expected types
|
||||
types = new HashSet<MediaType>();
|
||||
types.add(MediaType.image("bmp"));
|
||||
types.add(MediaType.image("gif"));
|
||||
types.add(MediaType.image("jpg"));
|
||||
types.add(MediaType.image("jpeg"));
|
||||
types.add(MediaType.image("png"));
|
||||
types.add(MediaType.image("tiff"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<MediaType> getSupportedTypes(ParseContext context) {
|
||||
return types;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parse(InputStream stream, ContentHandler handler,
|
||||
Metadata metadata, ParseContext context) throws IOException,
|
||||
SAXException, TikaException {
|
||||
// Is it a supported image?
|
||||
String filename = metadata.get(Metadata.RESOURCE_NAME_KEY);
|
||||
String type = metadata.get(Metadata.CONTENT_TYPE);
|
||||
boolean accept = false;
|
||||
|
||||
if(type != null) {
|
||||
for(MediaType mt : types) {
|
||||
if(mt.toString().equals(type)) {
|
||||
accept = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(filename != null) {
|
||||
for(MediaType mt : types) {
|
||||
String ext = "." + mt.getSubtype();
|
||||
if(filename.endsWith(ext)) {
|
||||
accept = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!accept)
|
||||
return;
|
||||
|
||||
handleImage(stream, filename, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parse(InputStream stream, ContentHandler handler,
|
||||
Metadata metadata) throws IOException, SAXException, TikaException {
|
||||
parse(stream, handler, metadata, new ParseContext());
|
||||
}
|
||||
|
||||
private void handleImage(InputStream stream, String filename, String type) {
|
||||
count++;
|
||||
|
||||
// Do we already have the folder? If not, create it
|
||||
if(imgFolder == null) {
|
||||
imgFolder = createImagesDirectory(renderingContext);
|
||||
}
|
||||
|
||||
// Give it a sensible name if needed
|
||||
if(filename == null) {
|
||||
filename = "image-" + count + ".";
|
||||
filename += type.substring(type.indexOf('/')+1);
|
||||
}
|
||||
|
||||
// Save the image
|
||||
createEmbeddedImage(imgFolder, (count==1), filename, type, stream, renderingContext);
|
||||
}
|
||||
}
|
||||
}
|
@@ -29,6 +29,7 @@ import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
|
||||
import org.alfresco.repo.model.Repository;
|
||||
import org.alfresco.repo.security.authentication.AuthenticationUtil;
|
||||
import org.alfresco.service.cmr.dictionary.DictionaryService;
|
||||
import org.alfresco.service.cmr.rendition.RenditionDefinition;
|
||||
import org.alfresco.service.cmr.rendition.RenditionService;
|
||||
import org.alfresco.service.cmr.repository.ChildAssociationRef;
|
||||
@@ -51,6 +52,7 @@ public class HTMLRenderingEngineTest extends BaseAlfrescoSpringTest
|
||||
{
|
||||
private final static Log log = LogFactory.getLog(HTMLRenderingEngineTest.class);
|
||||
private NodeRef companyHome;
|
||||
private DictionaryService dictionaryService;
|
||||
private RenditionService renditionService;
|
||||
private Repository repositoryHelper;
|
||||
|
||||
@@ -75,6 +77,7 @@ public class HTMLRenderingEngineTest extends BaseAlfrescoSpringTest
|
||||
this.contentService = (ContentService) this.applicationContext.getBean("ContentService");
|
||||
this.renditionService = (RenditionService) this.applicationContext.getBean("RenditionService");
|
||||
this.repositoryHelper = (Repository) this.applicationContext.getBean("repositoryHelper");
|
||||
this.dictionaryService = (DictionaryService) this.applicationContext.getBean("dictionaryService");
|
||||
this.companyHome = repositoryHelper.getCompanyHome();
|
||||
|
||||
createTargetFolder();
|
||||
@@ -241,8 +244,13 @@ public class HTMLRenderingEngineTest extends BaseAlfrescoSpringTest
|
||||
|
||||
// Check we didn't get an image folder, only the html
|
||||
int numItems = nodeService.getChildAssocs(targetFolder).size();
|
||||
// TODO - Enable this when proper folder stuff is in place
|
||||
// assertEquals(numItemsStart+1, numItems);
|
||||
assertEquals(numItemsStart+1, numItems);
|
||||
|
||||
// Check that the html lacks img tags
|
||||
assertEquals(
|
||||
"Unexpected img tag in html:\n" + html,
|
||||
false, html.contains("<img")
|
||||
);
|
||||
|
||||
// Check we didn't get any images
|
||||
for(ChildAssociationRef ref : nodeService.getChildAssocs(htmlNode))
|
||||
@@ -264,7 +272,93 @@ public class HTMLRenderingEngineTest extends BaseAlfrescoSpringTest
|
||||
*/
|
||||
public void testDocWithOneImages() throws Exception
|
||||
{
|
||||
|
||||
RenditionDefinition def = renditionService.createRenditionDefinition(
|
||||
QName.createQName("Test"), HTMLRenderingEngine.NAME);
|
||||
def.setParameterValue(
|
||||
RenditionService.PARAM_DESTINATION_PATH_TEMPLATE,
|
||||
targetFolderPath + "/${name}.html"
|
||||
);
|
||||
|
||||
for(String name : new String[] {"quickImg1.doc","quickImg1.docx"})
|
||||
{
|
||||
sourceDoc = createForDoc(name);
|
||||
|
||||
String baseName = name.substring(0, name.lastIndexOf('.'));
|
||||
|
||||
int numItemsStart = nodeService.getChildAssocs(targetFolder).size();
|
||||
|
||||
ChildAssociationRef rendition = renditionService.render(sourceDoc, def);
|
||||
assertNotNull(rendition);
|
||||
|
||||
// Check it was created
|
||||
NodeRef htmlNode = rendition.getChildRef();
|
||||
assertEquals(true, nodeService.exists(htmlNode));
|
||||
|
||||
// Check it got the right name
|
||||
assertEquals(
|
||||
baseName + ".html",
|
||||
nodeService.getProperty(htmlNode, ContentModel.PROP_NAME)
|
||||
);
|
||||
|
||||
// Check it ended up in the right place
|
||||
assertEquals(
|
||||
"Should have been in " + targetFolderPath + " but was in" +
|
||||
nodeService.getPath(htmlNode),
|
||||
targetFolder,
|
||||
nodeService.getPrimaryParent(htmlNode).getParentRef()
|
||||
);
|
||||
|
||||
// Check it got the right contents
|
||||
ContentReader reader = contentService.getReader(
|
||||
htmlNode, ContentModel.PROP_CONTENT
|
||||
);
|
||||
String html = reader.getContentString();
|
||||
assertEquals("<?xml", html.substring(0, 5));
|
||||
|
||||
// Check that the html has the img tags
|
||||
// assertEquals(
|
||||
// "Couldn't find img tag in html:\n" + html,
|
||||
// true, html.contains("<img")
|
||||
// );
|
||||
|
||||
// Check we got an image folder
|
||||
int numItems = nodeService.getChildAssocs(targetFolder).size();
|
||||
assertEquals(numItemsStart+2, numItems);
|
||||
|
||||
// Check the name of the image folder
|
||||
NodeRef imgFolder = null;
|
||||
for(ChildAssociationRef ref : nodeService.getChildAssocs(targetFolder)) {
|
||||
if(nodeService.getProperty(ref.getChildRef(), ContentModel.PROP_NAME).equals(
|
||||
baseName + "_files"
|
||||
)) {
|
||||
imgFolder = ref.getChildRef();
|
||||
}
|
||||
}
|
||||
assertNotNull("Couldn't find new folder named " + baseName + "_files", imgFolder);
|
||||
|
||||
// Check the contents
|
||||
assertEquals(1, nodeService.getChildAssocs(imgFolder).size());
|
||||
|
||||
|
||||
// Check the associations if supported
|
||||
if(dictionaryService.getAssociation(HTMLRenderingEngine.PRIMARY_IMAGE) != null)
|
||||
{
|
||||
boolean hasPrimary = false;
|
||||
boolean hasSecondary = false;
|
||||
for(ChildAssociationRef ref : nodeService.getChildAssocs(htmlNode))
|
||||
{
|
||||
if(ref.getTypeQName().equals(HTMLRenderingEngine.PRIMARY_IMAGE))
|
||||
hasPrimary = true;
|
||||
if(ref.getTypeQName().equals(HTMLRenderingEngine.SECONDARY_IMAGE))
|
||||
hasSecondary = true;
|
||||
}
|
||||
assertEquals(true, hasPrimary);
|
||||
assertEquals(false, hasSecondary);
|
||||
}
|
||||
|
||||
// All done
|
||||
tidyUpSourceDoc();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
Reference in New Issue
Block a user