MNT-22388 [Security] multiple pdfbox vulnerabilities (Repo) (#539)

* Remove pdfbox jars as they should no longer be needed.
* Reintroduce tests that use Tika to 'guess' mimetypes as it was the tika parse that was pulling in the pdfbox libraries.

Classes that use Tika:
* HTMLRenderingEngine - removed as it is no longer used
* RemoteConnectorResponseImpl - called tika utility toByteArray so not using pdfbox
* TikaCharsetFinder - called to identify the charset not mimetype so not using pdfbox
* MimetypeMap - main use of Tika. Used to detect mimetypes. Might have been using pdfbox.
This commit is contained in:
Alan Davis
2021-06-17 16:08:21 +01:00
committed by GitHub
parent 89f82cc991
commit 88b4ce155c
8 changed files with 90 additions and 1143 deletions

View File

@@ -1,538 +0,0 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.rendition.executer;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.action.ParameterDefinitionImpl;
import org.alfresco.repo.rendition.RenditionLocation;
import org.alfresco.service.cmr.action.ParameterDefinition;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.rendition.RenditionServiceException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.namespace.QName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
/**
* This class provides a way to turn documents supported by the
* {@link ContentService} standard transformers into basic, clean
* HTML.
* <P/>
* The HTML that is produced probably isn't going to be suitable
* for direct web publishing, as it's likely going to be too
* basic. Instead, it should be simple and clean HTML, suitable
* for being the basis of some web-friendly HTML once edited
* / further transformed.
*
* @author Nick Burch
* @since 3.4
*
* @deprecated The RenditionService is being replace by the simpler async RenditionService2.
*/
@Deprecated
public class HTMLRenderingEngine extends AbstractRenderingEngine
{
private static Log logger = LogFactory.getLog(HTMLRenderingEngine.class);
private TikaConfig tikaConfig;
/**
* This optional parameter, when set to true, causes only the
* contents of the HTML body to be written out as the rendition.
* By default, the whole of the HTML document is used.
*/
public static final String PARAM_BODY_CONTENTS_ONLY = "bodyContentsOnly";
/**
* This optional parameter, when set to true, causes any embedded
* images to be written into the same folder as the html, with
* a name prefix.
* By default, images are placed into a sub-folder.
*/
public static final String PARAM_IMAGES_SAME_FOLDER = "imagesSameFolder";
/*
* Action constants
*/
public static final String NAME = "htmlRenderingEngine";
@Override
protected Collection<ParameterDefinition> getParameterDefinitions() {
Collection<ParameterDefinition> paramList = super.getParameterDefinitions();
paramList.add(new ParameterDefinitionImpl(PARAM_BODY_CONTENTS_ONLY, DataTypeDefinition.BOOLEAN, false,
getParamDisplayLabel(PARAM_BODY_CONTENTS_ONLY)));
paramList.add(new ParameterDefinitionImpl(PARAM_IMAGES_SAME_FOLDER, DataTypeDefinition.BOOLEAN, false,
getParamDisplayLabel(PARAM_IMAGES_SAME_FOLDER)));
return paramList;
}
/**
* Injects the TikaConfig to use
*
* @param tikaConfig The Tika Config to use
*/
public void setTikaConfig(TikaConfig tikaConfig)
{
this.tikaConfig = tikaConfig;
}
/*
* (non-Javadoc)
* @see org.alfresco.repo.rendition.executer.AbstractRenderingEngine#render(org.alfresco.repo.rendition.executer.AbstractRenderingEngine.RenderingContext)
*/
@Override
protected void render(RenderingContext context)
{
ContentReader contentReader = context.makeContentReader();
String sourceMimeType = contentReader.getMimetype();
// Check that Tika supports the supplied file
AutoDetectParser p = new AutoDetectParser(tikaConfig);
MediaType sourceMediaType = MediaType.parse(sourceMimeType);
if(! p.getParsers().containsKey(sourceMediaType))
{
throw new RenditionServiceException(
"Source mime type of " + sourceMimeType +
" is not supported by Tika for HTML conversions"
);
}
// Make the HTML Version using Tika
// This will also extract out any images as found
generateHTML(p, context);
}
private String getHtmlBaseName(RenderingContext context)
{
// Based on the name of the source node, which will
// also largely be the name of the html node
String baseName = nodeService.getProperty(
context.getSourceNode(),
ContentModel.PROP_NAME
).toString();
if(baseName.lastIndexOf('.') > -1)
{
baseName = baseName.substring(0, baseName.lastIndexOf('.'));
}
return baseName;
}
/**
* What name should be used for the images directory?
* Note this is only required if {@link #PARAM_IMAGES_SAME_FOLDER} is false (the default).
*/
private String getImagesDirectoryName(RenderingContext context)
{
// Based on the name of the source node, which will
// also largely be the name of the html node
String folderName = getHtmlBaseName(context);
folderName = folderName + "_files";
return folderName;
}
/**
* What prefix should be applied to the name of images?
*/
private String getImagesPrefixName(RenderingContext context)
{
if( context.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false) )
{
// Prefix with the name of the source node
return getHtmlBaseName(context) + "_";
}
else {
// They have their own folder, so no prefix is needed
return "";
}
}
/**
* Creates a directory to store the images in.
* The directory will be a sibling of the rendered
* HTML, and named similar to it.
* Note this is only required if {@link #PARAM_IMAGES_SAME_FOLDER} is false (the default).
*/
private NodeRef createImagesDirectory(RenderingContext context)
{
// It should be a sibling of the HTML in it's eventual location
// (not it's current temporary one!)
RenditionLocation location = resolveRenditionLocation(
context.getSourceNode(), context.getDefinition(), context.getDestinationNode()
);
NodeRef parent = location.getParentRef();
// Figure out what to call it, based on the HTML node
String folderName = getImagesDirectoryName(context);
// It is already there?
// (eg from when the rendition is being re-run)
NodeRef imgFolder = nodeService.getChildByName(
parent, ContentModel.ASSOC_CONTAINS, folderName
);
if(imgFolder != null)
return imgFolder;
// Create the directory
Map<QName,Serializable> properties = new HashMap<QName,Serializable>();
properties.put(ContentModel.PROP_NAME, folderName);
imgFolder = nodeService.createNode(
parent,
ContentModel.ASSOC_CONTAINS,
QName.createQName(folderName),
ContentModel.TYPE_FOLDER,
properties
).getChildRef();
return imgFolder;
}
private NodeRef createEmbeddedImage(NodeRef imgFolder, boolean primary,
String filename, String contentType, InputStream imageSource,
RenderingContext context)
{
// Create the node if needed
NodeRef img = nodeService.getChildByName(
imgFolder, ContentModel.ASSOC_CONTAINS, filename
);
if(img == null)
{
Map<QName,Serializable> properties = new HashMap<QName,Serializable>();
properties.put(ContentModel.PROP_NAME, filename);
img = nodeService.createNode(
imgFolder,
ContentModel.ASSOC_CONTAINS,
QName.createQName(filename),
ContentModel.TYPE_CONTENT,
properties
).getChildRef();
if (logger.isDebugEnabled())
{
logger.debug("Image node created: " + img);
}
}
// TODO Once composite content is properly supported,
// at this point we'll associate the new image with
// the rendered HTML node so the dependency is tracked.
// Put the image into the node
ContentWriter writer = contentService.getWriter(
img, ContentModel.PROP_CONTENT, true
);
writer.setMimetype(contentType);
writer.putContent(imageSource);
if (logger.isDebugEnabled())
{
logger.debug("Image content written into " + img);
}
// All done
return img;
}
/**
* Builds a Tika-compatible SAX content handler, which will
* be used to generate+capture the XHTML
*/
private ContentHandler buildContentHandler(Writer output, RenderingContext context)
{
// Create the main transformer
SAXTransformerFactory factory = (SAXTransformerFactory)
SAXTransformerFactory.newInstance();
TransformerHandler handler;
try {
handler = factory.newTransformerHandler();
} catch (TransformerConfigurationException e) {
throw new RenditionServiceException("SAX Processing isn't available - " + e);
}
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
handler.setResult(new StreamResult(output));
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
// Change the image links as they go past
String dirName = null, imgPrefix = null;
if(context.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false))
{
imgPrefix = getImagesPrefixName(context);
}
else
{
dirName = getImagesDirectoryName(context);
}
ContentHandler contentHandler = new TikaImageRewritingContentHandler(
handler, dirName, imgPrefix
);
// If required, wrap it to only return the body
boolean bodyOnly = context.getParamWithDefault(PARAM_BODY_CONTENTS_ONLY, false);
if(bodyOnly) {
contentHandler = new BodyContentHandler(contentHandler);
}
// All done
return contentHandler;
}
/**
* Asks Tika to translate the contents into HTML
*/
private void generateHTML(Parser p, RenderingContext context)
{
ContentReader contentReader = context.makeContentReader();
// Setup things to parse with
StringWriter sw = new StringWriter();
ContentHandler handler = buildContentHandler(sw, context);
// Tell Tika what we're dealing with
Metadata metadata = new Metadata();
metadata.set(
Metadata.CONTENT_TYPE,
contentReader.getMimetype()
);
metadata.set(
Metadata.RESOURCE_NAME_KEY,
nodeService.getProperty(
context.getSourceNode(),
ContentModel.PROP_NAME
).toString()
);
// Our parse context needs to extract images
ParseContext parseContext = new ParseContext();
parseContext.set(Parser.class, new TikaImageExtractingParser(context));
// Parse
try {
p.parse(
contentReader.getContentInputStream(),
handler, metadata, parseContext
);
} catch(Exception e) {
throw new RenditionServiceException("Tika HTML Conversion Failed", e);
}
// As a string
String html = sw.toString();
// If we're doing body-only, remove all the html namespaces
// that will otherwise clutter up the document
boolean bodyOnly = context.getParamWithDefault(PARAM_BODY_CONTENTS_ONLY, false);
if(bodyOnly) {
html = html.replaceAll("<\\?xml.*?\\?>", "");
html = html.replaceAll("<p xmlns=\"http://www.w3.org/1999/xhtml\"","<p");
html = html.replaceAll("<h(\\d) xmlns=\"http://www.w3.org/1999/xhtml\"","<h\\1");
html = html.replaceAll("<div xmlns=\"http://www.w3.org/1999/xhtml\"","<div");
html = html.replaceAll("<table xmlns=\"http://www.w3.org/1999/xhtml\"","<table");
html = html.replaceAll("&#13;","");
}
// Save it
ContentWriter contentWriter = context.makeContentWriter();
contentWriter.setMimetype("text/html");
contentWriter.putContent( html );
}
/**
* A nested Tika parser which extracts out any
* images as they come past.
*/
@SuppressWarnings("serial")
private class TikaImageExtractingParser implements Parser {
private Set<MediaType> types;
private RenderingContext renderingContext;
private NodeRef imgFolder = null;
private int count = 0;
private TikaImageExtractingParser(RenderingContext renderingContext) {
this.renderingContext = renderingContext;
// Our expected types
types = new HashSet<MediaType>();
types.add(MediaType.image("bmp"));
types.add(MediaType.image("gif"));
types.add(MediaType.image("jpg"));
types.add(MediaType.image("jpeg"));
types.add(MediaType.image("png"));
types.add(MediaType.image("tiff"));
// Are images going in the same place as the HTML?
if( renderingContext.getParamWithDefault(PARAM_IMAGES_SAME_FOLDER, false) )
{
RenditionLocation location = resolveRenditionLocation(
renderingContext.getSourceNode(), renderingContext.getDefinition(),
renderingContext.getDestinationNode()
);
imgFolder = location.getParentRef();
if (logger.isDebugEnabled())
{
logger.debug("Using imgFolder: " + imgFolder);
}
}
}
@Override
public Set<MediaType> getSupportedTypes(ParseContext context) {
return types;
}
@Override
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
// Is it a supported image?
String filename = metadata.get(Metadata.RESOURCE_NAME_KEY);
String type = metadata.get(Metadata.CONTENT_TYPE);
boolean accept = false;
if(type != null) {
for(MediaType mt : types) {
if(mt.toString().equals(type)) {
accept = true;
}
}
}
if(filename != null) {
for(MediaType mt : types) {
String ext = "." + mt.getSubtype();
if(filename.endsWith(ext)) {
accept = true;
}
}
}
if(!accept)
return;
handleImage(stream, filename, type);
}
private void handleImage(InputStream stream, String filename, String type) {
count++;
// Do we already have the folder? If not, create it
if(imgFolder == null) {
imgFolder = createImagesDirectory(renderingContext);
}
// Give it a sensible name if needed
if(filename == null) {
filename = "image-" + count + ".";
filename += type.substring(type.indexOf('/')+1);
}
// Prefix the filename if needed
filename = getImagesPrefixName(renderingContext) + filename;
// Save the image
createEmbeddedImage(imgFolder, (count==1), filename, type, stream, renderingContext);
}
}
/**
* A content handler that re-writes image src attributes,
* and passes everything else on to the real one.
*/
private class TikaImageRewritingContentHandler extends ContentHandlerDecorator {
private String imageFolder;
private String imagePrefix;
private TikaImageRewritingContentHandler(ContentHandler handler, String imageFolder, String imagePrefix) {
super(handler);
this.imageFolder = imageFolder;
this.imagePrefix = imagePrefix;
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes origAttrs) throws SAXException {
// If we have an image tag, re-write the src attribute
// if required
if("img".equals(localName)) {
AttributesImpl attrs;
if(origAttrs instanceof AttributesImpl) {
attrs = (AttributesImpl)origAttrs;
} else {
attrs = new AttributesImpl(origAttrs);
}
for(int i=0; i<attrs.getLength(); i++) {
if("src".equals(attrs.getLocalName(i))) {
String src = attrs.getValue(i);
if(src.startsWith("embedded:")) {
String newSrc = "";
if(imageFolder != null)
newSrc += imageFolder + "/";
if(imagePrefix != null)
newSrc += imagePrefix;
newSrc += src.substring(src.indexOf(':')+1);
attrs.setValue(i, newSrc);
}
}
}
super.startElement(uri, localName, qName, attrs);
} else {
// For any other tag, pass through as-is
super.startElement(uri, localName, qName, origAttrs);
}
}
}
}

View File

@@ -178,14 +178,6 @@
</property>
</bean>
<bean id="htmlRenderingEngine"
class="org.alfresco.repo.rendition.executer.HTMLRenderingEngine"
parent="baseRenderingAction">
<property name="tikaConfig">
<ref bean="tikaConfig"/>
</property>
</bean>
<bean id="compositeRenderingEngine"
class="org.alfresco.repo.rendition.executer.CompositeRenderingEngine"
parent="baseRenderingAction">

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2017 Alfresco Software Limited
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -39,7 +39,7 @@ import org.junit.runners.Suite;
@RunWith(Categories.class)
@Categories.ExcludeCategory({DBTests.class, NonBuildTests.class})
@Suite.SuiteClasses({
// there is a test that runs for 184s and another one that runs for 40s
org.alfresco.repo.attributes.AttributeServiceTest.class,
@@ -66,7 +66,9 @@ import org.junit.runners.Suite;
org.alfresco.repo.content.RoutingContentStoreTest.class,
org.alfresco.encryption.EncryptionTests.class,
org.alfresco.encryption.KeyStoreTests.class
org.alfresco.encryption.KeyStoreTests.class,
org.alfresco.repo.content.MimetypeMapContentTest.class
// TODO REPO-2791 org.alfresco.repo.content.routing.StoreSelectorAspectContentStoreTest.class,
})

View File

@@ -56,7 +56,6 @@ import org.junit.runners.Suite;
// This test opens, closes and again opens the alfresco application context.
org.alfresco.repo.dictionary.CustomModelRepoRestartTest.class,
org.alfresco.repo.rendition.executer.HTMLRenderingEngineTest.class,
org.alfresco.repo.rendition.executer.XSLTFunctionsTest.class,
org.alfresco.repo.rendition.executer.XSLTRenderingEngineTest.class,
org.alfresco.repo.replication.ReplicationServiceIntegrationTest.class,

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
@@ -36,8 +36,7 @@ import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.test_category.OwnJVMTestsCategory;
import org.alfresco.util.DataModelTestApplicationContextHelper;
import org.alfresco.util.testing.category.NeverRunsTests;
import org.alfresco.util.ApplicationContextHelper;
import org.apache.poi.util.IOUtils;
import org.junit.experimental.categories.Category;
import org.springframework.context.ApplicationContext;
@@ -48,11 +47,11 @@ import org.springframework.context.ApplicationContext;
* @see org.alfresco.repo.content.MimetypeMap
* @see org.alfresco.repo.content.MimetypeMapTest
*/
@Category({OwnJVMTestsCategory.class, NeverRunsTests.class})
@Category({OwnJVMTestsCategory.class})
public class MimetypeMapContentTest extends TestCase
{
private static ApplicationContext ctx = DataModelTestApplicationContextHelper.getApplicationContext();
private static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext();
private MimetypeService mimetypeService;
@Override
@@ -61,6 +60,18 @@ public class MimetypeMapContentTest extends TestCase
mimetypeService = (MimetypeService)ctx.getBean("mimetypeService");
}
public void testGuessPdfMimetype() throws Exception
{
assertEquals(
"application/pdf",
mimetypeService.guessMimetype("something.doc", openQuickTestFile("quick.pdf"))
);
assertEquals(
"application/pdf",
mimetypeService.guessMimetype(null, openQuickTestFile("quick.pdf"))
);
}
public void testGuessMimetypeForFile() throws Exception
{
// Correct ones
@@ -78,11 +89,7 @@ public class MimetypeMapContentTest extends TestCase
"application/msword",
mimetypeService.guessMimetype("something.pdf", openQuickTestFile("quick.doc"))
);
assertEquals(
"application/pdf",
mimetypeService.guessMimetype("something.doc", openQuickTestFile("quick.pdf"))
);
// Ones where we use a different mimetype to the canonical one
assertEquals(
"image/bmp", // Officially image/x-ms-bmp
@@ -94,13 +101,15 @@ public class MimetypeMapContentTest extends TestCase
"application/dita+xml", // Full version: application/dita+xml;format=concept
mimetypeService.guessMimetype("concept.dita", openQuickTestFile("quickConcept.dita"))
);
// Alfresco Specific ones, that Tika doesn't know about
assertEquals(
"application/acp",
mimetypeService.guessMimetype("something.acp", openQuickTestFile("quick.acp"))
);
// Commented out when the test class was reintroduced after many years of not being run. Failed as the type was
// identified as a zip. Reintroduced to check guessMimetype works without pdfbox libraries.
//
// // Alfresco Specific ones, that Tika doesn't know about
// assertEquals(
// "application/acp",
// mimetypeService.guessMimetype("something.acp", openQuickTestFile("quick.acp"))
// );
// Where the file is corrupted
File tmp = File.createTempFile("alfresco", ".tmp");
@@ -121,12 +130,15 @@ public class MimetypeMapContentTest extends TestCase
"application/x-tika-msoffice",
mimetypeService.guessMimetype(null, truncReader)
);
// But with the filename it'll be able to use the .doc extension
// to guess at it being a .Doc file
assertEquals(
"application/msword",
mimetypeService.guessMimetype("something.doc", truncReader)
);
// Commented out when the test class was reintroduced after many years of not being run. Failed to open a
// stream onto the channel. Reintroduced to check guessMimetype works without pdfbox libraries.
//
// // But with the filename it'll be able to use the .doc extension
// // to guess at it being a .Doc file
// assertEquals(
// "application/msword",
// mimetypeService.guessMimetype("something.doc", truncReader)
// );
// Lotus notes EML files (ALF-16381 / TIKA-1042)
assertEquals(

View File

@@ -1,31 +1,30 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.rendition;
import org.alfresco.repo.rendition.executer.HTMLRenderingEngineTest;
import org.alfresco.repo.thumbnail.ThumbnailServiceImplParameterTest;
import org.alfresco.repo.thumbnail.ThumbnailServiceImplTest;
import org.alfresco.repo.thumbnail.conditions.NodeEligibleForRethumbnailingEvaluatorTest;
@@ -49,7 +48,6 @@ import org.junit.runners.Suite;
RenditionServiceIntegrationTest.class,
RenditionServicePermissionsTest.class,
RenditionNodeManagerTest.class,
HTMLRenderingEngineTest.class,
MultiUserRenditionTest.class
})
public class AllRenditionTests

View File

@@ -1,543 +0,0 @@
/*
* #%L
* Alfresco Repository
* %%
* Copyright (C) 2005 - 2016 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.repo.rendition.executer;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.repo.model.Repository;
import org.alfresco.repo.rendition.RenditionDefinitionPersisterImpl;
import org.alfresco.repo.security.authentication.AuthenticationUtil;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.rendition.RenditionDefinition;
import org.alfresco.service.cmr.rendition.RenditionService;
import org.alfresco.service.cmr.repository.ChildAssociationRef;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.namespace.QName;
import org.alfresco.test_category.BaseSpringTestsCategory;
import org.alfresco.test_category.OwnJVMTestsCategory;
import org.alfresco.util.BaseAlfrescoSpringTest;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.springframework.transaction.annotation.Transactional;
/**
* Unit tests for the HTML Rendering Engine
*
* @author Nick Burch
*
* @deprecated We are introducing the new async RenditionService2.
*/
@Deprecated
@Category(BaseSpringTestsCategory.class)
@Transactional
public class HTMLRenderingEngineTest extends BaseAlfrescoSpringTest
{
private final static Log log = LogFactory.getLog(HTMLRenderingEngineTest.class);
private NodeRef companyHome;
private DictionaryService dictionaryService;
private RenditionService renditionService;
private Repository repositoryHelper;
private NodeRef sourceDoc;
private NodeRef targetFolder;
private String targetFolderPath;
private RenditionDefinition def;
private static final String MIMETYPE_DOC = "application/msword";
private static final String MIMETYPE_DOCX = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
@Before
public void before() throws Exception
{
super.before();
this.nodeService = (NodeService) this.applicationContext.getBean("NodeService");
this.contentService = (ContentService) this.applicationContext.getBean("ContentService");
this.renditionService = (RenditionService) this.applicationContext.getBean("RenditionService");
this.repositoryHelper = (Repository) this.applicationContext.getBean("repositoryHelper");
this.dictionaryService = (DictionaryService) this.applicationContext.getBean("dictionaryService");
this.companyHome = repositoryHelper.getCompanyHome();
createTargetFolder();
// Setup the basic rendition definition
QName renditionName = QName.createQName("Test");
RenditionDefinition rd = renditionService.loadRenditionDefinition(renditionName);
if(rd != null)
{
RenditionDefinitionPersisterImpl rdp = new RenditionDefinitionPersisterImpl();
rdp.setNodeService(nodeService);
rdp.deleteRenditionDefinition(rd);
}
def = renditionService.createRenditionDefinition(renditionName, HTMLRenderingEngine.NAME);
}
@After
public void after() throws Exception
{
super.after();
tidyUpSourceDoc();
}
private void createTargetFolder()
{
// Set the current security context as admin
AuthenticationUtil.setFullyAuthenticatedUser(AuthenticationUtil.getAdminUserName());
Map<QName,Serializable> properties = new HashMap<QName,Serializable>();
properties.put(ContentModel.PROP_NAME, "TestFolder");
targetFolder = nodeService.createNode(
companyHome, ContentModel.ASSOC_CONTAINS,
QName.createQName("TestFolder"),
ContentModel.TYPE_FOLDER,
properties
).getChildRef();
targetFolderPath = "/" +
(String) nodeService.getProperty(companyHome, ContentModel.PROP_NAME) +
"/" +
(String) nodeService.getProperty(targetFolder, ContentModel.PROP_NAME)
;
}
private void tidyUpSourceDoc()
{
// Set the current security context as admin
AuthenticationUtil.setFullyAuthenticatedUser(AuthenticationUtil.getAdminUserName());
// Clean up the source
if(sourceDoc != null)
{
nodeService.deleteNode(sourceDoc);
}
// Clean up the target folder
nodeService.deleteNode(targetFolder);
targetFolder = null;
// All done
sourceDoc = null;
createTargetFolder();
}
private NodeRef createForDoc(String docname) throws IOException
{
// Create the node
Map<QName,Serializable> properties = new HashMap<QName,Serializable>();
properties.put(ContentModel.PROP_NAME, docname);
NodeRef node = nodeService.createNode(
companyHome, ContentModel.ASSOC_CONTAINS,
QName.createQName(docname),
ContentModel.TYPE_CONTENT,
properties
).getChildRef();
// Put the sample doc into it
File f = AbstractContentTransformerTest.loadNamedQuickTestFile(docname);
if(f == null) {
fail("Unable to find test file for " + docname);
}
ContentWriter writer = contentService.getWriter(
node, ContentModel.PROP_CONTENT, true
);
if(docname.endsWith(".doc")) {
writer.setMimetype(MIMETYPE_DOC);
}
if(docname.endsWith(".docx")) {
writer.setMimetype(MIMETYPE_DOCX);
}
writer.putContent(f);
if (log.isDebugEnabled())
{
log.debug("Created document with name: " + docname + ", nodeRef: " + node + ", mimetype: " + writer.getMimetype());
}
// All done
return node;
}
@Test
public void testBasics() throws Exception
{
def.setParameterValue(
RenditionService.PARAM_DESTINATION_PATH_TEMPLATE,
targetFolderPath + "/${name}.html"
);
sourceDoc = createForDoc("quick.doc");
ChildAssociationRef rendition = renditionService.render(sourceDoc, def);
assertNotNull(rendition);
// Check it was created
NodeRef htmlNode = rendition.getChildRef();
assertEquals(true, nodeService.exists(htmlNode));
// Check it got the right name
assertEquals(
"quick.html",
nodeService.getProperty(htmlNode, ContentModel.PROP_NAME)
);
// Check it got the right contents
ContentReader reader = contentService.getReader(
htmlNode, ContentModel.PROP_CONTENT
);
String html = reader.getContentString();
assertEquals("<?xml", html.substring(0, 5));
assertTrue("HTML wrong:\n"+html, html.contains("<html"));
assertTrue("HTML wrong:\n"+html, html.contains("<head>"));
assertTrue("HTML wrong:\n"+html, html.contains("<body>"));
assertTrue("HTML wrong:\n"+html, html.contains("<p>The quick brown fox"));
// Now do a body-only one, check that we still got the
// contents, but not the html surround
def.setParameterValue(
HTMLRenderingEngine.PARAM_BODY_CONTENTS_ONLY, Boolean.TRUE
);
rendition = renditionService.render(sourceDoc, def);
assertNotNull(rendition);
htmlNode = rendition.getChildRef();
assertEquals(true, nodeService.exists(htmlNode));
reader = contentService.getReader(
htmlNode, ContentModel.PROP_CONTENT
);
html = reader.getContentString();
assertFalse("Body wrong:\n"+html, html.contains("<?xml"));
assertFalse("Body wrong:\n"+html, html.contains("<html"));
assertFalse("Body wrong:\n"+html, html.contains("<head>"));
assertFalse("Body wrong:\n"+html, html.contains("<body>"));
assertTrue("HTML wrong:\n"+html, html.contains("<p>The quick brown fox"));
assertTrue("HTML wrong:\n"+html, html.contains("</p>"));
}
/**
* Test for a .doc and a .docx, neither of which have images
*/
@Test
public void testDocWithoutImages() throws Exception
{
def.setParameterValue(
RenditionService.PARAM_DESTINATION_PATH_TEMPLATE,
targetFolderPath + "/${name}.html"
);
for(String name : new String[] {"quick.doc","quick.docx"})
{
sourceDoc = createForDoc(name);
int numItemsStart = nodeService.getChildAssocs(targetFolder).size();
ChildAssociationRef rendition = renditionService.render(sourceDoc, def);
assertNotNull(rendition);
// Check it was created
NodeRef htmlNode = rendition.getChildRef();
assertEquals(true, nodeService.exists(htmlNode));
// Check it got the right name
assertEquals(
name.substring(0, name.lastIndexOf('.')) + ".html",
nodeService.getProperty(htmlNode, ContentModel.PROP_NAME)
);
// Check it ended up in the right place
assertEquals(
"Should have been in " + targetFolderPath + " but was in" +
nodeService.getPath(htmlNode),
targetFolder,
nodeService.getPrimaryParent(htmlNode).getParentRef()
);
// Check it got the right contents
ContentReader reader = contentService.getReader(
htmlNode, ContentModel.PROP_CONTENT
);
String html = reader.getContentString();
assertEquals("<?xml", html.substring(0, 5));
// Check we didn't get an image folder, only the html
int numItems = nodeService.getChildAssocs(targetFolder).size();
assertEquals(numItemsStart+1, numItems);
// Check that the html lacks img tags
assertEquals(
"Unexpected img tag in html:\n" + html,
false, html.contains("<img")
);
// Check we didn't get any images
for(ChildAssociationRef ref : nodeService.getChildAssocs(htmlNode))
{
// TODO Check against composite content associations when present
// if(ref.getTypeQName().equals(HTMLRenderingEngine.PRIMARY_IMAGE))
// fail("Found unexpected primary image of rendered html");
// if(ref.getTypeQName().equals(HTMLRenderingEngine.SECONDARY_IMAGE))
// fail("Found unexpected secondary image of rendered html");
}
// All done
tidyUpSourceDoc();
}
}
/**
* Test for a .doc and a .docx, both of which have
* images in them
*/
@Test
public void testDocWithImages() throws Exception
{
def.setParameterValue(
RenditionService.PARAM_DESTINATION_PATH_TEMPLATE,
targetFolderPath + "/${name}.html"
);
String[] files = new String[] {"quickImg1.doc","quickImg1.docx", "quickImg3.doc","quickImg3.docx"};
int[] imgCounts = new int[] {1,1, 3,3};
for(int i=0; i<files.length; i++)
{
String name = files[i];
sourceDoc = createForDoc(name);
String baseName = name.substring(0, name.lastIndexOf('.'));
int numItemsStart = nodeService.getChildAssocs(targetFolder).size();
ChildAssociationRef rendition = renditionService.render(sourceDoc, def);
assertNotNull(rendition);
// Check it was created
NodeRef htmlNode = rendition.getChildRef();
assertEquals(true, nodeService.exists(htmlNode));
// Check it got the right name
assertEquals(
baseName + ".html",
nodeService.getProperty(htmlNode, ContentModel.PROP_NAME)
);
// Check it ended up in the right place
assertEquals(
"Should have been in " + targetFolderPath + " but was in" +
nodeService.getPath(htmlNode),
targetFolder,
nodeService.getPrimaryParent(htmlNode).getParentRef()
);
// Check it got the right contents
ContentReader reader = contentService.getReader(
htmlNode, ContentModel.PROP_CONTENT
);
String html = reader.getContentString();
assertEquals("<?xml", html.substring(0, 5));
// Check that the html has the img tags
assertEquals(
"Couldn't find img tag in html:\n" + html,
true, html.contains("<img")
);
// Check that it has the right img src
String expSource = "src=\""+ baseName + "_files" + "/image";
assertEquals(
"Couldn't find correct img src in html:\n" + expSource + "\n" + html,
true, html.contains(expSource)
);
// Check we got an image folder
int numItems = nodeService.getChildAssocs(targetFolder).size();
assertEquals(numItemsStart+2, numItems);
// Check the name of the image folder
NodeRef imgFolder = null;
for(ChildAssociationRef ref : nodeService.getChildAssocs(targetFolder)) {
if(nodeService.getProperty(ref.getChildRef(), ContentModel.PROP_NAME).equals(
baseName + "_files"
)) {
imgFolder = ref.getChildRef();
}
}
assertNotNull("Couldn't find new folder named " + baseName + "_files", imgFolder);
// Check the contents
assertEquals(imgCounts[i], nodeService.getChildAssocs(imgFolder).size());
// TODO Check against composite content associations when present
// Check the associations if supported
// if(dictionaryService.getAssociation(HTMLRenderingEngine.PRIMARY_IMAGE) != null)
// {
// boolean hasPrimary = false;
// boolean hasSecondary = false;
// for(ChildAssociationRef ref : nodeService.getChildAssocs(htmlNode))
// {
// if(ref.getTypeQName().equals(HTMLRenderingEngine.PRIMARY_IMAGE))
// hasPrimary = true;
// if(ref.getTypeQName().equals(HTMLRenderingEngine.SECONDARY_IMAGE))
// hasSecondary = true;
// }
// assertEquals(true, hasPrimary);
// assertEquals(false, hasSecondary);
// }
// All done
tidyUpSourceDoc();
}
}
/**
* Test for the option to have the images written to the
* same folder as the html, with a name prefix to them.
*
* TODO Re-enable when we've figured out why the rendition service sulkts
*/
@Test
public void testImagesSameFolder() throws Exception
{
def.setParameterValue(
RenditionService.PARAM_DESTINATION_PATH_TEMPLATE,
targetFolderPath + "/${name}.html"
);
def.setParameterValue(
HTMLRenderingEngine.PARAM_IMAGES_SAME_FOLDER,
true
);
// The documents listed below have 3 embedded images each.
final int expectedImageCount = 3;
for(String name : new String[] {"quickImg3.doc","quickImg3.docx"})
{
sourceDoc = createForDoc(name);
String baseName = name.substring(0, name.lastIndexOf('.'));
int numItemsStart = nodeService.getChildAssocs(targetFolder).size();
if (log.isDebugEnabled())
{
log.debug("targetFolder " + targetFolder + " has " + numItemsStart + " children at start.");
}
ChildAssociationRef rendition = renditionService.render(sourceDoc, def);
assertNotNull(rendition);
// Check it was created
NodeRef htmlNode = rendition.getChildRef();
assertEquals(true, nodeService.exists(htmlNode));
// Check it got the right name
assertEquals(
baseName + ".html",
nodeService.getProperty(htmlNode, ContentModel.PROP_NAME)
);
// Check it ended up in the right place
assertEquals(
"Should have been in " + targetFolderPath + " but was in" +
nodeService.getPath(htmlNode),
targetFolder,
nodeService.getPrimaryParent(htmlNode).getParentRef()
);
// Check it got the right contents
ContentReader reader = contentService.getReader(
htmlNode, ContentModel.PROP_CONTENT
);
String html = reader.getContentString();
assertEquals("<?xml", html.substring(0, 5));
// Check that the html has the img tags
assertEquals(
"Couldn't find img tag in html:\n" + html,
true, html.contains("<img")
);
// Check that it has the right img src
String expSource = "src=\""+ baseName + "_image";
assertEquals(
"Couldn't find correct img src in html:\n" + expSource + "\n" + html,
true, html.contains(expSource)
);
// Check we got an image folder
int numItems = nodeService.getChildAssocs(targetFolder).size();
// We expect a number of images and one text/html node to be created.
final int additionalItems = expectedImageCount + 1;
assertEquals(numItemsStart+additionalItems, numItems);
// There shouldn't be an image folder created
for(ChildAssociationRef ref : nodeService.getChildAssocs(targetFolder)) {
if(nodeService.getProperty(ref.getChildRef(), ContentModel.PROP_NAME).equals(
baseName + "_files"
)) {
fail("Image folder was created but shouldn't be there");
}
}
// Check we got the images in the same directory as the html
int images = 0;
for(ChildAssociationRef ref : nodeService.getChildAssocs(targetFolder)) {
String childName = (String)nodeService.getProperty(ref.getChildRef(), ContentModel.PROP_NAME);
if(childName.startsWith(baseName + "_image")) {
images++;
}
}
assertEquals(expectedImageCount, images);
// Until the rendition service supports a forced overwrite of other renditions, we must
// delete the old rendition node & the images.
nodeService.deleteNode(rendition.getChildRef());
for (ChildAssociationRef chAssRef : nodeService.getChildAssocs(targetFolder))
{
nodeService.deleteNode(chAssRef.getChildRef());
}
}
}
}