MNT-16381 Transformers must validate the content stream mimetype

- Addition of AbstractContentTransformer2.strictMimeTypeCheck(...) to check that the
     declared mimetype is the same as the Tika derived mimetype. Controlled by a new alfresco
     global property content.transformer.strict.mimetype.check (set to true by default).
   - Added the UnsupportedTransformationException to a number of transformation callers as
     they should have been handling it already.
   - Modified DifferrentMimeTypeTest to test the new strictMimeTypeCheck.

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@128168 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Alan Davis
2016-06-15 16:11:15 +00:00
parent c4c30abae0
commit 60a8b718bc
8 changed files with 177 additions and 69 deletions

View File

@@ -475,7 +475,8 @@
<property name="registerTransformer">
<value>true</value>
</property>
<property name="retryTransformOnDifferentMimeType" value="${content.transformer.retryOn.different.mimetype}"/>
<property name="strictMimeTypeCheck" value="${content.transformer.strict.mimetype.check}"/>
<property name="retryTransformOnDifferentMimeType" value="${content.transformer.retryOn.different.mimetype}"/>
<property name="metadataExtracterConfig">
<ref bean="metadataExtracterConfig" />
</property>

View File

@@ -1057,8 +1057,19 @@ people.search.honor.hint.useCQ=true
# Delays cron jobs after bootstrap to allow server to fully come up before jobs start
system.cronJob.startDelayMinutes=1
#
# Check that the declared mimetype (of the Node) is the same as the derived
# mimetype of the content (via Tika) before a transformation takes place.
# Only files in the repository (not intermediate files in a transformer
# pipeline) are checked. This property provides a trade off between a
# security check and a relatively expensive (Tika) operation.
#
content.transformer.strict.mimetype.check=true
#
# Enable transformation retrying if the file has MIME type differ than file extension.
# Ignored if content.transformer.strict.mimetype.check is true as these transformations
# will not take place.
#
content.transformer.retryOn.different.mimetype=true

View File

@@ -70,6 +70,7 @@ public abstract class AbstractContentTransformer2 extends AbstractContentTransfo
private ContentTransformerRegistry registry;
private boolean registerTransformer;
private boolean retryTransformOnDifferentMimeType;
private boolean strictMimeTypeCheck;
MetadataExtracterConfig metadataExtracterConfig;
/**
* A flag that indicates that the transformer should be started in it own Thread so
@@ -248,6 +249,11 @@ public abstract class AbstractContentTransformer2 extends AbstractContentTransfo
targetMimetype, reader.getSize(), options);
}
// MNT-16381: check the mimetype of the file supplied by the user
// matches the sourceMimetype of the reader. Intermediate files are
// not checked.
strictMimeTypeCheck(reader, options);
// Check the transformability
checkTransformable(reader, writer, options);
@@ -338,10 +344,10 @@ public abstract class AbstractContentTransformer2 extends AbstractContentTransfo
String differentType = getMimetypeService().getMimetypeIfNotMatches(reader.getReader());
// Report the error
if(differentType == null)
if (differentType == null)
{
transformerDebug.debug(" Failed", e);
throw new ContentIOException("Content conversion failed: \n" +
transformerDebug.debug(" Failed", e);
throw new ContentIOException("Content conversion failed: \n" +
" reader: " + reader + "\n" +
" writer: " + writer + "\n" +
" options: " + options.toString(false) + "\n" +
@@ -432,6 +438,26 @@ public abstract class AbstractContentTransformer2 extends AbstractContentTransfo
}
}
private void strictMimeTypeCheck(ContentReader reader, TransformationOptions options)
throws UnsupportedTransformationException
{
if (strictMimeTypeCheck && depth.get() == 1)
{
String differentType = getMimetypeService().getMimetypeIfNotMatches(reader.getReader());
if (differentType != null)
{
String fileName = transformerDebug.getFileName(options, true, 0);
String readerSourceMimetype = reader.getMimetype();
String message = "Transformation of ("+fileName+
") has not taken place because the declared mimetype ("+
readerSourceMimetype+") does not match the detected mimetype ("+
differentType+").";
logger.warn(message);
throw new UnsupportedTransformationException(message);
}
}
}
/**
* Cancels <code>task</code> and closes content accessors
*
@@ -630,4 +656,14 @@ public abstract class AbstractContentTransformer2 extends AbstractContentTransfo
{
this.retryTransformOnDifferentMimeType = retryTransformOnDifferentMimeType;
}
public boolean getStrictMimeTypeCheck()
{
return strictMimeTypeCheck;
}
public void setStrictMimeTypeCheck(boolean strictMimeTypeCheck)
{
this.strictMimeTypeCheck = strictMimeTypeCheck;
}
}

View File

@@ -2675,45 +2675,14 @@ public class ScriptNode implements Scopeable, NamespacePrefixResolverProvider
final NodeRef sourceNodeRef = nodeRef;
// the delegate definition for transforming a document
Transformer transformer = new Transformer()
Transformer transformer = new AbstractTransformer()
{
public ScriptNode transform(ContentService contentService, NodeRef nodeRef, ContentReader reader,
ContentWriter writer)
protected void doTransform(ContentService contentService,
ContentReader reader, ContentWriter writer)
{
ScriptNode transformedNode = null;
TransformationOptions options = new TransformationOptions();
options.setSourceNodeRef(sourceNodeRef);
try
{
contentService.transform(reader, writer, options);
transformedNode = newInstance(nodeRef, services, scope);
}
catch (NoTransformerException e)
{
// ignore
}
catch (AlfrescoRuntimeException e)
{
Throwable rootCause = ((AlfrescoRuntimeException)e).getRootCause();
String message = rootCause.getMessage();
message = message == null ? "" : message;
if (rootCause instanceof UnimportantTransformException)
{
logger.debug(message);
// ignore
}
else if (rootCause instanceof UnsupportedTransformationException)
{
logger.error(message);
// ignore
}
else
{
throw e;
}
}
return transformedNode;
contentService.transform(reader, writer, options);
}
};
@@ -2828,10 +2797,10 @@ public class ScriptNode implements Scopeable, NamespacePrefixResolverProvider
final NodeRef sourceNodeRef = nodeRef;
// the delegate definition for transforming an image
Transformer transformer = new Transformer()
Transformer transformer = new AbstractTransformer()
{
public ScriptNode transform(ContentService contentService, NodeRef nodeRef, ContentReader reader,
ContentWriter writer)
protected void doTransform(ContentService contentService,
ContentReader reader, ContentWriter writer)
{
ImageTransformationOptions imageOptions = new ImageTransformationOptions();
imageOptions.setSourceNodeRef(sourceNodeRef);
@@ -2841,8 +2810,6 @@ public class ScriptNode implements Scopeable, NamespacePrefixResolverProvider
imageOptions.setCommandOptions(options);
}
contentService.getImageTransformer().transform(reader, writer, imageOptions);
return newInstance(nodeRef, services, scope);
}
};
@@ -4114,9 +4081,52 @@ public class ScriptNode implements Scopeable, NamespacePrefixResolverProvider
*
* @return Node representing the transformed entity
*/
ScriptNode transform(ContentService contentService, NodeRef noderef, ContentReader reader, ContentWriter writer);
ScriptNode transform(ContentService contentService, NodeRef noderef,
ContentReader reader, ContentWriter writer);
}
private abstract class AbstractTransformer implements Transformer
{
public ScriptNode transform(ContentService contentService, NodeRef nodeRef,
ContentReader reader, ContentWriter writer)
{
ScriptNode transformedNode = null;
try
{
doTransform(contentService, reader, writer);
transformedNode = newInstance(nodeRef, services, scope);
}
catch (NoTransformerException e)
{
// ignore
}
catch (AlfrescoRuntimeException e)
{
Throwable rootCause = ((AlfrescoRuntimeException)e).getRootCause();
String message = rootCause.getMessage();
message = message == null ? "" : message;
if (rootCause instanceof UnimportantTransformException)
{
logger.debug(message);
// ignore
}
else if (rootCause instanceof UnsupportedTransformationException)
{
logger.error(message);
// ignore
}
else
{
throw e;
}
}
return transformedNode;
}
protected abstract void doTransform(ContentService contentService,
ContentReader reader, ContentWriter writer);
};
/**
* NamespacePrefixResolverProvider getter implementation

View File

@@ -39,6 +39,7 @@ import org.alfresco.repo.action.ParameterDefinitionImpl;
import org.alfresco.repo.content.transform.ContentTransformer;
import org.alfresco.repo.content.transform.TransformerConfig;
import org.alfresco.repo.content.transform.TransformerDebug;
import org.alfresco.repo.content.transform.UnsupportedTransformationException;
import org.alfresco.repo.security.authentication.AuthenticationUtil;
import org.alfresco.service.cmr.action.ActionServiceException;
import org.alfresco.service.cmr.action.ActionTrackingService;
@@ -446,7 +447,7 @@ public abstract class AbstractTransformationRenderingEngine extends AbstractRend
contentService.transform(contentReader, tempContentWriter, options);
return tempContentWriter;
}
catch (NoTransformerException ntx)
catch (NoTransformerException|UnsupportedTransformationException ntx)
{
{
logger.debug("No transformer found to execute rule: \n" + " reader: " + contentReader + "\n"

View File

@@ -54,6 +54,7 @@ import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.transform.ContentTransformer;
import org.alfresco.repo.content.transform.TransformerDebug;
import org.alfresco.repo.content.transform.UnsupportedTransformationException;
import org.alfresco.repo.dictionary.IndexTokenisationMode;
import org.alfresco.repo.search.AspectIndexFilter;
import org.alfresco.repo.search.IndexerException;
@@ -83,6 +84,7 @@ import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.InvalidNodeRefException;
import org.alfresco.service.cmr.repository.MLText;
import org.alfresco.service.cmr.repository.NoTransformerException;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.repository.Path;
@@ -1573,7 +1575,7 @@ public class ADMLuceneIndexerImpl extends AbstractLuceneIndexerImpl<NodeRef> imp
+ " transformer: " + transformer + "\n" + " temp writer: " + writer);
}
}
catch (ContentIOException e)
catch (ContentIOException|NoTransformerException|UnsupportedTransformationException e)
{
// log it
if (s_logger.isInfoEnabled())

View File

@@ -34,10 +34,9 @@ import java.util.Set;
import org.alfresco.model.ApplicationModel;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.transform.UnsupportedTransformationException;
import org.alfresco.service.ServiceRegistry;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.model.FileInfo;
import org.alfresco.service.cmr.model.FileNotFoundException;
import org.alfresco.service.cmr.repository.ChildAssociationRef;
import org.alfresco.service.cmr.repository.ContentData;
import org.alfresco.service.cmr.repository.ContentReader;
@@ -635,7 +634,7 @@ public abstract class BaseContentNode implements TemplateContent
}
}
}
catch (NoTransformerException e)
catch (NoTransformerException|UnsupportedTransformationException e)
{
// ignore
}

View File

@@ -25,16 +25,28 @@
*/
package org.alfresco.repo.content.transform;
import junit.framework.TestCase;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.*;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.repo.model.Repository;
import org.alfresco.repo.security.authentication.AuthenticationUtil;
import org.alfresco.repo.transaction.RetryingTransactionHelper;
import org.alfresco.service.ServiceRegistry;
import org.alfresco.service.cmr.repository.*;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.repository.TransformationOptions;
import org.alfresco.service.namespace.NamespaceService;
import org.alfresco.service.namespace.QName;
import org.alfresco.service.transaction.TransactionService;
@@ -46,20 +58,20 @@ import org.junit.After;
import org.junit.Before;
import org.springframework.context.ApplicationContext;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.*;
import junit.framework.TestCase;
/**
* Tests the ContentTransformer correct work if the file extension is different from its MIME type
* Tests that ContentTransformers only correctly process source nodes if the
* mimetype of the node matches the content. This is the opposite of what this
* test class was originally written to prove for MNT-11015. The current version
* was reworked for MNT-16381.
*/
public class DifferrentMimeTypeTest extends TestCase
{
private static Log log = LogFactory.getLog(DifferrentMimeTypeTest.class);
private ContentTransformer contentTransformer;
private AbstractContentTransformer2 contentTransformer;
private TransformationOptions options;
private ServiceRegistry serviceRegistry;
private MimetypeService mimetypeService;
@@ -97,9 +109,9 @@ public class DifferrentMimeTypeTest extends TestCase
public void testDifferentMimeType() throws IOException
{
final QName propertyQName = ContentModel.PROP_CONTENT;
String sourceMimeType = mimetypeService.guessMimetype(testFile.getName());
String fileName = testFile.getName();
String sourceMimeType = mimetypeService.guessMimetype(fileName);
String targetMimeType = MimetypeMap.MIMETYPE_IMAGE_JPEG;
// mimetypeService.guessMimetype returns file mime type based entirely on the file extension
@@ -126,19 +138,51 @@ public class DifferrentMimeTypeTest extends TestCase
outputWriter.setMimetype(targetMimeType);
// verify that there is a desired transformer for actual file MIME type
ContentTransformer actualTransformer = this.registry.getTransformer(mimetypeService.getMimetypeIfNotMatches(contentReader),contentReader.getSize(), targetMimeType, options);
String assertMessageActualTransformer = "Transformer not found for converting " + mimetypeService.getMimetypeIfNotMatches(contentReader) + " to " + targetMimeType;
String actualSourceMimetype = mimetypeService.getMimetypeIfNotMatches(contentReader);
ContentTransformer actualTransformer = this.registry.getTransformer(actualSourceMimetype,contentReader.getSize(), targetMimeType, options);
String assertMessageActualTransformer = "Transformer not found for converting " + actualSourceMimetype + " to " + targetMimeType;
assertNotNull(assertMessageActualTransformer, actualTransformer);
contentTransformer = this.registry.getTransformer(sourceMimeType, contentReader.getSize(), targetMimeType, options);
contentTransformer = (AbstractContentTransformer2)registry.getTransformer(sourceMimeType, contentReader.getSize(), targetMimeType, options);
String assertMessageContentTransformer = "Transformer not found for converting " +sourceMimeType + " to " + targetMimeType;
assertNotNull(assertMessageContentTransformer, contentTransformer);
// Try to transform file with inaccurate MIME type
contentTransformer.transform(contentReader, outputWriter, options);
boolean originalStrict = contentTransformer.getStrictMimeTypeCheck();
assertTrue("Content Transformations should be 'strict' by default", originalStrict);
for (boolean strict: new boolean[] {false, true})
{
try
{
contentTransformer.setStrictMimeTypeCheck(strict);
contentTransformer.transform(contentReader.getReader(), outputWriter, options);
if (strict)
{
fail("The contentTransformer should have failed with an UnsupportedTransformationException");
}
// After successful transformation image size should be grater than 0
assertTrue("File transformation failed. Output file size is '0'", outputWriter.getSize() > 0);
}
catch (UnsupportedTransformationException e)
{
if (!strict)
{
fail("The contentTransformer should NOT have failed with an UnsupportedTransformationException "+e);
}
String message = e.getMessage();
assertTrue("Message should contain the original filename ("+fileName+")", message.contains(fileName));
assertTrue("Message should contain the declared source mimetype ("+sourceMimeType+")", message.contains(sourceMimeType));
assertTrue("Message should contain the detected source mimetype ("+actualSourceMimetype+")", message.contains(actualSourceMimetype));
}
finally
{
((AbstractContentTransformer2)contentTransformer).setStrictMimeTypeCheck(originalStrict);
}
}
// After successful transformation image size should be grater than 0
assertTrue("File transformation failed. Output file size is '0'", outputWriter.getSize() > 0);
// Try to transform file with accurate MIME type
contentReader.setMimetype(actualSourceMimetype);
actualTransformer.transform(contentReader, outputWriter, options);
}
public void testSetUp()
@@ -190,6 +234,10 @@ public class DifferrentMimeTypeTest extends TestCase
ContentModel.TYPE_CONTENT,
props).getChildRef();
// Make sure the error message contains the file name. Without this it is null.
nodeService.setProperty(node, ContentModel.PROP_NAME, fileName);
options.setSourceNodeRef(node);
// node should be removed after tests
nodesToDeleteAfterTest.add(node);