mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
ALF-17703: TikaPowerMetadataExtracter Destroys Content on Failed Embed
- Removed catch of exception and closing of output stream in TikaPoweredMetadataExtracter to allow AbstractMappingMetadataExtracter to better handle the error - Added catch of ContentIOException during construction of error details in AbstractMappingMetadataExtracter - Added ContentMetadataEmbedderTest to test a failing embedder git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@45949 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -26,6 +26,7 @@ import org.alfresco.repo.action.evaluator.ComparePropertyValueEvaluatorTest;
|
||||
import org.alfresco.repo.action.evaluator.HasAspectEvaluatorTest;
|
||||
import org.alfresco.repo.action.evaluator.IsSubTypeEvaluatorTest;
|
||||
import org.alfresco.repo.action.executer.AddFeaturesActionExecuterTest;
|
||||
import org.alfresco.repo.action.executer.ContentMetadataEmbedderTest;
|
||||
import org.alfresco.repo.action.executer.ContentMetadataExtracterTest;
|
||||
import org.alfresco.repo.action.executer.RemoveFeaturesActionExecuterTest;
|
||||
import org.alfresco.repo.action.executer.SetPropertyValueActionExecuterTest;
|
||||
@@ -66,6 +67,7 @@ public class ActionTestSuite extends TestSuite
|
||||
suite.addTestSuite(SetPropertyValueActionExecuterTest.class);
|
||||
suite.addTestSuite(AddFeaturesActionExecuterTest.class);
|
||||
suite.addTestSuite(ContentMetadataExtracterTest.class);
|
||||
suite.addTestSuite(ContentMetadataEmbedderTest.class);
|
||||
suite.addTestSuite(SpecialiseTypeActionExecuterTest.class);
|
||||
suite.addTestSuite(RemoveFeaturesActionExecuterTest.class);
|
||||
suite.addTestSuite(ActionTrackingServiceImplTest.class);
|
||||
|
@@ -140,7 +140,7 @@ public class ContentMetadataEmbedder extends ActionExecuterAbstractBase
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug(
|
||||
"Meetadata embedding failed: \n" +
|
||||
"Metadata embedding failed: \n" +
|
||||
" Extracter: " + this + "\n" +
|
||||
" Node: " + actionedUponNodeRef + "\n" +
|
||||
" Content: " + writer,
|
||||
|
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2012 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.action.executer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.action.ActionImpl;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.repo.content.metadata.MetadataExtracterRegistry;
|
||||
import org.alfresco.repo.content.metadata.TikaPoweredMetadataExtracter;
|
||||
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
|
||||
import org.alfresco.repo.security.authentication.AuthenticationComponent;
|
||||
import org.alfresco.service.cmr.dictionary.DictionaryService;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.cmr.repository.ContentService;
|
||||
import org.alfresco.service.cmr.repository.ContentWriter;
|
||||
import org.alfresco.service.cmr.repository.MimetypeService;
|
||||
import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.cmr.repository.NodeService;
|
||||
import org.alfresco.service.cmr.repository.StoreRef;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.util.BaseSpringTest;
|
||||
import org.alfresco.util.GUID;
|
||||
import org.apache.tika.embedder.Embedder;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.Parser;
|
||||
|
||||
/**
|
||||
* Test of the ActionExecuter for embedding metadata
|
||||
*
|
||||
* @author Ray Gauss II
|
||||
*/
|
||||
public class ContentMetadataEmbedderTest extends BaseSpringTest
|
||||
{
|
||||
|
||||
private NodeService nodeService;
|
||||
private ContentService contentService;
|
||||
private DictionaryService dictionaryService;
|
||||
private MimetypeService mimetypeService;
|
||||
private StoreRef testStoreRef;
|
||||
private NodeRef rootNodeRef;
|
||||
private NodeRef nodeRef;
|
||||
|
||||
private ContentMetadataEmbedder executer;
|
||||
|
||||
private final static String ID = GUID.generate();
|
||||
|
||||
@Override
|
||||
protected void onSetUpInTransaction() throws Exception
|
||||
{
|
||||
this.nodeService = (NodeService) this.applicationContext.getBean("nodeService");
|
||||
this.contentService = (ContentService) this.applicationContext.getBean("contentService");
|
||||
this.dictionaryService = (DictionaryService) this.applicationContext.getBean("dictionaryService");
|
||||
this.mimetypeService = (MimetypeService) this.applicationContext.getBean("mimetypeService");
|
||||
|
||||
AuthenticationComponent authenticationComponent = (AuthenticationComponent)applicationContext.getBean("authenticationComponent");
|
||||
authenticationComponent.setSystemUserAsCurrentUser();
|
||||
|
||||
// Create the store and get the root node
|
||||
this.testStoreRef = this.nodeService.createStore(
|
||||
StoreRef.PROTOCOL_WORKSPACE,
|
||||
"Test_" + System.currentTimeMillis());
|
||||
this.rootNodeRef = this.nodeService.getRootNode(this.testStoreRef);
|
||||
|
||||
// Create the node used for tests
|
||||
this.nodeRef = this.nodeService.createNode(
|
||||
this.rootNodeRef, ContentModel.ASSOC_CHILDREN,
|
||||
QName.createQName("{test}testnode"),
|
||||
ContentModel.TYPE_CONTENT).getChildRef();
|
||||
|
||||
// Setup the content from the PDF test data
|
||||
ContentWriter cw = this.contentService.getWriter(nodeRef, ContentModel.PROP_CONTENT, true);
|
||||
cw.setMimetype(MimetypeMap.MIMETYPE_PDF);
|
||||
cw.putContent(AbstractContentTransformerTest.loadQuickTestFile("pdf"));
|
||||
|
||||
// Get the executer instance
|
||||
this.executer = (ContentMetadataEmbedder) this.applicationContext.getBean("embed-metadata");
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that a failing embedder does not destroy the original content
|
||||
*/
|
||||
public void testFailingEmbedder()
|
||||
{
|
||||
MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry");
|
||||
FailingEmbedder embedder = new FailingEmbedder(Arrays.asList(MimetypeMap.MIMETYPE_PDF));
|
||||
embedder.setRegistry(registry);
|
||||
embedder.setDictionaryService(this.dictionaryService);
|
||||
embedder.setMimetypeService(this.mimetypeService);
|
||||
embedder.register();
|
||||
|
||||
String myCreator = "Embedded creator";
|
||||
|
||||
// Get the old props
|
||||
Map<QName, Serializable> props = this.nodeService.getProperties(this.nodeRef);
|
||||
props.put(ContentModel.PROP_AUTHOR, myCreator);
|
||||
this.nodeService.setProperties(this.nodeRef, props);
|
||||
|
||||
// Execute the action
|
||||
ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null);
|
||||
|
||||
ContentReader origReader = this.contentService.getReader(this.nodeRef, ContentModel.PROP_CONTENT);
|
||||
long origSize = origReader.getSize();
|
||||
assertTrue(origSize > 0);
|
||||
|
||||
this.executer.execute(action, this.nodeRef);
|
||||
|
||||
ContentReader embeddedReader = this.contentService.getReader(this.nodeRef, ContentModel.PROP_CONTENT);
|
||||
|
||||
assertEquals("The original content should remain unchanged on embed failures", origSize, embeddedReader.getSize());
|
||||
}
|
||||
|
||||
/**
|
||||
* Tika-powered embedder which fails upon calling embed on its {@link FailingTikaEmbedder}
|
||||
*/
|
||||
private class FailingEmbedder extends TikaPoweredMetadataExtracter
|
||||
{
|
||||
|
||||
/**
|
||||
* Constructor for setting supported extract and embed mimetypes
|
||||
*
|
||||
* @param mimetypes the supported extract and embed mimetypes
|
||||
*/
|
||||
public FailingEmbedder(Collection<String> mimetypes)
|
||||
{
|
||||
super(
|
||||
new HashSet<String>(mimetypes),
|
||||
new HashSet<String>(mimetypes));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Embedder getEmbedder()
|
||||
{
|
||||
return new FailingTikaEmbedder();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Set<QName>> readMappingProperties(String propertiesUrl)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Set<QName>> getDefaultMapping()
|
||||
{
|
||||
Map<String, Set<QName>> mapping = new HashMap<String, Set<QName>>(1);
|
||||
Set<QName> qnames = new HashSet<QName>(1);
|
||||
qnames.add(ContentModel.PROP_AUTHOR);
|
||||
mapping.put("author", qnames);
|
||||
return mapping;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tika metadata embedder which fails on a call to embed.
|
||||
*/
|
||||
private class FailingTikaEmbedder implements Embedder
|
||||
{
|
||||
private static final long serialVersionUID = -4954679684941467571L;
|
||||
|
||||
@Override
|
||||
public Set<MediaType> getSupportedEmbedTypes(ParseContext context)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void embed(Metadata metadata, InputStream originalStream, OutputStream outputStream, ParseContext context)
|
||||
throws IOException, TikaException
|
||||
{
|
||||
throw new IOException("Forced failure");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -42,6 +42,7 @@ import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
|
||||
import org.alfresco.service.cmr.dictionary.DictionaryService;
|
||||
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.cmr.repository.ContentWriter;
|
||||
import org.alfresco.service.cmr.repository.MalformedNodeRefException;
|
||||
@@ -1115,12 +1116,19 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
{
|
||||
// Ask Tika to detect the document, and report back on if
|
||||
// the current mime type is plausible
|
||||
String typeErrorMessage = null;
|
||||
String typeErrorMessage = "";
|
||||
String differentType = null;
|
||||
if(mimetypeService != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
differentType = mimetypeService.getMimetypeIfNotMatches(writer.getReader());
|
||||
}
|
||||
catch (ContentIOException cioe)
|
||||
{
|
||||
// Embedding failed and writer is empty
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.info("Unable to verify mimetype of " + writer.getReader() +
|
||||
@@ -1144,7 +1152,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.warn(
|
||||
logger.error(
|
||||
"Metadata embedding failed (turn on DEBUG for full error): \n" +
|
||||
" Extracter: " + this + "\n" +
|
||||
" Content: " + writer + "\n" +
|
||||
|
@@ -375,9 +375,7 @@ public abstract class TikaPoweredMetadataExtracter
|
||||
{
|
||||
return;
|
||||
}
|
||||
OutputStream outputStream = null;
|
||||
try
|
||||
{
|
||||
|
||||
Metadata metadataToEmbed = new Metadata();
|
||||
for (String metadataKey : properties.keySet())
|
||||
{
|
||||
@@ -415,22 +413,9 @@ public abstract class TikaPoweredMetadataExtracter
|
||||
}
|
||||
}
|
||||
InputStream inputStream = getInputStream(reader);
|
||||
outputStream = writer.getContentOutputStream();
|
||||
OutputStream outputStream = writer.getContentOutputStream();
|
||||
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
logger.error(e.getMessage(), e);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (outputStream != null)
|
||||
{
|
||||
try { outputStream.close(); } catch (Throwable e) {}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* This content handler will capture entries from within
|
||||
|
Reference in New Issue
Block a user