diff --git a/source/java/org/alfresco/repo/action/ActionTestSuite.java b/source/java/org/alfresco/repo/action/ActionTestSuite.java index b82fa4fcb5..e67e08da96 100644 --- a/source/java/org/alfresco/repo/action/ActionTestSuite.java +++ b/source/java/org/alfresco/repo/action/ActionTestSuite.java @@ -26,6 +26,7 @@ import org.alfresco.repo.action.evaluator.ComparePropertyValueEvaluatorTest; import org.alfresco.repo.action.evaluator.HasAspectEvaluatorTest; import org.alfresco.repo.action.evaluator.IsSubTypeEvaluatorTest; import org.alfresco.repo.action.executer.AddFeaturesActionExecuterTest; +import org.alfresco.repo.action.executer.ContentMetadataEmbedderTest; import org.alfresco.repo.action.executer.ContentMetadataExtracterTest; import org.alfresco.repo.action.executer.RemoveFeaturesActionExecuterTest; import org.alfresco.repo.action.executer.SetPropertyValueActionExecuterTest; @@ -66,6 +67,7 @@ public class ActionTestSuite extends TestSuite suite.addTestSuite(SetPropertyValueActionExecuterTest.class); suite.addTestSuite(AddFeaturesActionExecuterTest.class); suite.addTestSuite(ContentMetadataExtracterTest.class); + suite.addTestSuite(ContentMetadataEmbedderTest.class); suite.addTestSuite(SpecialiseTypeActionExecuterTest.class); suite.addTestSuite(RemoveFeaturesActionExecuterTest.class); suite.addTestSuite(ActionTrackingServiceImplTest.class); diff --git a/source/java/org/alfresco/repo/action/executer/ContentMetadataEmbedder.java b/source/java/org/alfresco/repo/action/executer/ContentMetadataEmbedder.java index be59ff45be..33a99241c9 100644 --- a/source/java/org/alfresco/repo/action/executer/ContentMetadataEmbedder.java +++ b/source/java/org/alfresco/repo/action/executer/ContentMetadataEmbedder.java @@ -140,7 +140,7 @@ public class ContentMetadataEmbedder extends ActionExecuterAbstractBase if (logger.isDebugEnabled()) { logger.debug( - "Meetadata embedding failed: \n" + + "Metadata embedding failed: \n" + " Extracter: " + this + "\n" + " Node: " + actionedUponNodeRef + "\n" + " Content: " + writer, diff --git a/source/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java b/source/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java new file mode 100644 index 0000000000..0581036577 --- /dev/null +++ b/source/java/org/alfresco/repo/action/executer/ContentMetadataEmbedderTest.java @@ -0,0 +1,210 @@ +/* + * Copyright (C) 2005-2012 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.repo.action.executer; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.alfresco.model.ContentModel; +import org.alfresco.repo.action.ActionImpl; +import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.repo.content.metadata.MetadataExtracterRegistry; +import org.alfresco.repo.content.metadata.TikaPoweredMetadataExtracter; +import org.alfresco.repo.content.transform.AbstractContentTransformerTest; +import org.alfresco.repo.security.authentication.AuthenticationComponent; +import org.alfresco.service.cmr.dictionary.DictionaryService; +import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.cmr.repository.ContentService; +import org.alfresco.service.cmr.repository.ContentWriter; +import org.alfresco.service.cmr.repository.MimetypeService; +import org.alfresco.service.cmr.repository.NodeRef; +import org.alfresco.service.cmr.repository.NodeService; +import org.alfresco.service.cmr.repository.StoreRef; +import org.alfresco.service.namespace.QName; +import org.alfresco.util.BaseSpringTest; +import org.alfresco.util.GUID; +import org.apache.tika.embedder.Embedder; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; + +/** + * Test of the ActionExecuter for embedding metadata + * + * @author Ray Gauss II + */ +public class ContentMetadataEmbedderTest extends BaseSpringTest +{ + + private NodeService nodeService; + private ContentService contentService; + private DictionaryService dictionaryService; + private MimetypeService mimetypeService; + private StoreRef testStoreRef; + private NodeRef rootNodeRef; + private NodeRef nodeRef; + + private ContentMetadataEmbedder executer; + + private final static String ID = GUID.generate(); + + @Override + protected void onSetUpInTransaction() throws Exception + { + this.nodeService = (NodeService) this.applicationContext.getBean("nodeService"); + this.contentService = (ContentService) this.applicationContext.getBean("contentService"); + this.dictionaryService = (DictionaryService) this.applicationContext.getBean("dictionaryService"); + this.mimetypeService = (MimetypeService) this.applicationContext.getBean("mimetypeService"); + + AuthenticationComponent authenticationComponent = (AuthenticationComponent)applicationContext.getBean("authenticationComponent"); + authenticationComponent.setSystemUserAsCurrentUser(); + + // Create the store and get the root node + this.testStoreRef = this.nodeService.createStore( + StoreRef.PROTOCOL_WORKSPACE, + "Test_" + System.currentTimeMillis()); + this.rootNodeRef = this.nodeService.getRootNode(this.testStoreRef); + + // Create the node used for tests + this.nodeRef = this.nodeService.createNode( + this.rootNodeRef, ContentModel.ASSOC_CHILDREN, + QName.createQName("{test}testnode"), + ContentModel.TYPE_CONTENT).getChildRef(); + + // Setup the content from the PDF test data + ContentWriter cw = this.contentService.getWriter(nodeRef, ContentModel.PROP_CONTENT, true); + cw.setMimetype(MimetypeMap.MIMETYPE_PDF); + cw.putContent(AbstractContentTransformerTest.loadQuickTestFile("pdf")); + + // Get the executer instance + this.executer = (ContentMetadataEmbedder) this.applicationContext.getBean("embed-metadata"); + } + + /** + * Test that a failing embedder does not destroy the original content + */ + public void testFailingEmbedder() + { + MetadataExtracterRegistry registry = (MetadataExtracterRegistry) applicationContext.getBean("metadataExtracterRegistry"); + FailingEmbedder embedder = new FailingEmbedder(Arrays.asList(MimetypeMap.MIMETYPE_PDF)); + embedder.setRegistry(registry); + embedder.setDictionaryService(this.dictionaryService); + embedder.setMimetypeService(this.mimetypeService); + embedder.register(); + + String myCreator = "Embedded creator"; + + // Get the old props + Map props = this.nodeService.getProperties(this.nodeRef); + props.put(ContentModel.PROP_AUTHOR, myCreator); + this.nodeService.setProperties(this.nodeRef, props); + + // Execute the action + ActionImpl action = new ActionImpl(null, ID, SetPropertyValueActionExecuter.NAME, null); + + ContentReader origReader = this.contentService.getReader(this.nodeRef, ContentModel.PROP_CONTENT); + long origSize = origReader.getSize(); + assertTrue(origSize > 0); + + this.executer.execute(action, this.nodeRef); + + ContentReader embeddedReader = this.contentService.getReader(this.nodeRef, ContentModel.PROP_CONTENT); + + assertEquals("The original content should remain unchanged on embed failures", origSize, embeddedReader.getSize()); + } + + /** + * Tika-powered embedder which fails upon calling embed on its {@link FailingTikaEmbedder} + */ + private class FailingEmbedder extends TikaPoweredMetadataExtracter + { + + /** + * Constructor for setting supported extract and embed mimetypes + * + * @param mimetypes the supported extract and embed mimetypes + */ + public FailingEmbedder(Collection mimetypes) + { + super( + new HashSet(mimetypes), + new HashSet(mimetypes)); + } + + @Override + protected Parser getParser() + { + return null; + } + + @Override + protected Embedder getEmbedder() + { + return new FailingTikaEmbedder(); + } + + @Override + protected Map> readMappingProperties(String propertiesUrl) + { + return null; + } + + @Override + protected Map> getDefaultMapping() + { + Map> mapping = new HashMap>(1); + Set qnames = new HashSet(1); + qnames.add(ContentModel.PROP_AUTHOR); + mapping.put("author", qnames); + return mapping; + } + } + + /** + * Tika metadata embedder which fails on a call to embed. + */ + private class FailingTikaEmbedder implements Embedder + { + private static final long serialVersionUID = -4954679684941467571L; + + @Override + public Set getSupportedEmbedTypes(ParseContext context) + { + return null; + } + + @Override + public void embed(Metadata metadata, InputStream originalStream, OutputStream outputStream, ParseContext context) + throws IOException, TikaException + { + throw new IOException("Forced failure"); + } + } + +} diff --git a/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java index c4635e1f2c..8f4c3eab48 100644 --- a/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/AbstractMappingMetadataExtracter.java @@ -42,6 +42,7 @@ import org.alfresco.model.ContentModel; import org.alfresco.service.cmr.dictionary.DataTypeDefinition; import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.dictionary.PropertyDefinition; +import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentWriter; import org.alfresco.service.cmr.repository.MalformedNodeRefException; @@ -1115,11 +1116,18 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac { // Ask Tika to detect the document, and report back on if // the current mime type is plausible - String typeErrorMessage = null; + String typeErrorMessage = ""; String differentType = null; if(mimetypeService != null) { - differentType = mimetypeService.getMimetypeIfNotMatches(writer.getReader()); + try + { + differentType = mimetypeService.getMimetypeIfNotMatches(writer.getReader()); + } + catch (ContentIOException cioe) + { + // Embedding failed and writer is empty + } } else { @@ -1144,7 +1152,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac } else { - logger.warn( + logger.error( "Metadata embedding failed (turn on DEBUG for full error): \n" + " Extracter: " + this + "\n" + " Content: " + writer + "\n" + diff --git a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java index b527fc2ddf..d546f26126 100644 --- a/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java +++ b/source/java/org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.java @@ -375,38 +375,23 @@ public abstract class TikaPoweredMetadataExtracter { return; } - OutputStream outputStream = null; - try + + Metadata metadataToEmbed = new Metadata(); + for (String metadataKey : properties.keySet()) { - Metadata metadataToEmbed = new Metadata(); - for (String metadataKey : properties.keySet()) + Serializable value = properties.get(metadataKey); + if (value == null) { - Serializable value = properties.get(metadataKey); - if (value == null) - { - continue; - } - if (value instanceof Collection) - { - for (Object singleValue : (Collection) value) - { - try - { - // Convert to a string value for Tika - metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, singleValue)); - } - catch (TypeConversionException e) - { - logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); - } - } - } - else + continue; + } + if (value instanceof Collection) + { + for (Object singleValue : (Collection) value) { try { // Convert to a string value for Tika - metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, value)); + metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, singleValue)); } catch (TypeConversionException e) { @@ -414,22 +399,22 @@ public abstract class TikaPoweredMetadataExtracter } } } - InputStream inputStream = getInputStream(reader); - outputStream = writer.getContentOutputStream(); - embedder.embed(metadataToEmbed, inputStream, outputStream, null); - } - catch (Exception e) - { - logger.error(e.getMessage(), e); - } - finally - { - if (outputStream != null) + else { - try { outputStream.close(); } catch (Throwable e) {} + try + { + // Convert to a string value for Tika + metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, value)); + } + catch (TypeConversionException e) + { + logger.info("Could not convert " + metadataKey + ": " + e.getMessage()); + } } } - + InputStream inputStream = getInputStream(reader); + OutputStream outputStream = writer.getContentOutputStream(); + embedder.embed(metadataToEmbed, inputStream, outputStream, null); } /**