ALF-17703: TikaPowerMetadataExtracter Destroys Content on Failed Embed

- Removed catch of exception and closing of output stream in TikaPoweredMetadataExtracter to allow AbstractMappingMetadataExtracter to better handle the error
   - Added catch of ContentIOException during construction of error details in AbstractMappingMetadataExtracter
   - Added ContentMetadataEmbedderTest to test a failing embedder

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@45949 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Ray Gauss
2013-01-28 19:30:31 +00:00
parent c7aea42dc2
commit adef628ee9
5 changed files with 248 additions and 43 deletions

View File

@@ -42,6 +42,7 @@ import org.alfresco.model.ContentModel;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MalformedNodeRefException;
@@ -1115,11 +1116,18 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
{
// Ask Tika to detect the document, and report back on if
// the current mime type is plausible
String typeErrorMessage = null;
String typeErrorMessage = "";
String differentType = null;
if(mimetypeService != null)
{
differentType = mimetypeService.getMimetypeIfNotMatches(writer.getReader());
try
{
differentType = mimetypeService.getMimetypeIfNotMatches(writer.getReader());
}
catch (ContentIOException cioe)
{
// Embedding failed and writer is empty
}
}
else
{
@@ -1144,7 +1152,7 @@ abstract public class AbstractMappingMetadataExtracter implements MetadataExtrac
}
else
{
logger.warn(
logger.error(
"Metadata embedding failed (turn on DEBUG for full error): \n" +
" Extracter: " + this + "\n" +
" Content: " + writer + "\n" +

View File

@@ -375,38 +375,23 @@ public abstract class TikaPoweredMetadataExtracter
{
return;
}
OutputStream outputStream = null;
try
Metadata metadataToEmbed = new Metadata();
for (String metadataKey : properties.keySet())
{
Metadata metadataToEmbed = new Metadata();
for (String metadataKey : properties.keySet())
Serializable value = properties.get(metadataKey);
if (value == null)
{
Serializable value = properties.get(metadataKey);
if (value == null)
{
continue;
}
if (value instanceof Collection<?>)
{
for (Object singleValue : (Collection<?>) value)
{
try
{
// Convert to a string value for Tika
metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, singleValue));
}
catch (TypeConversionException e)
{
logger.info("Could not convert " + metadataKey + ": " + e.getMessage());
}
}
}
else
continue;
}
if (value instanceof Collection<?>)
{
for (Object singleValue : (Collection<?>) value)
{
try
{
// Convert to a string value for Tika
metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, value));
metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, singleValue));
}
catch (TypeConversionException e)
{
@@ -414,22 +399,22 @@ public abstract class TikaPoweredMetadataExtracter
}
}
}
InputStream inputStream = getInputStream(reader);
outputStream = writer.getContentOutputStream();
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
}
catch (Exception e)
{
logger.error(e.getMessage(), e);
}
finally
{
if (outputStream != null)
else
{
try { outputStream.close(); } catch (Throwable e) {}
try
{
// Convert to a string value for Tika
metadataToEmbed.add(metadataKey, DefaultTypeConverter.INSTANCE.convert(String.class, value));
}
catch (TypeConversionException e)
{
logger.info("Could not convert " + metadataKey + ": " + e.getMessage());
}
}
}
InputStream inputStream = getInputStream(reader);
OutputStream outputStream = writer.getContentOutputStream();
embedder.embed(metadataToEmbed, inputStream, outputStream, null);
}
/**