Fix for ALF-7523: Share Preview for RFC822 messages

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@40549 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Jamal Kaabi-Mofrad
2012-08-17 10:57:45 +00:00
parent 5df32e4d2d
commit ed0425319f
5 changed files with 168 additions and 18 deletions

View File

@@ -16,10 +16,11 @@
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.content.transform;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Properties;
import javax.mail.MessagingException;
@@ -32,24 +33,25 @@ import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.TransformationOptions;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.txt.Icu4jEncodingDetector;
/**
* Uses javax.mail.MimeMessage to generate plain text versions of
* RFC822 email messages.
* Searches for all text content parts, and returns them. Any
* attachments are ignored.
*
* TIKA Note - could be replaced with the Tika email parser. Would
* require a recursing parser to be specified, but not the full
* Auto one (we don't want attachments), just one containing
* text and html related parsers.
* Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
* messages. Searches for all text content parts, and returns them. Any
* attachments are ignored. TIKA Note - could be replaced with the Tika email
* parser. Would require a recursing parser to be specified, but not the full
* Auto one (we don't want attachments), just one containing text and html
* related parsers.
*/
public class EMLTransformer extends AbstractContentTransformer2
{
@Override
public boolean isTransformableMimetype(String sourceMimetype, String targetMimetype, TransformationOptions options)
{
if (!MimetypeMap.MIMETYPE_RFC822.equals(sourceMimetype) || !MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
if (!MimetypeMap.MIMETYPE_RFC822.equals(sourceMimetype)
|| !MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
{
// only support RFC822 -> TEXT
return false;
@@ -61,15 +63,24 @@ public class EMLTransformer extends AbstractContentTransformer2
}
@Override
protected void transformInternal(ContentReader reader, ContentWriter writer, TransformationOptions options) throws Exception
protected void transformInternal(ContentReader reader, ContentWriter writer, TransformationOptions options)
throws Exception
{
InputStream is = null;
TikaInputStream tikaInputStream = null;
try
{
is = reader.getContentInputStream();
// wrap the given stream to a TikaInputStream instance
tikaInputStream = TikaInputStream.get(reader.getContentInputStream());
MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()), is);
final Icu4jEncodingDetector encodingDetector = new Icu4jEncodingDetector();
final Charset charset = encodingDetector.detect(tikaInputStream, new Metadata());
MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()), tikaInputStream);
if (charset != null)
{
mimeMessage.setHeader("Content-Type", "text/plain; charset=" + charset.name());
mimeMessage.setHeader("Content-Transfer-Encoding", "quoted-printable");
}
final StringBuilder sb = new StringBuilder();
Object content = mimeMessage.getContent();
if (content instanceof Multipart)
@@ -80,16 +91,16 @@ public class EMLTransformer extends AbstractContentTransformer2
{
sb.append(content.toString());
}
writer.putContent(sb.toString());
}
finally
{
if (is != null)
if (tikaInputStream != null)
{
try
{
is.close();
// it closes any other resources associated with it
tikaInputStream.close();
}
catch (IOException e)
{

View File

@@ -0,0 +1,106 @@
/*
* Copyright (C) 2005-2012 Alfresco Software Limited.
*
* This file is part of Alfresco
*
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.TransformationOptions;
import org.alfresco.util.TempFileProvider;
/**
* @see org.alfresco.repo.content.transform.EMLTransformer
*
* @author Jamal Kaabi-Mofrad
*/
public class EMLTransformerTest extends AbstractContentTransformerTest
{
private static final String QUICK_EML_CONTENT = "Gym class featuring a brown fox and lazy dog";
private static final String QUICK_EML_CONTENT_SPANISH_UNICODE = "El r\u00E1pido zorro marr\u00F3n salta sobre el perro perezoso";
private EMLTransformer transformer;
@Override
public void setUp() throws Exception
{
super.setUp();
transformer = new EMLTransformer();
transformer.setMimetypeService(mimetypeService);
transformer.setTransformerDebug(transformerDebug);
}
@Override
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
return transformer;
}
public void testIsTransformable() throws Exception
{
assertFalse(transformer.isTransformable(MimetypeMap.MIMETYPE_TEXT_PLAIN, -1, MimetypeMap.MIMETYPE_RFC822,
new TransformationOptions()));
assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_RFC822, -1, MimetypeMap.MIMETYPE_TEXT_PLAIN,
new TransformationOptions()));
}
/**
* Test transforming a valid eml file to text
*/
public void testRFC822ToText() throws Exception
{
File emlSourceFile = loadQuickTestFile("eml");
File txtTargetFile = TempFileProvider.createTempFile("test", ".txt");
ContentReader reader = new FileContentReader(emlSourceFile);
reader.setMimetype(MimetypeMap.MIMETYPE_RFC822);
ContentWriter writer = new FileContentWriter(txtTargetFile);
writer.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
transformer.transform(reader, writer);
ContentReader reader2 = new FileContentReader(txtTargetFile);
reader2.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertTrue(reader2.getContentString().contains(QUICK_EML_CONTENT));
}
/**
* Test transforming a non-ascii eml file to text
*/
public void testNonAsciiRFC822ToText() throws Exception
{
File emlSourceFile = loadQuickTestFile("spanish.eml");
File txtTargetFile = TempFileProvider.createTempFile("test2", ".txt");
ContentReader reader = new FileContentReader(emlSourceFile);
reader.setMimetype(MimetypeMap.MIMETYPE_RFC822);
ContentWriter writer = new FileContentWriter(txtTargetFile);
writer.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
transformer.transform(reader, writer);
ContentReader reader2 = new FileContentReader(txtTargetFile);
reader2.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertTrue(reader2.getContentString().contains(new String(QUICK_EML_CONTENT_SPANISH_UNICODE.getBytes("UTF-8"))));
}
}