diff --git a/config/alfresco/swf-transform-context.xml b/config/alfresco/swf-transform-context.xml
index 7974682e35..47557ca148 100644
--- a/config/alfresco/swf-transform-context.xml
+++ b/config/alfresco/swf-transform-context.xml
@@ -198,5 +198,24 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ text/plain
+ application/pdf
+
+
+
diff --git a/source/java/org/alfresco/repo/content/transform/EMLTransformer.java b/source/java/org/alfresco/repo/content/transform/EMLTransformer.java
index d10ae47912..8866123dd8 100644
--- a/source/java/org/alfresco/repo/content/transform/EMLTransformer.java
+++ b/source/java/org/alfresco/repo/content/transform/EMLTransformer.java
@@ -16,10 +16,11 @@
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see .
*/
+
package org.alfresco.repo.content.transform;
import java.io.IOException;
-import java.io.InputStream;
+import java.nio.charset.Charset;
import java.util.Properties;
import javax.mail.MessagingException;
@@ -32,24 +33,25 @@ import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.TransformationOptions;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.txt.Icu4jEncodingDetector;
/**
- * Uses javax.mail.MimeMessage to generate plain text versions of
- * RFC822 email messages.
- * Searches for all text content parts, and returns them. Any
- * attachments are ignored.
- *
- * TIKA Note - could be replaced with the Tika email parser. Would
- * require a recursing parser to be specified, but not the full
- * Auto one (we don't want attachments), just one containing
- * text and html related parsers.
+ * Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
+ * messages. Searches for all text content parts, and returns them. Any
+ * attachments are ignored. TIKA Note - could be replaced with the Tika email
+ * parser. Would require a recursing parser to be specified, but not the full
+ * Auto one (we don't want attachments), just one containing text and html
+ * related parsers.
*/
public class EMLTransformer extends AbstractContentTransformer2
{
@Override
public boolean isTransformableMimetype(String sourceMimetype, String targetMimetype, TransformationOptions options)
{
- if (!MimetypeMap.MIMETYPE_RFC822.equals(sourceMimetype) || !MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
+ if (!MimetypeMap.MIMETYPE_RFC822.equals(sourceMimetype)
+ || !MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
{
// only support RFC822 -> TEXT
return false;
@@ -61,15 +63,24 @@ public class EMLTransformer extends AbstractContentTransformer2
}
@Override
- protected void transformInternal(ContentReader reader, ContentWriter writer, TransformationOptions options) throws Exception
+ protected void transformInternal(ContentReader reader, ContentWriter writer, TransformationOptions options)
+ throws Exception
{
- InputStream is = null;
+ TikaInputStream tikaInputStream = null;
try
{
- is = reader.getContentInputStream();
+ // wrap the given stream to a TikaInputStream instance
+ tikaInputStream = TikaInputStream.get(reader.getContentInputStream());
- MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()), is);
+ final Icu4jEncodingDetector encodingDetector = new Icu4jEncodingDetector();
+ final Charset charset = encodingDetector.detect(tikaInputStream, new Metadata());
+ MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()), tikaInputStream);
+ if (charset != null)
+ {
+ mimeMessage.setHeader("Content-Type", "text/plain; charset=" + charset.name());
+ mimeMessage.setHeader("Content-Transfer-Encoding", "quoted-printable");
+ }
final StringBuilder sb = new StringBuilder();
Object content = mimeMessage.getContent();
if (content instanceof Multipart)
@@ -80,16 +91,16 @@ public class EMLTransformer extends AbstractContentTransformer2
{
sb.append(content.toString());
}
-
writer.putContent(sb.toString());
}
finally
{
- if (is != null)
+ if (tikaInputStream != null)
{
try
{
- is.close();
+ // it closes any other resources associated with it
+ tikaInputStream.close();
}
catch (IOException e)
{
diff --git a/source/java/org/alfresco/repo/content/transform/EMLTransformerTest.java b/source/java/org/alfresco/repo/content/transform/EMLTransformerTest.java
new file mode 100644
index 0000000000..2eafdb7b67
--- /dev/null
+++ b/source/java/org/alfresco/repo/content/transform/EMLTransformerTest.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2005-2012 Alfresco Software Limited.
+ *
+ * This file is part of Alfresco
+ *
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ */
+
+package org.alfresco.repo.content.transform;
+
+import java.io.File;
+
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.repo.content.filestore.FileContentReader;
+import org.alfresco.repo.content.filestore.FileContentWriter;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.cmr.repository.ContentWriter;
+import org.alfresco.service.cmr.repository.TransformationOptions;
+import org.alfresco.util.TempFileProvider;
+
+/**
+ * @see org.alfresco.repo.content.transform.EMLTransformer
+ *
+ * @author Jamal Kaabi-Mofrad
+ */
+public class EMLTransformerTest extends AbstractContentTransformerTest
+{
+ private static final String QUICK_EML_CONTENT = "Gym class featuring a brown fox and lazy dog";
+
+ private static final String QUICK_EML_CONTENT_SPANISH_UNICODE = "El r\u00E1pido zorro marr\u00F3n salta sobre el perro perezoso";
+
+ private EMLTransformer transformer;
+
+ @Override
+ public void setUp() throws Exception
+ {
+ super.setUp();
+
+ transformer = new EMLTransformer();
+ transformer.setMimetypeService(mimetypeService);
+ transformer.setTransformerDebug(transformerDebug);
+ }
+
+ @Override
+ protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
+ {
+ return transformer;
+ }
+
+ public void testIsTransformable() throws Exception
+ {
+ assertFalse(transformer.isTransformable(MimetypeMap.MIMETYPE_TEXT_PLAIN, -1, MimetypeMap.MIMETYPE_RFC822,
+ new TransformationOptions()));
+ assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_RFC822, -1, MimetypeMap.MIMETYPE_TEXT_PLAIN,
+ new TransformationOptions()));
+ }
+
+ /**
+ * Test transforming a valid eml file to text
+ */
+ public void testRFC822ToText() throws Exception
+ {
+ File emlSourceFile = loadQuickTestFile("eml");
+ File txtTargetFile = TempFileProvider.createTempFile("test", ".txt");
+ ContentReader reader = new FileContentReader(emlSourceFile);
+ reader.setMimetype(MimetypeMap.MIMETYPE_RFC822);
+ ContentWriter writer = new FileContentWriter(txtTargetFile);
+ writer.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
+
+ transformer.transform(reader, writer);
+
+ ContentReader reader2 = new FileContentReader(txtTargetFile);
+ reader2.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
+ assertTrue(reader2.getContentString().contains(QUICK_EML_CONTENT));
+ }
+
+ /**
+ * Test transforming a non-ascii eml file to text
+ */
+ public void testNonAsciiRFC822ToText() throws Exception
+ {
+ File emlSourceFile = loadQuickTestFile("spanish.eml");
+ File txtTargetFile = TempFileProvider.createTempFile("test2", ".txt");
+ ContentReader reader = new FileContentReader(emlSourceFile);
+ reader.setMimetype(MimetypeMap.MIMETYPE_RFC822);
+ ContentWriter writer = new FileContentWriter(txtTargetFile);
+ writer.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
+
+ transformer.transform(reader, writer);
+
+ ContentReader reader2 = new FileContentReader(txtTargetFile);
+ reader2.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
+ assertTrue(reader2.getContentString().contains(new String(QUICK_EML_CONTENT_SPANISH_UNICODE.getBytes("UTF-8"))));
+ }
+}
diff --git a/source/test-resources/quick/quick.eml b/source/test-resources/quick/quick.eml
index 8a25c8e33e..934d80d884 100644
--- a/source/test-resources/quick/quick.eml
+++ b/source/test-resources/quick/quick.eml
@@ -6,3 +6,5 @@ Date: Fri, 4 Jun 2004 14:23:22 +0200
Subject: The quick brown fox jumps over the lazy dog
Gym class featuring a brown fox and lazy dog
+
+The quick brown fox jumps over the lazy dog
diff --git a/source/test-resources/quick/quick.spanish.eml b/source/test-resources/quick/quick.spanish.eml
new file mode 100644
index 0000000000..0cc1a72707
--- /dev/null
+++ b/source/test-resources/quick/quick.spanish.eml
@@ -0,0 +1,12 @@
+MIME-Version: 1.0
+Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
+Date: Thu, 16 Aug 2012 16:13:29 +0100
+Delivered-To: jane.doe@alfresco.com
+Message-ID:
+Subject: quick test
+From: john.doe@alfresco.com>
+To: jane.doe@alfresco.com>
+Content-Type: multipart/alternative;
+
+El rápido zorro marrón salta sobre el perro perezoso
+