REPO-4616: Add EMLTransformer to the Misc T-Engine (#96)

* REPO-4616: Add EMLTransformer to the Misc T-Engine
This commit is contained in:
eknizat
2019-08-28 09:59:54 +01:00
committed by GitHub
parent c2f475cd57
commit 3806ef2f25
15 changed files with 616 additions and 59 deletions

View File

@@ -31,11 +31,12 @@ import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IWORK_KEYNOTE;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IWORK_NUMBERS;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IWORK_PAGES;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_OPENXML_WORDPROCESSING;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_PDF;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_RFC822;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.header;
@@ -105,7 +106,6 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
}
@Override
// Add extra required parameters to the request.
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile,
String... params)
{
@@ -124,6 +124,116 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
// It is the mock that returns a zero length file for other transformers, when we supply an invalid targetExtension.
}
/**
* Test transforming a valid eml file to text
*/
@Test
public void testRFC822ToText() throws Exception
{
String expected = "Gym class featuring a brown fox and lazy dog";
MvcResult result = sendRequest("eml",
null,
MIMETYPE_RFC822,
"txt",
MIMETYPE_TEXT_PLAIN,
null,
readTestFile("eml"));
assertTrue("Content from eml transform didn't contain expected value. ",
result.getResponse().getContentAsString().contains(expected));
}
/**
* Test transforming a non-ascii eml file to text
*/
@Test
public void testNonAsciiRFC822ToText() throws Exception
{
String expected = "El r\u00E1pido zorro marr\u00F3n salta sobre el perro perezoso";
MvcResult result = sendRequest("eml",
null,
MIMETYPE_RFC822,
"txt",
MIMETYPE_TEXT_PLAIN,
null,
readTestFile("spanish.eml"));
String contentResult = new String(result.getResponse().getContentAsByteArray(), "UTF-8");
assertTrue("Content from eml transform didn't contain expected value. ", contentResult.contains(expected));
}
/**
* Test transforming a valid eml with an attachment to text; attachment should be ignored
*/
@Test
public void testRFC822WithAttachmentToText() throws Exception
{
String expected = "Mail with attachment content";
String notExpected = "File attachment content";
MvcResult result = sendRequest("eml",
null,
MIMETYPE_RFC822,
"txt",
MIMETYPE_TEXT_PLAIN,
null,
readTestFile("attachment.eml"));
assertTrue("Content from eml transform didn't contain expected value. ",
result.getResponse().getContentAsString().contains(expected));
assertFalse(result.getResponse().getContentAsString().contains(notExpected));
}
/**
* Test transforming a valid eml with minetype multipart/alternative to text
*/
@Test
public void testRFC822AlternativeToText() throws Exception
{
String expected = "alternative plain text";
MvcResult result = sendRequest("eml",
null,
MIMETYPE_RFC822,
"txt",
MIMETYPE_TEXT_PLAIN,
null,
readTestFile("alternative.eml"));
assertTrue("Content from eml transform didn't contain expected value. ",
result.getResponse().getContentAsString().contains(expected));
}
/**
* Test transforming a valid eml with nested mimetype multipart/alternative to text
*/
@Test
public void testRFC822NestedAlternativeToText() throws Exception
{
String expected = "nested alternative plain text";
MvcResult result = sendRequest("eml",
null,
MIMETYPE_RFC822,
"txt",
MIMETYPE_TEXT_PLAIN,
null,
readTestFile("nested.alternative.eml"));
assertTrue("Content from eml transform didn't contain expected value. ",
result.getResponse().getContentAsString().contains(expected));
}
/**
* Test transforming a valid eml with a html part containing html special characters to text
*/
@Test
public void testHtmlSpecialCharsToText() throws Exception
{
String expected = " ";
MvcResult result = sendRequest("eml",
null,
MIMETYPE_RFC822,
"txt",
MIMETYPE_TEXT_PLAIN,
null,
readTestFile("htmlChars.eml"));
assertFalse(result.getResponse().getContentAsString().contains(expected));
}
@Test
public void testHTMLtoString() throws Exception
{
@@ -139,7 +249,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
String partC = "</body></html>";
final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
MvcResult result = sendText("html",
MvcResult result = sendRequest("html",
"UTF-8",
MIMETYPE_HTML,
"txt",
@@ -167,7 +277,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
throw new RuntimeException("Encoding not recognised", e);
}
MvcResult result = sendText("txt",
MvcResult result = sendRequest("txt",
"MacDingbat",
MIMETYPE_TEXT_PLAIN,
"txt",
@@ -186,7 +296,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
// Use empty content to create an empty source file
byte[] content = new byte[0];
MvcResult result = sendText("txt",
MvcResult result = sendRequest("txt",
"UTF-8",
MIMETYPE_TEXT_PLAIN,
"txt",
@@ -210,7 +320,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
sb.append("\nBart\n");
String expected = sb.toString();
MvcResult result = sendText("txt",
MvcResult result = sendRequest("txt",
"UTF-8",
MIMETYPE_TEXT_PLAIN,
"pdf",
@@ -234,77 +344,68 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
@Test
public void testAppleIWorksPages() throws Exception
{
imageBasedTransform("pages", MIMETYPE_IWORK_PAGES, MIMETYPE_IMAGE_JPEG, "jpeg");
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
"jpeg", MIMETYPE_IMAGE_JPEG, null, readTestFile("pages"));
assertTrue("Expected image content but content is empty.",result.getResponse().getContentLengthLong() > 0L);
}
@Test
public void testAppleIWorksNumbers() throws Exception
{
imageBasedTransform("numbers", MIMETYPE_IWORK_NUMBERS, MIMETYPE_IMAGE_JPEG, "jpeg");
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
"jpeg", MIMETYPE_IMAGE_JPEG, null, readTestFile("numbers"));
assertTrue("Expected image content but content is empty.",result.getResponse().getContentLengthLong() > 0L);
}
@Test
public void testAppleIWorksKey() throws Exception
{
imageBasedTransform("key", MIMETYPE_IWORK_KEYNOTE, MIMETYPE_IMAGE_JPEG, "jpeg");
MvcResult result = sendRequest("key", null, MIMETYPE_IWORK_KEYNOTE,
"jpeg", MIMETYPE_IMAGE_JPEG, null, readTestFile("key"));
assertTrue("Expected image content but content is empty.",result.getResponse().getContentLengthLong() > 0L);
}
// TODO Doesn't wotk with java 11, enable when fixed
// @Test
// TODO Doesn't work with java 11, enable when fixed
public void testOOXML() throws Exception
{
imageBasedTransform("docx", MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_IMAGE_JPEG, "jpeg");
MvcResult result = sendRequest("docx",null, MIMETYPE_OPENXML_WORDPROCESSING,
"jpeg", MIMETYPE_IMAGE_JPEG, null, readTestFile("docx"));
assertTrue("Expected image content but content is empty.",result.getResponse().getContentLengthLong() > 0L);
}
private void imageBasedTransform(String sourceExtension, String sourceMimetype,
String targetMimetype, String targetExtension) throws Exception
private MvcResult sendRequest(String sourceExtension,
String sourceEncoding,
String sourceMimetype,
String targetExtension,
String targetMimetype,
String targetEncoding,
byte[] content) throws Exception
{
MockMultipartFile sourceFilex = new MockMultipartFile("file",
"test_file." + sourceExtension, sourceMimetype, readTestFile(sourceExtension));
MockMultipartFile sourceFile = new MockMultipartFile("file", "test_file." + sourceExtension, sourceMimetype, content);
MockHttpServletRequestBuilder requestBuilder = super
.mockMvcRequest("/transform", sourceFilex)
.param("targetExtension", "jpeg")
.param("targetMimetype", targetMimetype)
.param("sourceMimetype",
sourceMimetype);
MockHttpServletRequestBuilder requestBuilder = super.mockMvcRequest("/transform", sourceFile)
.param("targetExtension", targetExtension)
.param("targetMimetype", targetMimetype)
.param("sourceMimetype", sourceMimetype);
MvcResult result = mockMvc
.perform(requestBuilder)
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
"attachment; filename*= UTF-8''test_file." + targetExtension))
.andReturn();
assertTrue("Expected image content but content is empty.",
result.getResponse().getContentLengthLong() > 0L);
if (sourceEncoding!=null)
{
requestBuilder.param("sourceEncoding", sourceEncoding);
}
if (targetEncoding!=null)
{
requestBuilder.param("targetEncoding", targetEncoding);
}
MvcResult result = mockMvc.perform(requestBuilder)
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition", "attachment; filename*= "
+(targetEncoding==null ? "UTF-8" : targetEncoding)+"''test_file." + targetExtension)).
andReturn();
return result;
}
private MvcResult sendText(String sourceExtension,
String sourceEncoding,
String sourceMimetype,
String targetExtension,
String targetMimetype,
String targetEncoding,
byte[] content) throws Exception
{
MockMultipartFile sourceFilex = new MockMultipartFile("file",
"test_file." + sourceExtension, sourceMimetype, content);
MockHttpServletRequestBuilder requestBuilder = super
.mockMvcRequest("/transform", sourceFilex)
.param("targetExtension", targetExtension)
.param("targetEncoding", targetEncoding)
.param("targetMimetype", targetMimetype)
.param("sourceEncoding", sourceEncoding)
.param("sourceMimetype", sourceMimetype);
return mockMvc
.perform(requestBuilder)
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
"attachment; filename*= " + targetEncoding + "''test_file." + targetExtension))
.andReturn();
}
private String clean(String text)
{

View File

@@ -46,6 +46,7 @@ import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_OPENXML_WORD
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_OUTLOOK_MSG;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_PDF;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_PPT;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_RFC822;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_CSS;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_CSV;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_JAVASCRIPT;
@@ -111,7 +112,8 @@ public class MiscTransformsIT
testFile(MIMETYPE_DITA, "dita", "quickConcept.dita", false),
testFile(MIMETYPE_IWORK_KEYNOTE, "key", "quick.key", false),
testFile(MIMETYPE_IWORK_NUMBERS, "number", "quick.numbers", false),
testFile(MIMETYPE_IWORK_PAGES, "pages", "quick.pages", false)
testFile(MIMETYPE_IWORK_PAGES, "pages", "quick.pages", false),
testFile(MIMETYPE_RFC822, "eml", "quick.eml", false)
).collect(toMap(TestFileInfo::getMimeType, identity()));
private final String sourceMimetype;
@@ -143,7 +145,8 @@ public class MiscTransformsIT
SourceTarget.of("text/plain", "application/pdf"),
SourceTarget.of("text/csv", "application/pdf"),
SourceTarget.of("application/dita+xml", "application/pdf"),
SourceTarget.of("text/xml", "application/pdf")
SourceTarget.of("text/xml", "application/pdf"),
SourceTarget.of("message/rfc822", "text/plain")
).collect(toSet());
}

View File

@@ -0,0 +1,30 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
This is a multipart message in MIME format.
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
alternative plain text
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">alternative html text</div>
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
Parts form an multipart/alternative should represent the same content in different formats
In this eml example the content differs with the purpose of determining if right part was used in transformation

View File

@@ -0,0 +1,44 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/mixed;
boundary="----=_NextPart_000_0000_01D06C6A.D04F3750"
This is a multipart message in MIME format.
------=_NextPart_000_0000_01D06C6A.D04F3750
Content-Type: multipart/alternative;
boundary="----=_NextPart_001_0001_01D06C6A.D04F3750"
------=_NextPart_001_0001_01D06C6A.D04F3750
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
Mail with attachment content
------=_NextPart_001_0001_01D06C6A.D04F3750
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">Mail with attachment content</div>
------=_NextPart_001_0001_01D06C6A.D04F3750--
------=_NextPart_000_0000_01D06C6A.D04F3750
Content-Type: text/plain;
name="alt.txt"
Content-Transfer-Encoding: quoted-printable
Content-ID: <796B1E07B04ACC41A78199F35721150F@eurprd04.prod.outlook.com>
Content-Disposition: attachment;
filename="alt.txt"
File attachment content
------=_NextPart_000_0000_01D06C6A.D04F3750--

View File

@@ -0,0 +1,10 @@
From: Nevin Nollop <nevin.nollop@alfresco.com>
To: Nevin Nollop <nevin.nollop@alfresco.com>
Cc: Nevin Nollop <nevinn@alfresco.com>
Message-ID: <20040604122322.GV1905@phoenix.home>
Date: Fri, 4 Jun 2004 14:23:22 +0200
Subject: The quick brown fox jumps over the lazy dog
Gym class featuring a brown fox and lazy dog
The quick brown fox jumps over the lazy dog

View File

@@ -0,0 +1,28 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
This is a multipart message in MIME format.
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
html special characters
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">html&nbsp;special&nbsp;characters</div>
------=_NextPart_000_0005_01D06C6A.DBA98EC0--

View File

@@ -0,0 +1,41 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/related;
boundary="--_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423";
type="multipart/alternative"
This is a multi-part message in MIME format.
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
Content-Type: multipart/alternative; boundary="--_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362"
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
nested alternative plain text
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
Content-Type: text/html; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">nested alternative html text</div>
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362--
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
Content-Type: image/jpeg; name="image001.jpg"
Content-Transfer-Encoding: base64
Content-ID: <image001.jpg@01D146F0.63006280>
image
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423--

View File

@@ -0,0 +1,31 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: The quick brown fox jumps over the lazy dog
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0009_01D06BC5.14D754D0"
This is a multipart message in MIME format.
------=_NextPart_000_0009_01D06BC5.14D754D0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 8bit
El rápido zorro marrón salta sobre el perro perezoso
------=_NextPart_000_0009_01D06BC5.14D754D0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">El r=C3=A1pido zorro marr=C3=B3n salta sobre el perro =
perezoso&nbsp;<br></div>
------=_NextPart_000_0009_01D06BC5.14D754D0--