mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-09-17 14:21:18 +00:00
REPO-4331: Add remaining core transformers (#45)
* REPO-4331: Add remaining core transformers * HtmlParserContentTransformer * AppleIWorksContentTransformer * StringExtractingContentTransformer * TextToPdfContentTransformer * OOXMLThumbnailContentTransformer
This commit is contained in:
@@ -0,0 +1,294 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import org.alfresco.error.AlfrescoRuntimeException;
|
||||
import org.alfresco.transform.client.model.TransformRequest;
|
||||
import org.alfresco.transformer.transformers.AppleIWorksContentTransformer;
|
||||
import org.alfresco.transformer.transformers.HtmlParserContentTransformer;
|
||||
import org.alfresco.transformer.transformers.SelectingTransformer;
|
||||
import org.alfresco.transformer.transformers.StringExtractingContentTransformer;
|
||||
import org.alfresco.transformer.transformers.TextToPdfContentTransformer;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
import org.springframework.test.web.servlet.MvcResult;
|
||||
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IWORK_KEYNOTE;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IWORK_NUMBERS;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IWORK_PAGES;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_OPENXML_WORDPROCESSING;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_PDF;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.springframework.http.HttpStatus.OK;
|
||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.header;
|
||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@WebMvcTest(MiscController.class)
|
||||
@Import({SelectingTransformer.class})
|
||||
public class MiscControllerTest extends AbstractTransformerControllerTest
|
||||
{
|
||||
|
||||
@Autowired
|
||||
private MiscController controller;
|
||||
|
||||
private String sourceEncoding = "UTF-8";
|
||||
private String targetEncoding = "UTF-8";
|
||||
private String targetMimetype = MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
@Before
|
||||
public void before() throws Exception
|
||||
{
|
||||
sourceMimetype = MIMETYPE_HTML;
|
||||
sourceExtension = "html";
|
||||
targetExtension = "txt";
|
||||
expectedOptions = null;
|
||||
expectedSourceSuffix = null;
|
||||
expectedSourceFileBytes = readTestFile(sourceExtension);
|
||||
expectedTargetFileBytes = readTestFile(targetExtension);
|
||||
//expectedTargetFileBytes = null;
|
||||
sourceFile = new MockMultipartFile("file", "quick." + sourceExtension, sourceMimetype, expectedSourceFileBytes);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void mockTransformCommand(String sourceExtension, String targetExtension, String sourceMimetype, boolean readTargetFileBytes) throws IOException
|
||||
{
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AbstractTransformerController getController()
|
||||
{
|
||||
return controller;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void updateTransformRequestWithSpecificOptions(TransformRequest transformRequest)
|
||||
{
|
||||
}
|
||||
|
||||
@Override
|
||||
// Add extra required parameters to the request.
|
||||
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
|
||||
{
|
||||
return super.mockMvcRequest(url, sourceFile, params)
|
||||
.param("targetEncoding", targetEncoding)
|
||||
.param("sourceEncoding", sourceEncoding)
|
||||
.param("targetMimetype", targetMimetype)
|
||||
.param("sourceMimetype", sourceMimetype);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void noTargetFileTest()
|
||||
{
|
||||
// Ignore the test in super class as the Misc transforms are real rather than mocked up.
|
||||
// It is the mock that returns a zero length file for other transformers, when we supply an invalid targetExtension.
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHTMLtoString() throws Exception
|
||||
{
|
||||
final String NEWLINE = System.getProperty ("line.separator");
|
||||
final String TITLE = "Testing!";
|
||||
final String TEXT_P1 = "This is some text in English";
|
||||
final String TEXT_P2 = "This is more text in English";
|
||||
final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
|
||||
String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
|
||||
String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
|
||||
"<p>" + TEXT_P2 + "</p>" + NEWLINE +
|
||||
"<p>" + TEXT_P3 + "</p>" + NEWLINE;
|
||||
String partC = "</body></html>";
|
||||
final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
|
||||
|
||||
MvcResult result = sendText("html",
|
||||
"UTF-8",
|
||||
MIMETYPE_HTML,
|
||||
"txt",
|
||||
MIMETYPE_TEXT_PLAIN,
|
||||
"UTF-8",
|
||||
expected.getBytes());
|
||||
|
||||
String contentResult = new String(result.getResponse().getContentAsByteArray(), targetEncoding);
|
||||
assertTrue("The content did not include \""+expected, contentResult.contains(expected));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStringtoString() throws Exception
|
||||
{
|
||||
String expected = null;
|
||||
byte[] content = null;
|
||||
try
|
||||
{
|
||||
content = "azAz10!<21>$%^&*()\t\r\n".getBytes("UTF-8");
|
||||
expected = new String(content, "MacDingbat");
|
||||
}
|
||||
catch (UnsupportedEncodingException e)
|
||||
{
|
||||
throw new AlfrescoRuntimeException("Encoding not recognised", e);
|
||||
}
|
||||
|
||||
MvcResult result = sendText("txt",
|
||||
"MacDingbat",
|
||||
MIMETYPE_TEXT_PLAIN,
|
||||
"txt",
|
||||
MIMETYPE_TEXT_PLAIN,
|
||||
"UTF-8",
|
||||
content);
|
||||
|
||||
String contentResult = new String(result.getResponse().getContentAsByteArray(), targetEncoding);
|
||||
assertTrue("The content did not include \""+expected, contentResult.contains(expected));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void textToPdf() throws Exception
|
||||
{
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String expected = null;
|
||||
for (int i=1; i<=5; i++)
|
||||
{
|
||||
sb.append(i);
|
||||
sb.append(" I must not talk in class or feed my homework to my cat.\n");
|
||||
}
|
||||
sb.append("\nBart\n");
|
||||
expected = sb.toString();
|
||||
|
||||
MvcResult result = sendText("txt",
|
||||
"UTF-8",
|
||||
MIMETYPE_TEXT_PLAIN,
|
||||
"pdf",
|
||||
MIMETYPE_PDF,
|
||||
"UTF-8",
|
||||
expected.getBytes());
|
||||
|
||||
// Read back in the PDF and check it
|
||||
PDDocument doc = PDDocument.load(result.getResponse().getContentAsByteArray());
|
||||
PDFTextStripper textStripper = new PDFTextStripper();
|
||||
StringWriter textWriter = new StringWriter();
|
||||
textStripper.writeText(doc, textWriter);
|
||||
doc.close();
|
||||
|
||||
expected = clean(expected);
|
||||
String actual = clean(textWriter.toString());
|
||||
|
||||
assertEquals("The content did not match.", expected, actual);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAppleIWorksPages() throws Exception
|
||||
{
|
||||
imageBasedTransform("pages", MIMETYPE_IWORK_PAGES, MIMETYPE_IMAGE_JPEG, "jpeg");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAppleIWorksNumbers() throws Exception
|
||||
{
|
||||
imageBasedTransform("numbers", MIMETYPE_IWORK_NUMBERS, MIMETYPE_IMAGE_JPEG, "jpeg");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAppleIWorksKey() throws Exception
|
||||
{
|
||||
imageBasedTransform("key", MIMETYPE_IWORK_KEYNOTE, MIMETYPE_IMAGE_JPEG, "jpeg");
|
||||
}
|
||||
|
||||
// TODO Doesn't wotk with java 11, enable when fixed
|
||||
// @Test
|
||||
public void testOOXML() throws Exception
|
||||
{
|
||||
imageBasedTransform("docx", MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_IMAGE_JPEG, "jpeg");
|
||||
}
|
||||
|
||||
private void imageBasedTransform(String sourceExtension, String sourceMimetype, String targetMimetype, String targetExtension) throws Exception
|
||||
{
|
||||
MockMultipartFile sourceFilex = new MockMultipartFile("file", "test_file." + sourceExtension, sourceMimetype, readTestFile(sourceExtension));
|
||||
|
||||
MockHttpServletRequestBuilder requestBuilder = super.mockMvcRequest("/transform", sourceFilex)
|
||||
.param("targetExtension", "jpeg")
|
||||
.param("targetMimetype", targetMimetype)
|
||||
.param("sourceMimetype", sourceMimetype);
|
||||
|
||||
MvcResult result = mockMvc.perform(requestBuilder)
|
||||
.andExpect(status().is(OK.value()))
|
||||
.andExpect(header().string("Content-Disposition", "attachment; filename*= UTF-8''test_file." + targetExtension))
|
||||
.andReturn();
|
||||
assertTrue("Expected image content but content is empty.",result.getResponse().getContentLengthLong() > 0L);
|
||||
}
|
||||
|
||||
|
||||
private MvcResult sendText(String sourceExtension,
|
||||
String sourceEncoding,
|
||||
String sourceMimetype,
|
||||
String targetExtension,
|
||||
String targetMimetype,
|
||||
String targetEncoding,
|
||||
byte[] content) throws Exception
|
||||
{
|
||||
MockMultipartFile sourceFilex = new MockMultipartFile("file", "test_file." + sourceExtension, sourceMimetype, content);
|
||||
|
||||
MockHttpServletRequestBuilder requestBuilder = super.mockMvcRequest("/transform", sourceFilex)
|
||||
.param("targetExtension", targetExtension)
|
||||
.param("targetEncoding", targetEncoding)
|
||||
.param("targetMimetype", targetMimetype)
|
||||
.param("sourceEncoding", sourceEncoding)
|
||||
.param("sourceMimetype", sourceMimetype);
|
||||
|
||||
|
||||
MvcResult result = mockMvc.perform(requestBuilder)
|
||||
.andExpect(status().is(OK.value()))
|
||||
.andExpect(header().string("Content-Disposition", "attachment; filename*= "+targetEncoding+"''test_file." + targetExtension)).
|
||||
andReturn();
|
||||
return result;
|
||||
}
|
||||
|
||||
private String clean(String text)
|
||||
{
|
||||
text = text.replaceAll("\\s+\\r", "");
|
||||
text = text.replaceAll("\\s+\\n", "");
|
||||
text = text.replaceAll("\\r", "");
|
||||
text = text.replaceAll("\\n", "");
|
||||
return text;
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import org.alfresco.transform.client.model.TransformRequest;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
public class MiscQueueTransformServiceIT extends AbstractQueueTransformServiceIT
|
||||
{
|
||||
@Override
|
||||
protected TransformRequest buildRequest()
|
||||
{
|
||||
return TransformRequest.builder()
|
||||
.withRequestId(UUID.randomUUID().toString())
|
||||
.withSourceMediaType(MIMETYPE_HTML)
|
||||
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
|
||||
.withTargetExtension("txt")
|
||||
.withSchema(1)
|
||||
.withClientData("ACS")
|
||||
.withSourceReference(UUID.randomUUID().toString())
|
||||
.withSourceSize(32L).build();
|
||||
}
|
||||
}
|
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import org.junit.runner.RunWith;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
public class MiscTransformerHttpRequestTest extends AbstractHttpRequestTest
|
||||
{
|
||||
@Override
|
||||
protected String getTransformerName()
|
||||
{
|
||||
return "Miscellaneous Transformers";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getSourceExtension()
|
||||
{
|
||||
return "html";
|
||||
}
|
||||
}
|
@@ -0,0 +1,173 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.mockito.Mock;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.nio.file.Files;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transformer.transformers.StringExtractingContentTransformer.SOURCE_ENCODING;
|
||||
import static org.alfresco.transformer.transformers.StringExtractingContentTransformer.TARGET_ENCODING;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@Import(HtmlParserContentTransformer.class)
|
||||
public class HtmlParserContentTransformerTest
|
||||
{
|
||||
@Autowired
|
||||
HtmlParserContentTransformer transformer;
|
||||
|
||||
/**
|
||||
* Checks that we correctly handle text in different encodings,
|
||||
* no matter if the encoding is specified on the Content Property
|
||||
* or in a meta tag within the HTML itself. (ALF-10466)
|
||||
*
|
||||
* On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
|
||||
* so we must be careful when checking the returned text
|
||||
*/
|
||||
@Test
|
||||
public void testEncodingHandling() throws Exception
|
||||
{
|
||||
final String NEWLINE = System.getProperty ("line.separator");
|
||||
final String TITLE = "Testing!";
|
||||
final String TEXT_P1 = "This is some text in English";
|
||||
final String TEXT_P2 = "This is more text in English";
|
||||
final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
|
||||
String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
|
||||
String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
|
||||
"<p>" + TEXT_P2 + "</p>" + NEWLINE +
|
||||
"<p>" + TEXT_P3 + "</p>" + NEWLINE;
|
||||
String partC = "</body></html>";
|
||||
final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
|
||||
|
||||
File tmpS = null;
|
||||
File tmpD = null;
|
||||
|
||||
try
|
||||
{
|
||||
// Content set to ISO 8859-1
|
||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||
writeToFile(tmpS, partA+partB+partC, "ISO-8859-1");
|
||||
|
||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||
|
||||
Map<String, String> parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
|
||||
transformer.transform(tmpS, tmpD, parameters);
|
||||
|
||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||
tmpS.delete();
|
||||
tmpD.delete();
|
||||
|
||||
// Content set to UTF-8
|
||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||
writeToFile(tmpS, partA+partB+partC, "UTF-8");
|
||||
|
||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||
parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "UTF-8");
|
||||
transformer.transform(tmpS, tmpD, parameters);
|
||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||
tmpS.delete();
|
||||
tmpD.delete();
|
||||
|
||||
|
||||
// Content set to UTF-16
|
||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||
writeToFile(tmpS, partA+partB+partC, "UTF-16");
|
||||
|
||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||
parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "UTF-16");
|
||||
transformer.transform(tmpS, tmpD, parameters);
|
||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||
tmpS.delete();
|
||||
tmpD.delete();
|
||||
|
||||
// Note - since HTML Parser 2.0 META tags specifying the
|
||||
// document encoding will ONLY be respected if the original
|
||||
// content type was set to ISO-8859-1.
|
||||
//
|
||||
// This means there is now only one test which we can perform
|
||||
// to ensure that this now-limited overriding of the encoding
|
||||
// takes effect.
|
||||
|
||||
// Content set to ISO 8859-1, meta set to UTF-8
|
||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||
String str = partA+
|
||||
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" +
|
||||
partB+partC;
|
||||
|
||||
writeToFile(tmpS, str, "UTF-8");
|
||||
|
||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||
|
||||
parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
|
||||
transformer.transform(tmpS, tmpD, parameters);
|
||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||
tmpS.delete();
|
||||
tmpD.delete();
|
||||
|
||||
|
||||
// Note - we can't test UTF-16 with only a meta encoding,
|
||||
// because without that the parser won't know about the
|
||||
// 2 byte format so won't be able to identify the meta tag
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (tmpS != null && tmpS.exists()) tmpS.delete();
|
||||
if (tmpD != null && tmpD.exists()) tmpD.delete();
|
||||
}
|
||||
}
|
||||
|
||||
private void writeToFile(File file, String content, String encoding) throws Exception
|
||||
{
|
||||
try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
|
||||
{
|
||||
ow.append(content);
|
||||
}
|
||||
}
|
||||
|
||||
private String readFromFile(File file, String encoding) throws Exception
|
||||
{
|
||||
String content = "wrong content";
|
||||
content = new String(Files.readAllBytes(file.toPath()), encoding);
|
||||
return content;
|
||||
}
|
||||
}
|
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transformer.transformers.TextToPdfContentTransformer.PAGE_LIMIT;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@Import(TextToPdfContentTransformer.class)
|
||||
public class TextToPdfContentTransformerTest
|
||||
{
|
||||
@Autowired
|
||||
TextToPdfContentTransformer transformer;
|
||||
|
||||
@Before
|
||||
public void setUp()
|
||||
{
|
||||
transformer.setStandardFont("Times-Roman");
|
||||
transformer.setFontSize(20);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testUnlimitedPages() throws Exception
|
||||
{
|
||||
transformTextAndCheckPageLength(-1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLimitedTo1Page() throws Exception
|
||||
{
|
||||
transformTextAndCheckPageLength(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLimitedTo2Pages() throws Exception
|
||||
{
|
||||
transformTextAndCheckPageLength(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLimitedTo50Pages() throws Exception
|
||||
{
|
||||
transformTextAndCheckPageLength(50);
|
||||
}
|
||||
|
||||
private void transformTextAndCheckPageLength(int pageLimit) throws Exception
|
||||
{
|
||||
int pageLength = 32;
|
||||
int lines = (pageLength+10) * ((pageLimit > 0) ? pageLimit : 1);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String checkText = null;
|
||||
int cutoff = pageLimit * pageLength;
|
||||
for (int i=1; i<=lines; i++)
|
||||
{
|
||||
sb.append(i);
|
||||
sb.append(" I must not talk in class or feed my homework to my cat.\n");
|
||||
if (i == cutoff)
|
||||
checkText = sb.toString();
|
||||
}
|
||||
sb.append("\nBart\n");
|
||||
String text = sb.toString();
|
||||
checkText = (checkText == null) ? clean(text) : clean(checkText);
|
||||
transformTextAndCheck(text, "UTF-8", checkText, String.valueOf(pageLimit));
|
||||
}
|
||||
|
||||
private void transformTextAndCheck(String text, String encoding, String checkText, String pageLimit) throws Exception
|
||||
{
|
||||
// Get a reader for the text
|
||||
File sourceFile = File.createTempFile("AlfrescoTestSource_", ".txt");
|
||||
writeToFile(sourceFile,text, encoding);
|
||||
|
||||
|
||||
// And a temp writer
|
||||
File targetFile = File.createTempFile("AlfrescoTestTarget_", ".pdf");
|
||||
|
||||
// Transform to PDF
|
||||
Map<String, String> parameters = new HashMap<>();
|
||||
parameters.put(PAGE_LIMIT, pageLimit);
|
||||
transformer.transform(sourceFile, targetFile, parameters);
|
||||
|
||||
// Read back in the PDF and check it
|
||||
PDDocument doc = PDDocument.load(targetFile);
|
||||
PDFTextStripper textStripper = new PDFTextStripper();
|
||||
StringWriter textWriter = new StringWriter();
|
||||
textStripper.writeText(doc, textWriter);
|
||||
doc.close();
|
||||
|
||||
String roundTrip = clean(textWriter.toString());
|
||||
|
||||
assertEquals(
|
||||
"Incorrect text in PDF when starting from text in " + encoding,
|
||||
checkText, roundTrip
|
||||
);
|
||||
|
||||
sourceFile.delete();
|
||||
targetFile.delete();
|
||||
}
|
||||
|
||||
private String clean(String text)
|
||||
{
|
||||
text = text.replaceAll("\\s+\\r", "");
|
||||
text = text.replaceAll("\\s+\\n", "");
|
||||
text = text.replaceAll("\\r", "");
|
||||
text = text.replaceAll("\\n", "");
|
||||
return text;
|
||||
}
|
||||
|
||||
private void writeToFile(File file, String content, String encoding) throws Exception
|
||||
{
|
||||
try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
|
||||
{
|
||||
ow.append(content);
|
||||
}
|
||||
}
|
||||
}
|
BIN
alfresco-docker-transform-misc/src/test/resources/quick.docx
Normal file
BIN
alfresco-docker-transform-misc/src/test/resources/quick.docx
Normal file
Binary file not shown.
BIN
alfresco-docker-transform-misc/src/test/resources/quick.key
Normal file
BIN
alfresco-docker-transform-misc/src/test/resources/quick.key
Normal file
Binary file not shown.
BIN
alfresco-docker-transform-misc/src/test/resources/quick.numbers
Normal file
BIN
alfresco-docker-transform-misc/src/test/resources/quick.numbers
Normal file
Binary file not shown.
BIN
alfresco-docker-transform-misc/src/test/resources/quick.pages
Normal file
BIN
alfresco-docker-transform-misc/src/test/resources/quick.pages
Normal file
Binary file not shown.
18
alfresco-docker-transform-misc/src/test/resources/quick.txt
Normal file
18
alfresco-docker-transform-misc/src/test/resources/quick.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
The quick brown fox jumps over the lazy dog
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
The quick brown fox jumps over the lazy dog
|
||||
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user