[MNT-23960] Added new options (pdfFont, pdfFontSize) to textToPdf transformer (#885)

* [MNT-23960] Added options (pdfFont, pdfFontSize) and NotoSans fonts to textToPdf transformer

* [MNT-23960] Added 'MISC_PDFBOX_DEFAULT_FONT' and 'transform.core.misc.pdfbox.defaultFont' configuration to core-aio and misc T-Engines

* [MNT-23960] Added NotoSans fonts to core-aio and misc T-Engine images

* [MNT-23960] Improved logging: added messages, using placeholders

* [MNT-23960] Added DEFAULT_FONT constant (NotoSans-Regular)

* [MNT-23960] Splitted getFont(PDDocument, String) code into 3 methods. Added Javadoc.

* [MNT-23960] Return TransformCheckResult on transformTextAndCheck methods. Added assertion to testUTF8WithBOM test.
This commit is contained in:
tiagosalvado10
2024-02-06 16:26:25 +00:00
committed by GitHub
parent bb5d86135d
commit b9bcc3c9d2
16 changed files with 555 additions and 52 deletions

View File

@@ -26,10 +26,14 @@
*/
package org.alfresco.transform.misc.transformers;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import static org.alfresco.transform.common.RequestParamMap.PAGE_LIMIT;
import static org.alfresco.transform.common.RequestParamMap.PDF_FONT;
import static org.alfresco.transform.common.RequestParamMap.PDF_FONT_SIZE;
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
@@ -43,14 +47,19 @@ import java.io.StringWriter;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.common.RequestParamMap.PAGE_LIMIT;
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;;
public class TextToPdfContentTransformerTest
{
TextToPdfContentTransformer transformer = new TextToPdfContentTransformer();
private static final String TEXT_WITH_A_BREVE = "Gămbardella, Matthew, Corets, Evă";
private static final String TEXT_WITHOUT_A_BREVE = "Gambardella, Matthew, Corets, Eva";
@BeforeEach
public void setUp()
{
@@ -137,7 +146,8 @@ public class TextToPdfContentTransformerTest
@Test
public void testUTF8WithBOM() throws Exception
{
transformTextAndCheck("UTF-8", null, true, "ef bb bf 31 20 49 20 6d");
TransformCheckResult result = transformTextAndCheck("UTF-8", null, true, "ef bb bf 31 20 49 20 6d");
assertEquals(result.getUsedFont(), "Times-Roman");
}
@Test
@@ -146,6 +156,78 @@ public class TextToPdfContentTransformerTest
transformTextAndCheck("UTF-8", null, false, "31 20 49 20 6d 75 73 74");
}
/**
* Test if a different font can be chosen to perform the transformation
*
* @throws Exception
*/
@Test
public void testMNT23960_TimesBold_WithoutBreve() throws Exception
{
File sourceFile = File.createTempFile("TMP_Times-Bold", ".txt");
String encoding = "UTF-8";
writeToFile(sourceFile, TEXT_WITHOUT_A_BREVE, encoding, null, null);
Map<String, String> parameters = new HashMap<>();
parameters.put(PDF_FONT, PDType1Font.TIMES_BOLD.getName());
parameters.put(PDF_FONT_SIZE, "30");
TransformCheckResult result = transformTextAndCheck(sourceFile, encoding, TEXT_WITHOUT_A_BREVE, String.valueOf(-1), true,
parameters, false);
assertEquals(result.getUsedFont(), PDType1Font.TIMES_BOLD.getName());
assertNull(result.getErrorMessage());
}
/**
* Test if the default font is used when the chosen one is not found
*
* @throws Exception
*/
@Test
public void testMNT23960_InexistentFont_WithoutBreve() throws Exception
{
File sourceFile = File.createTempFile("TMP_MyDummyFont", ".txt");
String encoding = "UTF-8";
writeToFile(sourceFile, TEXT_WITHOUT_A_BREVE, encoding, null, null);
Map<String, String> parameters = new HashMap<>();
parameters.put(PDF_FONT, "MyDummyFont");
TransformCheckResult result = transformTextAndCheck(sourceFile, encoding, TEXT_WITHOUT_A_BREVE, String.valueOf(-1), true,
parameters, false);
assertEquals(result.getUsedFont(), PDType1Font.TIMES_ROMAN.getName());
assertNull(result.getErrorMessage());
}
/**
* Test if a different font can be chosen to perform the transformation with breve character. This test
* transformation should fail as Times-Bold font doesn't handle the breve character
*
* @throws Exception
*/
@Test
public void testMNT23960_TimesBold_WithBreve() throws Exception
{
File sourceFile = File.createTempFile("TMP_Times-Bold", ".txt");
String encoding = "UTF-8";
writeToFile(sourceFile, TEXT_WITH_A_BREVE, encoding, null, null);
Map<String, String> parameters = new HashMap<>();
parameters.put(PDF_FONT, PDType1Font.TIMES_BOLD.getName());
TransformCheckResult result = transformTextAndCheck(sourceFile, encoding, TEXT_WITH_A_BREVE, String.valueOf(-1), true,
parameters, true);
assertEquals(result.getUsedFont(), PDType1Font.TIMES_BOLD.getName());
assertNotNull(result.getErrorMessage());
assertTrue(result.getErrorMessage().contains(PDType1Font.TIMES_BOLD.getName()));
}
/**
* @param encoding to be used to read the source file
* @param bigEndian indicates that the file should contain big endian characters, so typically the first byte of
@@ -155,18 +237,18 @@ public class TextToPdfContentTransformerTest
* @param expectedByteOrder The first few bytes of the source file so we can check the test data has been
* correctly created.
*/
protected void transformTextAndCheck(String encoding, Boolean bigEndian, Boolean validBom,
protected TransformCheckResult transformTextAndCheck(String encoding, Boolean bigEndian, Boolean validBom,
String expectedByteOrder) throws Exception
{
transformTextAndCheckImpl(-1, encoding, bigEndian, validBom, expectedByteOrder);
return transformTextAndCheckImpl(-1, encoding, bigEndian, validBom, expectedByteOrder);
}
protected void transformTextAndCheckPageLength(int pageLimit) throws Exception
protected TransformCheckResult transformTextAndCheckPageLength(int pageLimit) throws Exception
{
transformTextAndCheckImpl(pageLimit, "UTF-8", null, null, null);
return transformTextAndCheckImpl(pageLimit, "UTF-8", null, null, null);
}
private void transformTextAndCheckImpl(int pageLimit, String encoding, Boolean bigEndian, Boolean validBom,
private TransformCheckResult transformTextAndCheckImpl(int pageLimit, String encoding, Boolean bigEndian, Boolean validBom,
String expectedByteOrder) throws Exception
{
StringBuilder sb = new StringBuilder();
@@ -177,7 +259,7 @@ public class TextToPdfContentTransformerTest
writeToFile(sourceFile, text, encoding, bigEndian, validBom);
checkFileBytes(sourceFile, expectedByteOrder);
transformTextAndCheck(sourceFile, encoding, checkText, String.valueOf(pageLimit));
return transformTextAndCheck(sourceFile, encoding, checkText, String.valueOf(pageLimit));
}
private String createTestText(int pageLimit, StringBuilder sb)
@@ -203,9 +285,17 @@ public class TextToPdfContentTransformerTest
return checkText;
}
private void transformTextAndCheck(File sourceFile, String encoding, String checkText,
private TransformCheckResult transformTextAndCheck(File sourceFile, String encoding, String checkText,
String pageLimit) throws Exception
{
return transformTextAndCheck(sourceFile, encoding, checkText, pageLimit, true, null, false);
}
private TransformCheckResult transformTextAndCheck(File sourceFile, String encoding, String checkText,
String pageLimit, boolean clean, Map<String, String> extraParameters, boolean shouldFail) throws Exception
{
TransformCheckResult result = new TransformCheckResult();
// And a temp writer
File targetFile = File.createTempFile("AlfrescoTestTarget_", ".pdf");
@@ -213,24 +303,47 @@ public class TextToPdfContentTransformerTest
Map<String, String> parameters = new HashMap<>();
parameters.put(PAGE_LIMIT, pageLimit);
parameters.put(SOURCE_ENCODING, encoding);
transformer.transform("text/plain", "application/pdf", parameters, sourceFile, targetFile, null);
if (extraParameters != null)
{
parameters.putAll(extraParameters);
}
// Read back in the PDF and check it
PDDocument doc = PDDocument.load(targetFile);
PDFTextStripper textStripper = new PDFTextStripper();
StringWriter textWriter = new StringWriter();
textStripper.writeText(doc, textWriter);
doc.close();
boolean failed = false;
String roundTrip = clean(textWriter.toString());
try
{
transformer.transform("text/plain", "application/pdf", parameters, sourceFile, targetFile, null);
}
catch (Exception e)
{
failed = true;
result.setErrorMessage(e.getMessage());
}
assertEquals(
checkText, roundTrip,
"Incorrect text in PDF when starting from text in " + encoding
);
result.setUsedFont(transformer.getUsedFont());
if (!failed)
{
// Read back in the PDF and check it
PDDocument doc = PDDocument.load(targetFile);
PDFTextStripper textStripper = new PDFTextStripper();
StringWriter textWriter = new StringWriter();
textStripper.writeText(doc, textWriter);
doc.close();
String roundTrip = clean(textWriter.toString());
assertEquals(checkText, roundTrip, "Incorrect text in PDF when starting from text in " + encoding);
}
else
{
assertTrue(shouldFail && failed);
}
sourceFile.delete();
targetFile.delete();
return result;
}
private String clean(String text)
@@ -367,4 +480,30 @@ public class TextToPdfContentTransformerTest
}
return sb.toString();
}
private static class TransformCheckResult
{
private String usedFont;
private String errorMessage;
public String getUsedFont()
{
return usedFont;
}
public void setUsedFont(String usedFont)
{
this.usedFont = usedFont;
}
public String getErrorMessage()
{
return errorMessage;
}
public void setErrorMessage(String errorMessage)
{
this.errorMessage = errorMessage;
}
}
}