[MNT-23960] Added new options (pdfFont, pdfFontSize) to textToPdf transformer (#885)

* [MNT-23960] Added options (pdfFont, pdfFontSize) and NotoSans fonts to textToPdf transformer

* [MNT-23960] Added 'MISC_PDFBOX_DEFAULT_FONT' and 'transform.core.misc.pdfbox.defaultFont' configuration to core-aio and misc T-Engines

* [MNT-23960] Added NotoSans fonts to core-aio and misc T-Engine images

* [MNT-23960] Improved logging: added messages, using placeholders

* [MNT-23960] Added DEFAULT_FONT constant (NotoSans-Regular)

* [MNT-23960] Splitted getFont(PDDocument, String) code into 3 methods. Added Javadoc.

* [MNT-23960] Return TransformCheckResult on transformTextAndCheck methods. Added assertion to testUTF8WithBOM test.
This commit is contained in:
tiagosalvado10 2024-02-06 16:26:25 +00:00 committed by GitHub
parent bb5d86135d
commit b9bcc3c9d2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 555 additions and 52 deletions

View File

@ -39,6 +39,7 @@ The following externalized T-engines properties are available:
| ACTIVEMQ_PASSWORD | ActiveMQ Password. | admin | | ACTIVEMQ_PASSWORD | ActiveMQ Password. | admin |
| FILE_STORE_URL | T-Engine Port. | http://localhost:8099/alfresco/api/-default-/private/sfs/versions/1/file | | FILE_STORE_URL | T-Engine Port. | http://localhost:8099/alfresco/api/-default-/private/sfs/versions/1/file |
| TRANSFORM_ENGINE_REQUEST_QUEUE | T-Engine queue used for async requests. | org.alfresco.transform.engine.misc.acs | | TRANSFORM_ENGINE_REQUEST_QUEUE | T-Engine queue used for async requests. | org.alfresco.transform.engine.misc.acs |
| MISC_PDFBOX_DEFAULT_FONT | Default font used by PdfBox | NotoSans-Regular |
## Libreoffice ## Libreoffice
| Property | Description | Default value | | Property | Description | Default value |
@ -96,4 +97,5 @@ The following externalized T-engines properties are available:
| IMAGEMAGICK_DYN | Path to Imagemagick DYLD. | /usr/lib64/ImageMagick-7.0.10/lib | | IMAGEMAGICK_DYN | Path to Imagemagick DYLD. | /usr/lib64/ImageMagick-7.0.10/lib |
| IMAGEMAGICK_EXE | Path to Imagemagick EXE. | /usr/bin/convert | | IMAGEMAGICK_EXE | Path to Imagemagick EXE. | /usr/bin/convert |
| IMAGEMAGICK_CODERS | Path to Imagemagick custom coders. | | | IMAGEMAGICK_CODERS | Path to Imagemagick custom coders. | |
| IMAGEMAGICK_CONFIG | Path to Imagemagick custom config. | | | IMAGEMAGICK_CONFIG | Path to Imagemagick custom config. | |
| MISC_PDFBOX_DEFAULT_FONT | Default font used by PdfBox | NotoSans-Regular |

View File

@ -76,6 +76,7 @@ ADD target/generated-resources/licenses /licenses
ADD target/generated-resources/licenses.xml /licenses/ ADD target/generated-resources/licenses.xml /licenses/
ADD target/generated-sources/license/THIRD-PARTY.txt /licenses/ ADD target/generated-sources/license/THIRD-PARTY.txt /licenses/
COPY target/classes/licenses/3rd-party/ / COPY target/classes/licenses/3rd-party/ /
COPY target/classes/fonts/NotoSans /usr/local/share/fonts/NotoSans
RUN groupadd -g ${GROUPID} ${GROUPNAME} && \ RUN groupadd -g ${GROUPID} ${GROUPNAME} && \
useradd -u ${USERID} -G ${GROUPNAME} ${AIOUSERNAME} && \ useradd -u ${USERID} -G ${GROUPNAME} ${AIOUSERNAME} && \

View File

@ -24,3 +24,6 @@ transform:
exifTool: exifTool:
windowsOS: 'exiftool -args -G1 -sep "|||" #{"$"}{INPUT}' windowsOS: 'exiftool -args -G1 -sep "|||" #{"$"}{INPUT}'
unixOS: 'env FOO=#{"$"}{OUTPUT} exiftool -args -G1 -sep "|||" #{"$"}{INPUT}' unixOS: 'env FOO=#{"$"}{OUTPUT} exiftool -args -G1 -sep "|||" #{"$"}{INPUT}'
misc:
pdfBox:
defaultFont: ${MISC_PDFBOX_DEFAULT_FONT:NotoSans-Regular}

View File

@ -71,7 +71,9 @@ public class AIOTikaTest extends TikaTest
"startPage", "startPage",
"targetEncoding", "targetEncoding",
"thumbnail", "thumbnail",
"width" "width",
"pdfFont",
"pdfFontSize"
), ),
getOptionNames(controller.transformConfig(0).getBody().getTransformOptions())); getOptionNames(controller.transformConfig(0).getBody().getTransformOptions()));
} }

View File

@ -19,6 +19,7 @@ ADD target/generated-resources/licenses /licenses
ADD target/generated-resources/licenses.xml /licenses/ ADD target/generated-resources/licenses.xml /licenses/
ADD target/generated-sources/license/THIRD-PARTY.txt /licenses/ ADD target/generated-sources/license/THIRD-PARTY.txt /licenses/
COPY target/classes/licenses/3rd-party/ / COPY target/classes/licenses/3rd-party/ /
COPY target/classes/fonts/NotoSans /usr/local/share/fonts/NotoSans
RUN groupadd -g ${GROUPID} ${GROUPNAME} && \ RUN groupadd -g ${GROUPID} ${GROUPNAME} && \
useradd -u ${USERID} -G ${GROUPNAME} ${MISCUSERNAME} && \ useradd -u ${USERID} -G ${GROUPNAME} ${MISCUSERNAME} && \

View File

@ -7,3 +7,4 @@
* commons-compress, PDFBox and poi-ooxml are from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0 or the * commons-compress, PDFBox and poi-ooxml are from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0 or the
[Apache 2.0.txt](src/main/resources/licenses/3rd-party/Apache%202.0.txt) [Apache 2.0.txt](src/main/resources/licenses/3rd-party/Apache%202.0.txt)
file placed in the root directory of the docker image. file placed in the root directory of the docker image.
* NotoSans https://openfontlicense.org/open-font-license-official-text/

View File

@ -26,16 +26,10 @@
*/ */
package org.alfresco.transform.misc.transformers; package org.alfresco.transform.misc.transformers;
import org.alfresco.transform.base.TransformManager; import static org.alfresco.transform.common.RequestParamMap.PAGE_LIMIT;
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor; import static org.alfresco.transform.common.RequestParamMap.PDF_FONT;
import org.apache.pdfbox.pdmodel.PDDocument; import static org.alfresco.transform.common.RequestParamMap.PDF_FONT_SIZE;
import org.apache.pdfbox.pdmodel.PDPage; import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.tools.TextToPDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.BufferedOutputStream; import java.io.BufferedOutputStream;
import java.io.BufferedReader; import java.io.BufferedReader;
@ -48,12 +42,31 @@ import java.io.InputStreamReader;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.PushbackInputStream; import java.io.PushbackInputStream;
import java.io.Reader; import java.io.Reader;
import java.net.URI;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import static org.alfresco.transform.common.RequestParamMap.PAGE_LIMIT; import org.alfresco.transform.base.TransformManager;
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING; import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.fontbox.util.autodetect.FontFileFinder;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.FontMappers;
import org.apache.pdfbox.pdmodel.font.FontMapping;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.tools.TextToPDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import jakarta.annotation.PostConstruct;
/** /**
* <p> * <p>
@ -77,20 +90,30 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor
private static final byte EF = (byte) 0xEF; private static final byte EF = (byte) 0xEF;
private static final byte BB = (byte) 0xBB; private static final byte BB = (byte) 0xBB;
private static final byte BF = (byte) 0xBF; private static final byte BF = (byte) 0xBF;
private static final String DEFAULT_FONT = "NotoSans-Regular";
private static final int DEFAULT_FONT_SIZE = 10;
private final PagedTextToPDF transformer; private final PagedTextToPDF transformer;
@Value("${transform.core.misc.pdfBox.defaultFont:NotoSans-Regular}")
private String pdfBoxDefaultFont;
public TextToPdfContentTransformer() public TextToPdfContentTransformer()
{ {
transformer = new PagedTextToPDF(); transformer = new PagedTextToPDF();
} }
@PostConstruct
public void init()
{
transformer.setDefaultFont(pdfBoxDefaultFont);
}
public void setStandardFont(String fontName) public void setStandardFont(String fontName)
{ {
try try
{ {
transformer.setFont(PagedTextToPDF.getStandardFont(fontName)); transformer.setFont(fontName);
} }
catch (Throwable e) catch (Throwable e)
{ {
@ -112,6 +135,11 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor
} }
} }
public String getUsedFont()
{
return transformer.getFontName();
}
@Override @Override
public String getTransformerName() public String getTransformerName()
{ {
@ -130,6 +158,25 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor
{ {
pageLimit = parseInt(stringPageLimit, PAGE_LIMIT); pageLimit = parseInt(stringPageLimit, PAGE_LIMIT);
} }
String pdfFont = transformOptions.get(PDF_FONT);
if (pdfFont == null || pdfFont.isBlank())
{
pdfFont = pdfBoxDefaultFont;
}
String pdfFontSize = transformOptions.get(PDF_FONT_SIZE);
Integer fontSize = null;
if (pdfFontSize != null && !pdfFontSize.isBlank())
{
try
{
fontSize = parseInt(pdfFontSize, PDF_FONT_SIZE);
}
catch (Exception e)
{
fontSize = DEFAULT_FONT_SIZE;
logger.error("Error parsing font size {}, going to set it as {}", pdfFontSize, fontSize, e);
}
}
PDDocument pdf = null; PDDocument pdf = null;
try (InputStream is = new FileInputStream(sourceFile); try (InputStream is = new FileInputStream(sourceFile);
@ -138,7 +185,7 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor
{ {
//TransformationOptionLimits limits = getLimits(reader, writer, options); //TransformationOptionLimits limits = getLimits(reader, writer, options);
//TransformationOptionPair pageLimits = limits.getPagesPair(); //TransformationOptionPair pageLimits = limits.getPagesPair();
pdf = transformer.createPDFFromText(ir, pageLimit); pdf = transformer.createPDFFromText(ir, pageLimit, pdfFont, fontSize);
pdf.save(os); pdf.save(os);
} }
finally finally
@ -231,22 +278,34 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor
} }
//duplicating until here //duplicating until here
private String fontName = null;
private String defaultFont = null;
// The following code is based on the code in TextToPDF with the addition of // The following code is based on the code in TextToPDF with the addition of
// checks for page limits. // checks for page limits.
// The calling code must close the PDDocument once finished with it. // The calling code must close the PDDocument once finished with it.
public PDDocument createPDFFromText(Reader text, int pageLimit) public PDDocument createPDFFromText(Reader text, int pageLimit, String pdfFontName, Integer pdfFontSize)
throws IOException throws IOException
{ {
PDDocument doc = null; PDDocument doc = null;
int pageCount = 0; int pageCount = 0;
try try
{ {
doc = new PDDocument();
final PDFont font = getFont(doc, pdfFontName);
final int fontSize = pdfFontSize != null ? pdfFontSize : getFontSize();
fontName = font.getName();
logger.debug("Going to use font {} with size {}", fontName, fontSize);
final int margin = 40; final int margin = 40;
float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000; float height = font.getFontDescriptor().getFontBoundingBox().getHeight() / 1000;
//calculate font height and increase by 5 percent. //calculate font height and increase by 5 percent.
height = height * getFontSize() * 1.05f; height = height * fontSize * 1.05f;
doc = new PDDocument();
BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text); BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text);
String nextLine; String nextLine;
PDPage page = new PDPage(); PDPage page = new PDPage();
@ -280,8 +339,8 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor
{ {
String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex]; String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
lengthIfUsingNextWord = lengthIfUsingNextWord =
(getFont().getStringWidth( (font.getStringWidth(
lineWithNextWord) / 1000) * getFontSize(); lineWithNextWord) / 1000) * fontSize;
} }
} }
while (lineIndex < lineWords.length && while (lineIndex < lineWords.length &&
@ -304,7 +363,7 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor
contentStream.close(); contentStream.close();
} }
contentStream = new PDPageContentStream(doc, page); contentStream = new PDPageContentStream(doc, page);
contentStream.setFont(getFont(), getFontSize()); contentStream.setFont(font, fontSize);
contentStream.beginText(); contentStream.beginText();
y = page.getMediaBox().getHeight() - margin + height; y = page.getMediaBox().getHeight() - margin + height;
contentStream.moveTextPositionByAmount(margin, y); contentStream.moveTextPositionByAmount(margin, y);
@ -344,6 +403,199 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor
} }
return doc; return doc;
} }
public void setFont(String aFontName)
{
PDType1Font font = PagedTextToPDF.getStandardFont(aFontName);
if (font != null)
{
super.setFont(font);
this.fontName = aFontName;
}
}
/**
* Gets the font that will be used in document transformation using the following approaches:
* <ol>
* <li>Standard font map
* <li>Font Mappers
* <li>File system fonts
* <li>Transformer default font
* <li>PdfBox default font
* </ol>
*
* @param doc
* the document that will be transformed
* @param fontName
* the font name that will be used in transformation
*
* @return the font that was found
*/
private PDFont getFont(PDDocument doc, String fontName)
{
if (fontName == null)
{
fontName = fontName != null ? fontName : getDefaultFont();
}
// First, it tries to get the font from PdfBox STANDARD_14 map
PDFont font = getFromStandardFonts(fontName);
// If not found, tries to get the font from FontMappers
if (font == null)
{
font = getFromFontMapper(fontName, doc);
// If still not found, tries to get the font from file system
if (font == null)
{
font = getFromFileSystem(fontName);
// If font is still null:
// - it will recursively get the transformer default font
// - Otherwise, it will use the PdfBox default font (Helvetica)
if (font == null)
{
if (defaultFont != null && !fontName.equals(defaultFont))
{
font = getFont(doc, defaultFont);
}
else
{
font = getFont();
}
}
}
}
return font;
}
/**
* Gets the font from PdfBox standard fonts map
*
* @param fontName
* the font name to obtain
*
* @return the font object that has been found, otherwise null
*/
private PDFont getFromStandardFonts(String fontName)
{
return PagedTextToPDF.getStandardFont(fontName);
}
/**
* Gets the font from {@link FontMappers} instance
*
* @param fontName
* the font name to obtain
* @param doc
* the PDF document
*
* @return the font object that has been found, otherwise null
*/
private PDFont getFromFontMapper(String fontName, PDDocument doc)
{
PDFont font = null;
FontMapping<TrueTypeFont> mapping = FontMappers.instance().getTrueTypeFont(fontName, null);
if (mapping != null && mapping.getFont() != null && !mapping.isFallback())
{
try
{
font = PDType0Font.load(doc, mapping.getFont().getOriginalData());
}
catch (Exception e)
{
logger.error("Error loading font mapping {}", fontName, e);
}
}
return font;
}
/**
* Gets the font from existing file system fonts
*
* @param fontName
* the font name to obtain
* @return the font object that has been found, otherwise null
*/
private PDFont getFromFileSystem(String fontName)
{
PDFont font = null;
String nameWithExtension = fontName + ".ttf";
FontFileFinder fontFileFinder = new FontFileFinder();
List<URI> uris = fontFileFinder.find();
for (URI uri : uris)
{
if (uri.getPath().contains(nameWithExtension))
{
InputStream fontIS = null;
try
{
fontIS = new FileInputStream(new File(uri));
if (null != fontIS)
{
PDDocument documentMock = new PDDocument();
font = PDType0Font.load(documentMock, fontIS);
break;
}
}
catch (IOException ioe)
{
logger.error("Error loading font {} from filesystem", fontName, ioe);
}
finally
{
if (fontIS != null)
{
try
{
fontIS.close();
}
catch (Exception e)
{
logger.error("Error closing font inputstream", e);
}
}
}
}
}
return font;
}
public String getFontName()
{
return this.fontName;
}
public String getDefaultFont()
{
if (defaultFont == null || defaultFont.isBlank())
{
return TextToPdfContentTransformer.DEFAULT_FONT;
}
return defaultFont;
}
public void setDefaultFont(String name)
{
if (name == null || name.isBlank())
{
defaultFont = TextToPdfContentTransformer.DEFAULT_FONT;
}
else
{
this.defaultFont = name;
}
}
} }
private int parseInt(String s, String paramName) private int parseInt(String s, String paramName)

View File

@ -1,2 +1,7 @@
queue: queue:
engineRequestQueue: ${TRANSFORM_ENGINE_REQUEST_QUEUE:org.alfresco.transform.engine.misc.acs} engineRequestQueue: ${TRANSFORM_ENGINE_REQUEST_QUEUE:org.alfresco.transform.engine.misc.acs}
transform:
core:
misc:
pdfBox:
defaultFont: ${MISC_PDFBOX_DEFAULT_FONT:NotoSans-Regular}

View File

@ -0,0 +1,93 @@
Copyright 2022 The Noto Project Authors (https://github.com/notofonts/latin-greek-cyrillic)
This Font Software is licensed under the SIL Open Font License, Version 1.1.
This license is copied below, and is also available with a FAQ at:
https://openfontlicense.org
-----------------------------------------------------------
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
-----------------------------------------------------------
PREAMBLE
The goals of the Open Font License (OFL) are to stimulate worldwide
development of collaborative font projects, to support the font creation
efforts of academic and linguistic communities, and to provide a free and
open framework in which fonts may be shared and improved in partnership
with others.
The OFL allows the licensed fonts to be used, studied, modified and
redistributed freely as long as they are not sold by themselves. The
fonts, including any derivative works, can be bundled, embedded,
redistributed and/or sold with any software provided that any reserved
names are not used by derivative works. The fonts and derivatives,
however, cannot be released under any other type of license. The
requirement for fonts to remain under this license does not apply
to any document created using the fonts or their derivatives.
DEFINITIONS
"Font Software" refers to the set of files released by the Copyright
Holder(s) under this license and clearly marked as such. This may
include source files, build scripts and documentation.
"Reserved Font Name" refers to any names specified as such after the
copyright statement(s).
"Original Version" refers to the collection of Font Software components as
distributed by the Copyright Holder(s).
"Modified Version" refers to any derivative made by adding to, deleting,
or substituting -- in part or in whole -- any of the components of the
Original Version, by changing formats or by porting the Font Software to a
new environment.
"Author" refers to any designer, engineer, programmer, technical
writer or other person who contributed to the Font Software.
PERMISSION & CONDITIONS
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Font Software, to use, study, copy, merge, embed, modify,
redistribute, and sell modified and unmodified copies of the Font
Software, subject to the following conditions:
1) Neither the Font Software nor any of its individual components,
in Original or Modified Versions, may be sold by itself.
2) Original or Modified Versions of the Font Software may be bundled,
redistributed and/or sold with any software, provided that each copy
contains the above copyright notice and this license. These can be
included either as stand-alone text files, human-readable headers or
in the appropriate machine-readable metadata fields within text or
binary files as long as those fields can be easily viewed by the user.
3) No Modified Version of the Font Software may use the Reserved Font
Name(s) unless explicit written permission is granted by the corresponding
Copyright Holder. This restriction only applies to the primary font name as
presented to the users.
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
Software shall not be used to promote, endorse or advertise any
Modified Version, except to acknowledge the contribution(s) of the
Copyright Holder(s) and the Author(s) or with their explicit written
permission.
5) The Font Software, modified or unmodified, in part or in whole,
must be distributed entirely under this license, and must not be
distributed under any other license. The requirement for fonts to
remain under this license does not apply to any document created
using the Font Software.
TERMINATION
This license becomes null and void if any of the above conditions are
not met.
DISCLAIMER
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
OTHER DEALINGS IN THE FONT SOFTWARE.

View File

@ -1,7 +1,9 @@
{ {
"transformOptions": { "transformOptions": {
"textToPdfOptions": [ "textToPdfOptions": [
{"value": {"name": "pageLimit"}} {"value": {"name": "pageLimit"}},
{"value": {"name": "pdfFont"}},
{"value": {"name": "pdfFontSize"}}
], ],
"stringOptions": [ "stringOptions": [
{"value": {"name": "targetEncoding"}} {"value": {"name": "targetEncoding"}}

View File

@ -26,10 +26,14 @@
*/ */
package org.alfresco.transform.misc.transformers; package org.alfresco.transform.misc.transformers;
import org.apache.pdfbox.pdmodel.PDDocument; import static org.alfresco.transform.common.RequestParamMap.PAGE_LIMIT;
import org.apache.pdfbox.text.PDFTextStripper; import static org.alfresco.transform.common.RequestParamMap.PDF_FONT;
import org.junit.jupiter.api.BeforeEach; import static org.alfresco.transform.common.RequestParamMap.PDF_FONT_SIZE;
import org.junit.jupiter.api.Test; import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.BufferedInputStream; import java.io.BufferedInputStream;
import java.io.BufferedOutputStream; import java.io.BufferedOutputStream;
@ -43,14 +47,19 @@ import java.io.StringWriter;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import static org.alfresco.transform.common.RequestParamMap.PAGE_LIMIT; import org.apache.pdfbox.pdmodel.PDDocument;
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING; import org.apache.pdfbox.pdmodel.font.PDType1Font;
import static org.junit.jupiter.api.Assertions.assertEquals; import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;;
public class TextToPdfContentTransformerTest public class TextToPdfContentTransformerTest
{ {
TextToPdfContentTransformer transformer = new TextToPdfContentTransformer(); TextToPdfContentTransformer transformer = new TextToPdfContentTransformer();
private static final String TEXT_WITH_A_BREVE = "Gămbardella, Matthew, Corets, Evă";
private static final String TEXT_WITHOUT_A_BREVE = "Gambardella, Matthew, Corets, Eva";
@BeforeEach @BeforeEach
public void setUp() public void setUp()
{ {
@ -137,7 +146,8 @@ public class TextToPdfContentTransformerTest
@Test @Test
public void testUTF8WithBOM() throws Exception public void testUTF8WithBOM() throws Exception
{ {
transformTextAndCheck("UTF-8", null, true, "ef bb bf 31 20 49 20 6d"); TransformCheckResult result = transformTextAndCheck("UTF-8", null, true, "ef bb bf 31 20 49 20 6d");
assertEquals(result.getUsedFont(), "Times-Roman");
} }
@Test @Test
@ -146,6 +156,78 @@ public class TextToPdfContentTransformerTest
transformTextAndCheck("UTF-8", null, false, "31 20 49 20 6d 75 73 74"); transformTextAndCheck("UTF-8", null, false, "31 20 49 20 6d 75 73 74");
} }
/**
* Test if a different font can be chosen to perform the transformation
*
* @throws Exception
*/
@Test
public void testMNT23960_TimesBold_WithoutBreve() throws Exception
{
File sourceFile = File.createTempFile("TMP_Times-Bold", ".txt");
String encoding = "UTF-8";
writeToFile(sourceFile, TEXT_WITHOUT_A_BREVE, encoding, null, null);
Map<String, String> parameters = new HashMap<>();
parameters.put(PDF_FONT, PDType1Font.TIMES_BOLD.getName());
parameters.put(PDF_FONT_SIZE, "30");
TransformCheckResult result = transformTextAndCheck(sourceFile, encoding, TEXT_WITHOUT_A_BREVE, String.valueOf(-1), true,
parameters, false);
assertEquals(result.getUsedFont(), PDType1Font.TIMES_BOLD.getName());
assertNull(result.getErrorMessage());
}
/**
* Test if the default font is used when the chosen one is not found
*
* @throws Exception
*/
@Test
public void testMNT23960_InexistentFont_WithoutBreve() throws Exception
{
File sourceFile = File.createTempFile("TMP_MyDummyFont", ".txt");
String encoding = "UTF-8";
writeToFile(sourceFile, TEXT_WITHOUT_A_BREVE, encoding, null, null);
Map<String, String> parameters = new HashMap<>();
parameters.put(PDF_FONT, "MyDummyFont");
TransformCheckResult result = transformTextAndCheck(sourceFile, encoding, TEXT_WITHOUT_A_BREVE, String.valueOf(-1), true,
parameters, false);
assertEquals(result.getUsedFont(), PDType1Font.TIMES_ROMAN.getName());
assertNull(result.getErrorMessage());
}
/**
* Test if a different font can be chosen to perform the transformation with breve character. This test
* transformation should fail as Times-Bold font doesn't handle the breve character
*
* @throws Exception
*/
@Test
public void testMNT23960_TimesBold_WithBreve() throws Exception
{
File sourceFile = File.createTempFile("TMP_Times-Bold", ".txt");
String encoding = "UTF-8";
writeToFile(sourceFile, TEXT_WITH_A_BREVE, encoding, null, null);
Map<String, String> parameters = new HashMap<>();
parameters.put(PDF_FONT, PDType1Font.TIMES_BOLD.getName());
TransformCheckResult result = transformTextAndCheck(sourceFile, encoding, TEXT_WITH_A_BREVE, String.valueOf(-1), true,
parameters, true);
assertEquals(result.getUsedFont(), PDType1Font.TIMES_BOLD.getName());
assertNotNull(result.getErrorMessage());
assertTrue(result.getErrorMessage().contains(PDType1Font.TIMES_BOLD.getName()));
}
/** /**
* @param encoding to be used to read the source file * @param encoding to be used to read the source file
* @param bigEndian indicates that the file should contain big endian characters, so typically the first byte of * @param bigEndian indicates that the file should contain big endian characters, so typically the first byte of
@ -155,18 +237,18 @@ public class TextToPdfContentTransformerTest
* @param expectedByteOrder The first few bytes of the source file so we can check the test data has been * @param expectedByteOrder The first few bytes of the source file so we can check the test data has been
* correctly created. * correctly created.
*/ */
protected void transformTextAndCheck(String encoding, Boolean bigEndian, Boolean validBom, protected TransformCheckResult transformTextAndCheck(String encoding, Boolean bigEndian, Boolean validBom,
String expectedByteOrder) throws Exception String expectedByteOrder) throws Exception
{ {
transformTextAndCheckImpl(-1, encoding, bigEndian, validBom, expectedByteOrder); return transformTextAndCheckImpl(-1, encoding, bigEndian, validBom, expectedByteOrder);
} }
protected void transformTextAndCheckPageLength(int pageLimit) throws Exception protected TransformCheckResult transformTextAndCheckPageLength(int pageLimit) throws Exception
{ {
transformTextAndCheckImpl(pageLimit, "UTF-8", null, null, null); return transformTextAndCheckImpl(pageLimit, "UTF-8", null, null, null);
} }
private void transformTextAndCheckImpl(int pageLimit, String encoding, Boolean bigEndian, Boolean validBom, private TransformCheckResult transformTextAndCheckImpl(int pageLimit, String encoding, Boolean bigEndian, Boolean validBom,
String expectedByteOrder) throws Exception String expectedByteOrder) throws Exception
{ {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
@ -177,7 +259,7 @@ public class TextToPdfContentTransformerTest
writeToFile(sourceFile, text, encoding, bigEndian, validBom); writeToFile(sourceFile, text, encoding, bigEndian, validBom);
checkFileBytes(sourceFile, expectedByteOrder); checkFileBytes(sourceFile, expectedByteOrder);
transformTextAndCheck(sourceFile, encoding, checkText, String.valueOf(pageLimit)); return transformTextAndCheck(sourceFile, encoding, checkText, String.valueOf(pageLimit));
} }
private String createTestText(int pageLimit, StringBuilder sb) private String createTestText(int pageLimit, StringBuilder sb)
@ -203,9 +285,17 @@ public class TextToPdfContentTransformerTest
return checkText; return checkText;
} }
private void transformTextAndCheck(File sourceFile, String encoding, String checkText, private TransformCheckResult transformTextAndCheck(File sourceFile, String encoding, String checkText,
String pageLimit) throws Exception String pageLimit) throws Exception
{ {
return transformTextAndCheck(sourceFile, encoding, checkText, pageLimit, true, null, false);
}
private TransformCheckResult transformTextAndCheck(File sourceFile, String encoding, String checkText,
String pageLimit, boolean clean, Map<String, String> extraParameters, boolean shouldFail) throws Exception
{
TransformCheckResult result = new TransformCheckResult();
// And a temp writer // And a temp writer
File targetFile = File.createTempFile("AlfrescoTestTarget_", ".pdf"); File targetFile = File.createTempFile("AlfrescoTestTarget_", ".pdf");
@ -213,24 +303,47 @@ public class TextToPdfContentTransformerTest
Map<String, String> parameters = new HashMap<>(); Map<String, String> parameters = new HashMap<>();
parameters.put(PAGE_LIMIT, pageLimit); parameters.put(PAGE_LIMIT, pageLimit);
parameters.put(SOURCE_ENCODING, encoding); parameters.put(SOURCE_ENCODING, encoding);
transformer.transform("text/plain", "application/pdf", parameters, sourceFile, targetFile, null); if (extraParameters != null)
{
parameters.putAll(extraParameters);
}
// Read back in the PDF and check it boolean failed = false;
PDDocument doc = PDDocument.load(targetFile);
PDFTextStripper textStripper = new PDFTextStripper();
StringWriter textWriter = new StringWriter();
textStripper.writeText(doc, textWriter);
doc.close();
String roundTrip = clean(textWriter.toString()); try
{
transformer.transform("text/plain", "application/pdf", parameters, sourceFile, targetFile, null);
}
catch (Exception e)
{
failed = true;
result.setErrorMessage(e.getMessage());
}
assertEquals( result.setUsedFont(transformer.getUsedFont());
checkText, roundTrip,
"Incorrect text in PDF when starting from text in " + encoding if (!failed)
); {
// Read back in the PDF and check it
PDDocument doc = PDDocument.load(targetFile);
PDFTextStripper textStripper = new PDFTextStripper();
StringWriter textWriter = new StringWriter();
textStripper.writeText(doc, textWriter);
doc.close();
String roundTrip = clean(textWriter.toString());
assertEquals(checkText, roundTrip, "Incorrect text in PDF when starting from text in " + encoding);
}
else
{
assertTrue(shouldFail && failed);
}
sourceFile.delete(); sourceFile.delete();
targetFile.delete(); targetFile.delete();
return result;
} }
private String clean(String text) private String clean(String text)
@ -367,4 +480,30 @@ public class TextToPdfContentTransformerTest
} }
return sb.toString(); return sb.toString();
} }
private static class TransformCheckResult
{
private String usedFont;
private String errorMessage;
public String getUsedFont()
{
return usedFont;
}
public void setUsedFont(String usedFont)
{
this.usedFont = usedFont;
}
public String getErrorMessage()
{
return errorMessage;
}
public void setErrorMessage(String errorMessage)
{
this.errorMessage = errorMessage;
}
}
} }

View File

@ -66,6 +66,8 @@ public interface RequestParamMap
String PAGE_LIMIT = "pageLimit"; String PAGE_LIMIT = "pageLimit";
String PDF_FORMAT = "pdfFormat"; String PDF_FORMAT = "pdfFormat";
String PDF_ORIENTATION = "pdfOrientation"; String PDF_ORIENTATION = "pdfOrientation";
String PDF_FONT = "pdfFont";
String PDF_FONT_SIZE = "pdfFontSize";
// Parameters interpreted by the TransformController // Parameters interpreted by the TransformController
String DIRECT_ACCESS_URL = "directAccessUrl"; String DIRECT_ACCESS_URL = "directAccessUrl";