diff --git a/docs/external-engine-configuration.md b/docs/external-engine-configuration.md index 33cde1e1..606c787f 100644 --- a/docs/external-engine-configuration.md +++ b/docs/external-engine-configuration.md @@ -39,6 +39,7 @@ The following externalized T-engines properties are available: | ACTIVEMQ_PASSWORD | ActiveMQ Password. | admin | | FILE_STORE_URL | T-Engine Port. | http://localhost:8099/alfresco/api/-default-/private/sfs/versions/1/file | | TRANSFORM_ENGINE_REQUEST_QUEUE | T-Engine queue used for async requests. | org.alfresco.transform.engine.misc.acs | +| MISC_PDFBOX_DEFAULT_FONT | Default font used by PdfBox | NotoSans-Regular | ## Libreoffice | Property | Description | Default value | @@ -96,4 +97,5 @@ The following externalized T-engines properties are available: | IMAGEMAGICK_DYN | Path to Imagemagick DYLD. | /usr/lib64/ImageMagick-7.0.10/lib | | IMAGEMAGICK_EXE | Path to Imagemagick EXE. | /usr/bin/convert | | IMAGEMAGICK_CODERS | Path to Imagemagick custom coders. | | -| IMAGEMAGICK_CONFIG | Path to Imagemagick custom config. | | \ No newline at end of file +| IMAGEMAGICK_CONFIG | Path to Imagemagick custom config. | | +| MISC_PDFBOX_DEFAULT_FONT | Default font used by PdfBox | NotoSans-Regular | \ No newline at end of file diff --git a/engines/aio/Dockerfile b/engines/aio/Dockerfile index df305972..47fef57c 100644 --- a/engines/aio/Dockerfile +++ b/engines/aio/Dockerfile @@ -76,6 +76,7 @@ ADD target/generated-resources/licenses /licenses ADD target/generated-resources/licenses.xml /licenses/ ADD target/generated-sources/license/THIRD-PARTY.txt /licenses/ COPY target/classes/licenses/3rd-party/ / +COPY target/classes/fonts/NotoSans /usr/local/share/fonts/NotoSans RUN groupadd -g ${GROUPID} ${GROUPNAME} && \ useradd -u ${USERID} -G ${GROUPNAME} ${AIOUSERNAME} && \ diff --git a/engines/aio/src/main/resources/application-default.yaml b/engines/aio/src/main/resources/application-default.yaml index 51e31f8d..e6b06b77 100644 --- a/engines/aio/src/main/resources/application-default.yaml +++ b/engines/aio/src/main/resources/application-default.yaml @@ -24,3 +24,6 @@ transform: exifTool: windowsOS: 'exiftool -args -G1 -sep "|||" #{"$"}{INPUT}' unixOS: 'env FOO=#{"$"}{OUTPUT} exiftool -args -G1 -sep "|||" #{"$"}{INPUT}' + misc: + pdfBox: + defaultFont: ${MISC_PDFBOX_DEFAULT_FONT:NotoSans-Regular} \ No newline at end of file diff --git a/engines/aio/src/test/java/org/alfresco/transform/aio/AIOTikaTest.java b/engines/aio/src/test/java/org/alfresco/transform/aio/AIOTikaTest.java index a1007538..40b3068e 100644 --- a/engines/aio/src/test/java/org/alfresco/transform/aio/AIOTikaTest.java +++ b/engines/aio/src/test/java/org/alfresco/transform/aio/AIOTikaTest.java @@ -71,7 +71,9 @@ public class AIOTikaTest extends TikaTest "startPage", "targetEncoding", "thumbnail", - "width" + "width", + "pdfFont", + "pdfFontSize" ), getOptionNames(controller.transformConfig(0).getBody().getTransformOptions())); } diff --git a/engines/misc/Dockerfile b/engines/misc/Dockerfile index 863835b0..9085bc94 100644 --- a/engines/misc/Dockerfile +++ b/engines/misc/Dockerfile @@ -19,6 +19,7 @@ ADD target/generated-resources/licenses /licenses ADD target/generated-resources/licenses.xml /licenses/ ADD target/generated-sources/license/THIRD-PARTY.txt /licenses/ COPY target/classes/licenses/3rd-party/ / +COPY target/classes/fonts/NotoSans /usr/local/share/fonts/NotoSans RUN groupadd -g ${GROUPID} ${GROUPNAME} && \ useradd -u ${USERID} -G ${GROUPNAME} ${MISCUSERNAME} && \ diff --git a/engines/misc/LICENSES.md b/engines/misc/LICENSES.md index 3fe03d83..8c7aeeaf 100644 --- a/engines/misc/LICENSES.md +++ b/engines/misc/LICENSES.md @@ -7,3 +7,4 @@ * commons-compress, PDFBox and poi-ooxml are from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0 or the [Apache 2.0.txt](src/main/resources/licenses/3rd-party/Apache%202.0.txt) file placed in the root directory of the docker image. +* NotoSans https://openfontlicense.org/open-font-license-official-text/ \ No newline at end of file diff --git a/engines/misc/src/main/java/org/alfresco/transform/misc/transformers/TextToPdfContentTransformer.java b/engines/misc/src/main/java/org/alfresco/transform/misc/transformers/TextToPdfContentTransformer.java index 77ad0321..c8643d0d 100644 --- a/engines/misc/src/main/java/org/alfresco/transform/misc/transformers/TextToPdfContentTransformer.java +++ b/engines/misc/src/main/java/org/alfresco/transform/misc/transformers/TextToPdfContentTransformer.java @@ -26,16 +26,10 @@ */ package org.alfresco.transform.misc.transformers; -import org.alfresco.transform.base.TransformManager; -import org.alfresco.transform.base.util.CustomTransformerFileAdaptor; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.PDPageContentStream; -import org.apache.pdfbox.pdmodel.font.PDType1Font; -import org.apache.pdfbox.tools.TextToPDF; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.stereotype.Component; +import static org.alfresco.transform.common.RequestParamMap.PAGE_LIMIT; +import static org.alfresco.transform.common.RequestParamMap.PDF_FONT; +import static org.alfresco.transform.common.RequestParamMap.PDF_FONT_SIZE; +import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING; import java.io.BufferedOutputStream; import java.io.BufferedReader; @@ -48,12 +42,31 @@ import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PushbackInputStream; import java.io.Reader; +import java.net.URI; import java.nio.charset.Charset; import java.util.HashMap; +import java.util.List; import java.util.Map; -import static org.alfresco.transform.common.RequestParamMap.PAGE_LIMIT; -import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING; +import org.alfresco.transform.base.TransformManager; +import org.alfresco.transform.base.util.CustomTransformerFileAdaptor; +import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.fontbox.util.autodetect.FontFileFinder; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.font.FontMappers; +import org.apache.pdfbox.pdmodel.font.FontMapping; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType0Font; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.tools.TextToPDF; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import jakarta.annotation.PostConstruct; /** *

@@ -77,20 +90,30 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor private static final byte EF = (byte) 0xEF; private static final byte BB = (byte) 0xBB; private static final byte BF = (byte) 0xBF; - + private static final String DEFAULT_FONT = "NotoSans-Regular"; + private static final int DEFAULT_FONT_SIZE = 10; private final PagedTextToPDF transformer; + @Value("${transform.core.misc.pdfBox.defaultFont:NotoSans-Regular}") + private String pdfBoxDefaultFont; + public TextToPdfContentTransformer() { transformer = new PagedTextToPDF(); } + @PostConstruct + public void init() + { + transformer.setDefaultFont(pdfBoxDefaultFont); + } + public void setStandardFont(String fontName) { try { - transformer.setFont(PagedTextToPDF.getStandardFont(fontName)); + transformer.setFont(fontName); } catch (Throwable e) { @@ -112,6 +135,11 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor } } + public String getUsedFont() + { + return transformer.getFontName(); + } + @Override public String getTransformerName() { @@ -130,6 +158,25 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor { pageLimit = parseInt(stringPageLimit, PAGE_LIMIT); } + String pdfFont = transformOptions.get(PDF_FONT); + if (pdfFont == null || pdfFont.isBlank()) + { + pdfFont = pdfBoxDefaultFont; + } + String pdfFontSize = transformOptions.get(PDF_FONT_SIZE); + Integer fontSize = null; + if (pdfFontSize != null && !pdfFontSize.isBlank()) + { + try + { + fontSize = parseInt(pdfFontSize, PDF_FONT_SIZE); + } + catch (Exception e) + { + fontSize = DEFAULT_FONT_SIZE; + logger.error("Error parsing font size {}, going to set it as {}", pdfFontSize, fontSize, e); + } + } PDDocument pdf = null; try (InputStream is = new FileInputStream(sourceFile); @@ -138,7 +185,7 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor { //TransformationOptionLimits limits = getLimits(reader, writer, options); //TransformationOptionPair pageLimits = limits.getPagesPair(); - pdf = transformer.createPDFFromText(ir, pageLimit); + pdf = transformer.createPDFFromText(ir, pageLimit, pdfFont, fontSize); pdf.save(os); } finally @@ -231,22 +278,34 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor } //duplicating until here + private String fontName = null; + private String defaultFont = null; + // The following code is based on the code in TextToPDF with the addition of // checks for page limits. // The calling code must close the PDDocument once finished with it. - public PDDocument createPDFFromText(Reader text, int pageLimit) + public PDDocument createPDFFromText(Reader text, int pageLimit, String pdfFontName, Integer pdfFontSize) throws IOException { PDDocument doc = null; int pageCount = 0; try { + doc = new PDDocument(); + + final PDFont font = getFont(doc, pdfFontName); + final int fontSize = pdfFontSize != null ? pdfFontSize : getFontSize(); + + fontName = font.getName(); + + logger.debug("Going to use font {} with size {}", fontName, fontSize); + final int margin = 40; - float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000; + float height = font.getFontDescriptor().getFontBoundingBox().getHeight() / 1000; //calculate font height and increase by 5 percent. - height = height * getFontSize() * 1.05f; - doc = new PDDocument(); + height = height * fontSize * 1.05f; + BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text); String nextLine; PDPage page = new PDPage(); @@ -280,8 +339,8 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor { String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex]; lengthIfUsingNextWord = - (getFont().getStringWidth( - lineWithNextWord) / 1000) * getFontSize(); + (font.getStringWidth( + lineWithNextWord) / 1000) * fontSize; } } while (lineIndex < lineWords.length && @@ -304,7 +363,7 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor contentStream.close(); } contentStream = new PDPageContentStream(doc, page); - contentStream.setFont(getFont(), getFontSize()); + contentStream.setFont(font, fontSize); contentStream.beginText(); y = page.getMediaBox().getHeight() - margin + height; contentStream.moveTextPositionByAmount(margin, y); @@ -344,6 +403,199 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor } return doc; } + + public void setFont(String aFontName) + { + PDType1Font font = PagedTextToPDF.getStandardFont(aFontName); + + if (font != null) + { + super.setFont(font); + this.fontName = aFontName; + } + } + + /** + * Gets the font that will be used in document transformation using the following approaches: + *

    + *
  1. Standard font map + *
  2. Font Mappers + *
  3. File system fonts + *
  4. Transformer default font + *
  5. PdfBox default font + *
+ * + * @param doc + * the document that will be transformed + * @param fontName + * the font name that will be used in transformation + * + * @return the font that was found + */ + private PDFont getFont(PDDocument doc, String fontName) + { + if (fontName == null) + { + fontName = fontName != null ? fontName : getDefaultFont(); + } + + // First, it tries to get the font from PdfBox STANDARD_14 map + PDFont font = getFromStandardFonts(fontName); + + // If not found, tries to get the font from FontMappers + if (font == null) + { + font = getFromFontMapper(fontName, doc); + + // If still not found, tries to get the font from file system + if (font == null) + { + font = getFromFileSystem(fontName); + + // If font is still null: + // - it will recursively get the transformer default font + // - Otherwise, it will use the PdfBox default font (Helvetica) + if (font == null) + { + if (defaultFont != null && !fontName.equals(defaultFont)) + { + font = getFont(doc, defaultFont); + } + else + { + font = getFont(); + } + } + } + + } + + return font; + } + + /** + * Gets the font from PdfBox standard fonts map + * + * @param fontName + * the font name to obtain + * + * @return the font object that has been found, otherwise null + */ + private PDFont getFromStandardFonts(String fontName) + { + return PagedTextToPDF.getStandardFont(fontName); + } + + /** + * Gets the font from {@link FontMappers} instance + * + * @param fontName + * the font name to obtain + * @param doc + * the PDF document + * + * @return the font object that has been found, otherwise null + */ + private PDFont getFromFontMapper(String fontName, PDDocument doc) + { + PDFont font = null; + FontMapping mapping = FontMappers.instance().getTrueTypeFont(fontName, null); + + if (mapping != null && mapping.getFont() != null && !mapping.isFallback()) + { + try + { + font = PDType0Font.load(doc, mapping.getFont().getOriginalData()); + } + catch (Exception e) + { + logger.error("Error loading font mapping {}", fontName, e); + } + } + + return font; + } + + /** + * Gets the font from existing file system fonts + * + * @param fontName + * the font name to obtain + * @return the font object that has been found, otherwise null + */ + private PDFont getFromFileSystem(String fontName) + { + PDFont font = null; + String nameWithExtension = fontName + ".ttf"; + + FontFileFinder fontFileFinder = new FontFileFinder(); + List uris = fontFileFinder.find(); + + for (URI uri : uris) + { + if (uri.getPath().contains(nameWithExtension)) + { + InputStream fontIS = null; + try + { + fontIS = new FileInputStream(new File(uri)); + if (null != fontIS) + { + PDDocument documentMock = new PDDocument(); + font = PDType0Font.load(documentMock, fontIS); + break; + } + } + catch (IOException ioe) + { + logger.error("Error loading font {} from filesystem", fontName, ioe); + } + finally + { + if (fontIS != null) + { + try + { + fontIS.close(); + } + catch (Exception e) + { + logger.error("Error closing font inputstream", e); + } + } + } + } + } + + return font; + } + + public String getFontName() + { + return this.fontName; + } + + public String getDefaultFont() + { + if (defaultFont == null || defaultFont.isBlank()) + { + return TextToPdfContentTransformer.DEFAULT_FONT; + } + + return defaultFont; + } + + public void setDefaultFont(String name) + { + if (name == null || name.isBlank()) + { + defaultFont = TextToPdfContentTransformer.DEFAULT_FONT; + } + else + { + this.defaultFont = name; + } + } } private int parseInt(String s, String paramName) diff --git a/engines/misc/src/main/resources/application-default.yaml b/engines/misc/src/main/resources/application-default.yaml index 62e91ec6..54cc2404 100644 --- a/engines/misc/src/main/resources/application-default.yaml +++ b/engines/misc/src/main/resources/application-default.yaml @@ -1,2 +1,7 @@ queue: engineRequestQueue: ${TRANSFORM_ENGINE_REQUEST_QUEUE:org.alfresco.transform.engine.misc.acs} +transform: + core: + misc: + pdfBox: + defaultFont: ${MISC_PDFBOX_DEFAULT_FONT:NotoSans-Regular} \ No newline at end of file diff --git a/engines/misc/src/main/resources/fonts/NotoSans/NotoSans-Bold.ttf b/engines/misc/src/main/resources/fonts/NotoSans/NotoSans-Bold.ttf new file mode 100644 index 00000000..d84248ed Binary files /dev/null and b/engines/misc/src/main/resources/fonts/NotoSans/NotoSans-Bold.ttf differ diff --git a/engines/misc/src/main/resources/fonts/NotoSans/NotoSans-BoldItalic.ttf b/engines/misc/src/main/resources/fonts/NotoSans/NotoSans-BoldItalic.ttf new file mode 100644 index 00000000..3a34c4c3 Binary files /dev/null and b/engines/misc/src/main/resources/fonts/NotoSans/NotoSans-BoldItalic.ttf differ diff --git a/engines/misc/src/main/resources/fonts/NotoSans/NotoSans-Italic.ttf b/engines/misc/src/main/resources/fonts/NotoSans/NotoSans-Italic.ttf new file mode 100644 index 00000000..c40c3562 Binary files /dev/null and b/engines/misc/src/main/resources/fonts/NotoSans/NotoSans-Italic.ttf differ diff --git a/engines/misc/src/main/resources/fonts/NotoSans/NotoSans-Regular.ttf b/engines/misc/src/main/resources/fonts/NotoSans/NotoSans-Regular.ttf new file mode 100644 index 00000000..fa4cff50 Binary files /dev/null and b/engines/misc/src/main/resources/fonts/NotoSans/NotoSans-Regular.ttf differ diff --git a/engines/misc/src/main/resources/licenses/3rd-party/OFL.txt b/engines/misc/src/main/resources/licenses/3rd-party/OFL.txt new file mode 100644 index 00000000..09f020bb --- /dev/null +++ b/engines/misc/src/main/resources/licenses/3rd-party/OFL.txt @@ -0,0 +1,93 @@ +Copyright 2022 The Noto Project Authors (https://github.com/notofonts/latin-greek-cyrillic) + +This Font Software is licensed under the SIL Open Font License, Version 1.1. +This license is copied below, and is also available with a FAQ at: +https://openfontlicense.org + + +----------------------------------------------------------- +SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 +----------------------------------------------------------- + +PREAMBLE +The goals of the Open Font License (OFL) are to stimulate worldwide +development of collaborative font projects, to support the font creation +efforts of academic and linguistic communities, and to provide a free and +open framework in which fonts may be shared and improved in partnership +with others. + +The OFL allows the licensed fonts to be used, studied, modified and +redistributed freely as long as they are not sold by themselves. The +fonts, including any derivative works, can be bundled, embedded, +redistributed and/or sold with any software provided that any reserved +names are not used by derivative works. The fonts and derivatives, +however, cannot be released under any other type of license. The +requirement for fonts to remain under this license does not apply +to any document created using the fonts or their derivatives. + +DEFINITIONS +"Font Software" refers to the set of files released by the Copyright +Holder(s) under this license and clearly marked as such. This may +include source files, build scripts and documentation. + +"Reserved Font Name" refers to any names specified as such after the +copyright statement(s). + +"Original Version" refers to the collection of Font Software components as +distributed by the Copyright Holder(s). + +"Modified Version" refers to any derivative made by adding to, deleting, +or substituting -- in part or in whole -- any of the components of the +Original Version, by changing formats or by porting the Font Software to a +new environment. + +"Author" refers to any designer, engineer, programmer, technical +writer or other person who contributed to the Font Software. + +PERMISSION & CONDITIONS +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Font Software, to use, study, copy, merge, embed, modify, +redistribute, and sell modified and unmodified copies of the Font +Software, subject to the following conditions: + +1) Neither the Font Software nor any of its individual components, +in Original or Modified Versions, may be sold by itself. + +2) Original or Modified Versions of the Font Software may be bundled, +redistributed and/or sold with any software, provided that each copy +contains the above copyright notice and this license. These can be +included either as stand-alone text files, human-readable headers or +in the appropriate machine-readable metadata fields within text or +binary files as long as those fields can be easily viewed by the user. + +3) No Modified Version of the Font Software may use the Reserved Font +Name(s) unless explicit written permission is granted by the corresponding +Copyright Holder. This restriction only applies to the primary font name as +presented to the users. + +4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font +Software shall not be used to promote, endorse or advertise any +Modified Version, except to acknowledge the contribution(s) of the +Copyright Holder(s) and the Author(s) or with their explicit written +permission. + +5) The Font Software, modified or unmodified, in part or in whole, +must be distributed entirely under this license, and must not be +distributed under any other license. The requirement for fonts to +remain under this license does not apply to any document created +using the Font Software. + +TERMINATION +This license becomes null and void if any of the above conditions are +not met. + +DISCLAIMER +THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE +COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL +DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM +OTHER DEALINGS IN THE FONT SOFTWARE. diff --git a/engines/misc/src/main/resources/misc_engine_config.json b/engines/misc/src/main/resources/misc_engine_config.json index e32b1f90..0dbd327f 100644 --- a/engines/misc/src/main/resources/misc_engine_config.json +++ b/engines/misc/src/main/resources/misc_engine_config.json @@ -1,7 +1,9 @@ { "transformOptions": { "textToPdfOptions": [ - {"value": {"name": "pageLimit"}} + {"value": {"name": "pageLimit"}}, + {"value": {"name": "pdfFont"}}, + {"value": {"name": "pdfFontSize"}} ], "stringOptions": [ {"value": {"name": "targetEncoding"}} diff --git a/engines/misc/src/test/java/org/alfresco/transform/misc/transformers/TextToPdfContentTransformerTest.java b/engines/misc/src/test/java/org/alfresco/transform/misc/transformers/TextToPdfContentTransformerTest.java index a33c786e..a177a945 100644 --- a/engines/misc/src/test/java/org/alfresco/transform/misc/transformers/TextToPdfContentTransformerTest.java +++ b/engines/misc/src/test/java/org/alfresco/transform/misc/transformers/TextToPdfContentTransformerTest.java @@ -26,10 +26,14 @@ */ package org.alfresco.transform.misc.transformers; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.text.PDFTextStripper; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; +import static org.alfresco.transform.common.RequestParamMap.PAGE_LIMIT; +import static org.alfresco.transform.common.RequestParamMap.PDF_FONT; +import static org.alfresco.transform.common.RequestParamMap.PDF_FONT_SIZE; +import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; @@ -43,14 +47,19 @@ import java.io.StringWriter; import java.util.HashMap; import java.util.Map; -import static org.alfresco.transform.common.RequestParamMap.PAGE_LIMIT; -import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING; -import static org.junit.jupiter.api.Assertions.assertEquals; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.text.PDFTextStripper; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test;; public class TextToPdfContentTransformerTest { TextToPdfContentTransformer transformer = new TextToPdfContentTransformer(); + private static final String TEXT_WITH_A_BREVE = "Gămbardella, Matthew, Corets, Evă"; + private static final String TEXT_WITHOUT_A_BREVE = "Gambardella, Matthew, Corets, Eva"; + @BeforeEach public void setUp() { @@ -137,7 +146,8 @@ public class TextToPdfContentTransformerTest @Test public void testUTF8WithBOM() throws Exception { - transformTextAndCheck("UTF-8", null, true, "ef bb bf 31 20 49 20 6d"); + TransformCheckResult result = transformTextAndCheck("UTF-8", null, true, "ef bb bf 31 20 49 20 6d"); + assertEquals(result.getUsedFont(), "Times-Roman"); } @Test @@ -146,6 +156,78 @@ public class TextToPdfContentTransformerTest transformTextAndCheck("UTF-8", null, false, "31 20 49 20 6d 75 73 74"); } + /** + * Test if a different font can be chosen to perform the transformation + * + * @throws Exception + */ + @Test + public void testMNT23960_TimesBold_WithoutBreve() throws Exception + { + File sourceFile = File.createTempFile("TMP_Times-Bold", ".txt"); + String encoding = "UTF-8"; + + writeToFile(sourceFile, TEXT_WITHOUT_A_BREVE, encoding, null, null); + + Map parameters = new HashMap<>(); + parameters.put(PDF_FONT, PDType1Font.TIMES_BOLD.getName()); + parameters.put(PDF_FONT_SIZE, "30"); + + TransformCheckResult result = transformTextAndCheck(sourceFile, encoding, TEXT_WITHOUT_A_BREVE, String.valueOf(-1), true, + parameters, false); + + assertEquals(result.getUsedFont(), PDType1Font.TIMES_BOLD.getName()); + assertNull(result.getErrorMessage()); + } + + /** + * Test if the default font is used when the chosen one is not found + * + * @throws Exception + */ + @Test + public void testMNT23960_InexistentFont_WithoutBreve() throws Exception + { + File sourceFile = File.createTempFile("TMP_MyDummyFont", ".txt"); + String encoding = "UTF-8"; + + writeToFile(sourceFile, TEXT_WITHOUT_A_BREVE, encoding, null, null); + + Map parameters = new HashMap<>(); + parameters.put(PDF_FONT, "MyDummyFont"); + + TransformCheckResult result = transformTextAndCheck(sourceFile, encoding, TEXT_WITHOUT_A_BREVE, String.valueOf(-1), true, + parameters, false); + + assertEquals(result.getUsedFont(), PDType1Font.TIMES_ROMAN.getName()); + assertNull(result.getErrorMessage()); + } + + /** + * Test if a different font can be chosen to perform the transformation with breve character. This test + * transformation should fail as Times-Bold font doesn't handle the breve character + * + * @throws Exception + */ + @Test + public void testMNT23960_TimesBold_WithBreve() throws Exception + { + File sourceFile = File.createTempFile("TMP_Times-Bold", ".txt"); + String encoding = "UTF-8"; + + writeToFile(sourceFile, TEXT_WITH_A_BREVE, encoding, null, null); + + Map parameters = new HashMap<>(); + parameters.put(PDF_FONT, PDType1Font.TIMES_BOLD.getName()); + + TransformCheckResult result = transformTextAndCheck(sourceFile, encoding, TEXT_WITH_A_BREVE, String.valueOf(-1), true, + parameters, true); + + assertEquals(result.getUsedFont(), PDType1Font.TIMES_BOLD.getName()); + assertNotNull(result.getErrorMessage()); + assertTrue(result.getErrorMessage().contains(PDType1Font.TIMES_BOLD.getName())); + } + /** * @param encoding to be used to read the source file * @param bigEndian indicates that the file should contain big endian characters, so typically the first byte of @@ -155,18 +237,18 @@ public class TextToPdfContentTransformerTest * @param expectedByteOrder The first few bytes of the source file so we can check the test data has been * correctly created. */ - protected void transformTextAndCheck(String encoding, Boolean bigEndian, Boolean validBom, + protected TransformCheckResult transformTextAndCheck(String encoding, Boolean bigEndian, Boolean validBom, String expectedByteOrder) throws Exception { - transformTextAndCheckImpl(-1, encoding, bigEndian, validBom, expectedByteOrder); + return transformTextAndCheckImpl(-1, encoding, bigEndian, validBom, expectedByteOrder); } - protected void transformTextAndCheckPageLength(int pageLimit) throws Exception + protected TransformCheckResult transformTextAndCheckPageLength(int pageLimit) throws Exception { - transformTextAndCheckImpl(pageLimit, "UTF-8", null, null, null); + return transformTextAndCheckImpl(pageLimit, "UTF-8", null, null, null); } - private void transformTextAndCheckImpl(int pageLimit, String encoding, Boolean bigEndian, Boolean validBom, + private TransformCheckResult transformTextAndCheckImpl(int pageLimit, String encoding, Boolean bigEndian, Boolean validBom, String expectedByteOrder) throws Exception { StringBuilder sb = new StringBuilder(); @@ -177,7 +259,7 @@ public class TextToPdfContentTransformerTest writeToFile(sourceFile, text, encoding, bigEndian, validBom); checkFileBytes(sourceFile, expectedByteOrder); - transformTextAndCheck(sourceFile, encoding, checkText, String.valueOf(pageLimit)); + return transformTextAndCheck(sourceFile, encoding, checkText, String.valueOf(pageLimit)); } private String createTestText(int pageLimit, StringBuilder sb) @@ -203,9 +285,17 @@ public class TextToPdfContentTransformerTest return checkText; } - private void transformTextAndCheck(File sourceFile, String encoding, String checkText, + private TransformCheckResult transformTextAndCheck(File sourceFile, String encoding, String checkText, String pageLimit) throws Exception { + return transformTextAndCheck(sourceFile, encoding, checkText, pageLimit, true, null, false); + } + + private TransformCheckResult transformTextAndCheck(File sourceFile, String encoding, String checkText, + String pageLimit, boolean clean, Map extraParameters, boolean shouldFail) throws Exception + { + TransformCheckResult result = new TransformCheckResult(); + // And a temp writer File targetFile = File.createTempFile("AlfrescoTestTarget_", ".pdf"); @@ -213,24 +303,47 @@ public class TextToPdfContentTransformerTest Map parameters = new HashMap<>(); parameters.put(PAGE_LIMIT, pageLimit); parameters.put(SOURCE_ENCODING, encoding); - transformer.transform("text/plain", "application/pdf", parameters, sourceFile, targetFile, null); + if (extraParameters != null) + { + parameters.putAll(extraParameters); + } - // Read back in the PDF and check it - PDDocument doc = PDDocument.load(targetFile); - PDFTextStripper textStripper = new PDFTextStripper(); - StringWriter textWriter = new StringWriter(); - textStripper.writeText(doc, textWriter); - doc.close(); + boolean failed = false; - String roundTrip = clean(textWriter.toString()); + try + { + transformer.transform("text/plain", "application/pdf", parameters, sourceFile, targetFile, null); + } + catch (Exception e) + { + failed = true; + result.setErrorMessage(e.getMessage()); + } - assertEquals( - checkText, roundTrip, - "Incorrect text in PDF when starting from text in " + encoding - ); + result.setUsedFont(transformer.getUsedFont()); + + if (!failed) + { + // Read back in the PDF and check it + PDDocument doc = PDDocument.load(targetFile); + PDFTextStripper textStripper = new PDFTextStripper(); + StringWriter textWriter = new StringWriter(); + textStripper.writeText(doc, textWriter); + doc.close(); + + String roundTrip = clean(textWriter.toString()); + + assertEquals(checkText, roundTrip, "Incorrect text in PDF when starting from text in " + encoding); + } + else + { + assertTrue(shouldFail && failed); + } sourceFile.delete(); targetFile.delete(); + + return result; } private String clean(String text) @@ -367,4 +480,30 @@ public class TextToPdfContentTransformerTest } return sb.toString(); } + + private static class TransformCheckResult + { + private String usedFont; + private String errorMessage; + + public String getUsedFont() + { + return usedFont; + } + + public void setUsedFont(String usedFont) + { + this.usedFont = usedFont; + } + + public String getErrorMessage() + { + return errorMessage; + } + + public void setErrorMessage(String errorMessage) + { + this.errorMessage = errorMessage; + } + } } diff --git a/model/src/main/java/org/alfresco/transform/common/RequestParamMap.java b/model/src/main/java/org/alfresco/transform/common/RequestParamMap.java index 1647accc..eb13e483 100644 --- a/model/src/main/java/org/alfresco/transform/common/RequestParamMap.java +++ b/model/src/main/java/org/alfresco/transform/common/RequestParamMap.java @@ -66,6 +66,8 @@ public interface RequestParamMap String PAGE_LIMIT = "pageLimit"; String PDF_FORMAT = "pdfFormat"; String PDF_ORIENTATION = "pdfOrientation"; + String PDF_FONT = "pdfFont"; + String PDF_FONT_SIZE = "pdfFontSize"; // Parameters interpreted by the TransformController String DIRECT_ACCESS_URL = "directAccessUrl";