ATS-671: Split engines into fat & skinny modules (ATS-674) (#192)

Each transform engine project has been separated into 2 modules so that an executable and non-executable jar can be created. Modules have been renamed such that *docker* has been removed from the artifactIds and project names. Co-authored-by: Erik Knizat <erik.knizat@alfresco.com> Co-authored-by: David Edwards <david.edwards@alfresco.com>
2025-09-24 14:31:24 +00:00 · 2020-03-27 13:45:15 +00:00
parent 46b2e6df5b
commit 3bed6930bf
215 changed files with 539 additions and 157 deletions
--- a/alfresco-transform-misc/alfresco-transform-misc/LICENSES.md
+++ b/alfresco-transform-misc/alfresco-transform-misc/LICENSES.md
@@ -0,0 +1,9 @@
+### Licenses
+
+* htmlparser http://htmlparser.sourceforge.net/license.html
+* commons-compress   http://jakarta.apache.org/commons/
+* pdfbox-tools  http://pdfbox.apache.org/
+* poi-ooxml http://poi.apache.org/
+* commons-compress, PDFBox and poi-ooxml are from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0 or the
+  [Apache 2.0.txt](https://github.com/Alfresco/acs-community-packaging/blob/master/distribution/src/main/resources/licenses/3rd-party/Apache%202.0.txt)
+  file placed in the root directory of the docker image.
--- a/alfresco-transform-misc/alfresco-transform-misc/pom.xml
+++ b/alfresco-transform-misc/alfresco-transform-misc/pom.xml
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>alfresco-transform-misc</artifactId>
+    <name>Alfresco Miscellaneous Transformers</name>
+    <packaging>jar</packaging>
+
+    <parent>
+        <artifactId>alfresco-transform-core</artifactId>
+        <groupId>org.alfresco</groupId>
+        <version>2.2.0-SNAPSHOT</version>
+        <relativePath>../../pom.xml</relativePath>
+    </parent>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.alfresco</groupId>
+            <artifactId>alfresco-transformer-base</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.dom4j</groupId>
+            <artifactId>dom4j</artifactId>
+        </dependency>
+
+        <!-- HtmlParserContentTransformer -->
+        <dependency>
+            <groupId>org.htmlparser</groupId>
+            <artifactId>htmlparser</artifactId>
+            <version>2.1</version>
+        </dependency>
+
+        <!-- AppleIWorksContentTransformer -->
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-compress</artifactId>
+        </dependency>
+
+        <!-- TextToPdfContentTransformer -->
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>pdfbox-tools</artifactId>
+            <version>2.0.19</version>
+        </dependency>
+
+        <!-- OOXMLThumbnailContentTransformer -->
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml</artifactId>
+            <version>${dependency.poi.version}</version>
+        </dependency>
+
+        <!-- EMLTransformer -->
+        <dependency>
+            <groupId>com.sun.mail</groupId>
+            <artifactId>javax.mail</artifactId>
+            <version>1.6.2</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml</artifactId>
+            <version>4.1.1</version>
+        </dependency>
+
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.13</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>license-maven-plugin</artifactId>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-failsafe-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+</project>
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/AppleIWorksContentTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/AppleIWorksContentTransformer.java
@@ -0,0 +1,116 @@
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * Converts Apple iWorks files to JPEGs for thumbnailing and previewing.
+ * The transformer will only work for iWorks 2013/14 files. Support for iWorks 2008/9 has been dropped as we cannot
+ * support both, because the newer format does not contain a PDF. If we say this transformer supports PDF, Share will
+ * assume incorrectly that we can convert to PDF and we would only get a preview for the older format and never the
+ * newer one. Both formats have the same mimetype.
+ *
+ * <p>
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ * </p>
+ *
+ * @author Neil Mc Erlean
+ * @author eknizat
+ * @since 4.0
+ */
+public class AppleIWorksContentTransformer implements SelectableTransformer
+{
+    private static final Logger logger = LoggerFactory.getLogger(
+        AppleIWorksContentTransformer.class);
+
+    // Apple's zip entry names for previews in iWorks have changed over time.
+    private static final List<String> PDF_PATHS = ImmutableList.of(
+        "QuickLook/Preview.pdf");  // iWorks 2008/9
+    private static final List<String> JPG_PATHS = ImmutableList.of(
+        "QuickLook/Thumbnail.jpg", // iWorks 2008/9
+        "preview.jpg");            // iWorks 2013/14 (720 x 552) We use the best quality image. Others are:
+    //                (225 x 173) preview-web.jpg
+    //                 (53 x  41) preview-micro.jpg
+
+    @Override
+    public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
+        final String targetMimetype, final Map<String, String> parameters)
+    {
+        logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
+            sourceMimetype, targetMimetype);
+
+        // iWorks files are zip (or package) files.
+        // If it's not a zip file, the resultant ZipException will be caught as an IOException below.
+        try (ZipArchiveInputStream iWorksZip = new ZipArchiveInputStream(
+            new BufferedInputStream(new FileInputStream(sourceFile))))
+        {
+            // Look through the zip file entries for the preview/thumbnail.
+            List<String> paths = MIMETYPE_IMAGE_JPEG.equals(targetMimetype) ? JPG_PATHS : PDF_PATHS;
+            ZipArchiveEntry entry;
+            boolean found = false;
+            while ((entry = iWorksZip.getNextZipEntry()) != null)
+            {
+                String name = entry.getName();
+                if (paths.contains(name))
+                {
+                    Files.copy(iWorksZip, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+                    found = true;
+                    break;
+                }
+            }
+
+            if (!found)
+            {
+                throw new RuntimeException(
+                    "The source " + sourceMimetype + " file did not contain a " + targetMimetype + " preview");
+            }
+        }
+        catch (IOException e)
+        {
+            throw new RuntimeException(
+                "Unable to transform " + sourceMimetype + " file. It should have been a zip format file.",
+                e);
+        }
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/EMLTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/EMLTransformer.java
@@ -0,0 +1,233 @@
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
+import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
+import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.Map;
+import java.util.Properties;
+
+import javax.mail.MessagingException;
+import javax.mail.Multipart;
+import javax.mail.Part;
+import javax.mail.Session;
+import javax.mail.internet.MimeMessage;
+
+import org.alfresco.transformer.fs.FileManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
+ * messages. Searches for all text content parts, and returns them. Any
+ * attachments are ignored. TIKA Note - could be replaced with the Tika email
+ * parser. Would require a recursing parser to be specified, but not the full
+ * Auto one (we don't want attachments), just one containing text and html
+ * related parsers.
+ *
+ * <p>
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ * </p>
+ */
+public class EMLTransformer implements SelectableTransformer
+
+{
+    private static final Logger logger = LoggerFactory.getLogger(EMLTransformer.class);
+
+    private static final String CHARSET = "charset";
+    private static final String DEFAULT_ENCODING = "UTF-8";
+
+    @Override
+    public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
+        final String targetMimetype, final Map<String, String> parameters) throws Exception
+    {
+        logger.debug("Performing RFC822 to text transform.");
+        // Use try with resource
+        try (InputStream contentInputStream = new BufferedInputStream(
+            new FileInputStream(sourceFile));
+             Writer bufferedFileWriter = new BufferedWriter(new FileWriter(targetFile)))
+        {
+            MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()),
+                contentInputStream);
+
+            final StringBuilder sb = new StringBuilder();
+            Object content = mimeMessage.getContent();
+            if (content instanceof Multipart)
+            {
+                processMultiPart((Multipart) content, sb);
+            }
+            else
+            {
+                sb.append(content.toString());
+            }
+            bufferedFileWriter.write(sb.toString());
+        }
+    }
+
+    /**
+     * Find "text" parts of message recursively and appends it to sb StringBuilder
+     *
+     * @param multipart Multipart to process
+     * @param sb        StringBuilder
+     * @throws MessagingException
+     * @throws IOException
+     */
+    private void processMultiPart(Multipart multipart, StringBuilder sb) throws MessagingException,
+        IOException
+    {
+        boolean isAlternativeMultipart = multipart.getContentType().contains(
+            MIMETYPE_MULTIPART_ALTERNATIVE);
+        if (isAlternativeMultipart)
+        {
+            processAlternativeMultipart(multipart, sb);
+        }
+        else
+        {
+            for (int i = 0, n = multipart.getCount(); i < n; i++)
+            {
+                Part part = multipart.getBodyPart(i);
+                if (part.getContent() instanceof Multipart)
+                {
+                    processMultiPart((Multipart) part.getContent(), sb);
+                }
+                else
+                {
+                    processPart(part, sb);
+                }
+            }
+        }
+    }
+
+    /**
+     * Finds the suitable part from an multipart/alternative and appends it's text content to StringBuilder sb
+     *
+     * @param multipart
+     * @param sb
+     * @throws IOException
+     * @throws MessagingException
+     */
+    private void processAlternativeMultipart(Multipart multipart, StringBuilder sb) throws
+        IOException, MessagingException
+    {
+        Part partToUse = null;
+        for (int i = 0, n = multipart.getCount(); i < n; i++)
+        {
+            Part part = multipart.getBodyPart(i);
+            if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
+            {
+                partToUse = part;
+                break;
+            }
+            else if (part.getContentType().contains(MIMETYPE_HTML))
+            {
+                partToUse = part;
+            }
+            else if (part.getContentType().contains(MIMETYPE_MULTIPART_ALTERNATIVE))
+            {
+                if (part.getContent() instanceof Multipart)
+                {
+                    processAlternativeMultipart((Multipart) part.getContent(), sb);
+                }
+            }
+        }
+        if (partToUse != null)
+        {
+            processPart(partToUse, sb);
+        }
+    }
+
+    /**
+     * Finds text on a given mail part. Accepted parts types are text/html and text/plain.
+     * Attachments are ignored
+     *
+     * @param part
+     * @param sb
+     * @throws IOException
+     * @throws MessagingException
+     */
+    private void processPart(Part part, StringBuilder sb) throws IOException, MessagingException
+    {
+        boolean isAttachment = Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition());
+        if (isAttachment)
+        {
+            return;
+        }
+        if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
+        {
+            sb.append(part.getContent().toString());
+        }
+        else if (part.getContentType().contains(MIMETYPE_HTML))
+        {
+            String mailPartContent = part.getContent().toString();
+
+            //create a temporary html file with same mail part content and encoding
+            File tempHtmlFile = FileManager.TempFileProvider.createTempFile("EMLTransformer_",
+                ".html");
+            String encoding = getMailPartContentEncoding(part);
+            try (OutputStreamWriter osWriter = new OutputStreamWriter(
+                new FileOutputStream(tempHtmlFile), encoding))
+            {
+                osWriter.write(mailPartContent);
+            }
+
+            //transform html file's content to plain text
+            HtmlParserContentTransformer.EncodingAwareStringBean extractor = new HtmlParserContentTransformer.EncodingAwareStringBean();
+            extractor.setCollapse(false);
+            extractor.setLinks(false);
+            extractor.setReplaceNonBreakingSpaces(false);
+            extractor.setURL(tempHtmlFile, encoding);
+            sb.append(extractor.getStrings());
+
+            tempHtmlFile.delete();
+        }
+    }
+
+    private String getMailPartContentEncoding(Part part) throws MessagingException
+    {
+        String encoding = DEFAULT_ENCODING;
+        String contentType = part.getContentType();
+        int startIndex = contentType.indexOf(CHARSET);
+        if (startIndex > 0)
+        {
+            encoding = contentType.substring(startIndex + CHARSET.length() + 1)
+                                  .replaceAll("\"", "");
+        }
+        return encoding;
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/HtmlParserContentTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/HtmlParserContentTransformer.java
@@ -0,0 +1,190 @@
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.net.URLConnection;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.util.Map;
+
+import org.htmlparser.Parser;
+import org.htmlparser.beans.StringBean;
+import org.htmlparser.util.ParserException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Content transformer which wraps the HTML Parser library for
+ * parsing HTML content.
+ *
+ * <p>
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ * </p>
+ *
+ * <p>
+ * Since HTML Parser was updated from v1.6 to v2.1, META tags
+ * defining an encoding for the content via http-equiv=Content-Type
+ * will ONLY be respected if the encoding of the content item
+ * itself is set to ISO-8859-1.
+ * </p>
+ *
+ * <p>
+ * Tika Note - could be converted to use the Tika HTML parser,
+ * but we'd potentially need a custom text handler to replicate
+ * the current settings around links and non-breaking spaces.
+ * </p>
+ *
+ * @author Derek Hulley
+ * @author eknizat
+ * @see <a href="http://htmlparser.sourceforge.net/">http://htmlparser.sourceforge.net</a>
+ * @see org.htmlparser.beans.StringBean
+ * @see <a href="http://sourceforge.net/tracker/?func=detail&aid=1644504&group_id=24399&atid=381401">HTML Parser</a>
+ */
+public class HtmlParserContentTransformer implements SelectableTransformer
+{
+    private static final Logger logger = LoggerFactory.getLogger(
+        HtmlParserContentTransformer.class);
+
+    @Override
+    public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
+        final String targetMimetype, final Map<String, String> parameters) throws Exception
+    {
+        String sourceEncoding = parameters.get(SOURCE_ENCODING);
+        checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
+
+        if (logger.isDebugEnabled())
+        {
+            logger.debug("Performing HTML to text transform with sourceEncoding=" + sourceEncoding);
+        }
+
+        // Create the extractor
+        EncodingAwareStringBean extractor = new EncodingAwareStringBean();
+        extractor.setCollapse(false);
+        extractor.setLinks(false);
+        extractor.setReplaceNonBreakingSpaces(false);
+        extractor.setURL(sourceFile, sourceEncoding);
+        // get the text
+        String text = extractor.getStrings();
+
+        // write it to the writer
+        try (Writer writer = new BufferedWriter(
+            new OutputStreamWriter(new FileOutputStream(targetFile))))
+        {
+            writer.write(text);
+        }
+    }
+
+    private void checkEncodingParameter(String encoding, String parameterName)
+    {
+        try
+        {
+            if (encoding != null && !Charset.isSupported(encoding))
+            {
+                throw new IllegalArgumentException(
+                    parameterName + "=" + encoding + " is not supported by the JVM.");
+            }
+        }
+        catch (IllegalCharsetNameException e)
+        {
+            throw new IllegalArgumentException(
+                parameterName + "=" + encoding + " is not a valid encoding.");
+        }
+    }
+
+    /**
+     * <p>
+     * This code is based on a class of the same name, originally implemented in alfresco-repository.
+     * </p>
+     *
+     * A version of {@link StringBean} which allows control of the
+     * encoding in the underlying HTML Parser.
+     * Unfortunately, StringBean doesn't allow easy over-riding of
+     * this, so we have to duplicate some code to control this.
+     * This allows us to correctly handle HTML files where the encoding
+     * is specified against the content property (rather than in the
+     * HTML Head Meta), see ALF-10466 for details.
+     */
+    public static class EncodingAwareStringBean extends StringBean
+    {
+        private static final long serialVersionUID = -9033414360428669553L;
+
+        /**
+         * Sets the File to extract strings from, and the encoding
+         * it's in (if known to Alfresco)
+         *
+         * @param file     The File that text should be fetched from.
+         * @param encoding The encoding of the input
+         */
+        public void setURL(File file, String encoding)
+        {
+            String previousURL = getURL();
+            String newURL = file.getAbsolutePath();
+
+            if (previousURL == null || !newURL.equals(previousURL))
+            {
+                try
+                {
+                    URLConnection conn = getConnection();
+
+                    if (null == mParser)
+                    {
+                        mParser = new Parser(newURL);
+                    }
+                    else
+                    {
+                        mParser.setURL(newURL);
+                    }
+
+                    if (encoding != null)
+                    {
+                        mParser.setEncoding(encoding);
+                    }
+
+                    mPropertySupport.firePropertyChange(StringBean.PROP_URL_PROPERTY, previousURL,
+                        getURL());
+                    mPropertySupport.firePropertyChange(StringBean.PROP_CONNECTION_PROPERTY, conn,
+                        mParser.getConnection());
+                    setStrings();
+                }
+                catch (ParserException pe)
+                {
+                    updateStrings(pe.toString());
+                }
+            }
+        }
+
+        public String getEncoding()
+        {
+            return mParser.getEncoding();
+        }
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/OOXMLThumbnailContentTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/OOXMLThumbnailContentTransformer.java
@@ -0,0 +1,130 @@
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+import java.util.Map;
+
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing.
+ * This transformer will only work for OOXML files where thumbnailing was enabled,
+ * which isn't on by default on Windows, but is more common on Mac.
+ *
+ * <p>
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ * </p>
+ *
+ * @author Nick Burch
+ * @author eknizat
+ */
+public class OOXMLThumbnailContentTransformer implements SelectableTransformer
+{
+    private static final Logger logger = LoggerFactory.getLogger(
+        OOXMLThumbnailContentTransformer.class);
+
+    @Override
+    public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
+        final String targetMimetype, final Map<String, String> parameters) throws Exception
+    {
+        if (logger.isDebugEnabled())
+        {
+            logger.debug("Performing OOXML to jpeg transform with sourceMimetype=" + sourceMimetype
+                         + " targetMimetype=" + targetMimetype);
+        }
+
+        try (OPCPackage pkg = OPCPackage.open(sourceFile.getPath()))
+        {
+
+            // Does it have a thumbnail?
+            PackageRelationshipCollection rels = pkg.getRelationshipsByType(
+                PackageRelationshipTypes.THUMBNAIL);
+            if (rels.size() > 0)
+            {
+                // Get the thumbnail part
+                PackageRelationship tRel = rels.getRelationship(0);
+                PackagePart tPart = pkg.getPart(tRel);
+
+                // Write it to the target
+                InputStream tStream = tPart.getInputStream();
+                Files.copy(tStream, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+                tStream.close();
+            }
+            else
+            {
+                logger.debug("No thumbnail present in file.");
+                throw new Exception(
+                    "No thumbnail present in file, unable to generate " + targetMimetype);
+            }
+        }
+        catch (IOException e)
+        {
+            throw new RuntimeException("Unable to transform file.", e);
+        }
+    }
+
+    /*
+    // TODO Add this back to engine_config.json when the transformer is fixed for java 11
+    {
+      "transformerName": "ooxmlThumbnail",
+      "supportedSourceAndTargetList": [
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",    "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12",                           "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template",    "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12",                           "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation",  "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12",                 "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow",     "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12",                    "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template",      "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12",                     "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12",                        "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide",         "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12",                        "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",          "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template",       "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12",                             "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12",                          "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12",                             "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12",                      "targetMediaType": "image/jpeg"}
+      ],
+      "transformOptions": [
+      ]
+    }
+     */
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/SelectableTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/SelectableTransformer.java
@@ -0,0 +1,52 @@
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import java.io.File;
+import java.util.Map;
+
+/**
+ * Implemented by transformers used by {@link SelectingTransformer}.
+ *
+ * @author eknizat
+ */
+public interface SelectableTransformer
+{
+    String SOURCE_ENCODING = "sourceEncoding";
+    String TARGET_ENCODING = "targetEncoding";
+
+    /**
+     * Implementation of the actual transformation.
+     *
+     * @param sourceFile
+     * @param targetFile
+     * @param parameters
+     * @throws Exception
+     */
+    void transform(File sourceFile, File targetFile, String sourceMimetype,
+        String targetMimetype, Map<String, String> parameters) throws Exception;
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/SelectingTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/SelectingTransformer.java
@@ -0,0 +1,123 @@
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import static org.springframework.http.HttpStatus.BAD_REQUEST;
+import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
+
+import java.io.File;
+import java.util.Map;
+import java.util.StringJoiner;
+
+import org.alfresco.transform.exceptions.TransformException;
+import org.alfresco.transformer.logging.LogEntry;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.ImmutableMap;
+
+/**
+ * The SelectingTransformer selects a registered {@link SelectableTransformer}
+ * and delegates the transformation to its implementation.
+ *
+ * @author eknizat
+ */
+public class SelectingTransformer
+{
+    private static final Logger logger = LoggerFactory.getLogger(SelectingTransformer.class);
+
+    private final Map<String, SelectableTransformer> transformers = ImmutableMap
+        .<String, SelectableTransformer>builder()
+        .put("appleIWorks", new AppleIWorksContentTransformer())
+        .put("html", new HtmlParserContentTransformer())
+        .put("string", new StringExtractingContentTransformer())
+        .put("textToPdf", new TextToPdfContentTransformer())
+        .put("rfc822", new EMLTransformer())
+        .put("ooXmlThumbnail", new OOXMLThumbnailContentTransformer())
+        .build();
+
+    /**
+     * Performs a transform using a transformer selected based on the provided sourceMimetype and targetMimetype
+     *
+     * @param transform      the name of the transformer
+     * @param sourceFile     File to transform from
+     * @param targetFile     File to transform to
+     * @param sourceMimetype Mimetype of the source file
+     * @throws TransformException if there was a problem internally
+     */
+    public void transform(String transform, File sourceFile, File targetFile, String sourceMimetype,
+        String targetMimetype, Map<String, String> parameters) throws TransformException
+    {
+        try
+        {
+            final SelectableTransformer transformer = transformers.get(transform);
+            logOptions(sourceFile, targetFile, parameters);
+            transformer.transform(sourceFile, targetFile, sourceMimetype, targetMimetype,
+                parameters);
+        }
+        catch (IllegalArgumentException e)
+        {
+            throw new TransformException(BAD_REQUEST.value(), getMessage(e));
+        }
+        catch (Exception e)
+        {
+            throw new TransformException(INTERNAL_SERVER_ERROR.value(), getMessage(e));
+        }
+        if (!targetFile.exists())
+        {
+            throw new TransformException(INTERNAL_SERVER_ERROR.value(),
+                "Transformer failed to create an output file. Target file does not exist.");
+        }
+        if (sourceFile.length() > 0 && targetFile.length() == 0)
+        {
+            throw new TransformException(INTERNAL_SERVER_ERROR.value(),
+                "Transformer failed to create an output file. Target file is empty but source file was not empty.");
+        }
+    }
+
+    private static String getMessage(Exception e)
+    {
+        return e.getMessage() == null || e.getMessage().isEmpty() ? e.getClass().getSimpleName() : e.getMessage();
+    }
+
+    private static void logOptions(File sourceFile, File targetFile, Map<String, String> parameters)
+    {
+        StringJoiner sj = new StringJoiner(" ");
+        parameters.forEach((k, v) -> sj.add(
+            "--" + k + "=" + v)); // keeping the existing style used in other T-Engines
+        sj.add(getExtension(sourceFile));
+        sj.add(getExtension(targetFile));
+        LogEntry.setOptions(sj.toString());
+    }
+
+    private static String getExtension(File file)
+    {
+        final String name = file.getName();
+        int i = name.lastIndexOf('.');
+        return i == -1 ? "???" : name.substring(i + 1);
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/StringExtractingContentTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/StringExtractingContentTransformer.java
@@ -0,0 +1,155 @@
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Converts any textual format to plain text.
+ * <p>
+ * The transformation is sensitive to the source and target string encodings.
+ *
+ *
+ * <p>
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ * </p>
+ *
+ * @author Derek Hulley
+ * @author eknizat
+ */
+public class StringExtractingContentTransformer implements SelectableTransformer
+{
+
+    private static final Log logger = LogFactory.getLog(StringExtractingContentTransformer.class);
+
+    /**
+     * Text to text conversions are done directly using the content reader and writer string
+     * manipulation methods.
+     * <p>
+     * Extraction of text from binary content attempts to take the possible character
+     * encoding into account.  The text produced from this will, if the encoding was correct,
+     * be unformatted but valid.
+     */
+    @Override
+    public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
+        final String targetMimetype, final Map<String, String> parameters) throws Exception
+    {
+        String sourceEncoding = parameters.get(SOURCE_ENCODING);
+        String targetEncoding = parameters.get(TARGET_ENCODING);
+
+        if (logger.isDebugEnabled())
+        {
+            logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
+                         + " targetEncoding=" + targetEncoding);
+        }
+
+        Reader charReader = null;
+        Writer charWriter = null;
+        try
+        {
+            // Build reader
+            if (sourceEncoding == null)
+            {
+                charReader = new BufferedReader(
+                    new InputStreamReader(new FileInputStream(sourceFile)));
+            }
+            else
+            {
+                checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
+                charReader = new BufferedReader(
+                    new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
+            }
+
+            // Build writer
+            if (targetEncoding == null)
+            {
+                charWriter = new BufferedWriter(
+                    new OutputStreamWriter(new FileOutputStream(targetFile)));
+            }
+            else
+            {
+                checkEncodingParameter(targetEncoding, TARGET_ENCODING);
+                charWriter = new BufferedWriter(
+                    new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
+            }
+
+            // copy from the one to the other
+            char[] buffer = new char[8192];
+            int readCount = 0;
+            while (readCount > -1)
+            {
+                // write the last read count number of bytes
+                charWriter.write(buffer, 0, readCount);
+                // fill the buffer again
+                readCount = charReader.read(buffer);
+            }
+        }
+        finally
+        {
+            if (charReader != null)
+            {
+                try { charReader.close(); } catch (Throwable e) { logger.error(e); }
+            }
+            if (charWriter != null)
+            {
+                try { charWriter.close(); } catch (Throwable e) { logger.error(e); }
+            }
+        }
+        // done
+    }
+
+    private void checkEncodingParameter(String encoding, String paramterName)
+    {
+        try
+        {
+            if (!Charset.isSupported(encoding))
+            {
+                throw new IllegalArgumentException(
+                    paramterName + "=" + encoding + " is not supported by the JVM.");
+            }
+        }
+        catch (IllegalCharsetNameException e)
+        {
+            throw new IllegalArgumentException(
+                paramterName + "=" + encoding + " is not a valid encoding.");
+        }
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/TextToPdfContentTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/TextToPdfContentTransformer.java
@@ -0,0 +1,323 @@
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.tools.TextToPDF;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <p>
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ * </p>
+ *
+ * Makes use of the <a href="http://www.pdfbox.org/">PDFBox</a> library's <code>TextToPDF</code> utility.
+ *
+ * @author Derek Hulley
+ * @author eknizat
+ */
+public class TextToPdfContentTransformer implements SelectableTransformer
+{
+    private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
+
+    public static final String PAGE_LIMIT = "pageLimit";
+
+    private final PagedTextToPDF transformer;
+
+    public TextToPdfContentTransformer()
+    {
+        transformer = new PagedTextToPDF();
+    }
+
+    public void setStandardFont(String fontName)
+    {
+        try
+        {
+            transformer.setFont(PagedTextToPDF.getStandardFont(fontName));
+        }
+        catch (Throwable e)
+        {
+            throw new RuntimeException(
+                "Unable to set Standard Font for PDF generation: " + fontName, e);
+        }
+    }
+
+    public void setFontSize(int fontSize)
+    {
+        try
+        {
+            transformer.setFontSize(fontSize);
+        }
+        catch (Throwable e)
+        {
+            throw new RuntimeException(
+                "Unable to set Font Size for PDF generation: " + fontSize);
+        }
+    }
+
+    @Override
+    public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
+        final String targetMimetype, final Map<String, String> parameters) throws Exception
+    {
+        String sourceEncoding = parameters.get(SOURCE_ENCODING);
+        String stringPageLimit = parameters.get(PAGE_LIMIT);
+        int pageLimit = -1;
+        if (stringPageLimit != null)
+        {
+            pageLimit = parseInt(stringPageLimit, PAGE_LIMIT);
+        }
+
+        PDDocument pdf = null;
+        try (InputStream is = new FileInputStream(sourceFile);
+             Reader ir = new BufferedReader(buildReader(is, sourceEncoding));
+             OutputStream os = new BufferedOutputStream(new FileOutputStream(targetFile)))
+        {
+            //TransformationOptionLimits limits = getLimits(reader, writer, options);
+            //TransformationOptionPair pageLimits = limits.getPagesPair();
+            pdf = transformer.createPDFFromText(ir, pageLimit);
+            pdf.save(os);
+        }
+        finally
+        {
+            if (pdf != null)
+            {
+                try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); }
+            }
+        }
+    }
+
+    protected InputStreamReader buildReader(InputStream is, String encoding)
+    {
+        // If they gave an encoding, try to use it
+        if (encoding != null)
+        {
+            Charset charset = null;
+            try
+            {
+                charset = Charset.forName(encoding);
+            }
+            catch (Exception e)
+            {
+                logger.warn("JVM doesn't understand encoding '" + encoding +
+                            "' when transforming text to pdf");
+            }
+            if (charset != null)
+            {
+                logger.debug("Processing plain text in encoding " + charset.displayName());
+                return new InputStreamReader(is, charset);
+            }
+        }
+
+        // Fall back on the system default
+        logger.debug("Processing plain text using system default encoding");
+        return new InputStreamReader(is);
+    }
+
+    private static class PagedTextToPDF extends TextToPDF
+    {
+        // REPO-1066: duplicating the following lines from org.apache.pdfbox.tools.TextToPDF because they made them private
+        // before the upgrade to pdfbox 2.0.8, in pdfbox 1.8, this piece of code was public in org.apache.pdfbox.pdmodel.font.PDType1Font
+        static PDType1Font getStandardFont(String name)
+        {
+            return STANDARD_14.get(name);
+        }
+
+        private static final Map<String, PDType1Font> STANDARD_14 = new HashMap<>();
+
+        static
+        {
+            STANDARD_14.put(PDType1Font.TIMES_ROMAN.getBaseFont(), PDType1Font.TIMES_ROMAN);
+            STANDARD_14.put(PDType1Font.TIMES_BOLD.getBaseFont(), PDType1Font.TIMES_BOLD);
+            STANDARD_14.put(PDType1Font.TIMES_ITALIC.getBaseFont(), PDType1Font.TIMES_ITALIC);
+            STANDARD_14.put(PDType1Font.TIMES_BOLD_ITALIC.getBaseFont(),
+                PDType1Font.TIMES_BOLD_ITALIC);
+            STANDARD_14.put(PDType1Font.HELVETICA.getBaseFont(), PDType1Font.HELVETICA);
+            STANDARD_14.put(PDType1Font.HELVETICA_BOLD.getBaseFont(), PDType1Font.HELVETICA_BOLD);
+            STANDARD_14.put(PDType1Font.HELVETICA_OBLIQUE.getBaseFont(),
+                PDType1Font.HELVETICA_OBLIQUE);
+            STANDARD_14.put(PDType1Font.HELVETICA_BOLD_OBLIQUE.getBaseFont(),
+                PDType1Font.HELVETICA_BOLD_OBLIQUE);
+            STANDARD_14.put(PDType1Font.COURIER.getBaseFont(), PDType1Font.COURIER);
+            STANDARD_14.put(PDType1Font.COURIER_BOLD.getBaseFont(), PDType1Font.COURIER_BOLD);
+            STANDARD_14.put(PDType1Font.COURIER_OBLIQUE.getBaseFont(), PDType1Font.COURIER_OBLIQUE);
+            STANDARD_14.put(PDType1Font.COURIER_BOLD_OBLIQUE.getBaseFont(),
+                PDType1Font.COURIER_BOLD_OBLIQUE);
+            STANDARD_14.put(PDType1Font.SYMBOL.getBaseFont(), PDType1Font.SYMBOL);
+            STANDARD_14.put(PDType1Font.ZAPF_DINGBATS.getBaseFont(), PDType1Font.ZAPF_DINGBATS);
+        }
+        //duplicating until here
+
+        // The following code is based on the code in TextToPDF with the addition of
+        // checks for page limits.
+        // The calling code must close the PDDocument once finished with it.
+        public PDDocument createPDFFromText(Reader text, int pageLimit)
+            throws IOException
+        {
+            //int pageLimit = (int)pageLimits.getValue();
+            PDDocument doc = null;
+            int pageCount = 0;
+            try
+            {
+                final int margin = 40;
+                float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000;
+
+                //calculate font height and increase by 5 percent.
+                height = height * getFontSize() * 1.05f;
+                doc = new PDDocument();
+                BufferedReader data = new BufferedReader(text);
+                String nextLine;
+                PDPage page = new PDPage();
+                PDPageContentStream contentStream = null;
+                float y = -1;
+                float maxStringLength = page.getMediaBox().getWidth() - 2 * margin;
+
+                // There is a special case of creating a PDF document from an empty string.
+                boolean textIsEmpty = true;
+
+                outer:
+                while ((nextLine = data.readLine()) != null)
+                {
+
+                    // The input text is nonEmpty. New pages will be created and added
+                    // to the PDF document as they are needed, depending on the length of
+                    // the text.
+                    textIsEmpty = false;
+
+                    String[] lineWords = nextLine.trim().split(" ");
+                    int lineIndex = 0;
+                    while (lineIndex < lineWords.length)
+                    {
+                        final StringBuilder nextLineToDraw = new StringBuilder();
+                        float lengthIfUsingNextWord = 0;
+                        do
+                        {
+                            nextLineToDraw.append(lineWords[lineIndex]);
+                            nextLineToDraw.append(" ");
+                            lineIndex++;
+                            if (lineIndex < lineWords.length)
+                            {
+                                String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
+                                lengthIfUsingNextWord =
+                                    (getFont().getStringWidth(
+                                        lineWithNextWord) / 1000) * getFontSize();
+                            }
+                        }
+                        while (lineIndex < lineWords.length &&
+                               lengthIfUsingNextWord < maxStringLength);
+                        if (y < margin)
+                        {
+                            int test = pageCount + 1;
+                            if (pageLimit > 0 && (pageCount++ >= pageLimit))
+                            {
+//                                pageLimits.getAction().throwIOExceptionIfRequired("Page limit ("+pageLimit+
+//                                        ") reached.", transformerDebug);
+                                break outer;
+                            }
+
+                            // We have crossed the end-of-page boundary and need to extend the
+                            // document by another page.
+                            page = new PDPage();
+                            doc.addPage(page);
+                            if (contentStream != null)
+                            {
+                                contentStream.endText();
+                                contentStream.close();
+                            }
+                            contentStream = new PDPageContentStream(doc, page);
+                            contentStream.setFont(getFont(), getFontSize());
+                            contentStream.beginText();
+                            y = page.getMediaBox().getHeight() - margin + height;
+                            contentStream.moveTextPositionByAmount(margin, y);
+                        }
+                        //System.out.println( "Drawing string at " + x + "," + y );
+
+                        if (contentStream == null)
+                        {
+                            throw new IOException("Error:Expected non-null content stream.");
+                        }
+                        contentStream.moveTextPositionByAmount(0, -height);
+                        y -= height;
+                        contentStream.drawString(nextLineToDraw.toString());
+                    }
+                }
+
+                // If the input text was the empty string, then the above while loop will have short-circuited
+                // and we will not have added any PDPages to the document.
+                // So in order to make the resultant PDF document readable by Adobe Reader etc, we'll add an empty page.
+                if (textIsEmpty)
+                {
+                    doc.addPage(page);
+                }
+
+                if (contentStream != null)
+                {
+                    contentStream.endText();
+                    contentStream.close();
+                }
+            }
+            catch (IOException io)
+            {
+                if (doc != null)
+                {
+                    doc.close();
+                }
+                throw io;
+            }
+            return doc;
+        }
+    }
+
+    private int parseInt(String s, String paramName)
+    {
+        try
+        {
+            return Integer.valueOf(s);
+        }
+        catch (NumberFormatException e)
+        {
+            throw new IllegalArgumentException(paramName + " parameter must be an integer.");
+        }
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/test/java/org/alfresco/transformer/transformers/HtmlParserContentTransformerTest.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/test/java/org/alfresco/transformer/transformers/HtmlParserContentTransformerTest.java
@@ -0,0 +1,162 @@
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import static org.alfresco.transformer.transformers.StringExtractingContentTransformer.SOURCE_ENCODING;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.nio.file.Files;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.junit.Test;
+
+public class HtmlParserContentTransformerTest
+{
+    private static final String SOURCE_MIMETYPE = "text/html";
+    private static final String TARGET_MIMETYPE = "text/plain";
+
+    HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
+
+    /**
+     * Checks that we correctly handle text in different encodings,
+     * no matter if the encoding is specified on the Content Property
+     * or in a meta tag within the HTML itself. (ALF-10466)
+     *
+     * On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
+     * so we must be careful when checking the returned text
+     */
+    @Test
+    public void testEncodingHandling() throws Exception
+    {
+        final String NEWLINE = System.getProperty("line.separator");
+        final String TITLE = "Testing!";
+        final String TEXT_P1 = "This is some text in English";
+        final String TEXT_P2 = "This is more text in English";
+        final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
+        String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
+        String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
+                       "<p>" + TEXT_P2 + "</p>" + NEWLINE +
+                       "<p>" + TEXT_P3 + "</p>" + NEWLINE;
+        String partC = "</body></html>";
+        final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
+
+        File tmpS = null;
+        File tmpD = null;
+
+        try
+        {
+            // Content set to ISO 8859-1
+            tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
+            writeToFile(tmpS, partA + partB + partC, "ISO-8859-1");
+
+            tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
+
+            Map<String, String> parameters = new HashMap<>();
+            parameters.put(SOURCE_ENCODING, "ISO-8859-1");
+            transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
+
+            assertEquals(expected, readFromFile(tmpD, "UTF-8"));
+            tmpS.delete();
+            tmpD.delete();
+
+            // Content set to UTF-8
+            tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
+            writeToFile(tmpS, partA + partB + partC, "UTF-8");
+
+            tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
+            parameters = new HashMap<>();
+            parameters.put(SOURCE_ENCODING, "UTF-8");
+            transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
+            assertEquals(expected, readFromFile(tmpD, "UTF-8"));
+            tmpS.delete();
+            tmpD.delete();
+
+            // Content set to UTF-16
+            tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
+            writeToFile(tmpS, partA + partB + partC, "UTF-16");
+
+            tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
+            parameters = new HashMap<>();
+            parameters.put(SOURCE_ENCODING, "UTF-16");
+            transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
+            assertEquals(expected, readFromFile(tmpD, "UTF-8"));
+            tmpS.delete();
+            tmpD.delete();
+
+            // Note - since HTML Parser 2.0 META tags specifying the
+            // document encoding will ONLY be respected if the original
+            // content type was set to ISO-8859-1.
+            //
+            // This means there is now only one test which we can perform
+            // to ensure that this now-limited overriding of the encoding
+            // takes effect.
+
+            // Content set to ISO 8859-1, meta set to UTF-8
+            tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
+            String str = partA +
+                         "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" +
+                         partB + partC;
+
+            writeToFile(tmpS, str, "UTF-8");
+
+            tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
+
+            parameters = new HashMap<>();
+            parameters.put(SOURCE_ENCODING, "ISO-8859-1");
+            transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
+            assertEquals(expected, readFromFile(tmpD, "UTF-8"));
+            tmpS.delete();
+            tmpD.delete();
+
+            // Note - we can't test UTF-16 with only a meta encoding,
+            //  because without that the parser won't know about the
+            //  2 byte format so won't be able to identify the meta tag
+        }
+        finally
+        {
+            if (tmpS != null && tmpS.exists()) tmpS.delete();
+            if (tmpD != null && tmpD.exists()) tmpD.delete();
+        }
+    }
+
+    private void writeToFile(File file, String content, String encoding) throws Exception
+    {
+        try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
+        {
+            ow.append(content);
+        }
+    }
+
+    private String readFromFile(File file, final String encoding) throws Exception
+    {
+        return new String(Files.readAllBytes(file.toPath()), encoding);
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/test/java/org/alfresco/transformer/transformers/TextToPdfContentTransformerTest.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/test/java/org/alfresco/transformer/transformers/TextToPdfContentTransformerTest.java
@@ -0,0 +1,148 @@
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2019 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import static org.alfresco.transformer.transformers.TextToPdfContentTransformer.PAGE_LIMIT;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.StringWriter;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TextToPdfContentTransformerTest
+{
+    TextToPdfContentTransformer transformer = new TextToPdfContentTransformer();
+
+    @Before
+    public void setUp()
+    {
+        transformer.setStandardFont("Times-Roman");
+        transformer.setFontSize(20);
+    }
+
+    @Test
+    public void testUnlimitedPages() throws Exception
+    {
+        transformTextAndCheckPageLength(-1);
+    }
+
+    @Test
+    public void testLimitedTo1Page() throws Exception
+    {
+        transformTextAndCheckPageLength(1);
+    }
+
+    @Test
+    public void testLimitedTo2Pages() throws Exception
+    {
+        transformTextAndCheckPageLength(2);
+    }
+
+    @Test
+    public void testLimitedTo50Pages() throws Exception
+    {
+        transformTextAndCheckPageLength(50);
+    }
+
+    private void transformTextAndCheckPageLength(int pageLimit) throws Exception
+    {
+        int pageLength = 32;
+        int lines = (pageLength + 10) * ((pageLimit > 0) ? pageLimit : 1);
+        StringBuilder sb = new StringBuilder();
+        String checkText = null;
+        int cutoff = pageLimit * pageLength;
+        for (int i = 1; i <= lines; i++)
+        {
+            sb.append(i);
+            sb.append(" I must not talk in class or feed my homework to my cat.\n");
+            if (i == cutoff)
+                checkText = sb.toString();
+        }
+        sb.append("\nBart\n");
+        String text = sb.toString();
+        checkText = (checkText == null) ? clean(text) : clean(checkText);
+        transformTextAndCheck(text, "UTF-8", checkText, String.valueOf(pageLimit));
+    }
+
+    private void transformTextAndCheck(String text, String encoding, String checkText,
+        String pageLimit) throws Exception
+    {
+        // Get a reader for the text
+        File sourceFile = File.createTempFile("AlfrescoTestSource_", ".txt");
+        writeToFile(sourceFile, text, encoding);
+
+        // And a temp writer
+        File targetFile = File.createTempFile("AlfrescoTestTarget_", ".pdf");
+
+        // Transform to PDF
+        Map<String, String> parameters = new HashMap<>();
+        parameters.put(PAGE_LIMIT, pageLimit);
+        transformer.transform(sourceFile, targetFile, "text/plain", "application/pdf", parameters);
+
+        // Read back in the PDF and check it
+        PDDocument doc = PDDocument.load(targetFile);
+        PDFTextStripper textStripper = new PDFTextStripper();
+        StringWriter textWriter = new StringWriter();
+        textStripper.writeText(doc, textWriter);
+        doc.close();
+
+        String roundTrip = clean(textWriter.toString());
+
+        assertEquals(
+            "Incorrect text in PDF when starting from text in " + encoding,
+            checkText, roundTrip
+        );
+
+        sourceFile.delete();
+        targetFile.delete();
+    }
+
+    private String clean(String text)
+    {
+        text = text.replaceAll("\\s+\\r", "");
+        text = text.replaceAll("\\s+\\n", "");
+        text = text.replaceAll("\\r", "");
+        text = text.replaceAll("\\n", "");
+        return text;
+    }
+
+    private void writeToFile(File file, String content, String encoding) throws Exception
+    {
+        try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
+        {
+            ow.append(content);
+        }
+    }
+}