fix mixed-line-ending

2025-10-01 14:41:17 +00:00 · 2022-02-23 22:40:19 +01:00
parent 157e261dde
commit 4175ac34da
56 changed files with 5160 additions and 5160 deletions
--- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/main/java/org/alfresco/transformer/Application.java
+++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/main/java/org/alfresco/transformer/Application.java
@@ -1,77 +1,77 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2020 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer;
-
-import io.micrometer.core.instrument.MeterRegistry;
-import org.alfresco.transformer.transformers.SelectingTransformer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.springframework.beans.factory.annotation.Value;
-import org.springframework.boot.SpringApplication;
-import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
-import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
-import org.springframework.boot.autoconfigure.SpringBootApplication;
-import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
-import org.springframework.boot.context.event.ApplicationReadyEvent;
-import org.springframework.context.annotation.Bean;
-import org.springframework.context.event.EventListener;
-
-import java.util.Arrays;
-
-import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
-
-@SpringBootApplication
-@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
-public class Application
-{
-    private static final Logger logger = LoggerFactory.getLogger(Application.class);
-
-    @Value("${container.name}")
-    private String containerName;
-
-    @Bean
-    MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
-    {
-        return registry -> registry.config().commonTags("containerName", containerName);
-    }
-
-    public static void main(String[] args)
-    {
-        SpringApplication.run(Application.class, args);
-    }
-
-    @EventListener(ApplicationReadyEvent.class)
-    public void startup()
-    {
-        logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
-        Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
-        Arrays.stream(SelectingTransformer.LICENCE.split("\\n")).forEach(logger::info);
-        logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
-
-        logger.info("Starting application components... Done");
-    }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer;
+
+import io.micrometer.core.instrument.MeterRegistry;
+import org.alfresco.transformer.transformers.SelectingTransformer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.boot.SpringApplication;
+import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
+import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
+import org.springframework.boot.context.event.ApplicationReadyEvent;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.event.EventListener;
+
+import java.util.Arrays;
+
+import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
+
+@SpringBootApplication
+@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
+public class Application
+{
+    private static final Logger logger = LoggerFactory.getLogger(Application.class);
+
+    @Value("${container.name}")
+    private String containerName;
+
+    @Bean
+    MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
+    {
+        return registry -> registry.config().commonTags("containerName", containerName);
+    }
+
+    public static void main(String[] args)
+    {
+        SpringApplication.run(Application.class, args);
+    }
+
+    @EventListener(ApplicationReadyEvent.class)
+    public void startup()
+    {
+        logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
+        Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
+        Arrays.stream(SelectingTransformer.LICENCE.split("\\n")).forEach(logger::info);
+        logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
+
+        logger.info("Starting application components... Done");
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/main/java/org/alfresco/transformer/MiscController.java
+++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/main/java/org/alfresco/transformer/MiscController.java
@@ -1,89 +1,89 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2022 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer;
-
-import org.alfresco.transformer.probes.ProbeTestTransform;
-import org.alfresco.transformer.transformers.SelectingTransformer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.springframework.stereotype.Controller;
-
-import java.io.File;
-import java.util.HashMap;
-import java.util.Map;
-
-import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
-import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
-import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
-import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
-
-@Controller
-public class MiscController extends AbstractTransformerController
-{
-    private static final Logger logger = LoggerFactory.getLogger(MiscController.class);
-
-    private SelectingTransformer transformer = new SelectingTransformer();
-
-    @Override
-    public String getTransformerName()
-    {
-        return "Miscellaneous Transformers";
-    }
-
-    @Override
-    public String version()
-    {
-        return getTransformerName() + " available";
-    }
-
-    @Override
-    public ProbeTestTransform getProbeTestTransform()
-    {
-        // HtmlParserContentTransformer html -> text
-        // See the Javadoc on this method and Probes.md for the choice of these values.
-        return new ProbeTestTransform(this, "quick.html", "quick.txt",
-            119, 30, 150, 1024,
-            60 * 2 + 1, 60 * 2)
-        {
-            @Override
-            protected void executeTransformCommand(File sourceFile, File targetFile)
-            {
-                Map<String, String> parameters = new HashMap<>();
-                parameters.put(SOURCE_ENCODING, "UTF-8");
-                transformImpl("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
-            }
-        };
-    }
-
-    @Override
-    public void transformImpl(String transformName, String sourceMimetype, String targetMimetype,
-                                 Map<String, String> transformOptions, File sourceFile, File targetFile)
-    {
-        transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
-        transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
-    }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2022 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer;
+
+import org.alfresco.transformer.probes.ProbeTestTransform;
+import org.alfresco.transformer.transformers.SelectingTransformer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.stereotype.Controller;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
+import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
+import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
+import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
+
+@Controller
+public class MiscController extends AbstractTransformerController
+{
+    private static final Logger logger = LoggerFactory.getLogger(MiscController.class);
+
+    private SelectingTransformer transformer = new SelectingTransformer();
+
+    @Override
+    public String getTransformerName()
+    {
+        return "Miscellaneous Transformers";
+    }
+
+    @Override
+    public String version()
+    {
+        return getTransformerName() + " available";
+    }
+
+    @Override
+    public ProbeTestTransform getProbeTestTransform()
+    {
+        // HtmlParserContentTransformer html -> text
+        // See the Javadoc on this method and Probes.md for the choice of these values.
+        return new ProbeTestTransform(this, "quick.html", "quick.txt",
+            119, 30, 150, 1024,
+            60 * 2 + 1, 60 * 2)
+        {
+            @Override
+            protected void executeTransformCommand(File sourceFile, File targetFile)
+            {
+                Map<String, String> parameters = new HashMap<>();
+                parameters.put(SOURCE_ENCODING, "UTF-8");
+                transformImpl("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
+            }
+        };
+    }
+
+    @Override
+    public void transformImpl(String transformName, String sourceMimetype, String targetMimetype,
+                                 Map<String, String> transformOptions, File sourceFile, File targetFile)
+    {
+        transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
+        transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscControllerTest.java
+++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscControllerTest.java
--- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscQueueTransformServiceIT.java
+++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscQueueTransformServiceIT.java
@@ -1,55 +1,55 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2021 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer;
-
-import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
-import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
-
-import java.util.UUID;
-
-import org.alfresco.transform.client.model.TransformRequest;
-import org.springframework.boot.test.context.SpringBootTest;
-
-@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
-                properties = {"activemq.url=nio://localhost:61616"})
-public class MiscQueueTransformServiceIT extends AbstractQueueTransformServiceIT
-{
-    @Override
-    protected TransformRequest buildRequest()
-    {
-        return TransformRequest
-            .builder()
-            .withRequestId(UUID.randomUUID().toString())
-            .withSourceMediaType(MIMETYPE_HTML)
-            .withTargetMediaType(MIMETYPE_TEXT_PLAIN)
-            .withTargetExtension("txt")
-            .withSchema(1)
-            .withClientData("ACS")
-            .withSourceReference(UUID.randomUUID().toString())
-            .withSourceSize(32L).build();
-    }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2021 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer;
+
+import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
+import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
+
+import java.util.UUID;
+
+import org.alfresco.transform.client.model.TransformRequest;
+import org.springframework.boot.test.context.SpringBootTest;
+
+@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
+                properties = {"activemq.url=nio://localhost:61616"})
+public class MiscQueueTransformServiceIT extends AbstractQueueTransformServiceIT
+{
+    @Override
+    protected TransformRequest buildRequest()
+    {
+        return TransformRequest
+            .builder()
+            .withRequestId(UUID.randomUUID().toString())
+            .withSourceMediaType(MIMETYPE_HTML)
+            .withTargetMediaType(MIMETYPE_TEXT_PLAIN)
+            .withTargetExtension("txt")
+            .withSchema(1)
+            .withClientData("ACS")
+            .withSourceReference(UUID.randomUUID().toString())
+            .withSourceSize(32L).build();
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscTransformerHttpRequestTest.java
+++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscTransformerHttpRequestTest.java
@@ -1,48 +1,48 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2021 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer;
-
-import org.springframework.boot.test.context.SpringBootTest;
-
-/**
- * Tests MiscController with a server test harness.
- */
-@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
-public class MiscTransformerHttpRequestTest extends AbstractHttpRequestTest
-{
-    @Override
-    protected String getTransformerName()
-    {
-        return "Miscellaneous Transformers";
-    }
-
-    @Override
-    protected String getSourceExtension()
-    {
-        return "html";
-    }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2021 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer;
+
+import org.springframework.boot.test.context.SpringBootTest;
+
+/**
+ * Tests MiscController with a server test harness.
+ */
+@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
+public class MiscTransformerHttpRequestTest extends AbstractHttpRequestTest
+{
+    @Override
+    protected String getTransformerName()
+    {
+        return "Miscellaneous Transformers";
+    }
+
+    @Override
+    protected String getSourceExtension()
+    {
+        return "html";
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/quick.alternative.eml
+++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/quick.alternative.eml
@@ -1,30 +1,30 @@
-MIME-Version: 1.0
-Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
-Date: Thu, 16 Aug 2012 16:13:29 +0100
-Delivered-To: jane.doe@alfresco.com
-Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
-Subject: Attachment test
-From: <john.doe@alfresco.com>
-To: <jane.doe@alfresco.com>
-Content-Type: multipart/alternative;
-	boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
-
-This is a multipart message in MIME format.
-
------=_NextPart_000_0005_01D06C6A.DBA98EC0
-Content-Type: text/plain;
-	charset="utf-8"
-Content-Transfer-Encoding: 7bit
-
-alternative plain text
-
------=_NextPart_000_0005_01D06C6A.DBA98EC0
-Content-Type: text/html;
-	charset="utf-8"
-Content-Transfer-Encoding: quoted-printable
-
-<div dir=3D"ltr">alternative html text</div>
-
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
-Parts form an multipart/alternative should represent the same content in different formats
-In this eml example the content differs with the purpose of determining if right part was used in transformation
+MIME-Version: 1.0
+Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
+Date: Thu, 16 Aug 2012 16:13:29 +0100
+Delivered-To: jane.doe@alfresco.com
+Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
+Subject: Attachment test
+From: <john.doe@alfresco.com>
+To: <jane.doe@alfresco.com>
+Content-Type: multipart/alternative;
+	boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
+
+This is a multipart message in MIME format.
+
+------=_NextPart_000_0005_01D06C6A.DBA98EC0
+Content-Type: text/plain;
+	charset="utf-8"
+Content-Transfer-Encoding: 7bit
+
+alternative plain text
+
+------=_NextPart_000_0005_01D06C6A.DBA98EC0
+Content-Type: text/html;
+	charset="utf-8"
+Content-Transfer-Encoding: quoted-printable
+
+<div dir=3D"ltr">alternative html text</div>
+
+------=_NextPart_000_0005_01D06C6A.DBA98EC0--
+Parts form an multipart/alternative should represent the same content in different formats
+In this eml example the content differs with the purpose of determining if right part was used in transformation
--- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/quick.attachment.eml
+++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/quick.attachment.eml
@@ -1,44 +1,44 @@
-MIME-Version: 1.0
-Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
-Date: Thu, 16 Aug 2012 16:13:29 +0100
-Delivered-To: jane.doe@alfresco.com
-Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
-Subject: Attachment test
-From: <john.doe@alfresco.com>
-To: <jane.doe@alfresco.com>
-Content-Type: multipart/mixed;
-	boundary="----=_NextPart_000_0000_01D06C6A.D04F3750"
-
-This is a multipart message in MIME format.
-
------=_NextPart_000_0000_01D06C6A.D04F3750
-Content-Type: multipart/alternative;
-	boundary="----=_NextPart_001_0001_01D06C6A.D04F3750"
-
-
------=_NextPart_001_0001_01D06C6A.D04F3750
-Content-Type: text/plain;
-	charset="utf-8"
-Content-Transfer-Encoding: 7bit
-
-Mail with attachment content
-
------=_NextPart_001_0001_01D06C6A.D04F3750
-Content-Type: text/html;
-	charset="utf-8"
-Content-Transfer-Encoding: quoted-printable
-
-<div dir=3D"ltr">Mail with attachment content</div>
-
------=_NextPart_001_0001_01D06C6A.D04F3750--
-
------=_NextPart_000_0000_01D06C6A.D04F3750
-Content-Type: text/plain;
-	name="alt.txt"
-Content-Transfer-Encoding: quoted-printable
-Content-ID: <796B1E07B04ACC41A78199F35721150F@eurprd04.prod.outlook.com>
-Content-Disposition: attachment;
-	filename="alt.txt"
-
-File attachment content
------=_NextPart_000_0000_01D06C6A.D04F3750--
+MIME-Version: 1.0
+Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
+Date: Thu, 16 Aug 2012 16:13:29 +0100
+Delivered-To: jane.doe@alfresco.com
+Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
+Subject: Attachment test
+From: <john.doe@alfresco.com>
+To: <jane.doe@alfresco.com>
+Content-Type: multipart/mixed;
+	boundary="----=_NextPart_000_0000_01D06C6A.D04F3750"
+
+This is a multipart message in MIME format.
+
+------=_NextPart_000_0000_01D06C6A.D04F3750
+Content-Type: multipart/alternative;
+	boundary="----=_NextPart_001_0001_01D06C6A.D04F3750"
+
+
+------=_NextPart_001_0001_01D06C6A.D04F3750
+Content-Type: text/plain;
+	charset="utf-8"
+Content-Transfer-Encoding: 7bit
+
+Mail with attachment content
+
+------=_NextPart_001_0001_01D06C6A.D04F3750
+Content-Type: text/html;
+	charset="utf-8"
+Content-Transfer-Encoding: quoted-printable
+
+<div dir=3D"ltr">Mail with attachment content</div>
+
+------=_NextPart_001_0001_01D06C6A.D04F3750--
+
+------=_NextPart_000_0000_01D06C6A.D04F3750
+Content-Type: text/plain;
+	name="alt.txt"
+Content-Transfer-Encoding: quoted-printable
+Content-ID: <796B1E07B04ACC41A78199F35721150F@eurprd04.prod.outlook.com>
+Content-Disposition: attachment;
+	filename="alt.txt"
+
+File attachment content
+------=_NextPart_000_0000_01D06C6A.D04F3750--
--- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/quick.htmlChars.eml
+++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/quick.htmlChars.eml
@@ -1,28 +1,28 @@
-MIME-Version: 1.0
-Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
-Date: Thu, 16 Aug 2012 16:13:29 +0100
-Delivered-To: jane.doe@alfresco.com
-Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
-Subject: Attachment test
-From: <john.doe@alfresco.com>
-To: <jane.doe@alfresco.com>
-Content-Type: multipart/alternative;
-	boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
-
-This is a multipart message in MIME format.
-
------=_NextPart_000_0005_01D06C6A.DBA98EC0
-Content-Type: text/plain;
-	charset="utf-8"
-Content-Transfer-Encoding: 7bit
-
-html special characters
-
------=_NextPart_000_0005_01D06C6A.DBA98EC0
-Content-Type: text/html;
-	charset="utf-8"
-Content-Transfer-Encoding: quoted-printable
-
-<div dir=3D"ltr">html&nbsp;special&nbsp;characters</div>
-
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
+MIME-Version: 1.0
+Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
+Date: Thu, 16 Aug 2012 16:13:29 +0100
+Delivered-To: jane.doe@alfresco.com
+Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
+Subject: Attachment test
+From: <john.doe@alfresco.com>
+To: <jane.doe@alfresco.com>
+Content-Type: multipart/alternative;
+	boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
+
+This is a multipart message in MIME format.
+
+------=_NextPart_000_0005_01D06C6A.DBA98EC0
+Content-Type: text/plain;
+	charset="utf-8"
+Content-Transfer-Encoding: 7bit
+
+html special characters
+
+------=_NextPart_000_0005_01D06C6A.DBA98EC0
+Content-Type: text/html;
+	charset="utf-8"
+Content-Transfer-Encoding: quoted-printable
+
+<div dir=3D"ltr">html&nbsp;special&nbsp;characters</div>
+
+------=_NextPart_000_0005_01D06C6A.DBA98EC0--
--- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/quick.key
+++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/quick.key
--- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/quick.nested.alternative.eml
+++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/quick.nested.alternative.eml
@@ -1,41 +1,41 @@
-MIME-Version: 1.0
-Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
-Date: Thu, 16 Aug 2012 16:13:29 +0100
-Delivered-To: jane.doe@alfresco.com
-Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
-Subject: Attachment test
-From: <john.doe@alfresco.com>
-To: <jane.doe@alfresco.com>
-Content-Type: multipart/related;
- boundary="--_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423";
- type="multipart/alternative"
-
-This is a multi-part message in MIME format.
-
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
-Content-Type: multipart/alternative; boundary="--_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362"
-
-
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
-Content-Type: text/plain; charset="utf-8"
-Content-Transfer-Encoding: quoted-printable
-
-nested alternative plain text
-
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
-Content-Type: text/html; charset="utf-8"
-Content-Transfer-Encoding: quoted-printable
-
-<div dir=3D"ltr">nested alternative html text</div>
-
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362--
-
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
-Content-Type: image/jpeg; name="image001.jpg"
-Content-Transfer-Encoding: base64
-Content-ID: <image001.jpg@01D146F0.63006280>
-
-image
-
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423--
-
+MIME-Version: 1.0
+Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
+Date: Thu, 16 Aug 2012 16:13:29 +0100
+Delivered-To: jane.doe@alfresco.com
+Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
+Subject: Attachment test
+From: <john.doe@alfresco.com>
+To: <jane.doe@alfresco.com>
+Content-Type: multipart/related;
+ boundary="--_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423";
+ type="multipart/alternative"
+
+This is a multi-part message in MIME format.
+
+----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
+Content-Type: multipart/alternative; boundary="--_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362"
+
+
+----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: quoted-printable
+
+nested alternative plain text
+
+----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
+Content-Type: text/html; charset="utf-8"
+Content-Transfer-Encoding: quoted-printable
+
+<div dir=3D"ltr">nested alternative html text</div>
+
+----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362--
+
+----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
+Content-Type: image/jpeg; name="image001.jpg"
+Content-Transfer-Encoding: base64
+Content-ID: <image001.jpg@01D146F0.63006280>
+
+image
+
+----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423--
+
--- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/quick.spanish.eml
+++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/quick.spanish.eml
@@ -1,31 +1,31 @@
-MIME-Version: 1.0
-Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
-Date: Thu, 16 Aug 2012 16:13:29 +0100
-Delivered-To: jane.doe@alfresco.com
-Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
-Subject: The quick brown fox jumps over the lazy dog
-From: <john.doe@alfresco.com>
-To: <jane.doe@alfresco.com>
-Content-Type: multipart/alternative;
-	boundary="----=_NextPart_000_0009_01D06BC5.14D754D0"
-
-This is a multipart message in MIME format.
-
------=_NextPart_000_0009_01D06BC5.14D754D0
-Content-Type: text/plain;
-	charset="utf-8"
-Content-Transfer-Encoding: 8bit
-
-El rápido zorro marrón salta sobre el perro perezoso
-
-
------=_NextPart_000_0009_01D06BC5.14D754D0
-Content-Type: text/html;
-	charset="utf-8"
-Content-Transfer-Encoding: quoted-printable
-
-<div dir=3D"ltr">El r=C3=A1pido zorro marr=C3=B3n salta sobre el perro =
-perezoso&nbsp;<br></div>
-
------=_NextPart_000_0009_01D06BC5.14D754D0--
-
+MIME-Version: 1.0
+Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
+Date: Thu, 16 Aug 2012 16:13:29 +0100
+Delivered-To: jane.doe@alfresco.com
+Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
+Subject: The quick brown fox jumps over the lazy dog
+From: <john.doe@alfresco.com>
+To: <jane.doe@alfresco.com>
+Content-Type: multipart/alternative;
+	boundary="----=_NextPart_000_0009_01D06BC5.14D754D0"
+
+This is a multipart message in MIME format.
+
+------=_NextPart_000_0009_01D06BC5.14D754D0
+Content-Type: text/plain;
+	charset="utf-8"
+Content-Transfer-Encoding: 8bit
+
+El rápido zorro marrón salta sobre el perro perezoso
+
+
+------=_NextPart_000_0009_01D06BC5.14D754D0
+Content-Type: text/html;
+	charset="utf-8"
+Content-Transfer-Encoding: quoted-printable
+
+<div dir=3D"ltr">El r=C3=A1pido zorro marr=C3=B3n salta sobre el perro =
+perezoso&nbsp;<br></div>
+
+------=_NextPart_000_0009_01D06BC5.14D754D0--
+
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/AppleIWorksContentTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/AppleIWorksContentTransformer.java
@@ -1,115 +1,115 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2020 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer.transformers;
-
-import com.google.common.collect.ImmutableList;
-import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
-import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.StandardCopyOption;
-import java.util.List;
-import java.util.Map;
-
-import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
-
-/**
- * Converts Apple iWorks files to JPEGs for thumbnailing and previewing.
- * The transformer will only work for iWorks 2013/14 files. Support for iWorks 2008/9 has been dropped as we cannot
- * support both, because the newer format does not contain a PDF. If we say this transformer supports PDF, Share will
- * assume incorrectly that we can convert to PDF and we would only get a preview for the older format and never the
- * newer one. Both formats have the same mimetype.
- *
- * <p>
- * This code is based on a class of the same name originally implemented in alfresco-repository.
- * </p>
- *
- * @author Neil Mc Erlean
- * @author eknizat
- * @since 4.0
- */
-public class AppleIWorksContentTransformer implements SelectableTransformer
-{
-    private static final Logger logger = LoggerFactory.getLogger(
-        AppleIWorksContentTransformer.class);
-
-    // Apple's zip entry names for previews in iWorks have changed over time.
-    private static final List<String> PDF_PATHS = ImmutableList.of(
-        "QuickLook/Preview.pdf");  // iWorks 2008/9
-    private static final List<String> JPG_PATHS = ImmutableList.of(
-        "QuickLook/Thumbnail.jpg", // iWorks 2008/9
-        "preview.jpg");            // iWorks 2013/14 (720 x 552) We use the best quality image. Others are:
-    //                (225 x 173) preview-web.jpg
-    //                 (53 x  41) preview-micro.jpg
-
-    @Override
-    public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
-                          final File sourceFile, final File targetFile)
-    {
-        logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
-            sourceMimetype, targetMimetype);
-
-        // iWorks files are zip (or package) files.
-        // If it's not a zip file, the resultant ZipException will be caught as an IOException below.
-        try (ZipArchiveInputStream iWorksZip = new ZipArchiveInputStream(
-            new BufferedInputStream(new FileInputStream(sourceFile))))
-        {
-            // Look through the zip file entries for the preview/thumbnail.
-            List<String> paths = MIMETYPE_IMAGE_JPEG.equals(targetMimetype) ? JPG_PATHS : PDF_PATHS;
-            ZipArchiveEntry entry;
-            boolean found = false;
-            while ((entry = iWorksZip.getNextZipEntry()) != null)
-            {
-                String name = entry.getName();
-                if (paths.contains(name))
-                {
-                    Files.copy(iWorksZip, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
-                    found = true;
-                    break;
-                }
-            }
-
-            if (!found)
-            {
-                throw new RuntimeException(
-                    "The source " + sourceMimetype + " file did not contain a " + targetMimetype + " preview");
-            }
-        }
-        catch (IOException e)
-        {
-            throw new RuntimeException(
-                "Unable to transform " + sourceMimetype + " file. It should have been a zip format file.",
-                e);
-        }
-    }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+import java.util.List;
+import java.util.Map;
+
+import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
+
+/**
+ * Converts Apple iWorks files to JPEGs for thumbnailing and previewing.
+ * The transformer will only work for iWorks 2013/14 files. Support for iWorks 2008/9 has been dropped as we cannot
+ * support both, because the newer format does not contain a PDF. If we say this transformer supports PDF, Share will
+ * assume incorrectly that we can convert to PDF and we would only get a preview for the older format and never the
+ * newer one. Both formats have the same mimetype.
+ *
+ * <p>
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ * </p>
+ *
+ * @author Neil Mc Erlean
+ * @author eknizat
+ * @since 4.0
+ */
+public class AppleIWorksContentTransformer implements SelectableTransformer
+{
+    private static final Logger logger = LoggerFactory.getLogger(
+        AppleIWorksContentTransformer.class);
+
+    // Apple's zip entry names for previews in iWorks have changed over time.
+    private static final List<String> PDF_PATHS = ImmutableList.of(
+        "QuickLook/Preview.pdf");  // iWorks 2008/9
+    private static final List<String> JPG_PATHS = ImmutableList.of(
+        "QuickLook/Thumbnail.jpg", // iWorks 2008/9
+        "preview.jpg");            // iWorks 2013/14 (720 x 552) We use the best quality image. Others are:
+    //                (225 x 173) preview-web.jpg
+    //                 (53 x  41) preview-micro.jpg
+
+    @Override
+    public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
+                          final File sourceFile, final File targetFile)
+    {
+        logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
+            sourceMimetype, targetMimetype);
+
+        // iWorks files are zip (or package) files.
+        // If it's not a zip file, the resultant ZipException will be caught as an IOException below.
+        try (ZipArchiveInputStream iWorksZip = new ZipArchiveInputStream(
+            new BufferedInputStream(new FileInputStream(sourceFile))))
+        {
+            // Look through the zip file entries for the preview/thumbnail.
+            List<String> paths = MIMETYPE_IMAGE_JPEG.equals(targetMimetype) ? JPG_PATHS : PDF_PATHS;
+            ZipArchiveEntry entry;
+            boolean found = false;
+            while ((entry = iWorksZip.getNextZipEntry()) != null)
+            {
+                String name = entry.getName();
+                if (paths.contains(name))
+                {
+                    Files.copy(iWorksZip, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+                    found = true;
+                    break;
+                }
+            }
+
+            if (!found)
+            {
+                throw new RuntimeException(
+                    "The source " + sourceMimetype + " file did not contain a " + targetMimetype + " preview");
+            }
+        }
+        catch (IOException e)
+        {
+            throw new RuntimeException(
+                "Unable to transform " + sourceMimetype + " file. It should have been a zip format file.",
+                e);
+        }
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/EMLTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/EMLTransformer.java
@@ -1,232 +1,232 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2020 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer.transformers;
-
-import org.alfresco.transformer.fs.FileManager;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.mail.MessagingException;
-import javax.mail.Multipart;
-import javax.mail.Part;
-import javax.mail.Session;
-import javax.mail.internet.MimeMessage;
-import java.io.BufferedInputStream;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-import java.util.Map;
-import java.util.Properties;
-
-import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
-import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
-import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
-
-/**
- * Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
- * messages. Searches for all text content parts, and returns them. Any
- * attachments are ignored. TIKA Note - could be replaced with the Tika email
- * parser. Would require a recursing parser to be specified, but not the full
- * Auto one (we don't want attachments), just one containing text and html
- * related parsers.
- *
- * <p>
- * This code is based on a class of the same name originally implemented in alfresco-repository.
- * </p>
- */
-public class EMLTransformer implements SelectableTransformer
-
-{
-    private static final Logger logger = LoggerFactory.getLogger(EMLTransformer.class);
-
-    private static final String CHARSET = "charset";
-    private static final String DEFAULT_ENCODING = "UTF-8";
-
-    @Override
-    public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
-                          final File sourceFile, final File targetFile) throws Exception
-    {
-        logger.debug("Performing RFC822 to text transform.");
-        // Use try with resource
-        try (InputStream contentInputStream = new BufferedInputStream(
-            new FileInputStream(sourceFile));
-             Writer bufferedFileWriter = new BufferedWriter(new FileWriter(targetFile)))
-        {
-            MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()),
-                contentInputStream);
-
-            final StringBuilder sb = new StringBuilder();
-            Object content = mimeMessage.getContent();
-            if (content instanceof Multipart)
-            {
-                processMultiPart((Multipart) content, sb);
-            }
-            else
-            {
-                sb.append(content.toString());
-            }
-            bufferedFileWriter.write(sb.toString());
-        }
-    }
-
-    /**
-     * Find "text" parts of message recursively and appends it to sb StringBuilder
-     *
-     * @param multipart Multipart to process
-     * @param sb        StringBuilder
-     * @throws MessagingException
-     * @throws IOException
-     */
-    private void processMultiPart(Multipart multipart, StringBuilder sb) throws MessagingException,
-        IOException
-    {
-        boolean isAlternativeMultipart = multipart.getContentType().contains(
-            MIMETYPE_MULTIPART_ALTERNATIVE);
-        if (isAlternativeMultipart)
-        {
-            processAlternativeMultipart(multipart, sb);
-        }
-        else
-        {
-            for (int i = 0, n = multipart.getCount(); i < n; i++)
-            {
-                Part part = multipart.getBodyPart(i);
-                if (part.getContent() instanceof Multipart)
-                {
-                    processMultiPart((Multipart) part.getContent(), sb);
-                }
-                else
-                {
-                    processPart(part, sb);
-                }
-            }
-        }
-    }
-
-    /**
-     * Finds the suitable part from an multipart/alternative and appends it's text content to StringBuilder sb
-     *
-     * @param multipart
-     * @param sb
-     * @throws IOException
-     * @throws MessagingException
-     */
-    private void processAlternativeMultipart(Multipart multipart, StringBuilder sb) throws
-        IOException, MessagingException
-    {
-        Part partToUse = null;
-        for (int i = 0, n = multipart.getCount(); i < n; i++)
-        {
-            Part part = multipart.getBodyPart(i);
-            if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
-            {
-                partToUse = part;
-                break;
-            }
-            else if (part.getContentType().contains(MIMETYPE_HTML))
-            {
-                partToUse = part;
-            }
-            else if (part.getContentType().contains(MIMETYPE_MULTIPART_ALTERNATIVE))
-            {
-                if (part.getContent() instanceof Multipart)
-                {
-                    processAlternativeMultipart((Multipart) part.getContent(), sb);
-                }
-            }
-        }
-        if (partToUse != null)
-        {
-            processPart(partToUse, sb);
-        }
-    }
-
-    /**
-     * Finds text on a given mail part. Accepted parts types are text/html and text/plain.
-     * Attachments are ignored
-     *
-     * @param part
-     * @param sb
-     * @throws IOException
-     * @throws MessagingException
-     */
-    private void processPart(Part part, StringBuilder sb) throws IOException, MessagingException
-    {
-        boolean isAttachment = Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition());
-        if (isAttachment)
-        {
-            return;
-        }
-        if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
-        {
-            sb.append(part.getContent().toString());
-        }
-        else if (part.getContentType().contains(MIMETYPE_HTML))
-        {
-            String mailPartContent = part.getContent().toString();
-
-            //create a temporary html file with same mail part content and encoding
-            File tempHtmlFile = FileManager.TempFileProvider.createTempFile("EMLTransformer_",
-                ".html");
-            String encoding = getMailPartContentEncoding(part);
-            try (OutputStreamWriter osWriter = new OutputStreamWriter(
-                new FileOutputStream(tempHtmlFile), encoding))
-            {
-                osWriter.write(mailPartContent);
-            }
-
-            //transform html file's content to plain text
-            HtmlParserContentTransformer.EncodingAwareStringBean extractor = new HtmlParserContentTransformer.EncodingAwareStringBean();
-            extractor.setCollapse(false);
-            extractor.setLinks(false);
-            extractor.setReplaceNonBreakingSpaces(false);
-            extractor.setURL(tempHtmlFile, encoding);
-            sb.append(extractor.getStrings());
-
-            tempHtmlFile.delete();
-        }
-    }
-
-    private String getMailPartContentEncoding(Part part) throws MessagingException
-    {
-        String encoding = DEFAULT_ENCODING;
-        String contentType = part.getContentType();
-        int startIndex = contentType.indexOf(CHARSET);
-        if (startIndex > 0)
-        {
-            encoding = contentType.substring(startIndex + CHARSET.length() + 1)
-                                  .replaceAll("\"", "");
-        }
-        return encoding;
-    }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import org.alfresco.transformer.fs.FileManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.mail.MessagingException;
+import javax.mail.Multipart;
+import javax.mail.Part;
+import javax.mail.Session;
+import javax.mail.internet.MimeMessage;
+import java.io.BufferedInputStream;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.Map;
+import java.util.Properties;
+
+import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
+import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
+import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
+
+/**
+ * Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
+ * messages. Searches for all text content parts, and returns them. Any
+ * attachments are ignored. TIKA Note - could be replaced with the Tika email
+ * parser. Would require a recursing parser to be specified, but not the full
+ * Auto one (we don't want attachments), just one containing text and html
+ * related parsers.
+ *
+ * <p>
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ * </p>
+ */
+public class EMLTransformer implements SelectableTransformer
+
+{
+    private static final Logger logger = LoggerFactory.getLogger(EMLTransformer.class);
+
+    private static final String CHARSET = "charset";
+    private static final String DEFAULT_ENCODING = "UTF-8";
+
+    @Override
+    public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
+                          final File sourceFile, final File targetFile) throws Exception
+    {
+        logger.debug("Performing RFC822 to text transform.");
+        // Use try with resource
+        try (InputStream contentInputStream = new BufferedInputStream(
+            new FileInputStream(sourceFile));
+             Writer bufferedFileWriter = new BufferedWriter(new FileWriter(targetFile)))
+        {
+            MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()),
+                contentInputStream);
+
+            final StringBuilder sb = new StringBuilder();
+            Object content = mimeMessage.getContent();
+            if (content instanceof Multipart)
+            {
+                processMultiPart((Multipart) content, sb);
+            }
+            else
+            {
+                sb.append(content.toString());
+            }
+            bufferedFileWriter.write(sb.toString());
+        }
+    }
+
+    /**
+     * Find "text" parts of message recursively and appends it to sb StringBuilder
+     *
+     * @param multipart Multipart to process
+     * @param sb        StringBuilder
+     * @throws MessagingException
+     * @throws IOException
+     */
+    private void processMultiPart(Multipart multipart, StringBuilder sb) throws MessagingException,
+        IOException
+    {
+        boolean isAlternativeMultipart = multipart.getContentType().contains(
+            MIMETYPE_MULTIPART_ALTERNATIVE);
+        if (isAlternativeMultipart)
+        {
+            processAlternativeMultipart(multipart, sb);
+        }
+        else
+        {
+            for (int i = 0, n = multipart.getCount(); i < n; i++)
+            {
+                Part part = multipart.getBodyPart(i);
+                if (part.getContent() instanceof Multipart)
+                {
+                    processMultiPart((Multipart) part.getContent(), sb);
+                }
+                else
+                {
+                    processPart(part, sb);
+                }
+            }
+        }
+    }
+
+    /**
+     * Finds the suitable part from an multipart/alternative and appends it's text content to StringBuilder sb
+     *
+     * @param multipart
+     * @param sb
+     * @throws IOException
+     * @throws MessagingException
+     */
+    private void processAlternativeMultipart(Multipart multipart, StringBuilder sb) throws
+        IOException, MessagingException
+    {
+        Part partToUse = null;
+        for (int i = 0, n = multipart.getCount(); i < n; i++)
+        {
+            Part part = multipart.getBodyPart(i);
+            if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
+            {
+                partToUse = part;
+                break;
+            }
+            else if (part.getContentType().contains(MIMETYPE_HTML))
+            {
+                partToUse = part;
+            }
+            else if (part.getContentType().contains(MIMETYPE_MULTIPART_ALTERNATIVE))
+            {
+                if (part.getContent() instanceof Multipart)
+                {
+                    processAlternativeMultipart((Multipart) part.getContent(), sb);
+                }
+            }
+        }
+        if (partToUse != null)
+        {
+            processPart(partToUse, sb);
+        }
+    }
+
+    /**
+     * Finds text on a given mail part. Accepted parts types are text/html and text/plain.
+     * Attachments are ignored
+     *
+     * @param part
+     * @param sb
+     * @throws IOException
+     * @throws MessagingException
+     */
+    private void processPart(Part part, StringBuilder sb) throws IOException, MessagingException
+    {
+        boolean isAttachment = Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition());
+        if (isAttachment)
+        {
+            return;
+        }
+        if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
+        {
+            sb.append(part.getContent().toString());
+        }
+        else if (part.getContentType().contains(MIMETYPE_HTML))
+        {
+            String mailPartContent = part.getContent().toString();
+
+            //create a temporary html file with same mail part content and encoding
+            File tempHtmlFile = FileManager.TempFileProvider.createTempFile("EMLTransformer_",
+                ".html");
+            String encoding = getMailPartContentEncoding(part);
+            try (OutputStreamWriter osWriter = new OutputStreamWriter(
+                new FileOutputStream(tempHtmlFile), encoding))
+            {
+                osWriter.write(mailPartContent);
+            }
+
+            //transform html file's content to plain text
+            HtmlParserContentTransformer.EncodingAwareStringBean extractor = new HtmlParserContentTransformer.EncodingAwareStringBean();
+            extractor.setCollapse(false);
+            extractor.setLinks(false);
+            extractor.setReplaceNonBreakingSpaces(false);
+            extractor.setURL(tempHtmlFile, encoding);
+            sb.append(extractor.getStrings());
+
+            tempHtmlFile.delete();
+        }
+    }
+
+    private String getMailPartContentEncoding(Part part) throws MessagingException
+    {
+        String encoding = DEFAULT_ENCODING;
+        String contentType = part.getContentType();
+        int startIndex = contentType.indexOf(CHARSET);
+        if (startIndex > 0)
+        {
+            encoding = contentType.substring(startIndex + CHARSET.length() + 1)
+                                  .replaceAll("\"", "");
+        }
+        return encoding;
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/HtmlParserContentTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/HtmlParserContentTransformer.java
@@ -1,192 +1,192 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2022 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer.transformers;
-
-import org.htmlparser.Parser;
-import org.htmlparser.beans.StringBean;
-import org.htmlparser.util.ParserException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-import java.net.URLConnection;
-import java.nio.charset.Charset;
-import java.nio.charset.IllegalCharsetNameException;
-import java.util.Map;
-
-import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
-
-/**
- * Content transformer which wraps the HTML Parser library for
- * parsing HTML content.
- *
- * <p>
- * This code is based on a class of the same name originally implemented in alfresco-repository.
- * </p>
- *
- * <p>
- * Since HTML Parser was updated from v1.6 to v2.1, META tags
- * defining an encoding for the content via http-equiv=Content-Type
- * will ONLY be respected if the encoding of the content item
- * itself is set to ISO-8859-1.
- * </p>
- *
- * <p>
- * Tika Note - could be converted to use the Tika HTML parser,
- * but we'd potentially need a custom text handler to replicate
- * the current settings around links and non-breaking spaces.
- * </p>
- *
- * @author Derek Hulley
- * @author eknizat
- * @see <a href="http://htmlparser.sourceforge.net/">http://htmlparser.sourceforge.net</a>
- * @see org.htmlparser.beans.StringBean
- * @see <a href="http://sourceforge.net/tracker/?func=detail&aid=1644504&group_id=24399&atid=381401">HTML Parser</a>
- */
-public class HtmlParserContentTransformer implements SelectableTransformer
-{
-    private static final Logger logger = LoggerFactory.getLogger(
-        HtmlParserContentTransformer.class);
-
-    @Override
-    public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
-                          final File sourceFile, final File targetFile) throws Exception
-    {
-        String sourceEncoding = parameters.get(SOURCE_ENCODING);
-        checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
-
-        if (logger.isDebugEnabled())
-        {
-            logger.debug("Performing HTML to text transform with sourceEncoding=" + sourceEncoding);
-        }
-
-        // Create the extractor
-        EncodingAwareStringBean extractor = new EncodingAwareStringBean();
-        extractor.setCollapse(false);
-        extractor.setLinks(false);
-        extractor.setReplaceNonBreakingSpaces(false);
-        extractor.setURL(sourceFile, sourceEncoding);
-        // get the text
-        String text = extractor.getStrings();
-
-        // write it to the writer
-        try (Writer writer = new BufferedWriter(
-            new OutputStreamWriter(new FileOutputStream(targetFile))))
-        {
-            writer.write(text);
-        }
-    }
-
-    private void checkEncodingParameter(String encoding, String parameterName)
-    {
-        try
-        {
-            if (encoding != null && !Charset.isSupported(encoding))
-            {
-                throw new IllegalArgumentException(
-                    parameterName + "=" + encoding + " is not supported by the JVM.");
-            }
-        }
-        catch (IllegalCharsetNameException e)
-        {
-            throw new IllegalArgumentException(
-                parameterName + "=" + encoding + " is not a valid encoding.");
-        }
-    }
-
-    /**
-     * <p>
-     * This code is based on a class of the same name, originally implemented in alfresco-repository.
-     * </p>
-     *
-     * A version of {@link StringBean} which allows control of the
-     * encoding in the underlying HTML Parser.
-     * Unfortunately, StringBean doesn't allow easy over-riding of
-     * this, so we have to duplicate some code to control this.
-     * This allows us to correctly handle HTML files where the encoding
-     * is specified against the content property (rather than in the
-     * HTML Head Meta), see ALF-10466 for details.
-     */
-    public static class EncodingAwareStringBean extends StringBean
-    {
-        private static final long serialVersionUID = -9033414360428669553L;
-
-        /**
-         * Sets the File to extract strings from, and the encoding
-         * it's in (if known to Alfresco)
-         *
-         * @param file     The File that text should be fetched from.
-         * @param encoding The encoding of the input
-         */
-        public void setURL(File file, String encoding)
-        {
-            String previousURL = getURL();
-            String newURL = file.getAbsolutePath();
-
-            if (previousURL == null || !newURL.equals(previousURL))
-            {
-                try
-                {
-                    URLConnection conn = getConnection();
-
-                    if (null == mParser)
-                    {
-                        mParser = new Parser(newURL);
-                    }
-                    else
-                    {
-                        mParser.setURL(newURL);
-                    }
-
-                    if (encoding != null)
-                    {
-                        mParser.setEncoding(encoding);
-                    }
-
-                    mPropertySupport.firePropertyChange(StringBean.PROP_URL_PROPERTY, previousURL,
-                        getURL());
-                    mPropertySupport.firePropertyChange(StringBean.PROP_CONNECTION_PROPERTY, conn,
-                        mParser.getConnection());
-                    setStrings();
-                }
-                catch (ParserException pe)
-                {
-                    updateStrings(pe.toString());
-                }
-            }
-        }
-
-        public String getEncoding()
-        {
-            return mParser.getEncoding();
-        }
-    }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2022 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import org.htmlparser.Parser;
+import org.htmlparser.beans.StringBean;
+import org.htmlparser.util.ParserException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.net.URLConnection;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.util.Map;
+
+import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
+
+/**
+ * Content transformer which wraps the HTML Parser library for
+ * parsing HTML content.
+ *
+ * <p>
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ * </p>
+ *
+ * <p>
+ * Since HTML Parser was updated from v1.6 to v2.1, META tags
+ * defining an encoding for the content via http-equiv=Content-Type
+ * will ONLY be respected if the encoding of the content item
+ * itself is set to ISO-8859-1.
+ * </p>
+ *
+ * <p>
+ * Tika Note - could be converted to use the Tika HTML parser,
+ * but we'd potentially need a custom text handler to replicate
+ * the current settings around links and non-breaking spaces.
+ * </p>
+ *
+ * @author Derek Hulley
+ * @author eknizat
+ * @see <a href="http://htmlparser.sourceforge.net/">http://htmlparser.sourceforge.net</a>
+ * @see org.htmlparser.beans.StringBean
+ * @see <a href="http://sourceforge.net/tracker/?func=detail&aid=1644504&group_id=24399&atid=381401">HTML Parser</a>
+ */
+public class HtmlParserContentTransformer implements SelectableTransformer
+{
+    private static final Logger logger = LoggerFactory.getLogger(
+        HtmlParserContentTransformer.class);
+
+    @Override
+    public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
+                          final File sourceFile, final File targetFile) throws Exception
+    {
+        String sourceEncoding = parameters.get(SOURCE_ENCODING);
+        checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
+
+        if (logger.isDebugEnabled())
+        {
+            logger.debug("Performing HTML to text transform with sourceEncoding=" + sourceEncoding);
+        }
+
+        // Create the extractor
+        EncodingAwareStringBean extractor = new EncodingAwareStringBean();
+        extractor.setCollapse(false);
+        extractor.setLinks(false);
+        extractor.setReplaceNonBreakingSpaces(false);
+        extractor.setURL(sourceFile, sourceEncoding);
+        // get the text
+        String text = extractor.getStrings();
+
+        // write it to the writer
+        try (Writer writer = new BufferedWriter(
+            new OutputStreamWriter(new FileOutputStream(targetFile))))
+        {
+            writer.write(text);
+        }
+    }
+
+    private void checkEncodingParameter(String encoding, String parameterName)
+    {
+        try
+        {
+            if (encoding != null && !Charset.isSupported(encoding))
+            {
+                throw new IllegalArgumentException(
+                    parameterName + "=" + encoding + " is not supported by the JVM.");
+            }
+        }
+        catch (IllegalCharsetNameException e)
+        {
+            throw new IllegalArgumentException(
+                parameterName + "=" + encoding + " is not a valid encoding.");
+        }
+    }
+
+    /**
+     * <p>
+     * This code is based on a class of the same name, originally implemented in alfresco-repository.
+     * </p>
+     *
+     * A version of {@link StringBean} which allows control of the
+     * encoding in the underlying HTML Parser.
+     * Unfortunately, StringBean doesn't allow easy over-riding of
+     * this, so we have to duplicate some code to control this.
+     * This allows us to correctly handle HTML files where the encoding
+     * is specified against the content property (rather than in the
+     * HTML Head Meta), see ALF-10466 for details.
+     */
+    public static class EncodingAwareStringBean extends StringBean
+    {
+        private static final long serialVersionUID = -9033414360428669553L;
+
+        /**
+         * Sets the File to extract strings from, and the encoding
+         * it's in (if known to Alfresco)
+         *
+         * @param file     The File that text should be fetched from.
+         * @param encoding The encoding of the input
+         */
+        public void setURL(File file, String encoding)
+        {
+            String previousURL = getURL();
+            String newURL = file.getAbsolutePath();
+
+            if (previousURL == null || !newURL.equals(previousURL))
+            {
+                try
+                {
+                    URLConnection conn = getConnection();
+
+                    if (null == mParser)
+                    {
+                        mParser = new Parser(newURL);
+                    }
+                    else
+                    {
+                        mParser.setURL(newURL);
+                    }
+
+                    if (encoding != null)
+                    {
+                        mParser.setEncoding(encoding);
+                    }
+
+                    mPropertySupport.firePropertyChange(StringBean.PROP_URL_PROPERTY, previousURL,
+                        getURL());
+                    mPropertySupport.firePropertyChange(StringBean.PROP_CONNECTION_PROPERTY, conn,
+                        mParser.getConnection());
+                    setStrings();
+                }
+                catch (ParserException pe)
+                {
+                    updateStrings(pe.toString());
+                }
+            }
+        }
+
+        public String getEncoding()
+        {
+            return mParser.getEncoding();
+        }
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/OOXMLThumbnailContentTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/OOXMLThumbnailContentTransformer.java
@@ -1,130 +1,130 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2020 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer.transformers;
-
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackageRelationship;
-import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
-import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.file.Files;
-import java.nio.file.StandardCopyOption;
-import java.util.Map;
-
-/**
- * Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing.
- * This transformer will only work for OOXML files where thumbnailing was enabled,
- * which isn't on by default on Windows, but is more common on Mac.
- *
- * <p>
- * This code is based on a class of the same name originally implemented in alfresco-repository.
- * </p>
- *
- * @author Nick Burch
- * @author eknizat
- */
-public class OOXMLThumbnailContentTransformer implements SelectableTransformer
-{
-    private static final Logger logger = LoggerFactory.getLogger(
-        OOXMLThumbnailContentTransformer.class);
-
-    @Override
-    public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
-                          final File sourceFile, final File targetFile) throws Exception
-    {
-        if (logger.isDebugEnabled())
-        {
-            logger.debug("Performing OOXML to jpeg transform with sourceMimetype=" + sourceMimetype
-                         + " targetMimetype=" + targetMimetype);
-        }
-
-        try (OPCPackage pkg = OPCPackage.open(sourceFile.getPath()))
-        {
-
-            // Does it have a thumbnail?
-            PackageRelationshipCollection rels = pkg.getRelationshipsByType(
-                PackageRelationshipTypes.THUMBNAIL);
-            if (rels.size() > 0)
-            {
-                // Get the thumbnail part
-                PackageRelationship tRel = rels.getRelationship(0);
-                PackagePart tPart = pkg.getPart(tRel);
-
-                // Write it to the target
-                InputStream tStream = tPart.getInputStream();
-                Files.copy(tStream, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
-                tStream.close();
-            }
-            else
-            {
-                logger.debug("No thumbnail present in file.");
-                throw new Exception(
-                    "No thumbnail present in file, unable to generate " + targetMimetype);
-            }
-        }
-        catch (IOException e)
-        {
-            throw new RuntimeException("Unable to transform file.", e);
-        }
-    }
-
-    /*
-    // TODO Add this back to engine_config.json when the transformer is fixed for java 11
-    {
-      "transformerName": "ooxmlThumbnail",
-      "supportedSourceAndTargetList": [
-        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",    "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12",                           "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template",    "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12",                           "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation",  "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12",                 "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow",     "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12",                    "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template",      "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12",                     "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12",                        "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide",         "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12",                        "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",          "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template",       "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12",                             "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12",                          "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12",                             "targetMediaType": "image/jpeg"},
-        {"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12",                      "targetMediaType": "image/jpeg"}
-      ],
-      "transformOptions": [
-      ]
-    }
-     */
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+import java.util.Map;
+
+/**
+ * Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing.
+ * This transformer will only work for OOXML files where thumbnailing was enabled,
+ * which isn't on by default on Windows, but is more common on Mac.
+ *
+ * <p>
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ * </p>
+ *
+ * @author Nick Burch
+ * @author eknizat
+ */
+public class OOXMLThumbnailContentTransformer implements SelectableTransformer
+{
+    private static final Logger logger = LoggerFactory.getLogger(
+        OOXMLThumbnailContentTransformer.class);
+
+    @Override
+    public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
+                          final File sourceFile, final File targetFile) throws Exception
+    {
+        if (logger.isDebugEnabled())
+        {
+            logger.debug("Performing OOXML to jpeg transform with sourceMimetype=" + sourceMimetype
+                         + " targetMimetype=" + targetMimetype);
+        }
+
+        try (OPCPackage pkg = OPCPackage.open(sourceFile.getPath()))
+        {
+
+            // Does it have a thumbnail?
+            PackageRelationshipCollection rels = pkg.getRelationshipsByType(
+                PackageRelationshipTypes.THUMBNAIL);
+            if (rels.size() > 0)
+            {
+                // Get the thumbnail part
+                PackageRelationship tRel = rels.getRelationship(0);
+                PackagePart tPart = pkg.getPart(tRel);
+
+                // Write it to the target
+                InputStream tStream = tPart.getInputStream();
+                Files.copy(tStream, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+                tStream.close();
+            }
+            else
+            {
+                logger.debug("No thumbnail present in file.");
+                throw new Exception(
+                    "No thumbnail present in file, unable to generate " + targetMimetype);
+            }
+        }
+        catch (IOException e)
+        {
+            throw new RuntimeException("Unable to transform file.", e);
+        }
+    }
+
+    /*
+    // TODO Add this back to engine_config.json when the transformer is fixed for java 11
+    {
+      "transformerName": "ooxmlThumbnail",
+      "supportedSourceAndTargetList": [
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",    "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12",                           "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template",    "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12",                           "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation",  "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12",                 "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow",     "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12",                    "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template",      "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12",                     "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12",                        "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide",         "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12",                        "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",          "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template",       "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12",                             "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12",                          "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12",                             "targetMediaType": "image/jpeg"},
+        {"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12",                      "targetMediaType": "image/jpeg"}
+      ],
+      "transformOptions": [
+      ]
+    }
+     */
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/SelectableTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/SelectableTransformer.java
@@ -1,53 +1,53 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2022 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer.transformers;
-
-import java.io.File;
-import java.util.Map;
-
-/**
- * Implemented by transformers used by {@link SelectingTransformer}.
- *
- * @author eknizat
- */
-public interface SelectableTransformer
-{
-    default void transform(String sourceMimetype, String targetMimetype, Map<String, String> parameters,
-                   File sourceFile, File targetFile) throws Exception
-    {
-    }
-
-    default void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
-                                 File sourceFile, File targetFile) throws Exception
-    {
-    }
-
-    default void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
-                               File sourceFile, File targetFile) throws Exception
-    {
-    }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2022 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import java.io.File;
+import java.util.Map;
+
+/**
+ * Implemented by transformers used by {@link SelectingTransformer}.
+ *
+ * @author eknizat
+ */
+public interface SelectableTransformer
+{
+    default void transform(String sourceMimetype, String targetMimetype, Map<String, String> parameters,
+                   File sourceFile, File targetFile) throws Exception
+    {
+    }
+
+    default void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
+                                 File sourceFile, File targetFile) throws Exception
+    {
+    }
+
+    default void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
+                               File sourceFile, File targetFile) throws Exception
+    {
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/SelectingTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/SelectingTransformer.java
@@ -1,114 +1,114 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2020 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer.transformers;
-
-import com.google.common.collect.ImmutableMap;
-import org.alfresco.transformer.executors.Transformer;
-import org.alfresco.transformer.logging.LogEntry;
-import org.alfresco.transformer.metadataExtractors.HtmlMetadataExtractor;
-import org.alfresco.transformer.metadataExtractors.RFC822MetadataExtractor;
-
-import java.io.File;
-import java.util.Map;
-import java.util.StringJoiner;
-
-import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
-
-/**
- * The SelectingTransformer selects a registered {@link SelectableTransformer}
- * and delegates the transformation to its implementation.
- *
- * @author eknizat
- */
-public class SelectingTransformer implements Transformer
-{
-    private static final String ID = "misc";
-
-    public static final String LICENCE =
-            "This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\\n" +
-            "Additional libraries used:\n" +
-            "* htmlparser http://htmlparser.sourceforge.net/license.html";
-
-    private final Map<String, SelectableTransformer> transformers = ImmutableMap
-        .<String, SelectableTransformer>builder()
-        .put("appleIWorks", new AppleIWorksContentTransformer())
-        .put("html", new HtmlParserContentTransformer())
-        .put("string", new StringExtractingContentTransformer())
-        .put("textToPdf", new TextToPdfContentTransformer())
-        .put("rfc822", new EMLTransformer())
-        .put("ooXmlThumbnail", new OOXMLThumbnailContentTransformer())
-        .put("HtmlMetadataExtractor", new HtmlMetadataExtractor())
-        .put("RFC822MetadataExtractor", new RFC822MetadataExtractor())
-        .build();
-
-    @Override
-    public String getTransformerId()
-    {
-        return ID;
-    }
-
-    @Override
-    public void transform(String transformName, String sourceMimetype, String targetMimetype,
-                           Map<String, String> transformOptions,
-                           File sourceFile, File targetFile) throws Exception
-    {
-        final SelectableTransformer transformer = transformers.get(transformName);
-        logOptions(sourceFile, targetFile, transformOptions);
-        transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
-    }
-
-    public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
-                                Map<String, String> transformOptions,
-                                File sourceFile, File targetFile) throws Exception
-    {
-        final SelectableTransformer transformer = transformers.get(transformName);
-        logOptions(sourceFile, targetFile, transformOptions);
-        transformer.extractMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
-    }
-
-    private static void logOptions(File sourceFile, File targetFile, Map<String, String> parameters)
-    {
-        StringJoiner sj = new StringJoiner(" ");
-        parameters.forEach((k, v) ->
-        {
-            if (!TRANSFORM_NAME_PARAMETER.equals(k))
-            {
-                sj.add("--" + k + "=" + v);
-            }
-        }); // keeping the existing style used in other T-Engines
-        sj.add(getExtension(sourceFile));
-        sj.add(getExtension(targetFile));
-        LogEntry.setOptions(sj.toString());
-    }
-
-    private static String getExtension(File file)
-    {
-        final String name = file.getName();
-        int i = name.lastIndexOf('.');
-        return i == -1 ? "???" : name.substring(i + 1);
-    }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import com.google.common.collect.ImmutableMap;
+import org.alfresco.transformer.executors.Transformer;
+import org.alfresco.transformer.logging.LogEntry;
+import org.alfresco.transformer.metadataExtractors.HtmlMetadataExtractor;
+import org.alfresco.transformer.metadataExtractors.RFC822MetadataExtractor;
+
+import java.io.File;
+import java.util.Map;
+import java.util.StringJoiner;
+
+import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
+
+/**
+ * The SelectingTransformer selects a registered {@link SelectableTransformer}
+ * and delegates the transformation to its implementation.
+ *
+ * @author eknizat
+ */
+public class SelectingTransformer implements Transformer
+{
+    private static final String ID = "misc";
+
+    public static final String LICENCE =
+            "This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\\n" +
+            "Additional libraries used:\n" +
+            "* htmlparser http://htmlparser.sourceforge.net/license.html";
+
+    private final Map<String, SelectableTransformer> transformers = ImmutableMap
+        .<String, SelectableTransformer>builder()
+        .put("appleIWorks", new AppleIWorksContentTransformer())
+        .put("html", new HtmlParserContentTransformer())
+        .put("string", new StringExtractingContentTransformer())
+        .put("textToPdf", new TextToPdfContentTransformer())
+        .put("rfc822", new EMLTransformer())
+        .put("ooXmlThumbnail", new OOXMLThumbnailContentTransformer())
+        .put("HtmlMetadataExtractor", new HtmlMetadataExtractor())
+        .put("RFC822MetadataExtractor", new RFC822MetadataExtractor())
+        .build();
+
+    @Override
+    public String getTransformerId()
+    {
+        return ID;
+    }
+
+    @Override
+    public void transform(String transformName, String sourceMimetype, String targetMimetype,
+                           Map<String, String> transformOptions,
+                           File sourceFile, File targetFile) throws Exception
+    {
+        final SelectableTransformer transformer = transformers.get(transformName);
+        logOptions(sourceFile, targetFile, transformOptions);
+        transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
+    }
+
+    public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
+                                Map<String, String> transformOptions,
+                                File sourceFile, File targetFile) throws Exception
+    {
+        final SelectableTransformer transformer = transformers.get(transformName);
+        logOptions(sourceFile, targetFile, transformOptions);
+        transformer.extractMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
+    }
+
+    private static void logOptions(File sourceFile, File targetFile, Map<String, String> parameters)
+    {
+        StringJoiner sj = new StringJoiner(" ");
+        parameters.forEach((k, v) ->
+        {
+            if (!TRANSFORM_NAME_PARAMETER.equals(k))
+            {
+                sj.add("--" + k + "=" + v);
+            }
+        }); // keeping the existing style used in other T-Engines
+        sj.add(getExtension(sourceFile));
+        sj.add(getExtension(targetFile));
+        LogEntry.setOptions(sj.toString());
+    }
+
+    private static String getExtension(File file)
+    {
+        final String name = file.getName();
+        int i = name.lastIndexOf('.');
+        return i == -1 ? "???" : name.substring(i + 1);
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/StringExtractingContentTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/StringExtractingContentTransformer.java
@@ -1,158 +1,158 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2022 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer.transformers;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.io.Reader;
-import java.io.Writer;
-import java.nio.charset.Charset;
-import java.nio.charset.IllegalCharsetNameException;
-import java.util.Map;
-
-import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
-import static org.alfresco.transform.client.util.RequestParamMap.TARGET_ENCODING;
-
-/**
- * Converts any textual format to plain text.
- * <p>
- * The transformation is sensitive to the source and target string encodings.
- *
- *
- * <p>
- * This code is based on a class of the same name originally implemented in alfresco-repository.
- * </p>
- *
- * @author Derek Hulley
- * @author eknizat
- */
-public class StringExtractingContentTransformer implements SelectableTransformer
-{
-
-    private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
-
-    /**
-     * Text to text conversions are done directly using the content reader and writer string
-     * manipulation methods.
-     * <p>
-     * Extraction of text from binary content attempts to take the possible character
-     * encoding into account.  The text produced from this will, if the encoding was correct,
-     * be unformatted but valid.
-     */
-    @Override
-    public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
-                          final File sourceFile, final File targetFile) throws Exception
-    {
-        String sourceEncoding = parameters.get(SOURCE_ENCODING);
-        String targetEncoding = parameters.get(TARGET_ENCODING);
-
-        if (logger.isDebugEnabled())
-        {
-            logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
-                         + " targetEncoding=" + targetEncoding);
-        }
-
-        Reader charReader = null;
-        Writer charWriter = null;
-        try
-        {
-            // Build reader
-            if (sourceEncoding == null)
-            {
-                charReader = new BufferedReader(
-                    new InputStreamReader(new FileInputStream(sourceFile)));
-            }
-            else
-            {
-                checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
-                charReader = new BufferedReader(
-                    new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
-            }
-
-            // Build writer
-            if (targetEncoding == null)
-            {
-                charWriter = new BufferedWriter(
-                    new OutputStreamWriter(new FileOutputStream(targetFile)));
-            }
-            else
-            {
-                checkEncodingParameter(targetEncoding, TARGET_ENCODING);
-                charWriter = new BufferedWriter(
-                    new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
-            }
-
-            // copy from the one to the other
-            char[] buffer = new char[8192];
-            int readCount = 0;
-            while (readCount > -1)
-            {
-                // write the last read count number of bytes
-                charWriter.write(buffer, 0, readCount);
-                // fill the buffer again
-                readCount = charReader.read(buffer);
-            }
-        }
-        finally
-        {
-            if (charReader != null)
-            {
-                try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
-            }
-            if (charWriter != null)
-            {
-                try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
-            }
-        }
-        // done
-    }
-
-    private void checkEncodingParameter(String encoding, String paramterName)
-    {
-        try
-        {
-            if (!Charset.isSupported(encoding))
-            {
-                throw new IllegalArgumentException(
-                    paramterName + "=" + encoding + " is not supported by the JVM.");
-            }
-        }
-        catch (IllegalCharsetNameException e)
-        {
-            throw new IllegalArgumentException(
-                paramterName + "=" + encoding + " is not a valid encoding.");
-        }
-    }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2022 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.util.Map;
+
+import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
+import static org.alfresco.transform.client.util.RequestParamMap.TARGET_ENCODING;
+
+/**
+ * Converts any textual format to plain text.
+ * <p>
+ * The transformation is sensitive to the source and target string encodings.
+ *
+ *
+ * <p>
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ * </p>
+ *
+ * @author Derek Hulley
+ * @author eknizat
+ */
+public class StringExtractingContentTransformer implements SelectableTransformer
+{
+
+    private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
+
+    /**
+     * Text to text conversions are done directly using the content reader and writer string
+     * manipulation methods.
+     * <p>
+     * Extraction of text from binary content attempts to take the possible character
+     * encoding into account.  The text produced from this will, if the encoding was correct,
+     * be unformatted but valid.
+     */
+    @Override
+    public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
+                          final File sourceFile, final File targetFile) throws Exception
+    {
+        String sourceEncoding = parameters.get(SOURCE_ENCODING);
+        String targetEncoding = parameters.get(TARGET_ENCODING);
+
+        if (logger.isDebugEnabled())
+        {
+            logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
+                         + " targetEncoding=" + targetEncoding);
+        }
+
+        Reader charReader = null;
+        Writer charWriter = null;
+        try
+        {
+            // Build reader
+            if (sourceEncoding == null)
+            {
+                charReader = new BufferedReader(
+                    new InputStreamReader(new FileInputStream(sourceFile)));
+            }
+            else
+            {
+                checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
+                charReader = new BufferedReader(
+                    new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
+            }
+
+            // Build writer
+            if (targetEncoding == null)
+            {
+                charWriter = new BufferedWriter(
+                    new OutputStreamWriter(new FileOutputStream(targetFile)));
+            }
+            else
+            {
+                checkEncodingParameter(targetEncoding, TARGET_ENCODING);
+                charWriter = new BufferedWriter(
+                    new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
+            }
+
+            // copy from the one to the other
+            char[] buffer = new char[8192];
+            int readCount = 0;
+            while (readCount > -1)
+            {
+                // write the last read count number of bytes
+                charWriter.write(buffer, 0, readCount);
+                // fill the buffer again
+                readCount = charReader.read(buffer);
+            }
+        }
+        finally
+        {
+            if (charReader != null)
+            {
+                try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
+            }
+            if (charWriter != null)
+            {
+                try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
+            }
+        }
+        // done
+    }
+
+    private void checkEncodingParameter(String encoding, String paramterName)
+    {
+        try
+        {
+            if (!Charset.isSupported(encoding))
+            {
+                throw new IllegalArgumentException(
+                    paramterName + "=" + encoding + " is not supported by the JVM.");
+            }
+        }
+        catch (IllegalCharsetNameException e)
+        {
+            throw new IllegalArgumentException(
+                paramterName + "=" + encoding + " is not a valid encoding.");
+        }
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/TextToPdfContentTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/TextToPdfContentTransformer.java
@@ -1,448 +1,448 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2022 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer.transformers;
-
-import org.alfresco.transformer.util.RequestParamMap;
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.PDPageContentStream;
-import org.apache.pdfbox.pdmodel.font.PDType1Font;
-import org.apache.pdfbox.tools.TextToPDF;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedOutputStream;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.PushbackInputStream;
-import java.io.Reader;
-import java.nio.charset.Charset;
-import java.util.HashMap;
-import java.util.Map;
-
-import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
-
-/**
- * <p>
- * This code is based on a class of the same name originally implemented in alfresco-repository.
- * </p>
- *
- * Makes use of the <a href="http://www.pdfbox.org/">PDFBox</a> library's <code>TextToPDF</code> utility.
- *
- * @author Derek Hulley
- * @author eknizat
- */
-public class TextToPdfContentTransformer implements SelectableTransformer
-{
-    private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
-
-    private static final int UTF16_READ_AHEAD_BYTES = 16; // 8 characters including BOM if it exists
-    private static final byte FE = (byte) 0xFE;
-    private static final byte FF = (byte) 0xFF;
-
-    public static final String PAGE_LIMIT = RequestParamMap.PAGE_LIMIT;
-
-    private final PagedTextToPDF transformer;
-
-    public TextToPdfContentTransformer()
-    {
-        transformer = new PagedTextToPDF();
-    }
-
-    public void setStandardFont(String fontName)
-    {
-        try
-        {
-            transformer.setFont(PagedTextToPDF.getStandardFont(fontName));
-        }
-        catch (Throwable e)
-        {
-            throw new RuntimeException(
-                "Unable to set Standard Font for PDF generation: " + fontName, e);
-        }
-    }
-
-    public void setFontSize(int fontSize)
-    {
-        try
-        {
-            transformer.setFontSize(fontSize);
-        }
-        catch (Throwable e)
-        {
-            throw new RuntimeException(
-                "Unable to set Font Size for PDF generation: " + fontSize);
-        }
-    }
-
-    @Override
-    public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
-                          final File sourceFile, final File targetFile) throws Exception
-    {
-        String sourceEncoding = parameters.get(SOURCE_ENCODING);
-        String stringPageLimit = parameters.get(PAGE_LIMIT);
-        int pageLimit = -1;
-        if (stringPageLimit != null)
-        {
-            pageLimit = parseInt(stringPageLimit, PAGE_LIMIT);
-        }
-
-        PDDocument pdf = null;
-        try (InputStream is = new FileInputStream(sourceFile);
-             Reader ir = new BufferedReader(buildReader(is, sourceEncoding));
-             OutputStream os = new BufferedOutputStream(new FileOutputStream(targetFile)))
-        {
-            //TransformationOptionLimits limits = getLimits(reader, writer, options);
-            //TransformationOptionPair pageLimits = limits.getPagesPair();
-            pdf = transformer.createPDFFromText(ir, pageLimit);
-            pdf.save(os);
-        }
-        finally
-        {
-            if (pdf != null)
-            {
-                try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); }
-            }
-        }
-    }
-
-    protected InputStreamReader buildReader(InputStream is, String encoding)
-    {
-        // If they gave an encoding, try to use it
-        if (encoding != null)
-        {
-            Charset charset = null;
-            try
-            {
-                charset = Charset.forName(encoding);
-            }
-            catch (Exception e)
-            {
-                logger.warn("JVM doesn't understand encoding '" + encoding +
-                            "' when transforming text to pdf");
-            }
-            if (charset != null)
-            {
-                // Handles the situation where there is a BOM even though the encoding indicates that normally
-                // there should not be one for UTF-16BE and UTF-16LE. For extra flexibility includes UTF-16 too
-                // which optionally has the BOM. Rather than look at the BOM we look at the number of zero bytes
-                // in the first few character. XML files even when not in European languages tend to have more
-                // even zero bytes when big-endian encoded and more odd zero bytes when little-endian.
-                // Think of: <?xml version="1.0"?> The normal Java decoder does not have this flexibility but
-                // other transformers do.
-                String name = charset.displayName();
-                if ("UTF-16".equals(name) || "UTF-16BE".equals(name) || "UTF-16LE".equals(name))
-                {
-                    logger.debug("Handle big and little endian UTF-16 text. Using UTF-16 rather than encoding " + name);
-                    charset = Charset.forName("UTF-16");
-                    is = new PushbackInputStream(is, UTF16_READ_AHEAD_BYTES)
-                    {
-                        boolean bomRead;
-                        boolean switchByteOrder;
-                        boolean evenByte = true;
-
-                        @Override
-                        public int read(byte[] bytes, int off, int len) throws IOException
-                        {
-                            int i = 0;
-                            int b = 0;
-                            for (; i<len; i++)
-                            {
-                                b = read();
-                                if (b == -1)
-                                {
-                                    break;
-                                }
-                                bytes[off+i] = (byte)b;
-                            }
-                            return i == 0 && b == -1 ? -1 : i;
-                        }
-
-                        @Override
-                        public int read() throws IOException
-                        {
-                            if (!bomRead)
-                            {
-                                bomRead = true;
-                                boolean switchBom = false;
-                                byte[] bytes = new byte[UTF16_READ_AHEAD_BYTES];
-                                int end = in.read(bytes, 0, UTF16_READ_AHEAD_BYTES);
-                                int evenZeros = countZeros(bytes, 0);
-                                int oddZeros = countZeros(bytes, 1);
-                                if (evenZeros > oddZeros)
-                                {
-                                    if (bytes[0] == FF && bytes[1] == FE)
-                                    {
-                                        switchByteOrder = true;
-                                        switchBom = true;
-                                        logger.warn("Little-endian BOM FFFE read, but characters are big-endian");
-                                    }
-                                    else
-                                    {
-                                        logger.debug("More even zero bytes, so normal read for big-endian");
-                                    }
-                                }
-                                else
-                                {
-                                    if (bytes[0] == FE && bytes[1] == FF)
-                                    {
-                                        switchBom = true;
-                                        logger.debug("Big-endian BOM FEFF read, but characters are little-endian");
-                                    }
-                                    else
-                                    {
-                                        switchByteOrder = true;
-                                        logger.debug("More odd zero bytes, so switch bytes from little-endian");
-                                    }
-                                }
-
-                                if (switchBom)
-                                {
-                                    byte b = bytes[0];
-                                    bytes[0] = bytes[1];
-                                    bytes[1] = b;
-                                }
-
-                                for (int i = end-1; i>=0; i--)
-                                {
-                                    unread(bytes[i]);
-                                }
-                            }
-
-                            if (switchByteOrder)
-                            {
-                                if (evenByte)
-                                {
-                                    int b1 = super.read();
-                                    int b2 = super.read();
-                                    if (b1 != -1)
-                                    {
-                                        unread(b1);
-                                    }
-                                    if (b2 != -1)
-                                    {
-                                        unread(b2);
-                                    }
-                                }
-                                evenByte = !evenByte;
-                            }
-
-                            return super.read();
-                        }
-
-                        // Counts the number of even or odd 00 bytes
-                        private int countZeros(byte[] b, int offset)
-                        {
-                            int count = 0;
-                            for (int i=offset; i<UTF16_READ_AHEAD_BYTES; i+=2)
-                            {
-                                if (b[i] == 0)
-                                {
-                                    count++;
-                                }
-                            }
-                            return count;
-                        }
-                    };
-                }
-                logger.debug("Processing plain text in encoding " + name);
-                return new InputStreamReader(is, charset);
-            }
-        }
-
-        // Fall back on the system default
-        logger.debug("Processing plain text using system default encoding");
-        return new InputStreamReader(is);
-    }
-
-    private static class PagedTextToPDF extends TextToPDF
-    {
-        // REPO-1066: duplicating the following lines from org.apache.pdfbox.tools.TextToPDF because they made them private
-        // before the upgrade to pdfbox 2.0.8, in pdfbox 1.8, this piece of code was public in org.apache.pdfbox.pdmodel.font.PDType1Font
-        static PDType1Font getStandardFont(String name)
-        {
-            return STANDARD_14.get(name);
-        }
-
-        private static final Map<String, PDType1Font> STANDARD_14 = new HashMap<>();
-
-        static
-        {
-            STANDARD_14.put(PDType1Font.TIMES_ROMAN.getBaseFont(), PDType1Font.TIMES_ROMAN);
-            STANDARD_14.put(PDType1Font.TIMES_BOLD.getBaseFont(), PDType1Font.TIMES_BOLD);
-            STANDARD_14.put(PDType1Font.TIMES_ITALIC.getBaseFont(), PDType1Font.TIMES_ITALIC);
-            STANDARD_14.put(PDType1Font.TIMES_BOLD_ITALIC.getBaseFont(),
-                PDType1Font.TIMES_BOLD_ITALIC);
-            STANDARD_14.put(PDType1Font.HELVETICA.getBaseFont(), PDType1Font.HELVETICA);
-            STANDARD_14.put(PDType1Font.HELVETICA_BOLD.getBaseFont(), PDType1Font.HELVETICA_BOLD);
-            STANDARD_14.put(PDType1Font.HELVETICA_OBLIQUE.getBaseFont(),
-                PDType1Font.HELVETICA_OBLIQUE);
-            STANDARD_14.put(PDType1Font.HELVETICA_BOLD_OBLIQUE.getBaseFont(),
-                PDType1Font.HELVETICA_BOLD_OBLIQUE);
-            STANDARD_14.put(PDType1Font.COURIER.getBaseFont(), PDType1Font.COURIER);
-            STANDARD_14.put(PDType1Font.COURIER_BOLD.getBaseFont(), PDType1Font.COURIER_BOLD);
-            STANDARD_14.put(PDType1Font.COURIER_OBLIQUE.getBaseFont(), PDType1Font.COURIER_OBLIQUE);
-            STANDARD_14.put(PDType1Font.COURIER_BOLD_OBLIQUE.getBaseFont(),
-                PDType1Font.COURIER_BOLD_OBLIQUE);
-            STANDARD_14.put(PDType1Font.SYMBOL.getBaseFont(), PDType1Font.SYMBOL);
-            STANDARD_14.put(PDType1Font.ZAPF_DINGBATS.getBaseFont(), PDType1Font.ZAPF_DINGBATS);
-        }
-        //duplicating until here
-
-        // The following code is based on the code in TextToPDF with the addition of
-        // checks for page limits.
-        // The calling code must close the PDDocument once finished with it.
-        public PDDocument createPDFFromText(Reader text, int pageLimit)
-            throws IOException
-        {
-            PDDocument doc = null;
-            int pageCount = 0;
-            try
-            {
-                final int margin = 40;
-                float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000;
-
-                //calculate font height and increase by 5 percent.
-                height = height * getFontSize() * 1.05f;
-                doc = new PDDocument();
-                BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text);
-                String nextLine;
-                PDPage page = new PDPage();
-                PDPageContentStream contentStream = null;
-                float y = -1;
-                float maxStringLength = page.getMediaBox().getWidth() - 2 * margin;
-
-                // There is a special case of creating a PDF document from an empty string.
-                boolean textIsEmpty = true;
-
-                outer:
-                while ((nextLine = data.readLine()) != null)
-                {
-                    // The input text is nonEmpty. New pages will be created and added
-                    // to the PDF document as they are needed, depending on the length of
-                    // the text.
-                    textIsEmpty = false;
-
-                    String[] lineWords = nextLine.trim().split(" ");
-                    int lineIndex = 0;
-                    while (lineIndex < lineWords.length)
-                    {
-                        final StringBuilder nextLineToDraw = new StringBuilder();
-                        float lengthIfUsingNextWord = 0;
-                        do
-                        {
-                            nextLineToDraw.append(lineWords[lineIndex]);
-                            nextLineToDraw.append(" ");
-                            lineIndex++;
-                            if (lineIndex < lineWords.length)
-                            {
-                                String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
-                                lengthIfUsingNextWord =
-                                    (getFont().getStringWidth(
-                                        lineWithNextWord) / 1000) * getFontSize();
-                            }
-                        }
-                        while (lineIndex < lineWords.length &&
-                               lengthIfUsingNextWord < maxStringLength);
-                        if (y < margin)
-                        {
-                            int test = pageCount + 1;
-                            if (pageLimit > 0 && (pageCount++ >= pageLimit))
-                            {
-                                break outer;
-                            }
-
-                            // We have crossed the end-of-page boundary and need to extend the
-                            // document by another page.
-                            page = new PDPage();
-                            doc.addPage(page);
-                            if (contentStream != null)
-                            {
-                                contentStream.endText();
-                                contentStream.close();
-                            }
-                            contentStream = new PDPageContentStream(doc, page);
-                            contentStream.setFont(getFont(), getFontSize());
-                            contentStream.beginText();
-                            y = page.getMediaBox().getHeight() - margin + height;
-                            contentStream.moveTextPositionByAmount(margin, y);
-                        }
-
-                        if (contentStream == null)
-                        {
-                            throw new IOException("Error:Expected non-null content stream.");
-                        }
-                        contentStream.moveTextPositionByAmount(0, -height);
-                        y -= height;
-                        contentStream.drawString(nextLineToDraw.toString());
-                    }
-                }
-
-                // If the input text was the empty string, then the above while loop will have short-circuited
-                // and we will not have added any PDPages to the document.
-                // So in order to make the resultant PDF document readable by Adobe Reader etc, we'll add an empty page.
-                if (textIsEmpty)
-                {
-                    doc.addPage(page);
-                }
-
-                if (contentStream != null)
-                {
-                    contentStream.endText();
-                    contentStream.close();
-                }
-            }
-            catch (IOException io)
-            {
-                if (doc != null)
-                {
-                    doc.close();
-                }
-                throw io;
-            }
-            return doc;
-        }
-    }
-
-    private int parseInt(String s, String paramName)
-    {
-        try
-        {
-            return Integer.valueOf(s);
-        }
-        catch (NumberFormatException e)
-        {
-            throw new IllegalArgumentException(paramName + " parameter must be an integer.");
-        }
-    }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2022 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import org.alfresco.transformer.util.RequestParamMap;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.tools.TextToPDF;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.PushbackInputStream;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
+
+/**
+ * <p>
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ * </p>
+ *
+ * Makes use of the <a href="http://www.pdfbox.org/">PDFBox</a> library's <code>TextToPDF</code> utility.
+ *
+ * @author Derek Hulley
+ * @author eknizat
+ */
+public class TextToPdfContentTransformer implements SelectableTransformer
+{
+    private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
+
+    private static final int UTF16_READ_AHEAD_BYTES = 16; // 8 characters including BOM if it exists
+    private static final byte FE = (byte) 0xFE;
+    private static final byte FF = (byte) 0xFF;
+
+    public static final String PAGE_LIMIT = RequestParamMap.PAGE_LIMIT;
+
+    private final PagedTextToPDF transformer;
+
+    public TextToPdfContentTransformer()
+    {
+        transformer = new PagedTextToPDF();
+    }
+
+    public void setStandardFont(String fontName)
+    {
+        try
+        {
+            transformer.setFont(PagedTextToPDF.getStandardFont(fontName));
+        }
+        catch (Throwable e)
+        {
+            throw new RuntimeException(
+                "Unable to set Standard Font for PDF generation: " + fontName, e);
+        }
+    }
+
+    public void setFontSize(int fontSize)
+    {
+        try
+        {
+            transformer.setFontSize(fontSize);
+        }
+        catch (Throwable e)
+        {
+            throw new RuntimeException(
+                "Unable to set Font Size for PDF generation: " + fontSize);
+        }
+    }
+
+    @Override
+    public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
+                          final File sourceFile, final File targetFile) throws Exception
+    {
+        String sourceEncoding = parameters.get(SOURCE_ENCODING);
+        String stringPageLimit = parameters.get(PAGE_LIMIT);
+        int pageLimit = -1;
+        if (stringPageLimit != null)
+        {
+            pageLimit = parseInt(stringPageLimit, PAGE_LIMIT);
+        }
+
+        PDDocument pdf = null;
+        try (InputStream is = new FileInputStream(sourceFile);
+             Reader ir = new BufferedReader(buildReader(is, sourceEncoding));
+             OutputStream os = new BufferedOutputStream(new FileOutputStream(targetFile)))
+        {
+            //TransformationOptionLimits limits = getLimits(reader, writer, options);
+            //TransformationOptionPair pageLimits = limits.getPagesPair();
+            pdf = transformer.createPDFFromText(ir, pageLimit);
+            pdf.save(os);
+        }
+        finally
+        {
+            if (pdf != null)
+            {
+                try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); }
+            }
+        }
+    }
+
+    protected InputStreamReader buildReader(InputStream is, String encoding)
+    {
+        // If they gave an encoding, try to use it
+        if (encoding != null)
+        {
+            Charset charset = null;
+            try
+            {
+                charset = Charset.forName(encoding);
+            }
+            catch (Exception e)
+            {
+                logger.warn("JVM doesn't understand encoding '" + encoding +
+                            "' when transforming text to pdf");
+            }
+            if (charset != null)
+            {
+                // Handles the situation where there is a BOM even though the encoding indicates that normally
+                // there should not be one for UTF-16BE and UTF-16LE. For extra flexibility includes UTF-16 too
+                // which optionally has the BOM. Rather than look at the BOM we look at the number of zero bytes
+                // in the first few character. XML files even when not in European languages tend to have more
+                // even zero bytes when big-endian encoded and more odd zero bytes when little-endian.
+                // Think of: <?xml version="1.0"?> The normal Java decoder does not have this flexibility but
+                // other transformers do.
+                String name = charset.displayName();
+                if ("UTF-16".equals(name) || "UTF-16BE".equals(name) || "UTF-16LE".equals(name))
+                {
+                    logger.debug("Handle big and little endian UTF-16 text. Using UTF-16 rather than encoding " + name);
+                    charset = Charset.forName("UTF-16");
+                    is = new PushbackInputStream(is, UTF16_READ_AHEAD_BYTES)
+                    {
+                        boolean bomRead;
+                        boolean switchByteOrder;
+                        boolean evenByte = true;
+
+                        @Override
+                        public int read(byte[] bytes, int off, int len) throws IOException
+                        {
+                            int i = 0;
+                            int b = 0;
+                            for (; i<len; i++)
+                            {
+                                b = read();
+                                if (b == -1)
+                                {
+                                    break;
+                                }
+                                bytes[off+i] = (byte)b;
+                            }
+                            return i == 0 && b == -1 ? -1 : i;
+                        }
+
+                        @Override
+                        public int read() throws IOException
+                        {
+                            if (!bomRead)
+                            {
+                                bomRead = true;
+                                boolean switchBom = false;
+                                byte[] bytes = new byte[UTF16_READ_AHEAD_BYTES];
+                                int end = in.read(bytes, 0, UTF16_READ_AHEAD_BYTES);
+                                int evenZeros = countZeros(bytes, 0);
+                                int oddZeros = countZeros(bytes, 1);
+                                if (evenZeros > oddZeros)
+                                {
+                                    if (bytes[0] == FF && bytes[1] == FE)
+                                    {
+                                        switchByteOrder = true;
+                                        switchBom = true;
+                                        logger.warn("Little-endian BOM FFFE read, but characters are big-endian");
+                                    }
+                                    else
+                                    {
+                                        logger.debug("More even zero bytes, so normal read for big-endian");
+                                    }
+                                }
+                                else
+                                {
+                                    if (bytes[0] == FE && bytes[1] == FF)
+                                    {
+                                        switchBom = true;
+                                        logger.debug("Big-endian BOM FEFF read, but characters are little-endian");
+                                    }
+                                    else
+                                    {
+                                        switchByteOrder = true;
+                                        logger.debug("More odd zero bytes, so switch bytes from little-endian");
+                                    }
+                                }
+
+                                if (switchBom)
+                                {
+                                    byte b = bytes[0];
+                                    bytes[0] = bytes[1];
+                                    bytes[1] = b;
+                                }
+
+                                for (int i = end-1; i>=0; i--)
+                                {
+                                    unread(bytes[i]);
+                                }
+                            }
+
+                            if (switchByteOrder)
+                            {
+                                if (evenByte)
+                                {
+                                    int b1 = super.read();
+                                    int b2 = super.read();
+                                    if (b1 != -1)
+                                    {
+                                        unread(b1);
+                                    }
+                                    if (b2 != -1)
+                                    {
+                                        unread(b2);
+                                    }
+                                }
+                                evenByte = !evenByte;
+                            }
+
+                            return super.read();
+                        }
+
+                        // Counts the number of even or odd 00 bytes
+                        private int countZeros(byte[] b, int offset)
+                        {
+                            int count = 0;
+                            for (int i=offset; i<UTF16_READ_AHEAD_BYTES; i+=2)
+                            {
+                                if (b[i] == 0)
+                                {
+                                    count++;
+                                }
+                            }
+                            return count;
+                        }
+                    };
+                }
+                logger.debug("Processing plain text in encoding " + name);
+                return new InputStreamReader(is, charset);
+            }
+        }
+
+        // Fall back on the system default
+        logger.debug("Processing plain text using system default encoding");
+        return new InputStreamReader(is);
+    }
+
+    private static class PagedTextToPDF extends TextToPDF
+    {
+        // REPO-1066: duplicating the following lines from org.apache.pdfbox.tools.TextToPDF because they made them private
+        // before the upgrade to pdfbox 2.0.8, in pdfbox 1.8, this piece of code was public in org.apache.pdfbox.pdmodel.font.PDType1Font
+        static PDType1Font getStandardFont(String name)
+        {
+            return STANDARD_14.get(name);
+        }
+
+        private static final Map<String, PDType1Font> STANDARD_14 = new HashMap<>();
+
+        static
+        {
+            STANDARD_14.put(PDType1Font.TIMES_ROMAN.getBaseFont(), PDType1Font.TIMES_ROMAN);
+            STANDARD_14.put(PDType1Font.TIMES_BOLD.getBaseFont(), PDType1Font.TIMES_BOLD);
+            STANDARD_14.put(PDType1Font.TIMES_ITALIC.getBaseFont(), PDType1Font.TIMES_ITALIC);
+            STANDARD_14.put(PDType1Font.TIMES_BOLD_ITALIC.getBaseFont(),
+                PDType1Font.TIMES_BOLD_ITALIC);
+            STANDARD_14.put(PDType1Font.HELVETICA.getBaseFont(), PDType1Font.HELVETICA);
+            STANDARD_14.put(PDType1Font.HELVETICA_BOLD.getBaseFont(), PDType1Font.HELVETICA_BOLD);
+            STANDARD_14.put(PDType1Font.HELVETICA_OBLIQUE.getBaseFont(),
+                PDType1Font.HELVETICA_OBLIQUE);
+            STANDARD_14.put(PDType1Font.HELVETICA_BOLD_OBLIQUE.getBaseFont(),
+                PDType1Font.HELVETICA_BOLD_OBLIQUE);
+            STANDARD_14.put(PDType1Font.COURIER.getBaseFont(), PDType1Font.COURIER);
+            STANDARD_14.put(PDType1Font.COURIER_BOLD.getBaseFont(), PDType1Font.COURIER_BOLD);
+            STANDARD_14.put(PDType1Font.COURIER_OBLIQUE.getBaseFont(), PDType1Font.COURIER_OBLIQUE);
+            STANDARD_14.put(PDType1Font.COURIER_BOLD_OBLIQUE.getBaseFont(),
+                PDType1Font.COURIER_BOLD_OBLIQUE);
+            STANDARD_14.put(PDType1Font.SYMBOL.getBaseFont(), PDType1Font.SYMBOL);
+            STANDARD_14.put(PDType1Font.ZAPF_DINGBATS.getBaseFont(), PDType1Font.ZAPF_DINGBATS);
+        }
+        //duplicating until here
+
+        // The following code is based on the code in TextToPDF with the addition of
+        // checks for page limits.
+        // The calling code must close the PDDocument once finished with it.
+        public PDDocument createPDFFromText(Reader text, int pageLimit)
+            throws IOException
+        {
+            PDDocument doc = null;
+            int pageCount = 0;
+            try
+            {
+                final int margin = 40;
+                float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000;
+
+                //calculate font height and increase by 5 percent.
+                height = height * getFontSize() * 1.05f;
+                doc = new PDDocument();
+                BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text);
+                String nextLine;
+                PDPage page = new PDPage();
+                PDPageContentStream contentStream = null;
+                float y = -1;
+                float maxStringLength = page.getMediaBox().getWidth() - 2 * margin;
+
+                // There is a special case of creating a PDF document from an empty string.
+                boolean textIsEmpty = true;
+
+                outer:
+                while ((nextLine = data.readLine()) != null)
+                {
+                    // The input text is nonEmpty. New pages will be created and added
+                    // to the PDF document as they are needed, depending on the length of
+                    // the text.
+                    textIsEmpty = false;
+
+                    String[] lineWords = nextLine.trim().split(" ");
+                    int lineIndex = 0;
+                    while (lineIndex < lineWords.length)
+                    {
+                        final StringBuilder nextLineToDraw = new StringBuilder();
+                        float lengthIfUsingNextWord = 0;
+                        do
+                        {
+                            nextLineToDraw.append(lineWords[lineIndex]);
+                            nextLineToDraw.append(" ");
+                            lineIndex++;
+                            if (lineIndex < lineWords.length)
+                            {
+                                String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
+                                lengthIfUsingNextWord =
+                                    (getFont().getStringWidth(
+                                        lineWithNextWord) / 1000) * getFontSize();
+                            }
+                        }
+                        while (lineIndex < lineWords.length &&
+                               lengthIfUsingNextWord < maxStringLength);
+                        if (y < margin)
+                        {
+                            int test = pageCount + 1;
+                            if (pageLimit > 0 && (pageCount++ >= pageLimit))
+                            {
+                                break outer;
+                            }
+
+                            // We have crossed the end-of-page boundary and need to extend the
+                            // document by another page.
+                            page = new PDPage();
+                            doc.addPage(page);
+                            if (contentStream != null)
+                            {
+                                contentStream.endText();
+                                contentStream.close();
+                            }
+                            contentStream = new PDPageContentStream(doc, page);
+                            contentStream.setFont(getFont(), getFontSize());
+                            contentStream.beginText();
+                            y = page.getMediaBox().getHeight() - margin + height;
+                            contentStream.moveTextPositionByAmount(margin, y);
+                        }
+
+                        if (contentStream == null)
+                        {
+                            throw new IOException("Error:Expected non-null content stream.");
+                        }
+                        contentStream.moveTextPositionByAmount(0, -height);
+                        y -= height;
+                        contentStream.drawString(nextLineToDraw.toString());
+                    }
+                }
+
+                // If the input text was the empty string, then the above while loop will have short-circuited
+                // and we will not have added any PDPages to the document.
+                // So in order to make the resultant PDF document readable by Adobe Reader etc, we'll add an empty page.
+                if (textIsEmpty)
+                {
+                    doc.addPage(page);
+                }
+
+                if (contentStream != null)
+                {
+                    contentStream.endText();
+                    contentStream.close();
+                }
+            }
+            catch (IOException io)
+            {
+                if (doc != null)
+                {
+                    doc.close();
+                }
+                throw io;
+            }
+            return doc;
+        }
+    }
+
+    private int parseInt(String s, String paramName)
+    {
+        try
+        {
+            return Integer.valueOf(s);
+        }
+        catch (NumberFormatException e)
+        {
+            throw new IllegalArgumentException(paramName + " parameter must be an integer.");
+        }
+    }
+}
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/HtmlMetadataExtractor_metadata_extract.properties
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/HtmlMetadataExtractor_metadata_extract.properties
@@ -1,12 +1,12 @@
-#
-# HtmlMetadataExtractor - default mapping
-#
-# author: Derek Hulley
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-
-# Mappings
-author=cm:author
-title=cm:title
-description=cm:description
+#
+# HtmlMetadataExtractor - default mapping
+#
+# author: Derek Hulley
+
+# Namespaces
+namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
+
+# Mappings
+author=cm:author
+title=cm:title
+description=cm:description
--- a/alfresco-transform-misc/alfresco-transform-misc/src/test/java/org/alfresco/transformer/transformers/HtmlParserContentTransformerTest.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/test/java/org/alfresco/transformer/transformers/HtmlParserContentTransformerTest.java
@@ -1,162 +1,162 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2022 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail.  Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
- * #L%
- */
-package org.alfresco.transformer.transformers;
-
-import org.junit.jupiter.api.Test;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.OutputStreamWriter;
-import java.nio.file.Files;
-import java.util.HashMap;
-import java.util.Map;
-
-import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-public class HtmlParserContentTransformerTest
-{
-    private static final String SOURCE_MIMETYPE = "text/html";
-    private static final String TARGET_MIMETYPE = "text/plain";
-
-    HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
-
-    /**
-     * Checks that we correctly handle text in different encodings,
-     * no matter if the encoding is specified on the Content Property
-     * or in a meta tag within the HTML itself. (ALF-10466)
-     *
-     * On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
-     * so we must be careful when checking the returned text
-     */
-    @Test
-    public void testEncodingHandling() throws Exception
-    {
-        final String NEWLINE = System.getProperty("line.separator");
-        final String TITLE = "Testing!";
-        final String TEXT_P1 = "This is some text in English";
-        final String TEXT_P2 = "This is more text in English";
-        final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
-        String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
-        String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
-                       "<p>" + TEXT_P2 + "</p>" + NEWLINE +
-                       "<p>" + TEXT_P3 + "</p>" + NEWLINE;
-        String partC = "</body></html>";
-        final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
-
-        File tmpS = null;
-        File tmpD = null;
-
-        try
-        {
-            // Content set to ISO 8859-1
-            tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
-            writeToFile(tmpS, partA + partB + partC, "ISO-8859-1");
-
-            tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
-
-            Map<String, String> parameters = new HashMap<>();
-            parameters.put(SOURCE_ENCODING, "ISO-8859-1");
-            transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
-
-            assertEquals(expected, readFromFile(tmpD, "UTF-8"));
-            tmpS.delete();
-            tmpD.delete();
-
-            // Content set to UTF-8
-            tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
-            writeToFile(tmpS, partA + partB + partC, "UTF-8");
-
-            tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
-            parameters = new HashMap<>();
-            parameters.put(SOURCE_ENCODING, "UTF-8");
-            transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
-            assertEquals(expected, readFromFile(tmpD, "UTF-8"));
-            tmpS.delete();
-            tmpD.delete();
-
-            // Content set to UTF-16
-            tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
-            writeToFile(tmpS, partA + partB + partC, "UTF-16");
-
-            tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
-            parameters = new HashMap<>();
-            parameters.put(SOURCE_ENCODING, "UTF-16");
-            transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
-            assertEquals(expected, readFromFile(tmpD, "UTF-8"));
-            tmpS.delete();
-            tmpD.delete();
-
-            // Note - since HTML Parser 2.0 META tags specifying the
-            // document encoding will ONLY be respected if the original
-            // content type was set to ISO-8859-1.
-            //
-            // This means there is now only one test which we can perform
-            // to ensure that this now-limited overriding of the encoding
-            // takes effect.
-
-            // Content set to ISO 8859-1, meta set to UTF-8
-            tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
-            String str = partA +
-                         "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" +
-                         partB + partC;
-
-            writeToFile(tmpS, str, "UTF-8");
-
-            tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
-
-            parameters = new HashMap<>();
-            parameters.put(SOURCE_ENCODING, "ISO-8859-1");
-            transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
-            assertEquals(expected, readFromFile(tmpD, "UTF-8"));
-            tmpS.delete();
-            tmpD.delete();
-
-            // Note - we can't test UTF-16 with only a meta encoding,
-            //  because without that the parser won't know about the
-            //  2 byte format so won't be able to identify the meta tag
-        }
-        finally
-        {
-            if (tmpS != null && tmpS.exists()) tmpS.delete();
-            if (tmpD != null && tmpD.exists()) tmpD.delete();
-        }
-    }
-
-    private void writeToFile(File file, String content, String encoding) throws Exception
-    {
-        try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
-        {
-            ow.append(content);
-        }
-    }
-
-    private String readFromFile(File file, final String encoding) throws Exception
-    {
-        return new String(Files.readAllBytes(file.toPath()), encoding);
-    }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2022 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail.  Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.nio.file.Files;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+public class HtmlParserContentTransformerTest
+{
+    private static final String SOURCE_MIMETYPE = "text/html";
+    private static final String TARGET_MIMETYPE = "text/plain";
+
+    HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
+
+    /**
+     * Checks that we correctly handle text in different encodings,
+     * no matter if the encoding is specified on the Content Property
+     * or in a meta tag within the HTML itself. (ALF-10466)
+     *
+     * On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
+     * so we must be careful when checking the returned text
+     */
+    @Test
+    public void testEncodingHandling() throws Exception
+    {
+        final String NEWLINE = System.getProperty("line.separator");
+        final String TITLE = "Testing!";
+        final String TEXT_P1 = "This is some text in English";
+        final String TEXT_P2 = "This is more text in English";
+        final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
+        String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
+        String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
+                       "<p>" + TEXT_P2 + "</p>" + NEWLINE +
+                       "<p>" + TEXT_P3 + "</p>" + NEWLINE;
+        String partC = "</body></html>";
+        final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
+
+        File tmpS = null;
+        File tmpD = null;
+
+        try
+        {
+            // Content set to ISO 8859-1
+            tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
+            writeToFile(tmpS, partA + partB + partC, "ISO-8859-1");
+
+            tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
+
+            Map<String, String> parameters = new HashMap<>();
+            parameters.put(SOURCE_ENCODING, "ISO-8859-1");
+            transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
+
+            assertEquals(expected, readFromFile(tmpD, "UTF-8"));
+            tmpS.delete();
+            tmpD.delete();
+
+            // Content set to UTF-8
+            tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
+            writeToFile(tmpS, partA + partB + partC, "UTF-8");
+
+            tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
+            parameters = new HashMap<>();
+            parameters.put(SOURCE_ENCODING, "UTF-8");
+            transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
+            assertEquals(expected, readFromFile(tmpD, "UTF-8"));
+            tmpS.delete();
+            tmpD.delete();
+
+            // Content set to UTF-16
+            tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
+            writeToFile(tmpS, partA + partB + partC, "UTF-16");
+
+            tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
+            parameters = new HashMap<>();
+            parameters.put(SOURCE_ENCODING, "UTF-16");
+            transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
+            assertEquals(expected, readFromFile(tmpD, "UTF-8"));
+            tmpS.delete();
+            tmpD.delete();
+
+            // Note - since HTML Parser 2.0 META tags specifying the
+            // document encoding will ONLY be respected if the original
+            // content type was set to ISO-8859-1.
+            //
+            // This means there is now only one test which we can perform
+            // to ensure that this now-limited overriding of the encoding
+            // takes effect.
+
+            // Content set to ISO 8859-1, meta set to UTF-8
+            tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
+            String str = partA +
+                         "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" +
+                         partB + partC;
+
+            writeToFile(tmpS, str, "UTF-8");
+
+            tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
+
+            parameters = new HashMap<>();
+            parameters.put(SOURCE_ENCODING, "ISO-8859-1");
+            transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
+            assertEquals(expected, readFromFile(tmpD, "UTF-8"));
+            tmpS.delete();
+            tmpD.delete();
+
+            // Note - we can't test UTF-16 with only a meta encoding,
+            //  because without that the parser won't know about the
+            //  2 byte format so won't be able to identify the meta tag
+        }
+        finally
+        {
+            if (tmpS != null && tmpS.exists()) tmpS.delete();
+            if (tmpD != null && tmpD.exists()) tmpD.delete();
+        }
+    }
+
+    private void writeToFile(File file, String content, String encoding) throws Exception
+    {
+        try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
+        {
+            ow.append(content);
+        }
+    }
+
+    private String readFromFile(File file, final String encoding) throws Exception
+    {
+        return new String(Files.readAllBytes(file.toPath()), encoding);
+    }
+}