parameters)
+ {
+ StringJoiner sj = new StringJoiner(" ");
+ parameters.forEach((k, v) ->
+ {
+ if (!TRANSFORM_NAME_PARAMETER.equals(k))
+ {
+ sj.add("--" + k + "=" + v);
+ }
+ }); // keeping the existing style used in other T-Engines
+ sj.add(getExtension(sourceFile));
+ sj.add(getExtension(targetFile));
+ LogEntry.setOptions(sj.toString());
+ }
+
+ private static String getExtension(File file)
+ {
+ final String name = file.getName();
+ int i = name.lastIndexOf('.');
+ return i == -1 ? "???" : name.substring(i + 1);
+ }
+}
diff --git a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/StringExtractingContentTransformer.java b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/StringExtractingContentTransformer.java
index 0a1760a6..d5a0e88c 100644
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/StringExtractingContentTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/StringExtractingContentTransformer.java
@@ -1,158 +1,158 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2022 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail. Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see .
- * #L%
- */
-package org.alfresco.transformer.transformers;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.io.Reader;
-import java.io.Writer;
-import java.nio.charset.Charset;
-import java.nio.charset.IllegalCharsetNameException;
-import java.util.Map;
-
-import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
-import static org.alfresco.transform.client.util.RequestParamMap.TARGET_ENCODING;
-
-/**
- * Converts any textual format to plain text.
- *
- * The transformation is sensitive to the source and target string encodings.
- *
- *
- *
- * This code is based on a class of the same name originally implemented in alfresco-repository.
- *
- *
- * @author Derek Hulley
- * @author eknizat
- */
-public class StringExtractingContentTransformer implements SelectableTransformer
-{
-
- private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
-
- /**
- * Text to text conversions are done directly using the content reader and writer string
- * manipulation methods.
- *
- * Extraction of text from binary content attempts to take the possible character
- * encoding into account. The text produced from this will, if the encoding was correct,
- * be unformatted but valid.
- */
- @Override
- public void transform(final String sourceMimetype, final String targetMimetype, final Map parameters,
- final File sourceFile, final File targetFile) throws Exception
- {
- String sourceEncoding = parameters.get(SOURCE_ENCODING);
- String targetEncoding = parameters.get(TARGET_ENCODING);
-
- if (logger.isDebugEnabled())
- {
- logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
- + " targetEncoding=" + targetEncoding);
- }
-
- Reader charReader = null;
- Writer charWriter = null;
- try
- {
- // Build reader
- if (sourceEncoding == null)
- {
- charReader = new BufferedReader(
- new InputStreamReader(new FileInputStream(sourceFile)));
- }
- else
- {
- checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
- charReader = new BufferedReader(
- new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
- }
-
- // Build writer
- if (targetEncoding == null)
- {
- charWriter = new BufferedWriter(
- new OutputStreamWriter(new FileOutputStream(targetFile)));
- }
- else
- {
- checkEncodingParameter(targetEncoding, TARGET_ENCODING);
- charWriter = new BufferedWriter(
- new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
- }
-
- // copy from the one to the other
- char[] buffer = new char[8192];
- int readCount = 0;
- while (readCount > -1)
- {
- // write the last read count number of bytes
- charWriter.write(buffer, 0, readCount);
- // fill the buffer again
- readCount = charReader.read(buffer);
- }
- }
- finally
- {
- if (charReader != null)
- {
- try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
- }
- if (charWriter != null)
- {
- try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
- }
- }
- // done
- }
-
- private void checkEncodingParameter(String encoding, String paramterName)
- {
- try
- {
- if (!Charset.isSupported(encoding))
- {
- throw new IllegalArgumentException(
- paramterName + "=" + encoding + " is not supported by the JVM.");
- }
- }
- catch (IllegalCharsetNameException e)
- {
- throw new IllegalArgumentException(
- paramterName + "=" + encoding + " is not a valid encoding.");
- }
- }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2022 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail. Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.util.Map;
+
+import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
+import static org.alfresco.transform.client.util.RequestParamMap.TARGET_ENCODING;
+
+/**
+ * Converts any textual format to plain text.
+ *
+ * The transformation is sensitive to the source and target string encodings.
+ *
+ *
+ *
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ *
+ *
+ * @author Derek Hulley
+ * @author eknizat
+ */
+public class StringExtractingContentTransformer implements SelectableTransformer
+{
+
+ private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
+
+ /**
+ * Text to text conversions are done directly using the content reader and writer string
+ * manipulation methods.
+ *
+ * Extraction of text from binary content attempts to take the possible character
+ * encoding into account. The text produced from this will, if the encoding was correct,
+ * be unformatted but valid.
+ */
+ @Override
+ public void transform(final String sourceMimetype, final String targetMimetype, final Map parameters,
+ final File sourceFile, final File targetFile) throws Exception
+ {
+ String sourceEncoding = parameters.get(SOURCE_ENCODING);
+ String targetEncoding = parameters.get(TARGET_ENCODING);
+
+ if (logger.isDebugEnabled())
+ {
+ logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
+ + " targetEncoding=" + targetEncoding);
+ }
+
+ Reader charReader = null;
+ Writer charWriter = null;
+ try
+ {
+ // Build reader
+ if (sourceEncoding == null)
+ {
+ charReader = new BufferedReader(
+ new InputStreamReader(new FileInputStream(sourceFile)));
+ }
+ else
+ {
+ checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
+ charReader = new BufferedReader(
+ new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
+ }
+
+ // Build writer
+ if (targetEncoding == null)
+ {
+ charWriter = new BufferedWriter(
+ new OutputStreamWriter(new FileOutputStream(targetFile)));
+ }
+ else
+ {
+ checkEncodingParameter(targetEncoding, TARGET_ENCODING);
+ charWriter = new BufferedWriter(
+ new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
+ }
+
+ // copy from the one to the other
+ char[] buffer = new char[8192];
+ int readCount = 0;
+ while (readCount > -1)
+ {
+ // write the last read count number of bytes
+ charWriter.write(buffer, 0, readCount);
+ // fill the buffer again
+ readCount = charReader.read(buffer);
+ }
+ }
+ finally
+ {
+ if (charReader != null)
+ {
+ try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
+ }
+ if (charWriter != null)
+ {
+ try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
+ }
+ }
+ // done
+ }
+
+ private void checkEncodingParameter(String encoding, String paramterName)
+ {
+ try
+ {
+ if (!Charset.isSupported(encoding))
+ {
+ throw new IllegalArgumentException(
+ paramterName + "=" + encoding + " is not supported by the JVM.");
+ }
+ }
+ catch (IllegalCharsetNameException e)
+ {
+ throw new IllegalArgumentException(
+ paramterName + "=" + encoding + " is not a valid encoding.");
+ }
+ }
+}
diff --git a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/TextToPdfContentTransformer.java b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/TextToPdfContentTransformer.java
index 253d1c88..35065929 100644
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/TextToPdfContentTransformer.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/java/org/alfresco/transformer/transformers/TextToPdfContentTransformer.java
@@ -1,448 +1,448 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2022 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail. Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see .
- * #L%
- */
-package org.alfresco.transformer.transformers;
-
-import org.alfresco.transformer.util.RequestParamMap;
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.PDPageContentStream;
-import org.apache.pdfbox.pdmodel.font.PDType1Font;
-import org.apache.pdfbox.tools.TextToPDF;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedOutputStream;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.PushbackInputStream;
-import java.io.Reader;
-import java.nio.charset.Charset;
-import java.util.HashMap;
-import java.util.Map;
-
-import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
-
-/**
- *
- * This code is based on a class of the same name originally implemented in alfresco-repository.
- *
- *
- * Makes use of the PDFBox library's TextToPDF
utility.
- *
- * @author Derek Hulley
- * @author eknizat
- */
-public class TextToPdfContentTransformer implements SelectableTransformer
-{
- private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
-
- private static final int UTF16_READ_AHEAD_BYTES = 16; // 8 characters including BOM if it exists
- private static final byte FE = (byte) 0xFE;
- private static final byte FF = (byte) 0xFF;
-
- public static final String PAGE_LIMIT = RequestParamMap.PAGE_LIMIT;
-
- private final PagedTextToPDF transformer;
-
- public TextToPdfContentTransformer()
- {
- transformer = new PagedTextToPDF();
- }
-
- public void setStandardFont(String fontName)
- {
- try
- {
- transformer.setFont(PagedTextToPDF.getStandardFont(fontName));
- }
- catch (Throwable e)
- {
- throw new RuntimeException(
- "Unable to set Standard Font for PDF generation: " + fontName, e);
- }
- }
-
- public void setFontSize(int fontSize)
- {
- try
- {
- transformer.setFontSize(fontSize);
- }
- catch (Throwable e)
- {
- throw new RuntimeException(
- "Unable to set Font Size for PDF generation: " + fontSize);
- }
- }
-
- @Override
- public void transform(final String sourceMimetype, final String targetMimetype, final Map parameters,
- final File sourceFile, final File targetFile) throws Exception
- {
- String sourceEncoding = parameters.get(SOURCE_ENCODING);
- String stringPageLimit = parameters.get(PAGE_LIMIT);
- int pageLimit = -1;
- if (stringPageLimit != null)
- {
- pageLimit = parseInt(stringPageLimit, PAGE_LIMIT);
- }
-
- PDDocument pdf = null;
- try (InputStream is = new FileInputStream(sourceFile);
- Reader ir = new BufferedReader(buildReader(is, sourceEncoding));
- OutputStream os = new BufferedOutputStream(new FileOutputStream(targetFile)))
- {
- //TransformationOptionLimits limits = getLimits(reader, writer, options);
- //TransformationOptionPair pageLimits = limits.getPagesPair();
- pdf = transformer.createPDFFromText(ir, pageLimit);
- pdf.save(os);
- }
- finally
- {
- if (pdf != null)
- {
- try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); }
- }
- }
- }
-
- protected InputStreamReader buildReader(InputStream is, String encoding)
- {
- // If they gave an encoding, try to use it
- if (encoding != null)
- {
- Charset charset = null;
- try
- {
- charset = Charset.forName(encoding);
- }
- catch (Exception e)
- {
- logger.warn("JVM doesn't understand encoding '" + encoding +
- "' when transforming text to pdf");
- }
- if (charset != null)
- {
- // Handles the situation where there is a BOM even though the encoding indicates that normally
- // there should not be one for UTF-16BE and UTF-16LE. For extra flexibility includes UTF-16 too
- // which optionally has the BOM. Rather than look at the BOM we look at the number of zero bytes
- // in the first few character. XML files even when not in European languages tend to have more
- // even zero bytes when big-endian encoded and more odd zero bytes when little-endian.
- // Think of: The normal Java decoder does not have this flexibility but
- // other transformers do.
- String name = charset.displayName();
- if ("UTF-16".equals(name) || "UTF-16BE".equals(name) || "UTF-16LE".equals(name))
- {
- logger.debug("Handle big and little endian UTF-16 text. Using UTF-16 rather than encoding " + name);
- charset = Charset.forName("UTF-16");
- is = new PushbackInputStream(is, UTF16_READ_AHEAD_BYTES)
- {
- boolean bomRead;
- boolean switchByteOrder;
- boolean evenByte = true;
-
- @Override
- public int read(byte[] bytes, int off, int len) throws IOException
- {
- int i = 0;
- int b = 0;
- for (; i oddZeros)
- {
- if (bytes[0] == FF && bytes[1] == FE)
- {
- switchByteOrder = true;
- switchBom = true;
- logger.warn("Little-endian BOM FFFE read, but characters are big-endian");
- }
- else
- {
- logger.debug("More even zero bytes, so normal read for big-endian");
- }
- }
- else
- {
- if (bytes[0] == FE && bytes[1] == FF)
- {
- switchBom = true;
- logger.debug("Big-endian BOM FEFF read, but characters are little-endian");
- }
- else
- {
- switchByteOrder = true;
- logger.debug("More odd zero bytes, so switch bytes from little-endian");
- }
- }
-
- if (switchBom)
- {
- byte b = bytes[0];
- bytes[0] = bytes[1];
- bytes[1] = b;
- }
-
- for (int i = end-1; i>=0; i--)
- {
- unread(bytes[i]);
- }
- }
-
- if (switchByteOrder)
- {
- if (evenByte)
- {
- int b1 = super.read();
- int b2 = super.read();
- if (b1 != -1)
- {
- unread(b1);
- }
- if (b2 != -1)
- {
- unread(b2);
- }
- }
- evenByte = !evenByte;
- }
-
- return super.read();
- }
-
- // Counts the number of even or odd 00 bytes
- private int countZeros(byte[] b, int offset)
- {
- int count = 0;
- for (int i=offset; i STANDARD_14 = new HashMap<>();
-
- static
- {
- STANDARD_14.put(PDType1Font.TIMES_ROMAN.getBaseFont(), PDType1Font.TIMES_ROMAN);
- STANDARD_14.put(PDType1Font.TIMES_BOLD.getBaseFont(), PDType1Font.TIMES_BOLD);
- STANDARD_14.put(PDType1Font.TIMES_ITALIC.getBaseFont(), PDType1Font.TIMES_ITALIC);
- STANDARD_14.put(PDType1Font.TIMES_BOLD_ITALIC.getBaseFont(),
- PDType1Font.TIMES_BOLD_ITALIC);
- STANDARD_14.put(PDType1Font.HELVETICA.getBaseFont(), PDType1Font.HELVETICA);
- STANDARD_14.put(PDType1Font.HELVETICA_BOLD.getBaseFont(), PDType1Font.HELVETICA_BOLD);
- STANDARD_14.put(PDType1Font.HELVETICA_OBLIQUE.getBaseFont(),
- PDType1Font.HELVETICA_OBLIQUE);
- STANDARD_14.put(PDType1Font.HELVETICA_BOLD_OBLIQUE.getBaseFont(),
- PDType1Font.HELVETICA_BOLD_OBLIQUE);
- STANDARD_14.put(PDType1Font.COURIER.getBaseFont(), PDType1Font.COURIER);
- STANDARD_14.put(PDType1Font.COURIER_BOLD.getBaseFont(), PDType1Font.COURIER_BOLD);
- STANDARD_14.put(PDType1Font.COURIER_OBLIQUE.getBaseFont(), PDType1Font.COURIER_OBLIQUE);
- STANDARD_14.put(PDType1Font.COURIER_BOLD_OBLIQUE.getBaseFont(),
- PDType1Font.COURIER_BOLD_OBLIQUE);
- STANDARD_14.put(PDType1Font.SYMBOL.getBaseFont(), PDType1Font.SYMBOL);
- STANDARD_14.put(PDType1Font.ZAPF_DINGBATS.getBaseFont(), PDType1Font.ZAPF_DINGBATS);
- }
- //duplicating until here
-
- // The following code is based on the code in TextToPDF with the addition of
- // checks for page limits.
- // The calling code must close the PDDocument once finished with it.
- public PDDocument createPDFFromText(Reader text, int pageLimit)
- throws IOException
- {
- PDDocument doc = null;
- int pageCount = 0;
- try
- {
- final int margin = 40;
- float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000;
-
- //calculate font height and increase by 5 percent.
- height = height * getFontSize() * 1.05f;
- doc = new PDDocument();
- BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text);
- String nextLine;
- PDPage page = new PDPage();
- PDPageContentStream contentStream = null;
- float y = -1;
- float maxStringLength = page.getMediaBox().getWidth() - 2 * margin;
-
- // There is a special case of creating a PDF document from an empty string.
- boolean textIsEmpty = true;
-
- outer:
- while ((nextLine = data.readLine()) != null)
- {
- // The input text is nonEmpty. New pages will be created and added
- // to the PDF document as they are needed, depending on the length of
- // the text.
- textIsEmpty = false;
-
- String[] lineWords = nextLine.trim().split(" ");
- int lineIndex = 0;
- while (lineIndex < lineWords.length)
- {
- final StringBuilder nextLineToDraw = new StringBuilder();
- float lengthIfUsingNextWord = 0;
- do
- {
- nextLineToDraw.append(lineWords[lineIndex]);
- nextLineToDraw.append(" ");
- lineIndex++;
- if (lineIndex < lineWords.length)
- {
- String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
- lengthIfUsingNextWord =
- (getFont().getStringWidth(
- lineWithNextWord) / 1000) * getFontSize();
- }
- }
- while (lineIndex < lineWords.length &&
- lengthIfUsingNextWord < maxStringLength);
- if (y < margin)
- {
- int test = pageCount + 1;
- if (pageLimit > 0 && (pageCount++ >= pageLimit))
- {
- break outer;
- }
-
- // We have crossed the end-of-page boundary and need to extend the
- // document by another page.
- page = new PDPage();
- doc.addPage(page);
- if (contentStream != null)
- {
- contentStream.endText();
- contentStream.close();
- }
- contentStream = new PDPageContentStream(doc, page);
- contentStream.setFont(getFont(), getFontSize());
- contentStream.beginText();
- y = page.getMediaBox().getHeight() - margin + height;
- contentStream.moveTextPositionByAmount(margin, y);
- }
-
- if (contentStream == null)
- {
- throw new IOException("Error:Expected non-null content stream.");
- }
- contentStream.moveTextPositionByAmount(0, -height);
- y -= height;
- contentStream.drawString(nextLineToDraw.toString());
- }
- }
-
- // If the input text was the empty string, then the above while loop will have short-circuited
- // and we will not have added any PDPages to the document.
- // So in order to make the resultant PDF document readable by Adobe Reader etc, we'll add an empty page.
- if (textIsEmpty)
- {
- doc.addPage(page);
- }
-
- if (contentStream != null)
- {
- contentStream.endText();
- contentStream.close();
- }
- }
- catch (IOException io)
- {
- if (doc != null)
- {
- doc.close();
- }
- throw io;
- }
- return doc;
- }
- }
-
- private int parseInt(String s, String paramName)
- {
- try
- {
- return Integer.valueOf(s);
- }
- catch (NumberFormatException e)
- {
- throw new IllegalArgumentException(paramName + " parameter must be an integer.");
- }
- }
-}
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2022 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail. Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import org.alfresco.transformer.util.RequestParamMap;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.tools.TextToPDF;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.PushbackInputStream;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
+
+/**
+ *
+ * This code is based on a class of the same name originally implemented in alfresco-repository.
+ *
+ *
+ * Makes use of the PDFBox library's TextToPDF
utility.
+ *
+ * @author Derek Hulley
+ * @author eknizat
+ */
+public class TextToPdfContentTransformer implements SelectableTransformer
+{
+ private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
+
+ private static final int UTF16_READ_AHEAD_BYTES = 16; // 8 characters including BOM if it exists
+ private static final byte FE = (byte) 0xFE;
+ private static final byte FF = (byte) 0xFF;
+
+ public static final String PAGE_LIMIT = RequestParamMap.PAGE_LIMIT;
+
+ private final PagedTextToPDF transformer;
+
+ public TextToPdfContentTransformer()
+ {
+ transformer = new PagedTextToPDF();
+ }
+
+ public void setStandardFont(String fontName)
+ {
+ try
+ {
+ transformer.setFont(PagedTextToPDF.getStandardFont(fontName));
+ }
+ catch (Throwable e)
+ {
+ throw new RuntimeException(
+ "Unable to set Standard Font for PDF generation: " + fontName, e);
+ }
+ }
+
+ public void setFontSize(int fontSize)
+ {
+ try
+ {
+ transformer.setFontSize(fontSize);
+ }
+ catch (Throwable e)
+ {
+ throw new RuntimeException(
+ "Unable to set Font Size for PDF generation: " + fontSize);
+ }
+ }
+
+ @Override
+ public void transform(final String sourceMimetype, final String targetMimetype, final Map parameters,
+ final File sourceFile, final File targetFile) throws Exception
+ {
+ String sourceEncoding = parameters.get(SOURCE_ENCODING);
+ String stringPageLimit = parameters.get(PAGE_LIMIT);
+ int pageLimit = -1;
+ if (stringPageLimit != null)
+ {
+ pageLimit = parseInt(stringPageLimit, PAGE_LIMIT);
+ }
+
+ PDDocument pdf = null;
+ try (InputStream is = new FileInputStream(sourceFile);
+ Reader ir = new BufferedReader(buildReader(is, sourceEncoding));
+ OutputStream os = new BufferedOutputStream(new FileOutputStream(targetFile)))
+ {
+ //TransformationOptionLimits limits = getLimits(reader, writer, options);
+ //TransformationOptionPair pageLimits = limits.getPagesPair();
+ pdf = transformer.createPDFFromText(ir, pageLimit);
+ pdf.save(os);
+ }
+ finally
+ {
+ if (pdf != null)
+ {
+ try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); }
+ }
+ }
+ }
+
+ protected InputStreamReader buildReader(InputStream is, String encoding)
+ {
+ // If they gave an encoding, try to use it
+ if (encoding != null)
+ {
+ Charset charset = null;
+ try
+ {
+ charset = Charset.forName(encoding);
+ }
+ catch (Exception e)
+ {
+ logger.warn("JVM doesn't understand encoding '" + encoding +
+ "' when transforming text to pdf");
+ }
+ if (charset != null)
+ {
+ // Handles the situation where there is a BOM even though the encoding indicates that normally
+ // there should not be one for UTF-16BE and UTF-16LE. For extra flexibility includes UTF-16 too
+ // which optionally has the BOM. Rather than look at the BOM we look at the number of zero bytes
+ // in the first few character. XML files even when not in European languages tend to have more
+ // even zero bytes when big-endian encoded and more odd zero bytes when little-endian.
+ // Think of: The normal Java decoder does not have this flexibility but
+ // other transformers do.
+ String name = charset.displayName();
+ if ("UTF-16".equals(name) || "UTF-16BE".equals(name) || "UTF-16LE".equals(name))
+ {
+ logger.debug("Handle big and little endian UTF-16 text. Using UTF-16 rather than encoding " + name);
+ charset = Charset.forName("UTF-16");
+ is = new PushbackInputStream(is, UTF16_READ_AHEAD_BYTES)
+ {
+ boolean bomRead;
+ boolean switchByteOrder;
+ boolean evenByte = true;
+
+ @Override
+ public int read(byte[] bytes, int off, int len) throws IOException
+ {
+ int i = 0;
+ int b = 0;
+ for (; i oddZeros)
+ {
+ if (bytes[0] == FF && bytes[1] == FE)
+ {
+ switchByteOrder = true;
+ switchBom = true;
+ logger.warn("Little-endian BOM FFFE read, but characters are big-endian");
+ }
+ else
+ {
+ logger.debug("More even zero bytes, so normal read for big-endian");
+ }
+ }
+ else
+ {
+ if (bytes[0] == FE && bytes[1] == FF)
+ {
+ switchBom = true;
+ logger.debug("Big-endian BOM FEFF read, but characters are little-endian");
+ }
+ else
+ {
+ switchByteOrder = true;
+ logger.debug("More odd zero bytes, so switch bytes from little-endian");
+ }
+ }
+
+ if (switchBom)
+ {
+ byte b = bytes[0];
+ bytes[0] = bytes[1];
+ bytes[1] = b;
+ }
+
+ for (int i = end-1; i>=0; i--)
+ {
+ unread(bytes[i]);
+ }
+ }
+
+ if (switchByteOrder)
+ {
+ if (evenByte)
+ {
+ int b1 = super.read();
+ int b2 = super.read();
+ if (b1 != -1)
+ {
+ unread(b1);
+ }
+ if (b2 != -1)
+ {
+ unread(b2);
+ }
+ }
+ evenByte = !evenByte;
+ }
+
+ return super.read();
+ }
+
+ // Counts the number of even or odd 00 bytes
+ private int countZeros(byte[] b, int offset)
+ {
+ int count = 0;
+ for (int i=offset; i STANDARD_14 = new HashMap<>();
+
+ static
+ {
+ STANDARD_14.put(PDType1Font.TIMES_ROMAN.getBaseFont(), PDType1Font.TIMES_ROMAN);
+ STANDARD_14.put(PDType1Font.TIMES_BOLD.getBaseFont(), PDType1Font.TIMES_BOLD);
+ STANDARD_14.put(PDType1Font.TIMES_ITALIC.getBaseFont(), PDType1Font.TIMES_ITALIC);
+ STANDARD_14.put(PDType1Font.TIMES_BOLD_ITALIC.getBaseFont(),
+ PDType1Font.TIMES_BOLD_ITALIC);
+ STANDARD_14.put(PDType1Font.HELVETICA.getBaseFont(), PDType1Font.HELVETICA);
+ STANDARD_14.put(PDType1Font.HELVETICA_BOLD.getBaseFont(), PDType1Font.HELVETICA_BOLD);
+ STANDARD_14.put(PDType1Font.HELVETICA_OBLIQUE.getBaseFont(),
+ PDType1Font.HELVETICA_OBLIQUE);
+ STANDARD_14.put(PDType1Font.HELVETICA_BOLD_OBLIQUE.getBaseFont(),
+ PDType1Font.HELVETICA_BOLD_OBLIQUE);
+ STANDARD_14.put(PDType1Font.COURIER.getBaseFont(), PDType1Font.COURIER);
+ STANDARD_14.put(PDType1Font.COURIER_BOLD.getBaseFont(), PDType1Font.COURIER_BOLD);
+ STANDARD_14.put(PDType1Font.COURIER_OBLIQUE.getBaseFont(), PDType1Font.COURIER_OBLIQUE);
+ STANDARD_14.put(PDType1Font.COURIER_BOLD_OBLIQUE.getBaseFont(),
+ PDType1Font.COURIER_BOLD_OBLIQUE);
+ STANDARD_14.put(PDType1Font.SYMBOL.getBaseFont(), PDType1Font.SYMBOL);
+ STANDARD_14.put(PDType1Font.ZAPF_DINGBATS.getBaseFont(), PDType1Font.ZAPF_DINGBATS);
+ }
+ //duplicating until here
+
+ // The following code is based on the code in TextToPDF with the addition of
+ // checks for page limits.
+ // The calling code must close the PDDocument once finished with it.
+ public PDDocument createPDFFromText(Reader text, int pageLimit)
+ throws IOException
+ {
+ PDDocument doc = null;
+ int pageCount = 0;
+ try
+ {
+ final int margin = 40;
+ float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000;
+
+ //calculate font height and increase by 5 percent.
+ height = height * getFontSize() * 1.05f;
+ doc = new PDDocument();
+ BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text);
+ String nextLine;
+ PDPage page = new PDPage();
+ PDPageContentStream contentStream = null;
+ float y = -1;
+ float maxStringLength = page.getMediaBox().getWidth() - 2 * margin;
+
+ // There is a special case of creating a PDF document from an empty string.
+ boolean textIsEmpty = true;
+
+ outer:
+ while ((nextLine = data.readLine()) != null)
+ {
+ // The input text is nonEmpty. New pages will be created and added
+ // to the PDF document as they are needed, depending on the length of
+ // the text.
+ textIsEmpty = false;
+
+ String[] lineWords = nextLine.trim().split(" ");
+ int lineIndex = 0;
+ while (lineIndex < lineWords.length)
+ {
+ final StringBuilder nextLineToDraw = new StringBuilder();
+ float lengthIfUsingNextWord = 0;
+ do
+ {
+ nextLineToDraw.append(lineWords[lineIndex]);
+ nextLineToDraw.append(" ");
+ lineIndex++;
+ if (lineIndex < lineWords.length)
+ {
+ String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
+ lengthIfUsingNextWord =
+ (getFont().getStringWidth(
+ lineWithNextWord) / 1000) * getFontSize();
+ }
+ }
+ while (lineIndex < lineWords.length &&
+ lengthIfUsingNextWord < maxStringLength);
+ if (y < margin)
+ {
+ int test = pageCount + 1;
+ if (pageLimit > 0 && (pageCount++ >= pageLimit))
+ {
+ break outer;
+ }
+
+ // We have crossed the end-of-page boundary and need to extend the
+ // document by another page.
+ page = new PDPage();
+ doc.addPage(page);
+ if (contentStream != null)
+ {
+ contentStream.endText();
+ contentStream.close();
+ }
+ contentStream = new PDPageContentStream(doc, page);
+ contentStream.setFont(getFont(), getFontSize());
+ contentStream.beginText();
+ y = page.getMediaBox().getHeight() - margin + height;
+ contentStream.moveTextPositionByAmount(margin, y);
+ }
+
+ if (contentStream == null)
+ {
+ throw new IOException("Error:Expected non-null content stream.");
+ }
+ contentStream.moveTextPositionByAmount(0, -height);
+ y -= height;
+ contentStream.drawString(nextLineToDraw.toString());
+ }
+ }
+
+ // If the input text was the empty string, then the above while loop will have short-circuited
+ // and we will not have added any PDPages to the document.
+ // So in order to make the resultant PDF document readable by Adobe Reader etc, we'll add an empty page.
+ if (textIsEmpty)
+ {
+ doc.addPage(page);
+ }
+
+ if (contentStream != null)
+ {
+ contentStream.endText();
+ contentStream.close();
+ }
+ }
+ catch (IOException io)
+ {
+ if (doc != null)
+ {
+ doc.close();
+ }
+ throw io;
+ }
+ return doc;
+ }
+ }
+
+ private int parseInt(String s, String paramName)
+ {
+ try
+ {
+ return Integer.valueOf(s);
+ }
+ catch (NumberFormatException e)
+ {
+ throw new IllegalArgumentException(paramName + " parameter must be an integer.");
+ }
+ }
+}
diff --git a/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/HtmlMetadataExtractor_metadata_extract.properties b/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/HtmlMetadataExtractor_metadata_extract.properties
index 98a0fc20..5b7ee308 100644
--- a/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/HtmlMetadataExtractor_metadata_extract.properties
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/HtmlMetadataExtractor_metadata_extract.properties
@@ -1,12 +1,12 @@
-#
-# HtmlMetadataExtractor - default mapping
-#
-# author: Derek Hulley
-
-# Namespaces
-namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
-
-# Mappings
-author=cm:author
-title=cm:title
-description=cm:description
+#
+# HtmlMetadataExtractor - default mapping
+#
+# author: Derek Hulley
+
+# Namespaces
+namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
+
+# Mappings
+author=cm:author
+title=cm:title
+description=cm:description
diff --git a/alfresco-transform-misc/alfresco-transform-misc/src/test/java/org/alfresco/transformer/transformers/HtmlParserContentTransformerTest.java b/alfresco-transform-misc/alfresco-transform-misc/src/test/java/org/alfresco/transformer/transformers/HtmlParserContentTransformerTest.java
index 775a4408..4eecb8a7 100644
--- a/alfresco-transform-misc/alfresco-transform-misc/src/test/java/org/alfresco/transformer/transformers/HtmlParserContentTransformerTest.java
+++ b/alfresco-transform-misc/alfresco-transform-misc/src/test/java/org/alfresco/transformer/transformers/HtmlParserContentTransformerTest.java
@@ -1,162 +1,162 @@
-/*
- * #%L
- * Alfresco Transform Core
- * %%
- * Copyright (C) 2005 - 2022 Alfresco Software Limited
- * %%
- * This file is part of the Alfresco software.
- * -
- * If the software was purchased under a paid Alfresco license, the terms of
- * the paid license agreement will prevail. Otherwise, the software is
- * provided under the following open source license terms:
- * -
- * Alfresco is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * -
- * Alfresco is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- * -
- * You should have received a copy of the GNU Lesser General Public License
- * along with Alfresco. If not, see .
- * #L%
- */
-package org.alfresco.transformer.transformers;
-
-import org.junit.jupiter.api.Test;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.OutputStreamWriter;
-import java.nio.file.Files;
-import java.util.HashMap;
-import java.util.Map;
-
-import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-public class HtmlParserContentTransformerTest
-{
- private static final String SOURCE_MIMETYPE = "text/html";
- private static final String TARGET_MIMETYPE = "text/plain";
-
- HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
-
- /**
- * Checks that we correctly handle text in different encodings,
- * no matter if the encoding is specified on the Content Property
- * or in a meta tag within the HTML itself. (ALF-10466)
- *
- * On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
- * so we must be careful when checking the returned text
- */
- @Test
- public void testEncodingHandling() throws Exception
- {
- final String NEWLINE = System.getProperty("line.separator");
- final String TITLE = "Testing!";
- final String TEXT_P1 = "This is some text in English";
- final String TEXT_P2 = "This is more text in English";
- final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
- String partA = "" + TITLE + "" + NEWLINE;
- String partB = "" + TEXT_P1 + "
" + NEWLINE +
- "" + TEXT_P2 + "
" + NEWLINE +
- "" + TEXT_P3 + "
" + NEWLINE;
- String partC = "";
- final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
-
- File tmpS = null;
- File tmpD = null;
-
- try
- {
- // Content set to ISO 8859-1
- tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
- writeToFile(tmpS, partA + partB + partC, "ISO-8859-1");
-
- tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
-
- Map parameters = new HashMap<>();
- parameters.put(SOURCE_ENCODING, "ISO-8859-1");
- transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
-
- assertEquals(expected, readFromFile(tmpD, "UTF-8"));
- tmpS.delete();
- tmpD.delete();
-
- // Content set to UTF-8
- tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
- writeToFile(tmpS, partA + partB + partC, "UTF-8");
-
- tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
- parameters = new HashMap<>();
- parameters.put(SOURCE_ENCODING, "UTF-8");
- transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
- assertEquals(expected, readFromFile(tmpD, "UTF-8"));
- tmpS.delete();
- tmpD.delete();
-
- // Content set to UTF-16
- tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
- writeToFile(tmpS, partA + partB + partC, "UTF-16");
-
- tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
- parameters = new HashMap<>();
- parameters.put(SOURCE_ENCODING, "UTF-16");
- transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
- assertEquals(expected, readFromFile(tmpD, "UTF-8"));
- tmpS.delete();
- tmpD.delete();
-
- // Note - since HTML Parser 2.0 META tags specifying the
- // document encoding will ONLY be respected if the original
- // content type was set to ISO-8859-1.
- //
- // This means there is now only one test which we can perform
- // to ensure that this now-limited overriding of the encoding
- // takes effect.
-
- // Content set to ISO 8859-1, meta set to UTF-8
- tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
- String str = partA +
- "" +
- partB + partC;
-
- writeToFile(tmpS, str, "UTF-8");
-
- tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
-
- parameters = new HashMap<>();
- parameters.put(SOURCE_ENCODING, "ISO-8859-1");
- transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
- assertEquals(expected, readFromFile(tmpD, "UTF-8"));
- tmpS.delete();
- tmpD.delete();
-
- // Note - we can't test UTF-16 with only a meta encoding,
- // because without that the parser won't know about the
- // 2 byte format so won't be able to identify the meta tag
- }
- finally
- {
- if (tmpS != null && tmpS.exists()) tmpS.delete();
- if (tmpD != null && tmpD.exists()) tmpD.delete();
- }
- }
-
- private void writeToFile(File file, String content, String encoding) throws Exception
- {
- try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
- {
- ow.append(content);
- }
- }
-
- private String readFromFile(File file, final String encoding) throws Exception
- {
- return new String(Files.readAllBytes(file.toPath()), encoding);
- }
-}
\ No newline at end of file
+/*
+ * #%L
+ * Alfresco Transform Core
+ * %%
+ * Copyright (C) 2005 - 2022 Alfresco Software Limited
+ * %%
+ * This file is part of the Alfresco software.
+ * -
+ * If the software was purchased under a paid Alfresco license, the terms of
+ * the paid license agreement will prevail. Otherwise, the software is
+ * provided under the following open source license terms:
+ * -
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * -
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ * -
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ * #L%
+ */
+package org.alfresco.transformer.transformers;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.nio.file.Files;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+public class HtmlParserContentTransformerTest
+{
+ private static final String SOURCE_MIMETYPE = "text/html";
+ private static final String TARGET_MIMETYPE = "text/plain";
+
+ HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
+
+ /**
+ * Checks that we correctly handle text in different encodings,
+ * no matter if the encoding is specified on the Content Property
+ * or in a meta tag within the HTML itself. (ALF-10466)
+ *
+ * On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
+ * so we must be careful when checking the returned text
+ */
+ @Test
+ public void testEncodingHandling() throws Exception
+ {
+ final String NEWLINE = System.getProperty("line.separator");
+ final String TITLE = "Testing!";
+ final String TEXT_P1 = "This is some text in English";
+ final String TEXT_P2 = "This is more text in English";
+ final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
+ String partA = "" + TITLE + "" + NEWLINE;
+ String partB = "" + TEXT_P1 + "
" + NEWLINE +
+ "" + TEXT_P2 + "
" + NEWLINE +
+ "" + TEXT_P3 + "
" + NEWLINE;
+ String partC = "";
+ final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
+
+ File tmpS = null;
+ File tmpD = null;
+
+ try
+ {
+ // Content set to ISO 8859-1
+ tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
+ writeToFile(tmpS, partA + partB + partC, "ISO-8859-1");
+
+ tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
+
+ Map parameters = new HashMap<>();
+ parameters.put(SOURCE_ENCODING, "ISO-8859-1");
+ transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
+
+ assertEquals(expected, readFromFile(tmpD, "UTF-8"));
+ tmpS.delete();
+ tmpD.delete();
+
+ // Content set to UTF-8
+ tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
+ writeToFile(tmpS, partA + partB + partC, "UTF-8");
+
+ tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
+ parameters = new HashMap<>();
+ parameters.put(SOURCE_ENCODING, "UTF-8");
+ transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
+ assertEquals(expected, readFromFile(tmpD, "UTF-8"));
+ tmpS.delete();
+ tmpD.delete();
+
+ // Content set to UTF-16
+ tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
+ writeToFile(tmpS, partA + partB + partC, "UTF-16");
+
+ tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
+ parameters = new HashMap<>();
+ parameters.put(SOURCE_ENCODING, "UTF-16");
+ transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
+ assertEquals(expected, readFromFile(tmpD, "UTF-8"));
+ tmpS.delete();
+ tmpD.delete();
+
+ // Note - since HTML Parser 2.0 META tags specifying the
+ // document encoding will ONLY be respected if the original
+ // content type was set to ISO-8859-1.
+ //
+ // This means there is now only one test which we can perform
+ // to ensure that this now-limited overriding of the encoding
+ // takes effect.
+
+ // Content set to ISO 8859-1, meta set to UTF-8
+ tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
+ String str = partA +
+ "" +
+ partB + partC;
+
+ writeToFile(tmpS, str, "UTF-8");
+
+ tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
+
+ parameters = new HashMap<>();
+ parameters.put(SOURCE_ENCODING, "ISO-8859-1");
+ transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
+ assertEquals(expected, readFromFile(tmpD, "UTF-8"));
+ tmpS.delete();
+ tmpD.delete();
+
+ // Note - we can't test UTF-16 with only a meta encoding,
+ // because without that the parser won't know about the
+ // 2 byte format so won't be able to identify the meta tag
+ }
+ finally
+ {
+ if (tmpS != null && tmpS.exists()) tmpS.delete();
+ if (tmpD != null && tmpD.exists()) tmpD.delete();
+ }
+ }
+
+ private void writeToFile(File file, String content, String encoding) throws Exception
+ {
+ try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
+ {
+ ow.append(content);
+ }
+ }
+
+ private String readFromFile(File file, final String encoding) throws Exception
+ {
+ return new String(Files.readAllBytes(file.toPath()), encoding);
+ }
+}
diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/main/resources/licenses/3rd-party/Perl-Artistic-license.txt b/alfresco-transform-tika/alfresco-transform-tika-boot/src/main/resources/licenses/3rd-party/Perl-Artistic-license.txt
index 699679e8..4ff3f423 100644
--- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/main/resources/licenses/3rd-party/Perl-Artistic-license.txt
+++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/main/resources/licenses/3rd-party/Perl-Artistic-license.txt
@@ -1,127 +1,127 @@
- The "Artistic License"
-
- Preamble
-
-The intent of this document is to state the conditions under which a
-Package may be copied, such that the Copyright Holder maintains some
-semblance of artistic control over the development of the package,
-while giving the users of the package the right to use and distribute
-the Package in a more-or-less customary fashion, plus the right to make
-reasonable modifications.
-
-Definitions:
-
- "Package" refers to the collection of files distributed by the
- Copyright Holder, and derivatives of that collection of files
- created through textual modification.
-
- "Standard Version" refers to such a Package if it has not been
- modified, or has been modified in accordance with the wishes
- of the Copyright Holder as specified below.
-
- "Copyright Holder" is whoever is named in the copyright or
- copyrights for the package.
-
- "You" is you, if you're thinking about copying or distributing
- this Package.
-
- "Reasonable copying fee" is whatever you can justify on the
- basis of media cost, duplication charges, time of people involved,
- and so on. (You will not be required to justify it to the
- Copyright Holder, but only to the computing community at large
- as a market that must bear the fee.)
-
- "Freely Available" means that no fee is charged for the item
- itself, though there may be fees involved in handling the item.
- It also means that recipients of the item may redistribute it
- under the same conditions they received it.
-
-1. You may make and give away verbatim copies of the source form of the
-Standard Version of this Package without restriction, provided that you
-duplicate all of the original copyright notices and associated disclaimers.
-
-2. You may apply bug fixes, portability fixes and other modifications
-derived from the Public Domain or from the Copyright Holder. A Package
-modified in such a way shall still be considered the Standard Version.
-
-3. You may otherwise modify your copy of this Package in any way, provided
-that you insert a prominent notice in each changed file stating how and
-when you changed that file, and provided that you do at least ONE of the
-following:
-
- a) place your modifications in the Public Domain or otherwise make them
- Freely Available, such as by posting said modifications to Usenet or
- an equivalent medium, or placing the modifications on a major archive
- site such as uunet.uu.net, or by allowing the Copyright Holder to include
- your modifications in the Standard Version of the Package.
-
- b) use the modified Package only within your corporation or organization.
-
- c) rename any non-standard executables so the names do not conflict
- with standard executables, which must also be provided, and provide
- a separate manual page for each non-standard executable that clearly
- documents how it differs from the Standard Version.
-
- d) make other distribution arrangements with the Copyright Holder.
-
-4. You may distribute the programs of this Package in object code or
-executable form, provided that you do at least ONE of the following:
-
- a) distribute a Standard Version of the executables and library files,
- together with instructions (in the manual page or equivalent) on where
- to get the Standard Version.
-
- b) accompany the distribution with the machine-readable source of
- the Package with your modifications.
-
- c) give non-standard executables non-standard names, and clearly
- document the differences in manual pages (or equivalent), together
- with instructions on where to get the Standard Version.
-
- d) make other distribution arrangements with the Copyright Holder.
-
-5. You may charge a reasonable copying fee for any distribution of this
-Package. You may charge any fee you choose for support of this
-Package. You may not charge a fee for this Package itself. However,
-you may distribute this Package in aggregate with other (possibly
-commercial) programs as part of a larger (possibly commercial) software
-distribution provided that you do not advertise this Package as a
-product of your own. You may embed this Package's interpreter within
-an executable of yours (by linking); this shall be construed as a mere
-form of aggregation, provided that the complete Standard Version of the
-interpreter is so embedded.
-
-6. The scripts and library files supplied as input to or produced as
-output from the programs of this Package do not automatically fall
-under the copyright of this Package, but belong to whoever generated
-them, and may be sold commercially, and may be aggregated with this
-Package. If such scripts or library files are aggregated with this
-Package via the so-called "undump" or "unexec" methods of producing a
-binary executable image, then distribution of such an image shall
-neither be construed as a distribution of this Package nor shall it
-fall under the restrictions of Paragraphs 3 and 4, provided that you do
-not represent such an executable image as a Standard Version of this
-Package.
-
-7. C subroutines (or comparably compiled subroutines in other
-languages) supplied by you and linked into this Package in order to
-emulate subroutines and variables of the language defined by this
-Package shall not be considered part of this Package, but are the
-equivalent of input as in Paragraph 6, provided these subroutines do
-not change the language in any way that would cause it to fail the
-regression tests for the language.
-
-8. Aggregation of this Package with a commercial distribution is always
-permitted provided that the use of this Package is embedded; that is,
-when no overt attempt is made to make this Package's interfaces visible
-to the end user of the commercial distribution. Such use shall not be
-construed as a distribution of this Package.
-
-9. The name of the Copyright Holder may not be used to endorse or promote
-products derived from this software without specific prior written permission.
-
-10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
-IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
-WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
-
- The End
\ No newline at end of file
+ The "Artistic License"
+
+ Preamble
+
+The intent of this document is to state the conditions under which a
+Package may be copied, such that the Copyright Holder maintains some
+semblance of artistic control over the development of the package,
+while giving the users of the package the right to use and distribute
+the Package in a more-or-less customary fashion, plus the right to make
+reasonable modifications.
+
+Definitions:
+
+ "Package" refers to the collection of files distributed by the
+ Copyright Holder, and derivatives of that collection of files
+ created through textual modification.
+
+ "Standard Version" refers to such a Package if it has not been
+ modified, or has been modified in accordance with the wishes
+ of the Copyright Holder as specified below.
+
+ "Copyright Holder" is whoever is named in the copyright or
+ copyrights for the package.
+
+ "You" is you, if you're thinking about copying or distributing
+ this Package.
+
+ "Reasonable copying fee" is whatever you can justify on the
+ basis of media cost, duplication charges, time of people involved,
+ and so on. (You will not be required to justify it to the
+ Copyright Holder, but only to the computing community at large
+ as a market that must bear the fee.)
+
+ "Freely Available" means that no fee is charged for the item
+ itself, though there may be fees involved in handling the item.
+ It also means that recipients of the item may redistribute it
+ under the same conditions they received it.
+
+1. You may make and give away verbatim copies of the source form of the
+Standard Version of this Package without restriction, provided that you
+duplicate all of the original copyright notices and associated disclaimers.
+
+2. You may apply bug fixes, portability fixes and other modifications
+derived from the Public Domain or from the Copyright Holder. A Package
+modified in such a way shall still be considered the Standard Version.
+
+3. You may otherwise modify your copy of this Package in any way, provided
+that you insert a prominent notice in each changed file stating how and
+when you changed that file, and provided that you do at least ONE of the
+following:
+
+ a) place your modifications in the Public Domain or otherwise make them
+ Freely Available, such as by posting said modifications to Usenet or
+ an equivalent medium, or placing the modifications on a major archive
+ site such as uunet.uu.net, or by allowing the Copyright Holder to include
+ your modifications in the Standard Version of the Package.
+
+ b) use the modified Package only within your corporation or organization.
+
+ c) rename any non-standard executables so the names do not conflict
+ with standard executables, which must also be provided, and provide
+ a separate manual page for each non-standard executable that clearly
+ documents how it differs from the Standard Version.
+
+ d) make other distribution arrangements with the Copyright Holder.
+
+4. You may distribute the programs of this Package in object code or
+executable form, provided that you do at least ONE of the following:
+
+ a) distribute a Standard Version of the executables and library files,
+ together with instructions (in the manual page or equivalent) on where
+ to get the Standard Version.
+
+ b) accompany the distribution with the machine-readable source of
+ the Package with your modifications.
+
+ c) give non-standard executables non-standard names, and clearly
+ document the differences in manual pages (or equivalent), together
+ with instructions on where to get the Standard Version.
+
+ d) make other distribution arrangements with the Copyright Holder.
+
+5. You may charge a reasonable copying fee for any distribution of this
+Package. You may charge any fee you choose for support of this
+Package. You may not charge a fee for this Package itself. However,
+you may distribute this Package in aggregate with other (possibly
+commercial) programs as part of a larger (possibly commercial) software
+distribution provided that you do not advertise this Package as a
+product of your own. You may embed this Package's interpreter within
+an executable of yours (by linking); this shall be construed as a mere
+form of aggregation, provided that the complete Standard Version of the
+interpreter is so embedded.
+
+6. The scripts and library files supplied as input to or produced as
+output from the programs of this Package do not automatically fall
+under the copyright of this Package, but belong to whoever generated
+them, and may be sold commercially, and may be aggregated with this
+Package. If such scripts or library files are aggregated with this
+Package via the so-called "undump" or "unexec" methods of producing a
+binary executable image, then distribution of such an image shall
+neither be construed as a distribution of this Package nor shall it
+fall under the restrictions of Paragraphs 3 and 4, provided that you do
+not represent such an executable image as a Standard Version of this
+Package.
+
+7. C subroutines (or comparably compiled subroutines in other
+languages) supplied by you and linked into this Package in order to
+emulate subroutines and variables of the language defined by this
+Package shall not be considered part of this Package, but are the
+equivalent of input as in Paragraph 6, provided these subroutines do
+not change the language in any way that would cause it to fail the
+regression tests for the language.
+
+8. Aggregation of this Package with a commercial distribution is always
+permitted provided that the use of this Package is embedded; that is,
+when no overt attempt is made to make this Package's interfaces visible
+to the end user of the commercial distribution. Such use shall not be
+construed as a distribution of this Package.
+
+9. The name of the Copyright Holder may not be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
+ The End
diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.key b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.key
index 372c688cdd97a09659bb94354b5a58e16ca299f2..bbce2007476001ace0e7992d2a10b7fdc78873cd 100644
GIT binary patch
delta 10537
zcmc(F2UJx@xBr=$bKx=|9i%rAD<~?cC?W{9*gF=)f(6AG3-*8-HFoqyj$N^0FQ|vu
zqNo_`T|ZmwQHZfcC5rHV`ur2@II=2cY~ANbZ|E&a#!FN?*I?7?_!JJg_sdzOXze
zsB$%CBkjE0jVt^dosEa2saF*@XiTjVfqGXJe`vZ~WhqT)STz#}UcEaJy@K5=2&pe|R-|8FCHCL>G@YP|!$R_#%!H`bnk4mIiw
zBb6(|S_DX?@#t6=W@rvCSi=i8W-pZfF(&U(I#Y?%sJb=HTCiA*$VZbo>L%Q)H9S=
z-U*X?$xd1K9nV;?1636n-cn?CQj`y}Q-}H7(OZTrwPKFbms+V)gjGpGCndZ@q4rs_
zQ)xRE(vB$$|9)DErxGQkL{Zk@DO(B2R+P=s^fAHTD7`y;qbPaOWRc$7G_<$7C)9hg
znCdEYeL;K*n&%Wf!W+(Huqrh4}HVHJ=NEj
z%rER$1+{bkKB!as$J$9{R)|!L8BBKXo21c65pG82%~lxZD9CAPdN^SW2JbO3
z4ff>RniK@yKfPj|hTJekC%BlJr?p6vooqW>uOGw?_z0VzKM*%uMbu$x_+>pTYI+5TPiihIuQB$v!>7dT7>dD
ztgM`Z?{1JI)=Up3xG;k;bM$hqe*Y&0?EI`psQ1in5iUJF*49pto^^g1bT+c$B3tLE
zV6mViGU$1wb5}&E^z17=jrPWD+Z`UtN{{Pa&SyQPv!gWCo>LnN2Ab<-a}WX3^PGyN
z-{%R$`pl?0VohtMo+I}haX!AoC7E+`z7^T=DC1wU@>+M~
zNZkb;nWc;`mGO?~zwofIk!p!+(DD$arjNTc^~mgrj=7m#QQK>SQIFTKhsD%vp@h2b
zaGRGaJ3RAzmn%)@w9R(XK@kBPyZ|bJX~yDd=umFSIMg$i{0RdmFCD_|<@`n!L#FL%
z6ns2v|KtJ>9CpiJqwU3taQ->ztE|4_AUnDEu>+BV)%2)*^Cn=j&8k^dc2XW;u}8N1
zuh;sZtbl)C>*cD-M>g1n8`ruakb5n_tI71;0s7t=dnTDs0?`ncv(P
z8}xZE@WWoPm|3r8);=3N6zMb9u@07v?G0vokJ;e}H^pXSGtXh0aw)a;Z^8MHLG$0Mr|{(MElFTQ_1=-_aO#%8sheGGtU;rPhFNEdoNr6Rrs5knKsCvt`F7;UuGP
z*o+FU6>KoZt#Zabv)dDCltWQ@o(12a^pF#NMhOsoMgq7Zv|7U--
z|9fLEyo*aws!eUB*!C{2IQtg}q|IOZ{;9v7ObI{un+gsg`Ao+S)6To}r$eZ_9oZ#i
z%!r1rJ>BQQSx!f*(oWgvxVIDftz#kUm~ykUlhP8Y_W;+6<9*P2;dmTs*Aw}u@0>_R
z-8{c{GubJ@UWCsy{^wCOOg-A})E%QT<$3+jXA~PSJs9za-v8ezW%@mTx(jqIVdo;v
zsrav3)XI|;5SzS{v80>+a<-T?U>kON**lhANmDo?rH|GtTmHl*ZK_L#3%hLG=Ksu67D~iIDrC3U!#J
zPH`%IDgXZK`x571!LW)_v97vW*8Jy2|CHI3u9lzwR<4#x9ovR7r}n8zEKs_pSci0B
z39Dp>aEH=W^7G$@RT7)qi|8%N8jon1^X=(S_&jIlxh@8P<&}$+)c?5J)KaQ-f}4g(
zQ_!`F;Iz6{(OZ^&Ef0S!?T@(G8K3#<$_5YDSo=xR$FaQY5NVosEi~uIwOk8m8F!~S
zwB+R7ok+@}?%|Nm@w@*6@%BDgN4)Nj`hht7s1^~g9xnnh;|ZhS&wTb4bz0#!)KJ(2
zbyyJ=T~4>6Thv_VMIwkrFKnsfx|b!WvtCU_UGX(!a|XZugGNdCBbcu5bBdEtzb>Yo
zZ|I*3QJ4MM7IoVX%^|e-gOS!-R*5HwLnW1ocv6b%rfFCi9lvwRmN2;}h%G~%$1X9c
zEn@5>lX@@C=<=j*5zHB#v?B(V0y4ycMS!ghwq%%(R%XfCf!of=j*zf}WEZIQfi=S}
zu0`0es^BKs;5-L8wH(83S1&HdT#2-^V|zdzw_^^Nhknh8^~FGXeOGpqnw{O)Hb9;m
zYYMKFJL?IqKEQ(=7Vh%uv;jb=^3)yKxNN@V-$I5r(nsVNHvbZ$yQ+SsP-E}&~O
zhOp}e&DcPic4#a1J59{54NIo+X18O5NW~xR;Yz8U=)k@QwCTu(0QPreUV!TzSu=?G
zc47-jba!X=3&@3C80EFrzZ=5~f@T*_gX@v;ESB_7>CR%2{MyOhtS+FeH>(4vlgRo4
zv_yteUq7A5c9MpPec4!$Z~L-^FiHD9$voFfmeZL!&n?)+;A3x>1(4#G2A_Mm(k2bLv0$v
z+JpQ!hV`J%UB_WcQmZna)d0kgXD!jVZahmNS;t9iAjt8P%n7Q`Py3v(chVW%VD-qU
zte+*svSzUsfTCFpuOzz9Y?eup+B=VZO?FhwK&A>=4XLt#{Qxpfn}P2|iiEEI5H3&Ud8{kO8IB)onb^B^OR>|!qockN@lh~F=l
zm7^(svmfH~-XYeNr0N~{oB@-Lve6WvH^X9=)OVa^PjQD^{PtFlB&n@D&ypcyb%9~`
z)N5Q|+00tThwcBdG5sk##2vvbytg)I0hGFXVd24AqHg%Ug15&G{yq0siuv
zERp0s-DaK1!jAV?4oJ)U%$>UVKVaSzv#7@`hQy})3TFZLJY_A3|AqQ2u7CZKIZ*55
zSImx_w%{#$M0DzVq!#e!V%CzhIDKRdXt1`W%opU>rOc0-Gs@T!!m5mSCQRpiJPq>E
zz&8=S-jdI@0EXJ~5YSqp9fuF}({|jvFAQ_yH^6P?%(2L|ADwwFV1x@#hs+xnZcVDn
z+;}Q=P4(ck%wBl#%B0!Ni${_{U3@v#x}N9D=`hi|`txX#UJ}4dK`ySq<4MWuAU+=J
zSx>3XX@AzIgm6yc(`sGgv?#tAs8x;TR{&+vd>CL@499zjZfL^Inf-;ozYHgKJiWqKds5R6{?s}@5ne-Q`@S4+fcp1+IV}}E
zsXw1g>|c}kNx+T)oN}e#KpsHc>4W%skktosdIQ$7lQ}Jg8_7JM>{yk;Cjz{NaeUU%
zx(?%EXq-EYuO=%(zTwrWPmd8Ct6Hx+lJ^y!()Meo;$ObH`dff;xBH8Ssc~;u|Dy$B
z_#<1X?MdY^5PYA?-%!7zv3#hMmu}wLoj0*4M{0e>!6d1DH;&W3GHyJlRA&?TS`uDA
zk<*^>VIpOZmNbb+p>h2rei@DWxHP_jEb^OTwrt~3e0qaa-ZWkb+`?(xQrOG1Cac=y
z`d0IIkJ?b}@e634$y2G**x9@<;jOv6En&3`KAL)GE#M15)>+8k(HLL&{3+KTEavT*
zQNEp?eShd)sWn)N+>_dcr5sM^x^{};{lX%QJ-Vo%Tbv%=j^8eI`EBpC8S|R-ob4DvV={Rx)
zGg)+ke+zKV=Z^s&@_7)z|0KsQp+%qMeh}?@k~bkIZupr;!^hgwQ+zHU^)$yBqis5E
zUe^I<_$<=5?JUm*bU9~s+xm0nb*g2b=T)HL(s^EiG`+jP>CUJvyTmKm$l}e7YL+yd
zkv}q8>gO&Y9nrG+3N1n{`YN9T`Eysf6GivIb?!l}pKkE0jxs7iUzL!%^w&~{XT$pJ
z-RX*J%RjJ~q~78te@^SN?p?kLhs^`@N9U|ST3tF36+YmupTlpFyhQ_HOux2!^hJbgp*)_!uK!Z>*
z6Fp9Z3K!~etCr{ns8d_?2K-Q4^h0BDZE=7a_k@XQAS1#t8<0NrL_LsQ>xo>Dx_yM$
zMs|M@%0jN;kdD!Qj27+DVjUxnQpdARL`Q&stiV>UWycCSIRfIuQ);wsCSC$cnhESt
zT3~Z=!A>^F-|sW?Uhwc|KiT~@TI!;OXn>KlsILTG8nleBL@E?mwh}YQj`^)cZBlfs
zjo1ZpbX&2Kgq=H>r)}&gwu7s86h2t1+O>G{iSE>0(DzWib`Nm|xUZL(ODg6iAVN4A
zdkgcvIXqDe2lsxWSOD(GKF|a*pr2R+IMGkgihj~h^dU{nlf-&z{e6I#3K%g^V6|yw
z1I?jM93*BUqx8Zdg1(MwRa4+XsXa^)v`r=q6HU>$ewfIn#x}zRy#s9+E|S2t`9>6z
ze#1ynMALdTS}d`V)3zPF?mHkpXj!1tGgHM!>bG#Lz?#&bjunB_q1<>e1ReBo6GcO$
z@0VeE#Pz}7BPG%4*i3;vS#z8vXzN-qOJqPaV78#2d$dXOFgL0FGEeLSH*>zIOI^-o
zi2Z<>3(TqBd!cXzmAViKDRpTQv+z-UamR%fCSAULIMUH=-iz%GuD@>o5xd(DVl6~k
zFG8TvV|1pt0J5njrqBv6SSG>%Wy>gP`kUn*n!B=bGd6GFMbUqw|jf@3v5NP}C=(4~Hm$^`gUOQm=SK
z+$U9*$3zb&nY6iRMA+KG1xsQb`wtbxQ{G|X$HZpH`yCg%$&$+_%!@={bW&U)^OH}D
z2n$@t8u?;3JT3f5=FAyUpSE3#O9C6MHsO-^1Q>VOysccmEP9X~HLi+STls@wzMJ2i
zd#Uce(<7xe`Ks9$Td$gzT+lT^XRAK{x^Ty0_%bEFypNUB4iyO-ecnS+g(i~wScF2j
z@Uc0;^^9K;cd!RMqb)!Up(ncSf%?z8~`D3i9`2$vMbn`xQcASck$Cr;7QaImgVwK{~_gXi3?t^pd
zjW`JXrEf$n3U+9*p!eGS#i9$q;ZJi;?fp~Kg{k`UkK%Xo;q5ZoO!lhkK%#~Ubq3%M
zp}vDmk*X%r7TU^6mH6py;E9MH$sKynb$6FqU#ZeAm?zatK)khj33AnK)P|&cu&vsN
zv}HS}DIi@PRr62=S2e>&77u>a?A@GQ^YhXyCRWoVlxY?T{^F)Xa#4l73RlaaCz&
zN-D4Vkwd4utH%H>J=C{=#-1u|viY8BQ*vuLZ?!z2qPI%xz}-hJpol&6Rn0HhKlrPo
z(JBMf)_`sS>N!BA3Th!_90OH)FVibkROzCxO|Gociqb2q6~Mh!S)Iqq%l92lo%KK1
zp~V|ruYRV!-b0
zr8ke*V6_UqgYPK8t7HvzKiOS9)ZDT@R5jnsGwP@pC|rH&tKR{h)K_ECuUdp!iMSmb
zspkO|qSSP7*F~xH@kbvWt=bSyiBap}gZAX*({b=OQE!8_JXWnouDjY)y#d(XOr-C-e0=Fqmv7@(DcaUoD_A0FveOw3i
z2n98wvr5+uy+;?-e4*IaRhxbt)PmtQQcs0;XzAqo>
zm~q_Js?oZY&I1EW>wSE9ukDeaq<%DB9R-D2cn=JOJ-LTE3F_^8s-I{W+XVGBAxl*4
zNy@R0igQ3e-A8RiY}rpO5BKN+1Jw#-UDLtpuf+diZT&y%w1FPAkqfTJja2FPlYf<^
z)A+XbXq?&2N#oTBl2|=KrM*?VHA(#uoir-P
zq?@`kz{<^es&6g)&@H`{9O
z?3Ev+eWQa+nIZKtvygz5WvyRWqq|Jm9=suRd+4>$C*MqIZ=uYUHQU=NKeo1rEFFB`
zD+0g#>tc>-ep~N6_j6JO%~#KgG}-73Q|>%(t~`WYPaKSjca<~hc^@mI@(<~zT$c9E
zD|}$%*5FslD%e;gjmCWMTDJMtRuRf$8KE4N9h9Ah2uvXt&9^ayG8lOQIKc$~?m4zvf
zWSFwk!36_6g|gEq#WGI03#q-3cn`QUW}WA#Xr&0qX_qrM!pPr%;ZfEff7Tlw&ednVQBT@ncw|
z@(eRR2>b&ACsCe3U@{b(0_wm=;AP=?qfAn^15dzXuQ5n2p8s;CQi|t`vL-x60cZ~2
zxWQO^<%#t3DX>Ab?0qtf2x(1ts^+e5m3z>Uwk3MIvcL6qWr4q=vO3sN$#w{Unt3P(
zQO+QS+o9qn=6M~ojv;?%o#Da0Fu}#=Yb$s;(j3Fz&{P1CD=6go^C;xkr!oa@vNv1z
zK(;eS>k=$8q1={f_Rwo@PV7T4tdQ+hrr`IMH*iND$|tlvm61N9jmk$no`c1EPPMp=Wh2W1+{a+Gx_8&H12#CD+HZU0Hi>ejWecDyl|w=xrL
zCD6SaZM5)jp)5l=fbs!`Z%D(#1Nm5)4U0ZPFdu?CczJj}hvzjYmq9Q7i>8IO33M@N
z3eaBQLg0EJ@$=XxK=oSyjsKSq||4nT+0t1cI3{9z;}ZW6puN#?vc&FS%2Wu{!6o4w2H6Qh1N2u
zxJJ>dXB##?#V5U;dBmB3}+_HnH5l`i#TZ&eHKC?}>A4}4Xo9G(42+;6lq><|vE
z_v#6ncd>ej?l_}L@ITVrfQHW|7Nv5qa7#@(t(~O3efD1-L
zIKa}%uo8_MtPB&GyWEM74R3cmYZ7<$w}#PcyA8S7X-j10`V_n_TO0O5?3J~lD(Uhn
zXRropm2C|xK<3*TJPi7MC&N$*MjKbdJ-qxZ+LKl%wPph)zVq(axiWB~jmuoAH*qs0
zlY;H;2D&!tE4&T#0m`F1_lIBvu|jicc{k=4frOZMMWCup0X^`a2+tRks%6da~c_ZC`~WL7&?Qj+{7?|
zCOtpakO25S*031d8F7a8uw45`bF)y57KRQqktwYV=68&u)`rE7|Le(kDbZl5cK&Db
z{;#BX*&RhyY&5@%YLuRmWN@OmOdeoxB@ZtiWEexUv>$4~Jyd%>)UX!Z1t|uDt;7%e
L{e~G7MUnpj+%aT8
delta 10511
zcmc(FcT^Thv;Oq-jJ$+KKu|J>m=I7@P*Fe;Osl&FP(cM06-9JS=!yZeD{DaCm02Sy
ziaCKgm{`opnsZvSt{B#|Dk|ddsn`AX^Sl4tdw%Dhb3JrFQ`Obg)z#J2J@dNsokR9J
zhs+WtwmpvJOyox@qZweI8mcg1EHgg<;#Z5&Dxi^#4HH6=vcerCsY6RU3t^11eS?N}
zc2i{tpSFJlqQF5VVqcX(kS+S$(n0bJiXhoQ$J1z5opzD_U!6Asj47_)qrTy~>A%Qk
zxt#z%(cRleI0U--aKl}X#;je{F?~NrVC>Hkv9dw*kqy?FOtL|nou#2r7`M5*qthAp
zrqFNY(U68|;aQ0wx@vF!kL6b3uqOYYcQmoSGBCe-tvbv~RCIMREcLXrH|!I}p0ypJ
zF|~Fu>aDe_Lesh0i^%T5b&MdGZ!8f#{TwTZ6gzI1WPBOyVqD<27R*rpG)V07uLh?Y
ze-H3SZB@VOKSmrQBaFfI-+~{|U?l3?1~bsXzu^#4xiqk?mynvLMmR9F4JK=e$y%8v
zTmEITGnMs_HU!y1I^Vg`8tQbQu^)&RjqlJZHCM<}ZA`=548Dx!olSe7wriF_`kT4c
z5+=tMkYguVNS`NF?)YE)TJUA4{JYKS(281zm7^BT_o6Ot-UYRL3vzXAix?X4D^D7W
z!VA%y-m*UGyp~wve6I*sGBGT&7wV;vzF-wcUNi$N?@+_cSp+4UORH+E5>h}&>$Yzl
zTZ|kzd|;nc>vh6-JBo&C7+n*xJ)?V(mfc<7q0Z~(jrwx82WZ&W{R49lR+ffiRVvr4
zW3z!-IN3R`vGSiL@=wHepgzt$+oMkEIT_NAdj=58HEvQb5w+ru-3fEH$49aVYR9vp
zBO5DrbF=VYWX|lSFEUpqbMvyeC@H8+qV^0CRo;>LcVyE1f1VcMsZ8=Olcbe+
zS}Xalm8A8;_#v*ULAZ4uCP@c`ao&n(;}7vxE>Q2n!hIF!p30?HB8n|T58JgE*q9yP
zOc7F^Fy4w!fT7J3+R7H<470kztZu_%W1m29kN5E)^GgzIqpsAqH|o^B5fz1$;V&dZ
zCbNj`?JkM@V|~YxSVaFzs9lmeI*Y1a-9l>TSWSMZEG`#S|M}}LjMI{m6|o$0uajs=
z>kf#=is#Qw4kHbx2VqpBnu2A{k54J*mcq8i^BP~O`~gFqD^w6(#W@)9$5AUN+I`Zp
zYl>0pLv!!7KlfZrxYxeCW-9(R*G!eJSoaNbzisOuQ8B#Mn98+<@kN>&dRUJwa20L(
zrHtw~a$sWbZ~LY83wAUxH?~xrCHXOIl)Ga`WANB<>98mN+V~pa{o5-R>4*(u^Ed}%
z>-4tiBFZ}2a`gbV$DLcP=w7@;{d+;fqa=7dga3c8W>nl-DX6`iwTdZud}eaz@n+KQ
ze=V+C=U_=H9^xiR#>Fc_jU6U>I)Amy2pTN&-(i7Owpr<5Tr#l+j6OIq!9tW*qQ4K7
zgwb!3hrOuX+DeX%wP-t~JYMyWSG~PdVN95mg^9nLWJ?>-d@6QV{;{c7$Qdi9`w^U;
z!I)_U^Dq4Po)+y?#(mWJKeY`KF3vd(;)F}XTLVspR9|3iALhsBm4(!JR`b*aUM^hv
z2p5BmVXgHRXK9)9Wmo%?E*AE7!dQP+11RWksu#^dW-&g^uWo!c%Ydi1vo@e!Ir|w#
z&YaVT*$MX`F2hg9bUr8Yug*0mJMLxvM`T`#MVti9>%z=LOu2}$L;v}O
z+)5~Am1`{Vm+HAY31e(lPju9?x}mP94MaUg3%3*D_1a1SmA5#~DG;4YT;3K4<4J9O
zMPVy(uUuDvLSUT!%QSSbS~v#v^o8$X;KW6PWE(Lzw7UPa?V)~$0(Vb3?ktQSm%KvT
zpG$+}FM(fc`2|}M<>1NogbY+Bh8-~_0;5<_&qqW__p#waI{x?jJ!ys4f9!W3ao-AA
zxOx369I;oAOOH)ap*TqAK>X*{w^>PhyVXh?rV3{Lz{BX?)5$n8r#qM@b7HZ$*2yBj
z&T3PpKc5?7C+fZ4G$O3}>pw<^`Mpja<&!9kUTbz@)>qd=)cBGMJdq22VV298rF*Wk
zWZ^#Bt|7LK^%Z7)n^nYFZEU#~$vkge0Y%lW4LBb19X6)Ws!iVHg?jZS%G2jIbthJx
zEzMD7{ZvC_wms5Jo(m!<
zpBCh4KAnIr+c!etvF)>Hz-av#2={_OmM)~g@|)uS?61N9Z0z|rarqf#)j%q>zKM&>
z?s>?B|4RGC*opf*$xjCgDc>&qbsy?(McXVy`0!?xdpga5kL>pQP<{+KnCS26!p
zOuAMcCACAa-NmKiP;a!JJ`{=C;qVdEHx4JGZha)awTOzd;XyME|2m+CDEm7W|89^b
zKWp;kWMciN`&sSk?9W5wUC_(F$Lg%CZ(lEt6@p@AB|w`g-H0$!q38PaPJW#
z`wjOTkutDvKhF^($E5VLvr~n^uOSv$wYT#SRvlTatMhc>ee`pBSoP1S{FwUeylX{6
zhP$1GVYzdPFm^ib>m`)sm1?}eO1q;9u+2~=S1NxY{`u?k0;k{cA=QOs>9bta`*(VO
z#H`Afi!Xo6mJ6w&bu*D)zqlsz7L^BE`gdh<*`jlhZF#o%@|T+}kj|dB*dVQRZXxo=
z{}uqB=WjXHRRvVIaF*8S%EeY@LTTvNDo_}!U#bqyH{=E(KN9sau<8aP*zwils|okzb~
zmUPii}eQ>s$Ycq(!|`qWyv(&%#LgT
zsd&{1t`yqA&g@4(`z~w{pr8wL1zhaHT0_(=ip?j{t0H5MnLqPom)(5a4f#Iark0-FLq#>;j`vK(ZK5RZr(x&$_
z&GkY*Qy268EX`63PTu+PM3-inkwTl(pQS=tNn)4CGuM+@HpsDqSToX9kiyo0Oc)HW
z0zVC5k%TRWGJi~88$OcZ&Z$>QWkwom&1lvMbsz
z8%(k`<5?2OAI6&kRG*#xC19^lWOQ@Yn@(YgW)REHU~K^<84Rx?y6aCYi&iRs4*Q<$
zsFsOH<)RKkWghzra>M7dr{KpMSu!n8vBr*&o%5Ek41nuW_84Hcj3QYZw~So}bkAl{
zfKA!#4tmU8&WcHM>`InNJ)h(J8!^RW@i}kr|x%#t!0)XCUEzzvV(VTx(NODSr$tc41Fe_
zqTXT8<;Sv~aF@F?&F&hT0oI~xEP+Hm-eA#WT&LSCAEbJRIZ+RfyUdN2E93zSC$Wit
zz)!%uN30$3ztX%y)(5{}w$z&bl2s&s&3(h}6Fu@Bq6&DVl(i!*wjWtj8mvP(^8h)x
zoYAqZ&-u(264qjJG~onUPNPBItMWRcbIjzQDgf(R%l@FXxQa5SuOF`{n^HnbCHV@t
zk@hn7x2D_6SS4+!gFF#3FCAn{QuWDE9z|V8I?J@fo;l04NONUZIfM*~@{qC3^&$_M
z&JjJjs@#I4H7~gwWL7mfhLpUhA*W$0>w|n{O3nI2e_1B+DRpHE2fd`8T$AK%8_IOz
z=>wX`w3a)Y%0Up<8imNi0qa9#92vTIs7!Y|eSDa_9;nr7AzuJ|Y9S8+q=d_O>(F^i
z*%Z@Xx%WPHfcqKC+VX#8o2yqv7?9VYuypP1n?
z_Ol*1Lhi#|g!T88N^gC0@u`;~&*}NSQIW4%-~Gq=(EHXx%Nr$!L-5Tg`8D+``9V&x
z5a+I4&pR;gNxsnH$G{|^O&%jtMj4$ZQ>1@RlXFNoXPivgqI4VukCr%I4nt$kc=pXZ4%|FVcsMDyQ
zJ4(f#Bk~=xf#g3RdN=^9+7JR
zJdVmp5?a_%*%P7(N9C5}#8t=S7Vxq5uvnfA82Ouw(?whJn`v8no{%$0--eU&T0qn(
zliPAmnYO8xds?my4QEcv)kxD{XJoo9Y75WFHLXPH`p|l1&1M`K(L(4a&LJGpvi1UP
zLM`l~JPYzCF3Oc?b#GsmovHQX75So_2#eFR;|do2QEpo@r1y@km2qLYip?bS*4N}`
zv@Zj1%Gn?<-jwgd|Jv^R$ZI0t0W;Fa)3)r80nfpqntF6BF4g3W0<8NhLs&un=@(yf#ZQn;Z24s~o`5s#J
zkDp}vZmMoHOJ`0Wk2Mb$PS%fc;Wvrw?#2^9?sDT}
z$=*OuK9}&Y7mwp^qET$&?^Sw@^$0oS8Zz_f*$;)bz21!V*4{h}E64d?gIA_R*v*II
zqK(@@9ZqLRS6@zhQ|sW*DYZG&<>53=U5-7YEvU=k9c^Ykz6H=EfX_sag8|%udR(o~
zy94Ss;PHU@4R|6N-!|ZTs4*{)PXpOF2(tm{+L$*68Qqu{fYhyn`6jaatEGG_YdD_6
zHN6GzgckE~zMne&){=Jtctr5&fZPa9Cx>Swe?*Put@#VUht?c9O7m*N&r}pmkL-4z
zdE0Mj$*&ckrV5?4w-4t8$JpO)b@M^*)h8VZ$OInf6KRl9Pu4rM#6TT
zP1CN>g>MFTM;Gpny{cV^A)n~BvAE&`1A6cigylW?Y|=3+4oifiF`k@0
zd>**NdP5aR&qTh`OUzsJK0fBtqzT?vuim^`8t3Pf`2Eut8(e2P6>K{ncrxyT(C;Vm
z-lV)$Kfap!J?+n@0EQ-UY((u-k|_$}2Jo2(HvQ2cPM=D(T7%(Tq1_qGDTDSL!dsy+
zX9!T6<9IVf_SY+TU)Ga;M6{yQ{+S%UEl-1^D@Jxt${>14=
z9&PL#%uQ&&&*3}4{dq2LL|sl~^4)+L^Y}$avExRbTV}ic?opBRW_B-rbtvi0DaV&v
zknH9oc7<**@(g@&|E0_PYU9t}C=9W4ob!Bh)61_qeMBPsndd;{n+1po=o^v6&wz~3
z_+&~3M;G%zz^BEu!1~K2JeVfqo6TLx3vE}>T(#^K`~qO)N`4THjdOS$b=B5zI@XV_
z;fHA!vo~-bK*0u%mtCE044%p
z`66mvrklnaQD9oPKMQy@G}`Ur8!(PuxR*B}q0)U6>v~C{iPWnV@jIkSJ;-}h68+Xc
z86KGPc;3PYyS^#Bbn;tF{2*TsdG|v+k1RQR*tDN?<57Nr%pdR@53V5eX`voSoWJp^
zBvX8XH=zuzoa4yJ+L&|v6JYdtQwBPFp7$U-{4VkcYw@!>*U@vtZRu5PC5_Ra@90F93xm;zj%^9QmY)b}l+Y@#13;TLK4
zUnK}l&U(dj?8US{>%OUd$>8lT#qu7HZ~
zO(B&3o;QMv^(P(Ib|!)PTdIIb*FXxl0)l(Q16D4rAuQ=OE9fHuy`8$hUwLh1LA
zi_(fYi4*TyzZw6%Z_1v`i4BT64lDJDUmaV&FimLH+!WdeRos>1v|@KW6w_Py{HjVS
zlo-5}4uGy+$|*qgYRY5C*mx`S@}^g>uFxximR?Jt9krvDQVrazwUjxmig?$#_+-_+
zo!h?Fb>9*P^<_ihAWNnwR_T%>9`}bRBnK^C_-sWzC7PbxdPbOTA_C$
z$2N*Jacj0SIs9rnWrAWaY