mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-07-31 17:38:33 +00:00
ACS-9835-Improve code quality in alfresco-transform-core (#1116)
This commit is contained in:
@@ -26,17 +26,8 @@
|
||||
*/
|
||||
package org.alfresco.transform.misc.metadataExtractors;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
|
||||
|
||||
import javax.swing.text.ChangedCharSetException;
|
||||
import javax.swing.text.MutableAttributeSet;
|
||||
import javax.swing.text.html.HTML;
|
||||
import javax.swing.text.html.HTMLEditorKit;
|
||||
import javax.swing.text.html.parser.ParserDelegator;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
@@ -46,13 +37,23 @@ import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import javax.swing.text.ChangedCharSetException;
|
||||
import javax.swing.text.MutableAttributeSet;
|
||||
import javax.swing.text.html.HTML;
|
||||
import javax.swing.text.html.HTMLEditorKit;
|
||||
import javax.swing.text.html.parser.ParserDelegator;
|
||||
|
||||
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder;
|
||||
|
||||
/**
|
||||
* Metadata extractor for HTML and XHTML.
|
||||
*
|
||||
* Configuration: (see HtmlMetadataExtractor_metadata_extract.properties and misc_engine_config.json)
|
||||
* Configuration: (see HtmlMetadataExtractor_metadata_extract.properties and misc_engine_config.json)
|
||||
*
|
||||
* <pre>
|
||||
* <b>author:</b> -- cm:author
|
||||
@@ -73,7 +74,7 @@ public class HtmlMetadataExtractor extends AbstractMetadataExtractorEmbedder
|
||||
|
||||
private static final String KEY_AUTHOR = "author";
|
||||
private static final String KEY_TITLE = "title";
|
||||
private static final String KEY_DESCRIPTION= "description";
|
||||
private static final String KEY_DESCRIPTION = "description";
|
||||
|
||||
public HtmlMetadataExtractor()
|
||||
{
|
||||
@@ -105,8 +106,7 @@ public class HtmlMetadataExtractor extends AbstractMetadataExtractorEmbedder
|
||||
// so cannot use the input stream provided, as it will get closed.
|
||||
final File sourceFile = transformManager.createSourceFile();
|
||||
|
||||
HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback()
|
||||
{
|
||||
HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() {
|
||||
StringBuffer title = null;
|
||||
boolean inHead = false;
|
||||
|
||||
@@ -177,8 +177,7 @@ public class HtmlMetadataExtractor extends AbstractMetadataExtractorEmbedder
|
||||
}
|
||||
|
||||
public void handleError(String errorMsg, int pos)
|
||||
{
|
||||
}
|
||||
{}
|
||||
};
|
||||
|
||||
String charsetGuess = "UTF-8";
|
||||
|
@@ -26,17 +26,8 @@
|
||||
*/
|
||||
package org.alfresco.transform.misc.metadataExtractors;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
|
||||
|
||||
import jakarta.mail.Header;
|
||||
import jakarta.mail.internet.InternetAddress;
|
||||
import jakarta.mail.internet.MimeMessage;
|
||||
import jakarta.mail.internet.MimeMessage.RecipientType;
|
||||
import jakarta.mail.internet.MimeUtility;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.Serializable;
|
||||
@@ -46,13 +37,23 @@ import java.util.Enumeration;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import jakarta.mail.Header;
|
||||
import jakarta.mail.internet.InternetAddress;
|
||||
import jakarta.mail.internet.MimeMessage;
|
||||
import jakarta.mail.internet.MimeMessage.RecipientType;
|
||||
import jakarta.mail.internet.MimeUtility;
|
||||
|
||||
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder;
|
||||
|
||||
/**
|
||||
* Metadata extractor for RFC822 mime emails.
|
||||
*
|
||||
* Configuration: (see HtmlMetadataExtractor_metadata_extract.properties and misc_engine_config.json)
|
||||
* Configuration: (see HtmlMetadataExtractor_metadata_extract.properties and misc_engine_config.json)
|
||||
*
|
||||
* <pre>
|
||||
* <b>messageFrom:</b> -- imap:messageFrom, cm:originator
|
||||
@@ -106,8 +107,7 @@ public class RFC822MetadataExtractor extends AbstractMetadataExtractorEmbedder
|
||||
if (mimeMessage != null)
|
||||
{
|
||||
/**
|
||||
* Extract RFC822 values that doesn't match to headers and need to be encoded.
|
||||
* Or those special fields that require some code to extract data
|
||||
* Extract RFC822 values that doesn't match to headers and need to be encoded. Or those special fields that require some code to extract data
|
||||
*/
|
||||
String tmp = InternetAddress.toString(mimeMessage.getFrom());
|
||||
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
|
||||
@@ -126,18 +126,11 @@ public class RFC822MetadataExtractor extends AbstractMetadataExtractorEmbedder
|
||||
/**
|
||||
* Received field from RFC 822
|
||||
*
|
||||
* "Received" ":" ; one per relay
|
||||
* ["from" domain] ; sending host
|
||||
* ["by" domain] ; receiving host
|
||||
* ["via" atom] ; physical path
|
||||
* ("with" atom) ; link/mail protocol
|
||||
* ["id" msg-id] ; receiver msg id
|
||||
* ["for" addr-spec] ; initial form
|
||||
* ";" date-time ; time received
|
||||
* "Received" ":" ; one per relay ["from" domain] ; sending host ["by" domain] ; receiving host ["via" atom] ; physical path ("with" atom) ; link/mail protocol ["id" msg-id] ; receiver msg id ["for" addr-spec] ; initial form ";" date-time ; time received
|
||||
*/
|
||||
Date rxDate = mimeMessage.getReceivedDate();
|
||||
|
||||
if(rxDate != null)
|
||||
if (rxDate != null)
|
||||
{
|
||||
// The email implementation extracted the received date for us.
|
||||
putRawValue(KEY_MESSAGE_RECEIVED, rxDate, rawProperties);
|
||||
@@ -146,12 +139,12 @@ public class RFC822MetadataExtractor extends AbstractMetadataExtractorEmbedder
|
||||
{
|
||||
// the email implementation did not parse the received date for us.
|
||||
String[] rx = mimeMessage.getHeader("received");
|
||||
if(rx != null && rx.length > 0)
|
||||
if (rx != null && rx.length > 0)
|
||||
{
|
||||
String lastReceived = rx[0];
|
||||
lastReceived = MimeUtility.unfold(lastReceived);
|
||||
int x = lastReceived.lastIndexOf(';');
|
||||
if(x > 0)
|
||||
if (x > 0)
|
||||
{
|
||||
String dateStr = lastReceived.substring(x + 1).trim();
|
||||
putRawValue(KEY_MESSAGE_RECEIVED, dateStr, rawProperties);
|
||||
@@ -174,9 +167,7 @@ public class RFC822MetadataExtractor extends AbstractMetadataExtractorEmbedder
|
||||
putRawValue(KEY_MESSAGE_SUBJECT, decodedSubject, rawProperties);
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract values from all header fields, including extension fields "X-"
|
||||
*/
|
||||
/* Extract values from all header fields, including extension fields "X-" */
|
||||
Set<String> keys = getExtractMapping().keySet();
|
||||
Enumeration<Header> headers = mimeMessage.getAllHeaders();
|
||||
while (headers.hasMoreElements())
|
||||
|
@@ -1,125 +1,122 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transform.misc.transformers;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
|
||||
|
||||
/**
|
||||
* Converts Apple iWorks files to JPEGs for thumbnailing and previewing.
|
||||
* The transformer will only work for iWorks 2013/14 files. Support for iWorks 2008/9 has been dropped as we cannot
|
||||
* support both, because the newer format does not contain a PDF. If we say this transformer supports PDF, Share will
|
||||
* assume incorrectly that we can convert to PDF and we would only get a preview for the older format and never the
|
||||
* newer one. Both formats have the same mimetype.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* @author Neil Mc Erlean
|
||||
* @author eknizat
|
||||
* @since 4.0
|
||||
*/
|
||||
@Component
|
||||
public class AppleIWorksContentTransformer implements CustomTransformerFileAdaptor
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(
|
||||
AppleIWorksContentTransformer.class);
|
||||
|
||||
// Apple's zip entry names for previews in iWorks have changed over time.
|
||||
private static final List<String> PDF_PATHS = ImmutableList.of(
|
||||
"QuickLook/Preview.pdf"); // iWorks 2008/9
|
||||
private static final List<String> JPG_PATHS = ImmutableList.of(
|
||||
"QuickLook/Thumbnail.jpg", // iWorks 2008/9
|
||||
"preview.jpg"); // iWorks 2013/14 (720 x 552) We use the best quality image. Others are:
|
||||
// (225 x 173) preview-web.jpg
|
||||
// (53 x 41) preview-micro.jpg
|
||||
|
||||
@Override
|
||||
public String getTransformerName()
|
||||
{
|
||||
return "appleIWorks";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile, TransformManager transformManager)
|
||||
{
|
||||
logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
|
||||
sourceMimetype, targetMimetype);
|
||||
|
||||
// iWorks files are zip (or package) files.
|
||||
// If it's not a zip file, the resultant ZipException will be caught as an IOException below.
|
||||
try (ZipArchiveInputStream iWorksZip = new ZipArchiveInputStream(
|
||||
new BufferedInputStream(new FileInputStream(sourceFile))))
|
||||
{
|
||||
// Look through the zip file entries for the preview/thumbnail.
|
||||
List<String> paths = MIMETYPE_IMAGE_JPEG.equals(targetMimetype) ? JPG_PATHS : PDF_PATHS;
|
||||
ZipArchiveEntry entry;
|
||||
boolean found = false;
|
||||
while ((entry = iWorksZip.getNextZipEntry()) != null)
|
||||
{
|
||||
String name = entry.getName();
|
||||
if (paths.contains(name))
|
||||
{
|
||||
Files.copy(iWorksZip, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
{
|
||||
throw new RuntimeException(
|
||||
"The source " + sourceMimetype + " file did not contain a " + targetMimetype + " preview");
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new RuntimeException(
|
||||
"Unable to transform " + sourceMimetype + " file. It should have been a zip format file.",
|
||||
e);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transform.misc.transformers;
|
||||
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||
|
||||
/**
|
||||
* Converts Apple iWorks files to JPEGs for thumbnailing and previewing. The transformer will only work for iWorks 2013/14 files. Support for iWorks 2008/9 has been dropped as we cannot support both, because the newer format does not contain a PDF. If we say this transformer supports PDF, Share will assume incorrectly that we can convert to PDF and we would only get a preview for the older format and never the newer one. Both formats have the same mimetype.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* @author Neil Mc Erlean
|
||||
* @author eknizat
|
||||
* @since 4.0
|
||||
*/
|
||||
@Component
|
||||
public class AppleIWorksContentTransformer implements CustomTransformerFileAdaptor
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(
|
||||
AppleIWorksContentTransformer.class);
|
||||
|
||||
// Apple's zip entry names for previews in iWorks have changed over time.
|
||||
private static final List<String> PDF_PATHS = ImmutableList.of(
|
||||
"QuickLook/Preview.pdf"); // iWorks 2008/9
|
||||
private static final List<String> JPG_PATHS = ImmutableList.of(
|
||||
"QuickLook/Thumbnail.jpg", // iWorks 2008/9
|
||||
"preview.jpg"); // iWorks 2013/14 (720 x 552) We use the best quality image. Others are:
|
||||
// (225 x 173) preview-web.jpg
|
||||
// (53 x 41) preview-micro.jpg
|
||||
|
||||
@Override
|
||||
public String getTransformerName()
|
||||
{
|
||||
return "appleIWorks";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile, TransformManager transformManager)
|
||||
{
|
||||
logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
|
||||
sourceMimetype, targetMimetype);
|
||||
|
||||
// iWorks files are zip (or package) files.
|
||||
// If it's not a zip file, the resultant ZipException will be caught as an IOException below.
|
||||
try (ZipArchiveInputStream iWorksZip = new ZipArchiveInputStream(
|
||||
new BufferedInputStream(new FileInputStream(sourceFile))))
|
||||
{
|
||||
// Look through the zip file entries for the preview/thumbnail.
|
||||
List<String> paths = MIMETYPE_IMAGE_JPEG.equals(targetMimetype) ? JPG_PATHS : PDF_PATHS;
|
||||
ZipArchiveEntry entry;
|
||||
boolean found = false;
|
||||
while ((entry = iWorksZip.getNextZipEntry()) != null)
|
||||
{
|
||||
String name = entry.getName();
|
||||
if (paths.contains(name))
|
||||
{
|
||||
Files.copy(iWorksZip, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
{
|
||||
throw new RuntimeException(
|
||||
"The source " + sourceMimetype + " file did not contain a " + targetMimetype + " preview");
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new RuntimeException(
|
||||
"Unable to transform " + sourceMimetype + " file. It should have been a zip format file.",
|
||||
e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,241 +1,237 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2023 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transform.misc.transformers;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.fs.FileManager;
|
||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import jakarta.mail.MessagingException;
|
||||
import jakarta.mail.Multipart;
|
||||
import jakarta.mail.Part;
|
||||
import jakarta.mail.Session;
|
||||
import jakarta.mail.internet.MimeMessage;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
/**
|
||||
* Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
|
||||
* messages. Searches for all text content parts, and returns them. Any
|
||||
* attachments are ignored. TIKA Note - could be replaced with the Tika email
|
||||
* parser. Would require a recursing parser to be specified, but not the full
|
||||
* Auto one (we don't want attachments), just one containing text and html
|
||||
* related parsers.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*/
|
||||
@Component
|
||||
public class EMLTransformer implements CustomTransformerFileAdaptor
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(EMLTransformer.class);
|
||||
|
||||
private static final String CHARSET = "charset";
|
||||
private static final String DEFAULT_ENCODING = "UTF-8";
|
||||
|
||||
@Override
|
||||
public String getTransformerName()
|
||||
{
|
||||
return "rfc822";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile, TransformManager transformManager) throws Exception
|
||||
{
|
||||
logger.debug("Performing RFC822 to text transform.");
|
||||
// Use try with resource
|
||||
try (InputStream contentInputStream = new BufferedInputStream(
|
||||
new FileInputStream(sourceFile));
|
||||
Writer bufferedFileWriter = new BufferedWriter(new FileWriter(targetFile)))
|
||||
{
|
||||
MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()),
|
||||
contentInputStream);
|
||||
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
Object content = mimeMessage.getContent();
|
||||
if (content instanceof Multipart)
|
||||
{
|
||||
processMultiPart((Multipart) content, sb);
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.append(content.toString());
|
||||
}
|
||||
bufferedFileWriter.write(sb.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find "text" parts of message recursively and appends it to sb StringBuilder
|
||||
*
|
||||
* @param multipart Multipart to process
|
||||
* @param sb StringBuilder
|
||||
* @throws MessagingException
|
||||
* @throws IOException
|
||||
*/
|
||||
private void processMultiPart(Multipart multipart, StringBuilder sb) throws MessagingException,
|
||||
IOException
|
||||
{
|
||||
boolean isAlternativeMultipart = multipart.getContentType().contains(
|
||||
MIMETYPE_MULTIPART_ALTERNATIVE);
|
||||
if (isAlternativeMultipart)
|
||||
{
|
||||
processAlternativeMultipart(multipart, sb);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0, n = multipart.getCount(); i < n; i++)
|
||||
{
|
||||
Part part = multipart.getBodyPart(i);
|
||||
if (part.getContent() instanceof Multipart)
|
||||
{
|
||||
processMultiPart((Multipart) part.getContent(), sb);
|
||||
}
|
||||
else
|
||||
{
|
||||
processPart(part, sb);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the suitable part from an multipart/alternative and appends it's text content to StringBuilder sb
|
||||
*
|
||||
* @param multipart
|
||||
* @param sb
|
||||
* @throws IOException
|
||||
* @throws MessagingException
|
||||
*/
|
||||
private void processAlternativeMultipart(Multipart multipart, StringBuilder sb) throws
|
||||
IOException, MessagingException
|
||||
{
|
||||
Part partToUse = null;
|
||||
for (int i = 0, n = multipart.getCount(); i < n; i++)
|
||||
{
|
||||
Part part = multipart.getBodyPart(i);
|
||||
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
|
||||
{
|
||||
partToUse = part;
|
||||
break;
|
||||
}
|
||||
else if (part.getContentType().contains(MIMETYPE_HTML))
|
||||
{
|
||||
partToUse = part;
|
||||
}
|
||||
else if (part.getContentType().contains(MIMETYPE_MULTIPART_ALTERNATIVE))
|
||||
{
|
||||
if (part.getContent() instanceof Multipart)
|
||||
{
|
||||
processAlternativeMultipart((Multipart) part.getContent(), sb);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (partToUse != null)
|
||||
{
|
||||
processPart(partToUse, sb);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds text on a given mail part. Accepted parts types are text/html and text/plain.
|
||||
* Attachments are ignored
|
||||
*
|
||||
* @param part
|
||||
* @param sb
|
||||
* @throws IOException
|
||||
* @throws MessagingException
|
||||
*/
|
||||
private void processPart(Part part, StringBuilder sb) throws IOException, MessagingException
|
||||
{
|
||||
boolean isAttachment = Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition());
|
||||
if (isAttachment)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
|
||||
{
|
||||
sb.append(part.getContent().toString());
|
||||
}
|
||||
else if (part.getContentType().contains(MIMETYPE_HTML))
|
||||
{
|
||||
String mailPartContent = part.getContent().toString();
|
||||
|
||||
//create a temporary html file with same mail part content and encoding
|
||||
File tempHtmlFile = FileManager.TempFileProvider.createTempFile("EMLTransformer_",
|
||||
".html");
|
||||
String encoding = getMailPartContentEncoding(part);
|
||||
try (OutputStreamWriter osWriter = new OutputStreamWriter(
|
||||
new FileOutputStream(tempHtmlFile), encoding))
|
||||
{
|
||||
osWriter.write(mailPartContent);
|
||||
}
|
||||
|
||||
//transform html file's content to plain text
|
||||
HtmlParserContentTransformer.EncodingAwareStringBean extractor = new HtmlParserContentTransformer.EncodingAwareStringBean();
|
||||
extractor.setCollapse(false);
|
||||
extractor.setLinks(false);
|
||||
extractor.setReplaceNonBreakingSpaces(false);
|
||||
extractor.setURL(tempHtmlFile, encoding);
|
||||
sb.append(extractor.getStrings());
|
||||
|
||||
tempHtmlFile.delete();
|
||||
}
|
||||
}
|
||||
|
||||
private String getMailPartContentEncoding(Part part) throws MessagingException
|
||||
{
|
||||
String encoding = DEFAULT_ENCODING;
|
||||
String contentType = part.getContentType();
|
||||
int startIndex = contentType.indexOf(CHARSET);
|
||||
if (startIndex > 0)
|
||||
{
|
||||
encoding = contentType.substring(startIndex + CHARSET.length() + 1)
|
||||
.replaceAll("\"", "");
|
||||
}
|
||||
return encoding;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2023 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transform.misc.transformers;
|
||||
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import jakarta.mail.MessagingException;
|
||||
import jakarta.mail.Multipart;
|
||||
import jakarta.mail.Part;
|
||||
import jakarta.mail.Session;
|
||||
import jakarta.mail.internet.MimeMessage;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.fs.FileManager;
|
||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||
|
||||
/**
|
||||
* Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email messages. Searches for all text content parts, and returns them. Any attachments are ignored. TIKA Note - could be replaced with the Tika email parser. Would require a recursing parser to be specified, but not the full Auto one (we don't want attachments), just one containing text and html related parsers.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*/
|
||||
@Component
|
||||
public class EMLTransformer implements CustomTransformerFileAdaptor
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(EMLTransformer.class);
|
||||
|
||||
private static final String CHARSET = "charset";
|
||||
private static final String DEFAULT_ENCODING = "UTF-8";
|
||||
|
||||
@Override
|
||||
public String getTransformerName()
|
||||
{
|
||||
return "rfc822";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile, TransformManager transformManager) throws Exception
|
||||
{
|
||||
logger.debug("Performing RFC822 to text transform.");
|
||||
// Use try with resource
|
||||
try (InputStream contentInputStream = new BufferedInputStream(
|
||||
new FileInputStream(sourceFile));
|
||||
Writer bufferedFileWriter = new BufferedWriter(new FileWriter(targetFile)))
|
||||
{
|
||||
MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()),
|
||||
contentInputStream);
|
||||
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
Object content = mimeMessage.getContent();
|
||||
if (content instanceof Multipart)
|
||||
{
|
||||
processMultiPart((Multipart) content, sb);
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.append(content.toString());
|
||||
}
|
||||
bufferedFileWriter.write(sb.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find "text" parts of message recursively and appends it to sb StringBuilder
|
||||
*
|
||||
* @param multipart
|
||||
* Multipart to process
|
||||
* @param sb
|
||||
* StringBuilder
|
||||
* @throws MessagingException
|
||||
* @throws IOException
|
||||
*/
|
||||
private void processMultiPart(Multipart multipart, StringBuilder sb) throws MessagingException,
|
||||
IOException
|
||||
{
|
||||
boolean isAlternativeMultipart = multipart.getContentType().contains(
|
||||
MIMETYPE_MULTIPART_ALTERNATIVE);
|
||||
if (isAlternativeMultipart)
|
||||
{
|
||||
processAlternativeMultipart(multipart, sb);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0, n = multipart.getCount(); i < n; i++)
|
||||
{
|
||||
Part part = multipart.getBodyPart(i);
|
||||
if (part.getContent() instanceof Multipart)
|
||||
{
|
||||
processMultiPart((Multipart) part.getContent(), sb);
|
||||
}
|
||||
else
|
||||
{
|
||||
processPart(part, sb);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the suitable part from an multipart/alternative and appends it's text content to StringBuilder sb
|
||||
*
|
||||
* @param multipart
|
||||
* @param sb
|
||||
* @throws IOException
|
||||
* @throws MessagingException
|
||||
*/
|
||||
private void processAlternativeMultipart(Multipart multipart, StringBuilder sb) throws IOException, MessagingException
|
||||
{
|
||||
Part partToUse = null;
|
||||
for (int i = 0, n = multipart.getCount(); i < n; i++)
|
||||
{
|
||||
Part part = multipart.getBodyPart(i);
|
||||
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
|
||||
{
|
||||
partToUse = part;
|
||||
break;
|
||||
}
|
||||
else if (part.getContentType().contains(MIMETYPE_HTML))
|
||||
{
|
||||
partToUse = part;
|
||||
}
|
||||
else if (part.getContentType().contains(MIMETYPE_MULTIPART_ALTERNATIVE))
|
||||
{
|
||||
if (part.getContent() instanceof Multipart)
|
||||
{
|
||||
processAlternativeMultipart((Multipart) part.getContent(), sb);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (partToUse != null)
|
||||
{
|
||||
processPart(partToUse, sb);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds text on a given mail part. Accepted parts types are text/html and text/plain. Attachments are ignored
|
||||
*
|
||||
* @param part
|
||||
* @param sb
|
||||
* @throws IOException
|
||||
* @throws MessagingException
|
||||
*/
|
||||
private void processPart(Part part, StringBuilder sb) throws IOException, MessagingException
|
||||
{
|
||||
boolean isAttachment = Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition());
|
||||
if (isAttachment)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
|
||||
{
|
||||
sb.append(part.getContent().toString());
|
||||
}
|
||||
else if (part.getContentType().contains(MIMETYPE_HTML))
|
||||
{
|
||||
String mailPartContent = part.getContent().toString();
|
||||
|
||||
// create a temporary html file with same mail part content and encoding
|
||||
File tempHtmlFile = FileManager.TempFileProvider.createTempFile("EMLTransformer_",
|
||||
".html");
|
||||
String encoding = getMailPartContentEncoding(part);
|
||||
try (OutputStreamWriter osWriter = new OutputStreamWriter(
|
||||
new FileOutputStream(tempHtmlFile), encoding))
|
||||
{
|
||||
osWriter.write(mailPartContent);
|
||||
}
|
||||
|
||||
// transform html file's content to plain text
|
||||
HtmlParserContentTransformer.EncodingAwareStringBean extractor = new HtmlParserContentTransformer.EncodingAwareStringBean();
|
||||
extractor.setCollapse(false);
|
||||
extractor.setLinks(false);
|
||||
extractor.setReplaceNonBreakingSpaces(false);
|
||||
extractor.setURL(tempHtmlFile, encoding);
|
||||
sb.append(extractor.getStrings());
|
||||
|
||||
tempHtmlFile.delete();
|
||||
}
|
||||
}
|
||||
|
||||
private String getMailPartContentEncoding(Part part) throws MessagingException
|
||||
{
|
||||
String encoding = DEFAULT_ENCODING;
|
||||
String contentType = part.getContentType();
|
||||
int startIndex = contentType.indexOf(CHARSET);
|
||||
if (startIndex > 0)
|
||||
{
|
||||
encoding = contentType.substring(startIndex + CHARSET.length() + 1)
|
||||
.replaceAll("\"", "");
|
||||
}
|
||||
return encoding;
|
||||
}
|
||||
}
|
||||
|
@@ -26,16 +26,14 @@
|
||||
*/
|
||||
package org.alfresco.transform.misc.transformers;
|
||||
|
||||
import static org.apache.commons.imaging.formats.tiff.constants.TiffTagConstants.TIFF_TAG_XRESOLUTION;
|
||||
import static org.apache.commons.imaging.formats.tiff.constants.TiffTagConstants.TIFF_TAG_YRESOLUTION;
|
||||
|
||||
import static org.alfresco.transform.common.RequestParamMap.END_PAGE;
|
||||
import static org.alfresco.transform.common.RequestParamMap.PDF_FORMAT;
|
||||
import static org.alfresco.transform.common.RequestParamMap.PDF_ORIENTATION;
|
||||
import static org.alfresco.transform.common.RequestParamMap.START_PAGE;
|
||||
import static org.apache.commons.imaging.formats.tiff.constants.TiffTagConstants.TIFF_TAG_XRESOLUTION;
|
||||
import static org.apache.commons.imaging.formats.tiff.constants.TiffTagConstants.TIFF_TAG_YRESOLUTION;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
import javax.imageio.ImageReader;
|
||||
import javax.imageio.stream.ImageInputStream;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
@@ -44,9 +42,10 @@ import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Stream;
|
||||
import javax.imageio.ImageIO;
|
||||
import javax.imageio.ImageReader;
|
||||
import javax.imageio.stream.ImageInputStream;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||
import org.apache.commons.imaging.Imaging;
|
||||
import org.apache.commons.imaging.ImagingException;
|
||||
import org.apache.commons.imaging.common.ImageMetadata;
|
||||
@@ -63,15 +62,11 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||
|
||||
/**
|
||||
* Converts image files into PDF files. Transformer uses PDF Box to perform conversions.
|
||||
* During conversion image might be scaled down (keeping proportions) to match width or height of the PDF document.
|
||||
* If the image is smaller than PDF page size, the image will be placed in the top left-hand side of the PDF document page.
|
||||
* Transformer accepts bellow optional transform parameters:
|
||||
* - startPage - page number of image (for multi-page images) from which transformer should start conversion. Default: first page of the image.
|
||||
* - endPage - page number of image (for multi-page images) up to which transformation should be performed. Default: last page of the image.
|
||||
* - pdfFormat - output PDF file format. Available formats: DEFAULT, A0, A1, A2, A3, A4, A5, A6, LETTER, LEGAL. Default: original image size.
|
||||
* - pdfOrientation - output PDF file orientation. Available options: DEFAULT, PORTRAIT, LANDSCAPE. Default: original image orientation.
|
||||
* Converts image files into PDF files. Transformer uses PDF Box to perform conversions. During conversion image might be scaled down (keeping proportions) to match width or height of the PDF document. If the image is smaller than PDF page size, the image will be placed in the top left-hand side of the PDF document page. Transformer accepts bellow optional transform parameters: - startPage - page number of image (for multi-page images) from which transformer should start conversion. Default: first page of the image. - endPage - page number of image (for multi-page images) up to which transformation should be performed. Default: last page of the image. - pdfFormat - output PDF file format. Available formats: DEFAULT, A0, A1, A2, A3, A4, A5, A6, LETTER, LEGAL. Default: original image size. - pdfOrientation - output PDF file orientation. Available options: DEFAULT, PORTRAIT, LANDSCAPE. Default: original image orientation.
|
||||
*/
|
||||
@Component
|
||||
public class ImageToPdfTransformer implements CustomTransformerFileAdaptor
|
||||
@@ -95,13 +90,13 @@ public class ImageToPdfTransformer implements CustomTransformerFileAdaptor
|
||||
|
||||
@Override
|
||||
public void transform(
|
||||
String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||
File imageFile, File pdfFile, TransformManager transformManager
|
||||
) throws Exception {
|
||||
String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||
File imageFile, File pdfFile, TransformManager transformManager) throws Exception
|
||||
{
|
||||
try (
|
||||
ImageInputStream imageInputStream = ImageIO.createImageInputStream(imageFile);
|
||||
PDDocument pdfDocument = new PDDocument()
|
||||
) {
|
||||
ImageInputStream imageInputStream = ImageIO.createImageInputStream(imageFile);
|
||||
PDDocument pdfDocument = new PDDocument())
|
||||
{
|
||||
final Integer startPage = parseOptionIfPresent(transformOptions, START_PAGE, Integer.class).orElse(null);
|
||||
final Integer endPage = parseOptionIfPresent(transformOptions, END_PAGE, Integer.class).orElse(null);
|
||||
final String pdfFormat = parseOptionIfPresent(transformOptions, PDF_FORMAT, String.class).orElse(DEFAULT_PDF_FORMAT_STRING);
|
||||
@@ -142,7 +137,7 @@ public class ImageToPdfTransformer implements CustomTransformerFileAdaptor
|
||||
}
|
||||
|
||||
private void scaleAndDrawImage(final PDDocument pdfDocument, final BufferedImage bufferedImage, final String pdfFormat, final String pdfOrientation, final Map<String, Integer> resolution)
|
||||
throws IOException
|
||||
throws IOException
|
||||
{
|
||||
final PDImageXObject image = LosslessFactory.createFromImage(pdfDocument, bufferedImage);
|
||||
|
||||
@@ -150,10 +145,10 @@ public class ImageToPdfTransformer implements CustomTransformerFileAdaptor
|
||||
int imageHeight = image.getHeight();
|
||||
// if the image has a resolution which differs from pdfbox then adjust size in pixels according to pdfbox ppi
|
||||
if (resolution.get("X") > 0 && resolution.get("X") != PDFBOX_POINTS_PER_INCH &&
|
||||
resolution.get("Y") > 0 && resolution.get("Y") != PDFBOX_POINTS_PER_INCH)
|
||||
resolution.get("Y") > 0 && resolution.get("Y") != PDFBOX_POINTS_PER_INCH)
|
||||
{
|
||||
imageWidth = (int)(((float)imageWidth / resolution.get("X")) * PDFBOX_POINTS_PER_INCH);
|
||||
imageHeight = (int)(((float)imageHeight / resolution.get("Y")) * PDFBOX_POINTS_PER_INCH);
|
||||
imageWidth = (int) (((float) imageWidth / resolution.get("X")) * PDFBOX_POINTS_PER_INCH);
|
||||
imageHeight = (int) (((float) imageHeight / resolution.get("Y")) * PDFBOX_POINTS_PER_INCH);
|
||||
}
|
||||
|
||||
final PDPage pdfPage = new PDPage(resolvePdfFormat(pdfFormat, pdfOrientation, imageWidth, imageHeight));
|
||||
|
@@ -1,139 +1,112 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transform.misc.transformers;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing.
|
||||
* This transformer will only work for OOXML files where thumbnailing was enabled,
|
||||
* which isn't on by default on Windows, but is more common on Mac.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* @author Nick Burch
|
||||
* @author eknizat
|
||||
*/
|
||||
@Component
|
||||
public class OOXMLThumbnailContentTransformer implements CustomTransformerFileAdaptor
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(
|
||||
OOXMLThumbnailContentTransformer.class);
|
||||
|
||||
public String getTransformerName()
|
||||
{
|
||||
return "ooXmlThumbnail";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile, TransformManager transformManager) throws Exception
|
||||
{
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Performing OOXML to jpeg transform with sourceMimetype=" + sourceMimetype
|
||||
+ " targetMimetype=" + targetMimetype);
|
||||
}
|
||||
|
||||
try (OPCPackage pkg = OPCPackage.open(sourceFile.getPath()))
|
||||
{
|
||||
|
||||
// Does it have a thumbnail?
|
||||
PackageRelationshipCollection rels = pkg.getRelationshipsByType(
|
||||
PackageRelationshipTypes.THUMBNAIL);
|
||||
if (rels.size() > 0)
|
||||
{
|
||||
// Get the thumbnail part
|
||||
PackageRelationship tRel = rels.getRelationship(0);
|
||||
PackagePart tPart = pkg.getPart(tRel);
|
||||
|
||||
// Write it to the target
|
||||
InputStream tStream = tPart.getInputStream();
|
||||
Files.copy(tStream, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
|
||||
tStream.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.debug("No thumbnail present in file.");
|
||||
throw new Exception(
|
||||
"No thumbnail present in file, unable to generate " + targetMimetype);
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new RuntimeException("Unable to transform file.", e);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
// TODO Add this back to engine_config.json when the transformer is fixed for java 11
|
||||
{
|
||||
"transformerName": "ooxmlThumbnail",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "image/jpeg"}
|
||||
],
|
||||
"transformOptions": [
|
||||
]
|
||||
}
|
||||
*/
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transform.misc.transformers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||
|
||||
/**
|
||||
* Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing. This transformer will only work for OOXML files where thumbnailing was enabled, which isn't on by default on Windows, but is more common on Mac.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* @author Nick Burch
|
||||
* @author eknizat
|
||||
*/
|
||||
@Component
|
||||
public class OOXMLThumbnailContentTransformer implements CustomTransformerFileAdaptor
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(
|
||||
OOXMLThumbnailContentTransformer.class);
|
||||
|
||||
public String getTransformerName()
|
||||
{
|
||||
return "ooXmlThumbnail";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile, TransformManager transformManager) throws Exception
|
||||
{
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Performing OOXML to jpeg transform with sourceMimetype=" + sourceMimetype
|
||||
+ " targetMimetype=" + targetMimetype);
|
||||
}
|
||||
|
||||
try (OPCPackage pkg = OPCPackage.open(sourceFile.getPath()))
|
||||
{
|
||||
|
||||
// Does it have a thumbnail?
|
||||
PackageRelationshipCollection rels = pkg.getRelationshipsByType(
|
||||
PackageRelationshipTypes.THUMBNAIL);
|
||||
if (rels.size() > 0)
|
||||
{
|
||||
// Get the thumbnail part
|
||||
PackageRelationship tRel = rels.getRelationship(0);
|
||||
PackagePart tPart = pkg.getPart(tRel);
|
||||
|
||||
// Write it to the target
|
||||
InputStream tStream = tPart.getInputStream();
|
||||
Files.copy(tStream, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
|
||||
tStream.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.debug("No thumbnail present in file.");
|
||||
throw new Exception(
|
||||
"No thumbnail present in file, unable to generate " + targetMimetype);
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new RuntimeException("Unable to transform file.", e);
|
||||
}
|
||||
}
|
||||
|
||||
/* // TODO Add this back to engine_config.json when the transformer is fixed for java 11 { "transformerName": "ooxmlThumbnail", "supportedSourceAndTargetList": [ {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType":
|
||||
* "image/jpeg"}, {"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "image/jpeg"}, {"sourceMediaType":
|
||||
* "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "image/jpeg"}, {"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "image/jpeg"} ], "transformOptions": [ ] } */
|
||||
}
|
||||
|
@@ -1,166 +1,178 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transform.misc.transformers;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Reader;
|
||||
import java.io.Writer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
|
||||
import static org.alfresco.transform.common.RequestParamMap.TARGET_ENCODING;
|
||||
|
||||
/**
|
||||
* Converts any textual format to plain text.
|
||||
* <p>
|
||||
* The transformation is sensitive to the source and target string encodings.
|
||||
*
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* @author Derek Hulley
|
||||
* @author eknizat
|
||||
*/
|
||||
@Component
|
||||
public class StringExtractingContentTransformer implements CustomTransformerFileAdaptor
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
|
||||
|
||||
public String getTransformerName()
|
||||
{
|
||||
return "string";
|
||||
}
|
||||
|
||||
/**
|
||||
* Text to text conversions are done directly using the content reader and writer string
|
||||
* manipulation methods.
|
||||
* <p>
|
||||
* Extraction of text from binary content attempts to take the possible character
|
||||
* encoding into account. The text produced from this will, if the encoding was correct,
|
||||
* be unformatted but valid.
|
||||
*/
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> transformOptions,
|
||||
final File sourceFile, final File targetFile, TransformManager transformManager) throws Exception
|
||||
{
|
||||
String sourceEncoding = transformOptions.get(SOURCE_ENCODING);
|
||||
String targetEncoding = transformOptions.get(TARGET_ENCODING);
|
||||
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
|
||||
+ " targetEncoding=" + targetEncoding);
|
||||
}
|
||||
|
||||
Reader charReader = null;
|
||||
Writer charWriter = null;
|
||||
try
|
||||
{
|
||||
// Build reader
|
||||
if (sourceEncoding == null)
|
||||
{
|
||||
charReader = new BufferedReader(
|
||||
new InputStreamReader(new FileInputStream(sourceFile)));
|
||||
}
|
||||
else
|
||||
{
|
||||
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
|
||||
charReader = new BufferedReader(
|
||||
new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
|
||||
}
|
||||
|
||||
// Build writer
|
||||
if (targetEncoding == null)
|
||||
{
|
||||
charWriter = new BufferedWriter(
|
||||
new OutputStreamWriter(new FileOutputStream(targetFile)));
|
||||
}
|
||||
else
|
||||
{
|
||||
checkEncodingParameter(targetEncoding, TARGET_ENCODING);
|
||||
charWriter = new BufferedWriter(
|
||||
new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
|
||||
}
|
||||
|
||||
// copy from the one to the other
|
||||
char[] buffer = new char[8192];
|
||||
int readCount = 0;
|
||||
while (readCount > -1)
|
||||
{
|
||||
// write the last read count number of bytes
|
||||
charWriter.write(buffer, 0, readCount);
|
||||
// fill the buffer again
|
||||
readCount = charReader.read(buffer);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (charReader != null)
|
||||
{
|
||||
try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
|
||||
}
|
||||
if (charWriter != null)
|
||||
{
|
||||
try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
|
||||
}
|
||||
}
|
||||
// done
|
||||
}
|
||||
|
||||
private void checkEncodingParameter(String encoding, String paramterName)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (!Charset.isSupported(encoding))
|
||||
{
|
||||
throw new IllegalArgumentException(
|
||||
paramterName + "=" + encoding + " is not supported by the JVM.");
|
||||
}
|
||||
}
|
||||
catch (IllegalCharsetNameException e)
|
||||
{
|
||||
throw new IllegalArgumentException(
|
||||
paramterName + "=" + encoding + " is not a valid encoding.");
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transform.misc.transformers;
|
||||
|
||||
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
|
||||
import static org.alfresco.transform.common.RequestParamMap.TARGET_ENCODING;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Reader;
|
||||
import java.io.Writer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||
|
||||
/**
|
||||
* Converts any textual format to plain text.
|
||||
* <p>
|
||||
* The transformation is sensitive to the source and target string encodings.
|
||||
*
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* @author Derek Hulley
|
||||
* @author eknizat
|
||||
*/
|
||||
@Component
|
||||
public class StringExtractingContentTransformer implements CustomTransformerFileAdaptor
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
|
||||
|
||||
public String getTransformerName()
|
||||
{
|
||||
return "string";
|
||||
}
|
||||
|
||||
/**
|
||||
* Text to text conversions are done directly using the content reader and writer string manipulation methods.
|
||||
* <p>
|
||||
* Extraction of text from binary content attempts to take the possible character encoding into account. The text produced from this will, if the encoding was correct, be unformatted but valid.
|
||||
*/
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> transformOptions,
|
||||
final File sourceFile, final File targetFile, TransformManager transformManager) throws Exception
|
||||
{
|
||||
String sourceEncoding = transformOptions.get(SOURCE_ENCODING);
|
||||
String targetEncoding = transformOptions.get(TARGET_ENCODING);
|
||||
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
|
||||
+ " targetEncoding=" + targetEncoding);
|
||||
}
|
||||
|
||||
Reader charReader = null;
|
||||
Writer charWriter = null;
|
||||
try
|
||||
{
|
||||
// Build reader
|
||||
if (sourceEncoding == null)
|
||||
{
|
||||
charReader = new BufferedReader(
|
||||
new InputStreamReader(new FileInputStream(sourceFile)));
|
||||
}
|
||||
else
|
||||
{
|
||||
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
|
||||
charReader = new BufferedReader(
|
||||
new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
|
||||
}
|
||||
|
||||
// Build writer
|
||||
if (targetEncoding == null)
|
||||
{
|
||||
charWriter = new BufferedWriter(
|
||||
new OutputStreamWriter(new FileOutputStream(targetFile)));
|
||||
}
|
||||
else
|
||||
{
|
||||
checkEncodingParameter(targetEncoding, TARGET_ENCODING);
|
||||
charWriter = new BufferedWriter(
|
||||
new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
|
||||
}
|
||||
|
||||
// copy from the one to the other
|
||||
char[] buffer = new char[8192];
|
||||
int readCount = 0;
|
||||
while (readCount > -1)
|
||||
{
|
||||
// write the last read count number of bytes
|
||||
charWriter.write(buffer, 0, readCount);
|
||||
// fill the buffer again
|
||||
readCount = charReader.read(buffer);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (charReader != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
charReader.close();
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
logger.error("Failed to close charReader", e);
|
||||
}
|
||||
}
|
||||
if (charWriter != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
charWriter.close();
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
logger.error("Failed to close charWriter", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
// done
|
||||
}
|
||||
|
||||
private void checkEncodingParameter(String encoding, String paramterName)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (!Charset.isSupported(encoding))
|
||||
{
|
||||
throw new IllegalArgumentException(
|
||||
paramterName + "=" + encoding + " is not supported by the JVM.");
|
||||
}
|
||||
}
|
||||
catch (IllegalCharsetNameException e)
|
||||
{
|
||||
throw new IllegalArgumentException(
|
||||
paramterName + "=" + encoding + " is not a valid encoding.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -29,9 +29,11 @@ package org.alfresco.transform.misc;
|
||||
|
||||
import org.alfresco.transform.base.LivenessReadinessProbeTest;
|
||||
|
||||
public class MiscLivenessReadinessProbeIT extends LivenessReadinessProbeTest {
|
||||
public class MiscLivenessReadinessProbeIT extends LivenessReadinessProbeTest
|
||||
{
|
||||
@Override
|
||||
protected ImagesForTests getImageForTest() {
|
||||
protected ImagesForTests getImageForTest()
|
||||
{
|
||||
return new ImagesForTests("alfresco-transform-misc", "text/plain", "text/plain", "original.txt");
|
||||
}
|
||||
}
|
||||
|
@@ -26,18 +26,19 @@
|
||||
*/
|
||||
package org.alfresco.transform.misc;
|
||||
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.base.clients.FileInfo.testFile;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_RFC822;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_XHTML;
|
||||
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.alfresco.transform.base.metadata.AbstractMetadataExtractsIT;
|
||||
import org.alfresco.transform.base.clients.FileInfo;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
|
||||
import org.alfresco.transform.base.clients.FileInfo;
|
||||
import org.alfresco.transform.base.metadata.AbstractMetadataExtractsIT;
|
||||
|
||||
/**
|
||||
* Metadata integration tests in the Misc T-Engine.
|
||||
*
|
||||
@@ -53,7 +54,7 @@ public class MiscMetadataExtractsIT extends AbstractMetadataExtractsIT
|
||||
{
|
||||
super.testTransformation(fileInfo);
|
||||
}
|
||||
|
||||
|
||||
private static Stream<FileInfo> engineTransformations()
|
||||
{
|
||||
return Stream.of(
|
||||
@@ -66,7 +67,6 @@ public class MiscMetadataExtractsIT extends AbstractMetadataExtractsIT
|
||||
// Special test cases from the repo tests
|
||||
// ======================================
|
||||
testFile(MIMETYPE_RFC822, "eml", "quick.spanish.eml"),
|
||||
testFile(MIMETYPE_HTML, "html", "quick.japanese.html")
|
||||
);
|
||||
testFile(MIMETYPE_HTML, "html", "quick.japanese.html"));
|
||||
}
|
||||
}
|
||||
|
@@ -1,55 +1,55 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transform.misc;
|
||||
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
import org.alfresco.transform.client.model.TransformRequest;
|
||||
import org.alfresco.transform.base.messaging.AbstractQueueIT;
|
||||
|
||||
public class MiscQueueIT extends AbstractQueueIT
|
||||
{
|
||||
@Override
|
||||
protected TransformRequest buildRequest()
|
||||
{
|
||||
return TransformRequest
|
||||
.builder()
|
||||
.withRequestId(UUID.randomUUID().toString())
|
||||
.withSourceMediaType(MIMETYPE_HTML)
|
||||
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
|
||||
.withTargetExtension("txt")
|
||||
.withSchema(1)
|
||||
.withClientData("ACS")
|
||||
.withSourceReference(UUID.randomUUID().toString())
|
||||
.withSourceSize(32L)
|
||||
.withInternalContextForTransformEngineTests()
|
||||
.build();
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transform.misc;
|
||||
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
import org.alfresco.transform.base.messaging.AbstractQueueIT;
|
||||
import org.alfresco.transform.client.model.TransformRequest;
|
||||
|
||||
public class MiscQueueIT extends AbstractQueueIT
|
||||
{
|
||||
@Override
|
||||
protected TransformRequest buildRequest()
|
||||
{
|
||||
return TransformRequest
|
||||
.builder()
|
||||
.withRequestId(UUID.randomUUID().toString())
|
||||
.withSourceMediaType(MIMETYPE_HTML)
|
||||
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
|
||||
.withTargetExtension("txt")
|
||||
.withSchema(1)
|
||||
.withClientData("ACS")
|
||||
.withSourceReference(UUID.randomUUID().toString())
|
||||
.withSourceSize(32L)
|
||||
.withInternalContextForTransformEngineTests()
|
||||
.build();
|
||||
}
|
||||
}
|
||||
|
@@ -30,6 +30,11 @@ import static java.text.MessageFormat.format;
|
||||
import static java.util.function.Function.identity;
|
||||
import static java.util.stream.Collectors.toMap;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.fail;
|
||||
import static org.springframework.http.HttpStatus.OK;
|
||||
|
||||
import static org.alfresco.transform.base.clients.FileInfo.testFile;
|
||||
import static org.alfresco.transform.base.clients.HttpClient.sendTRequest;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_DITA;
|
||||
@@ -57,23 +62,20 @@ import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_MEDIAWIKI;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_WORD;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_XML;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.fail;
|
||||
import static org.springframework.http.HttpStatus.OK;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.alfresco.transform.base.clients.FileInfo;
|
||||
import org.alfresco.transform.base.clients.SourceTarget;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
|
||||
import org.alfresco.transform.base.clients.FileInfo;
|
||||
import org.alfresco.transform.base.clients.SourceTarget;
|
||||
|
||||
/**
|
||||
* @author Cezar Leahu
|
||||
*/
|
||||
@@ -82,78 +84,76 @@ public class MiscTransformsIT
|
||||
private static final String ENGINE_URL = "http://localhost:8090";
|
||||
|
||||
private static final Map<String, FileInfo> TEST_FILES = Stream.of(
|
||||
testFile(MIMETYPE_IMAGE_GIF, "gif", "sample.gif"),
|
||||
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "sample.jpg"),
|
||||
testFile(MIMETYPE_IMAGE_PNG, "png", "sample.png"),
|
||||
testFile(MIMETYPE_IMAGE_TIFF, "tiff", "sample.tiff"),
|
||||
testFile(MIMETYPE_WORD, "doc", "quick.doc"),
|
||||
testFile(MIMETYPE_OPENXML_WORDPROCESSING, "docx", "quick.docx"),
|
||||
testFile(MIMETYPE_EXCEL, "xls", "quick.xls"),
|
||||
testFile(MIMETYPE_OPENXML_SPREADSHEET, "xlsx", "quick.xlsx"),
|
||||
testFile(MIMETYPE_PPT, "ppt", "quick.ppt"),
|
||||
testFile(MIMETYPE_OPENXML_PRESENTATION, "pptx", "quick.pptx"),
|
||||
testFile(MIMETYPE_OUTLOOK_MSG, "msg", "quick.msg"),
|
||||
testFile(MIMETYPE_PDF, "pdf", "quick.pdf"),
|
||||
testFile(MIMETYPE_TEXT_PLAIN, "txt", "quick2.txt"),
|
||||
testFile(MIMETYPE_IMAGE_GIF, "gif", "sample.gif"),
|
||||
testFile(MIMETYPE_IMAGE_JPEG, "jpg", "sample.jpg"),
|
||||
testFile(MIMETYPE_IMAGE_PNG, "png", "sample.png"),
|
||||
testFile(MIMETYPE_IMAGE_TIFF, "tiff", "sample.tiff"),
|
||||
testFile(MIMETYPE_WORD, "doc", "quick.doc"),
|
||||
testFile(MIMETYPE_OPENXML_WORDPROCESSING, "docx", "quick.docx"),
|
||||
testFile(MIMETYPE_EXCEL, "xls", "quick.xls"),
|
||||
testFile(MIMETYPE_OPENXML_SPREADSHEET, "xlsx", "quick.xlsx"),
|
||||
testFile(MIMETYPE_PPT, "ppt", "quick.ppt"),
|
||||
testFile(MIMETYPE_OPENXML_PRESENTATION, "pptx", "quick.pptx"),
|
||||
testFile(MIMETYPE_OUTLOOK_MSG, "msg", "quick.msg"),
|
||||
testFile(MIMETYPE_PDF, "pdf", "quick.pdf"),
|
||||
testFile(MIMETYPE_TEXT_PLAIN, "txt", "quick2.txt"),
|
||||
|
||||
testFile("text/richtext", "rtf", "sample.rtf"),
|
||||
testFile("text/sgml", "sgml", "sample.sgml"),
|
||||
testFile("text/tab-separated-values", "tsv", "sample.tsv"),
|
||||
testFile("text/x-setext", "etx", "sample.etx"),
|
||||
testFile("text/x-java-source", "java", "Sample.java.txt"),
|
||||
testFile("text/x-jsp", "jsp", "sample.jsp.txt"),
|
||||
testFile("text/x-markdown", "md", "sample.md"),
|
||||
testFile("text/calendar", "ics", "sample.ics"),
|
||||
testFile("text/richtext", "rtf", "sample.rtf"),
|
||||
testFile("text/sgml", "sgml", "sample.sgml"),
|
||||
testFile("text/tab-separated-values", "tsv", "sample.tsv"),
|
||||
testFile("text/x-setext", "etx", "sample.etx"),
|
||||
testFile("text/x-java-source", "java", "Sample.java.txt"),
|
||||
testFile("text/x-jsp", "jsp", "sample.jsp.txt"),
|
||||
testFile("text/x-markdown", "md", "sample.md"),
|
||||
testFile("text/calendar", "ics", "sample.ics"),
|
||||
|
||||
testFile(MIMETYPE_TEXT_MEDIAWIKI, "mw", "sample.mw"),
|
||||
testFile(MIMETYPE_TEXT_CSS, "css", "style.css"),
|
||||
testFile(MIMETYPE_TEXT_CSV, "csv", "people.csv"),
|
||||
testFile(MIMETYPE_TEXT_JAVASCRIPT, "js", "script.js"),
|
||||
testFile(MIMETYPE_XML, "xml", "quick.xml"),
|
||||
testFile(MIMETYPE_HTML, "html", "quick.html"),
|
||||
testFile(MIMETYPE_JAVASCRIPT, "js", "script.js"),
|
||||
testFile(MIMETYPE_DITA, "dita", "quickConcept.dita"),
|
||||
testFile(MIMETYPE_IWORK_KEYNOTE, "key", "quick.key"),
|
||||
testFile(MIMETYPE_IWORK_NUMBERS, "number", "quick.numbers"),
|
||||
testFile(MIMETYPE_IWORK_PAGES, "pages", "quick.pages"),
|
||||
testFile(MIMETYPE_RFC822, "eml", "quick.eml")).collect(toMap(FileInfo::getMimeType, identity()));
|
||||
|
||||
testFile(MIMETYPE_TEXT_MEDIAWIKI, "mw", "sample.mw"),
|
||||
testFile(MIMETYPE_TEXT_CSS, "css", "style.css"),
|
||||
testFile(MIMETYPE_TEXT_CSV, "csv", "people.csv"),
|
||||
testFile(MIMETYPE_TEXT_JAVASCRIPT, "js", "script.js"),
|
||||
testFile(MIMETYPE_XML, "xml", "quick.xml"),
|
||||
testFile(MIMETYPE_HTML, "html", "quick.html"),
|
||||
testFile(MIMETYPE_JAVASCRIPT, "js", "script.js"),
|
||||
testFile(MIMETYPE_DITA, "dita", "quickConcept.dita"),
|
||||
testFile(MIMETYPE_IWORK_KEYNOTE, "key", "quick.key"),
|
||||
testFile(MIMETYPE_IWORK_NUMBERS, "number", "quick.numbers"),
|
||||
testFile(MIMETYPE_IWORK_PAGES, "pages", "quick.pages"),
|
||||
testFile(MIMETYPE_RFC822, "eml", "quick.eml")
|
||||
).collect(toMap(FileInfo::getMimeType, identity()));
|
||||
|
||||
public static Stream<SourceTarget> engineTransformations()
|
||||
{
|
||||
return Stream.of(
|
||||
SourceTarget.of("text/html", "text/plain"), //duplicate
|
||||
SourceTarget.of("text/html", "text/plain"), // duplicate
|
||||
|
||||
SourceTarget.of("text/plain", "text/plain"),
|
||||
SourceTarget.of("text/mediawiki", "text/plain"),
|
||||
SourceTarget.of("text/css", "text/plain"),
|
||||
SourceTarget.of("text/csv", "text/plain"),
|
||||
SourceTarget.of("text/xml", "text/plain"),
|
||||
SourceTarget.of("text/html", "text/plain"),
|
||||
SourceTarget.of("text/richtext", "text/plain"),
|
||||
SourceTarget.of("text/sgml", "text/plain"),
|
||||
SourceTarget.of("text/tab-separated-values", "text/plain"),
|
||||
SourceTarget.of("text/x-setext", "text/plain"),
|
||||
SourceTarget.of("text/x-java-source", "text/plain"),
|
||||
SourceTarget.of("text/x-jsp", "text/plain"),
|
||||
SourceTarget.of("text/x-markdown", "text/plain"),
|
||||
SourceTarget.of("text/calendar", "text/plain"),
|
||||
SourceTarget.of("application/x-javascript", "text/plain"),
|
||||
SourceTarget.of("application/dita+xml", "text/plain"),
|
||||
SourceTarget.of("text/plain", "text/plain"),
|
||||
SourceTarget.of("text/mediawiki", "text/plain"),
|
||||
SourceTarget.of("text/css", "text/plain"),
|
||||
SourceTarget.of("text/csv", "text/plain"),
|
||||
SourceTarget.of("text/xml", "text/plain"),
|
||||
SourceTarget.of("text/html", "text/plain"),
|
||||
SourceTarget.of("text/richtext", "text/plain"),
|
||||
SourceTarget.of("text/sgml", "text/plain"),
|
||||
SourceTarget.of("text/tab-separated-values", "text/plain"),
|
||||
SourceTarget.of("text/x-setext", "text/plain"),
|
||||
SourceTarget.of("text/x-java-source", "text/plain"),
|
||||
SourceTarget.of("text/x-jsp", "text/plain"),
|
||||
SourceTarget.of("text/x-markdown", "text/plain"),
|
||||
SourceTarget.of("text/calendar", "text/plain"),
|
||||
SourceTarget.of("application/x-javascript", "text/plain"),
|
||||
SourceTarget.of("application/dita+xml", "text/plain"),
|
||||
|
||||
SourceTarget.of("application/vnd.apple.keynote", "image/jpeg"),
|
||||
SourceTarget.of("application/vnd.apple.numbers", "image/jpeg"),
|
||||
SourceTarget.of("application/vnd.apple.pages", "image/jpeg"),
|
||||
SourceTarget.of("application/vnd.apple.keynote", "image/jpeg"),
|
||||
SourceTarget.of("application/vnd.apple.numbers", "image/jpeg"),
|
||||
SourceTarget.of("application/vnd.apple.pages", "image/jpeg"),
|
||||
|
||||
SourceTarget.of("text/plain", "application/pdf"),
|
||||
SourceTarget.of("text/csv", "application/pdf"),
|
||||
SourceTarget.of("application/dita+xml", "application/pdf"),
|
||||
SourceTarget.of("text/xml", "application/pdf"),
|
||||
SourceTarget.of("text/plain", "application/pdf"),
|
||||
SourceTarget.of("text/csv", "application/pdf"),
|
||||
SourceTarget.of("application/dita+xml", "application/pdf"),
|
||||
SourceTarget.of("text/xml", "application/pdf"),
|
||||
|
||||
SourceTarget.of(MIMETYPE_IMAGE_TIFF, MIMETYPE_PDF),
|
||||
SourceTarget.of(MIMETYPE_IMAGE_TIFF, MIMETYPE_PDF),
|
||||
|
||||
SourceTarget.of("message/rfc822", "text/plain")
|
||||
);
|
||||
SourceTarget.of("message/rfc822", "text/plain"));
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@@ -166,13 +166,13 @@ public class MiscTransformsIT
|
||||
final String targetExtension = TEST_FILES.get(targetMimetype).getExtension();
|
||||
|
||||
final String descriptor = format("Transform ({0}, {1} -> {2}, {3})",
|
||||
sourceFile, sourceMimetype, targetMimetype, targetExtension);
|
||||
sourceFile, sourceMimetype, targetMimetype, targetExtension);
|
||||
|
||||
try
|
||||
{
|
||||
// when
|
||||
final ResponseEntity<Resource> response = sendTRequest(ENGINE_URL, sourceFile,
|
||||
sourceMimetype, targetMimetype, targetExtension);
|
||||
sourceMimetype, targetMimetype, targetExtension);
|
||||
|
||||
assertEquals(OK, response.getStatusCode(), descriptor);
|
||||
if (MIMETYPE_PDF.equals(targetMimetype))
|
||||
|
@@ -26,6 +26,11 @@
|
||||
*/
|
||||
package org.alfresco.transform.misc.transformers;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.mockito.BDDMockito.then;
|
||||
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_GIF;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_PNG;
|
||||
@@ -35,12 +40,7 @@ import static org.alfresco.transform.common.RequestParamMap.END_PAGE;
|
||||
import static org.alfresco.transform.common.RequestParamMap.PDF_FORMAT;
|
||||
import static org.alfresco.transform.common.RequestParamMap.PDF_ORIENTATION;
|
||||
import static org.alfresco.transform.common.RequestParamMap.START_PAGE;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.mockito.BDDMockito.then;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
@@ -52,9 +52,8 @@ import java.util.function.BiFunction;
|
||||
import java.util.function.BiPredicate;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Stream;
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.misc.util.ArgumentsCartesianProduct;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
@@ -66,6 +65,9 @@ import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.MockitoAnnotations;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.misc.util.ArgumentsCartesianProduct;
|
||||
|
||||
class ImageToPdfTransformerTest
|
||||
{
|
||||
private static final File sourceFile = loadFile("sample.gif");
|
||||
@@ -91,32 +93,30 @@ class ImageToPdfTransformerTest
|
||||
static Stream<ImageFile> imageFiles()
|
||||
{
|
||||
return Stream.of(
|
||||
ImageFile.of("sample.jpg", MIMETYPE_IMAGE_JPEG),
|
||||
ImageFile.of("sample.gif", MIMETYPE_IMAGE_GIF),
|
||||
ImageFile.of("sample.png", MIMETYPE_IMAGE_PNG)
|
||||
);
|
||||
ImageFile.of("sample.jpg", MIMETYPE_IMAGE_JPEG),
|
||||
ImageFile.of("sample.gif", MIMETYPE_IMAGE_GIF),
|
||||
ImageFile.of("sample.png", MIMETYPE_IMAGE_PNG));
|
||||
}
|
||||
|
||||
static Stream<TransformOptions> defaultTransformOptions()
|
||||
{
|
||||
return Stream.of(
|
||||
TransformOptions.none(),
|
||||
TransformOptions.of(0, null),
|
||||
TransformOptions.of(0, 0)
|
||||
);
|
||||
TransformOptions.none(),
|
||||
TransformOptions.of(0, null),
|
||||
TransformOptions.of(0, 0));
|
||||
}
|
||||
|
||||
static Stream<TransformOptions> tiffTransformOptions()
|
||||
{
|
||||
return Stream.of(
|
||||
TransformOptions.of(0, 0), // (startPage, endPage)
|
||||
TransformOptions.of(0, 1),
|
||||
TransformOptions.of(1, 1),
|
||||
TransformOptions.of(null, 0), // expected 1 page in target file
|
||||
TransformOptions.of(null, 1), // expected 2 pages in target file
|
||||
TransformOptions.of(0, null), // expected all pages in target file
|
||||
TransformOptions.of(1, null), // expected all except first page in target file
|
||||
TransformOptions.none() // expected all pages in target file
|
||||
TransformOptions.of(0, 0), // (startPage, endPage)
|
||||
TransformOptions.of(0, 1),
|
||||
TransformOptions.of(1, 1),
|
||||
TransformOptions.of(null, 0), // expected 1 page in target file
|
||||
TransformOptions.of(null, 1), // expected 2 pages in target file
|
||||
TransformOptions.of(0, null), // expected all pages in target file
|
||||
TransformOptions.of(1, null), // expected all except first page in target file
|
||||
TransformOptions.none() // expected all pages in target file
|
||||
);
|
||||
}
|
||||
|
||||
@@ -124,9 +124,8 @@ class ImageToPdfTransformerTest
|
||||
{
|
||||
ImageFile tiffImage = ImageFile.of("sample.tiff", MIMETYPE_IMAGE_TIFF, 6);
|
||||
return Stream.of(
|
||||
ArgumentsCartesianProduct.of(imageFiles(), defaultTransformOptions()),
|
||||
ArgumentsCartesianProduct.of(tiffImage, tiffTransformOptions())
|
||||
).flatMap(Function.identity());
|
||||
ArgumentsCartesianProduct.of(imageFiles(), defaultTransformOptions()),
|
||||
ArgumentsCartesianProduct.of(tiffImage, tiffTransformOptions())).flatMap(Function.identity());
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@@ -157,10 +156,9 @@ class ImageToPdfTransformerTest
|
||||
static Stream<TransformOptions> improperTransformOptions()
|
||||
{
|
||||
return Stream.of(
|
||||
TransformOptions.of(1, 0),
|
||||
TransformOptions.of(-1, 0),
|
||||
TransformOptions.of(0, -1)
|
||||
);
|
||||
TransformOptions.of(1, 0),
|
||||
TransformOptions.of(-1, 0),
|
||||
TransformOptions.of(0, -1));
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@@ -168,8 +166,7 @@ class ImageToPdfTransformerTest
|
||||
void testTransformTiffToPdf_withImproperOptions(TransformOptions transformOptions)
|
||||
{
|
||||
// when
|
||||
assertThrows(IllegalArgumentException.class, () ->
|
||||
transformer.transform(MIMETYPE_IMAGE_TIFF, MIMETYPE_PDF, transformOptions.toMap(), sourceFile, targetFile, transformManager));
|
||||
assertThrows(IllegalArgumentException.class, () -> transformer.transform(MIMETYPE_IMAGE_TIFF, MIMETYPE_PDF, transformOptions.toMap(), sourceFile, targetFile, transformManager));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -179,8 +176,7 @@ class ImageToPdfTransformerTest
|
||||
transformOptions.put(START_PAGE, "a");
|
||||
|
||||
// when
|
||||
assertThrows(IllegalArgumentException.class, () ->
|
||||
transformer.transform(MIMETYPE_IMAGE_TIFF, MIMETYPE_PDF, transformOptions, sourceFile, targetFile, transformManager));
|
||||
assertThrows(IllegalArgumentException.class, () -> transformer.transform(MIMETYPE_IMAGE_TIFF, MIMETYPE_PDF, transformOptions, sourceFile, targetFile, transformManager));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -190,57 +186,53 @@ class ImageToPdfTransformerTest
|
||||
transformOptions.put(END_PAGE, "z");
|
||||
|
||||
// when
|
||||
assertThrows(IllegalArgumentException.class, () ->
|
||||
transformer.transform(MIMETYPE_IMAGE_TIFF, MIMETYPE_PDF, transformOptions, sourceFile, targetFile, transformManager));
|
||||
assertThrows(IllegalArgumentException.class, () -> transformer.transform(MIMETYPE_IMAGE_TIFF, MIMETYPE_PDF, transformOptions, sourceFile, targetFile, transformManager));
|
||||
}
|
||||
|
||||
/** Option and expected dimensions. */
|
||||
static Stream<Arguments> validPdfFormats()
|
||||
{
|
||||
return Stream.of(
|
||||
Arguments.of("DEFAULT", new PDRectangle(sourceFileWidth, sourceFileHeight)),
|
||||
Arguments.of("default", new PDRectangle(sourceFileWidth, sourceFileHeight)),
|
||||
Arguments.of("A0", PDRectangle.A0),
|
||||
Arguments.of("a0", PDRectangle.A0),
|
||||
Arguments.of("A1", PDRectangle.A1),
|
||||
Arguments.of("A2", PDRectangle.A2),
|
||||
Arguments.of("A3", PDRectangle.A3),
|
||||
Arguments.of("A4", PDRectangle.A4),
|
||||
Arguments.of("A5", PDRectangle.A5),
|
||||
Arguments.of("A6", PDRectangle.A6),
|
||||
Arguments.of("A6", PDRectangle.A6),
|
||||
Arguments.of("LETTER", PDRectangle.LETTER),
|
||||
Arguments.of("letter", PDRectangle.LETTER),
|
||||
Arguments.of("LEGAL", PDRectangle.LEGAL),
|
||||
Arguments.of("legal", PDRectangle.LEGAL)
|
||||
);
|
||||
Arguments.of("DEFAULT", new PDRectangle(sourceFileWidth, sourceFileHeight)),
|
||||
Arguments.of("default", new PDRectangle(sourceFileWidth, sourceFileHeight)),
|
||||
Arguments.of("A0", PDRectangle.A0),
|
||||
Arguments.of("a0", PDRectangle.A0),
|
||||
Arguments.of("A1", PDRectangle.A1),
|
||||
Arguments.of("A2", PDRectangle.A2),
|
||||
Arguments.of("A3", PDRectangle.A3),
|
||||
Arguments.of("A4", PDRectangle.A4),
|
||||
Arguments.of("A5", PDRectangle.A5),
|
||||
Arguments.of("A6", PDRectangle.A6),
|
||||
Arguments.of("A6", PDRectangle.A6),
|
||||
Arguments.of("LETTER", PDRectangle.LETTER),
|
||||
Arguments.of("letter", PDRectangle.LETTER),
|
||||
Arguments.of("LEGAL", PDRectangle.LEGAL),
|
||||
Arguments.of("legal", PDRectangle.LEGAL));
|
||||
}
|
||||
|
||||
/** Option and expected orientation. */
|
||||
static Stream<Arguments> validPdfOrientations()
|
||||
{
|
||||
return Stream.of(
|
||||
Arguments.of("DEFAULT", unchangedRectangle()),
|
||||
Arguments.of("default", unchangedRectangle()),
|
||||
Arguments.of("PORTRAIT", rectangleRotatedIf((width, height) -> width > height)),
|
||||
Arguments.of("portrait", rectangleRotatedIf((width, height) -> width > height)),
|
||||
Arguments.of("LANDSCAPE", rectangleRotatedIf((width, height) -> height > width)),
|
||||
Arguments.of("landscape", rectangleRotatedIf((width, height) -> height > width))
|
||||
);
|
||||
Arguments.of("DEFAULT", unchangedRectangle()),
|
||||
Arguments.of("default", unchangedRectangle()),
|
||||
Arguments.of("PORTRAIT", rectangleRotatedIf((width, height) -> width > height)),
|
||||
Arguments.of("portrait", rectangleRotatedIf((width, height) -> width > height)),
|
||||
Arguments.of("LANDSCAPE", rectangleRotatedIf((width, height) -> height > width)),
|
||||
Arguments.of("landscape", rectangleRotatedIf((width, height) -> height > width)));
|
||||
}
|
||||
|
||||
static Stream<Arguments> validPdfFormatsAndOrientations()
|
||||
{
|
||||
return ArgumentsCartesianProduct.ofArguments(
|
||||
validPdfFormats(),
|
||||
validPdfOrientations()
|
||||
);
|
||||
validPdfFormats(),
|
||||
validPdfOrientations());
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("validPdfFormatsAndOrientations")
|
||||
void testTransformImageToPDF_withVariousPdfFormatsAndOrientations(String pdfFormat, PDRectangle expectedPdfFormat,
|
||||
String pdfOrientation, BiFunction<Float, Float, PDRectangle> expectedPdfFormatRotator) throws Exception
|
||||
String pdfOrientation, BiFunction<Float, Float, PDRectangle> expectedPdfFormatRotator) throws Exception
|
||||
{
|
||||
TransformOptions transformOptions = TransformOptions.of(pdfFormat, pdfOrientation);
|
||||
|
||||
@@ -302,8 +294,7 @@ class ImageToPdfTransformerTest
|
||||
Arguments.of(ImageFile.of("612x792-300.tif", MIMETYPE_IMAGE_TIFF), 146.0f, 190.0f),
|
||||
Arguments.of(ImageFile.of("765x990-50.tif", MIMETYPE_IMAGE_TIFF), 1101.0f, 1425.0f),
|
||||
Arguments.of(ImageFile.of("765x990-72.tif", MIMETYPE_IMAGE_TIFF), 765.0f, 990.0f),
|
||||
Arguments.of(ImageFile.of("765x990-300.tif", MIMETYPE_IMAGE_TIFF), 183.0f, 237.0f)
|
||||
);
|
||||
Arguments.of(ImageFile.of("765x990-300.tif", MIMETYPE_IMAGE_TIFF), 183.0f, 237.0f));
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@@ -320,12 +311,12 @@ class ImageToPdfTransformerTest
|
||||
try (PDDocument actualPdfDocument = PDDocument.load(targetFile))
|
||||
{
|
||||
assertNotNull(actualPdfDocument);
|
||||
assertEquals(expectedWidth, actualPdfDocument.getPage(0).getMediaBox().getWidth(),"Pdf width");
|
||||
assertEquals(expectedHeight,actualPdfDocument.getPage(0).getMediaBox().getHeight(),"Pdf height");
|
||||
assertEquals(expectedWidth, actualPdfDocument.getPage(0).getMediaBox().getWidth(), "Pdf width");
|
||||
assertEquals(expectedHeight, actualPdfDocument.getPage(0).getMediaBox().getHeight(), "Pdf height");
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------- Helper methods and classes -----------------------------------------------
|
||||
// ----------------------------------------------- Helper methods and classes -----------------------------------------------
|
||||
|
||||
private static BiFunction<Float, Float, PDRectangle> unchangedRectangle()
|
||||
{
|
||||
@@ -339,7 +330,7 @@ class ImageToPdfTransformerTest
|
||||
return PDRectangle::new;
|
||||
}
|
||||
|
||||
return (width, height) -> predicate.test(width, height)? new PDRectangle(height, width) : new PDRectangle(width, height);
|
||||
return (width, height) -> predicate.test(width, height) ? new PDRectangle(height, width) : new PDRectangle(width, height);
|
||||
}
|
||||
|
||||
private static File loadFile(String fileName)
|
||||
@@ -451,7 +442,8 @@ class ImageToPdfTransformerTest
|
||||
}
|
||||
}
|
||||
|
||||
static {
|
||||
static
|
||||
{
|
||||
try
|
||||
{
|
||||
BufferedImage image = ImageIO.read(sourceFile);
|
||||
@@ -463,4 +455,4 @@ class ImageToPdfTransformerTest
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -26,15 +26,16 @@
|
||||
*/
|
||||
package org.alfresco.transform.misc.transformers;
|
||||
|
||||
import static org.alfresco.transform.common.RequestParamMap.PAGE_LIMIT;
|
||||
import static org.alfresco.transform.common.RequestParamMap.PDF_FONT;
|
||||
import static org.alfresco.transform.common.RequestParamMap.PDF_FONT_SIZE;
|
||||
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import static org.alfresco.transform.common.RequestParamMap.PAGE_LIMIT;
|
||||
import static org.alfresco.transform.common.RequestParamMap.PDF_FONT;
|
||||
import static org.alfresco.transform.common.RequestParamMap.PDF_FONT_SIZE;
|
||||
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
@@ -51,7 +52,7 @@ import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class TextToPdfContentTransformerTest
|
||||
{
|
||||
@@ -95,7 +96,7 @@ public class TextToPdfContentTransformerTest
|
||||
public void test1UTF16BigEndianBomBigEndianChars() throws Exception
|
||||
{
|
||||
// 1. BOM indicates BE (fe then ff) + chars appear to be BE (as first byte read tends to be a zero)
|
||||
// Expected with UTF-16. Some systems use BE and other like Windows and Mac used LE
|
||||
// Expected with UTF-16. Some systems use BE and other like Windows and Mac used LE
|
||||
String expectedByteOrder = "fe ff 00 31 00 20 00 49";
|
||||
transformTextAndCheck("UTF-16", true, true, expectedByteOrder);
|
||||
transformTextAndCheck("UTF-16", true, true, expectedByteOrder);
|
||||
@@ -107,7 +108,7 @@ public class TextToPdfContentTransformerTest
|
||||
public void test2UTF16LittleEndianBomLittleEndianChars() throws Exception
|
||||
{
|
||||
// 2. BOM indicates LE (ff then fe) + chars appear to be LE (as second byte read tends to be a zero)
|
||||
// Expected with UTF-16. Some systems use BE and other like Windows and Mac used LE
|
||||
// Expected with UTF-16. Some systems use BE and other like Windows and Mac used LE
|
||||
transformTextAndCheck("UTF-16", false, true, "ff fe 31 00 20 00 49 00");
|
||||
}
|
||||
|
||||
@@ -115,7 +116,7 @@ public class TextToPdfContentTransformerTest
|
||||
public void test3UTF16NoBomBigEndianChars() throws Exception
|
||||
{
|
||||
// 3. No BOM + chars appear to be BE (as first byte read tends to be a zero)
|
||||
// Expected with UTF-16BE
|
||||
// Expected with UTF-16BE
|
||||
transformTextAndCheck("UTF-16", true, null, "00 31 00 20 00 49");
|
||||
}
|
||||
|
||||
@@ -123,7 +124,7 @@ public class TextToPdfContentTransformerTest
|
||||
public void test4UTF16NoBomLittleEndianChars() throws Exception
|
||||
{
|
||||
// 4. No BOM + chars appear to be LE (as second byte read tends to be a zero)
|
||||
// Expected with UTF-16LE
|
||||
// Expected with UTF-16LE
|
||||
transformTextAndCheck("UTF-16", false, null, "31 00 20 00 49 00");
|
||||
}
|
||||
|
||||
@@ -131,7 +132,7 @@ public class TextToPdfContentTransformerTest
|
||||
public void test5UTF16BigEndianBomLittleEndianChars() throws Exception
|
||||
{
|
||||
// 5. BOM indicates BE (fe then ff) + chars appear to be LE (as second byte read tends to be a zero)
|
||||
// SOMETHING IS WRONG, BUT USE LE!!!!
|
||||
// SOMETHING IS WRONG, BUT USE LE!!!!
|
||||
transformTextAndCheck("UTF-16", false, false, "fe ff 31 00 20 00 49 00");
|
||||
}
|
||||
|
||||
@@ -139,7 +140,7 @@ public class TextToPdfContentTransformerTest
|
||||
public void test6UTF16LittleEndianBomBigEndianChars() throws Exception
|
||||
{
|
||||
// 6. BOM indicates LE (ff then fe) + chars appear to be BE (as first byte read tends to be a zero)
|
||||
// SOMETHING IS WRONG, BUT USE BE!!!!
|
||||
// SOMETHING IS WRONG, BUT USE BE!!!!
|
||||
transformTextAndCheck("UTF-16", true, false, "ff fe 00 31 00 20 00 49");
|
||||
}
|
||||
|
||||
@@ -204,8 +205,7 @@ public class TextToPdfContentTransformerTest
|
||||
}
|
||||
|
||||
/**
|
||||
* Test if a different font can be chosen to perform the transformation with breve character. This test
|
||||
* transformation should fail as Times-Bold font doesn't handle the breve character
|
||||
* Test if a different font can be chosen to perform the transformation with breve character. This test transformation should fail as Times-Bold font doesn't handle the breve character
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
@@ -229,16 +229,17 @@ public class TextToPdfContentTransformerTest
|
||||
}
|
||||
|
||||
/**
|
||||
* @param encoding to be used to read the source file
|
||||
* @param bigEndian indicates that the file should contain big endian characters, so typically the first byte of
|
||||
* each char is a zero when using English.
|
||||
* @param validBom if not null, the BOM is included. If true it is the one matching bigEndian. If false it is the
|
||||
* opposite byte order, which really is an error, but we try to recover from it.
|
||||
* @param expectedByteOrder The first few bytes of the source file so we can check the test data has been
|
||||
* correctly created.
|
||||
* @param encoding
|
||||
* to be used to read the source file
|
||||
* @param bigEndian
|
||||
* indicates that the file should contain big endian characters, so typically the first byte of each char is a zero when using English.
|
||||
* @param validBom
|
||||
* if not null, the BOM is included. If true it is the one matching bigEndian. If false it is the opposite byte order, which really is an error, but we try to recover from it.
|
||||
* @param expectedByteOrder
|
||||
* The first few bytes of the source file so we can check the test data has been correctly created.
|
||||
*/
|
||||
protected TransformCheckResult transformTextAndCheck(String encoding, Boolean bigEndian, Boolean validBom,
|
||||
String expectedByteOrder) throws Exception
|
||||
String expectedByteOrder) throws Exception
|
||||
{
|
||||
return transformTextAndCheckImpl(-1, encoding, bigEndian, validBom, expectedByteOrder);
|
||||
}
|
||||
@@ -249,7 +250,7 @@ public class TextToPdfContentTransformerTest
|
||||
}
|
||||
|
||||
private TransformCheckResult transformTextAndCheckImpl(int pageLimit, String encoding, Boolean bigEndian, Boolean validBom,
|
||||
String expectedByteOrder) throws Exception
|
||||
String expectedByteOrder) throws Exception
|
||||
{
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String checkText = createTestText(pageLimit, sb);
|
||||
@@ -286,13 +287,13 @@ public class TextToPdfContentTransformerTest
|
||||
}
|
||||
|
||||
private TransformCheckResult transformTextAndCheck(File sourceFile, String encoding, String checkText,
|
||||
String pageLimit) throws Exception
|
||||
String pageLimit) throws Exception
|
||||
{
|
||||
return transformTextAndCheck(sourceFile, encoding, checkText, pageLimit, true, null, false);
|
||||
}
|
||||
|
||||
private TransformCheckResult transformTextAndCheck(File sourceFile, String encoding, String checkText,
|
||||
String pageLimit, boolean clean, Map<String, String> extraParameters, boolean shouldFail) throws Exception
|
||||
String pageLimit, boolean clean, Map<String, String> extraParameters, boolean shouldFail) throws Exception
|
||||
{
|
||||
TransformCheckResult result = new TransformCheckResult();
|
||||
|
||||
@@ -384,7 +385,7 @@ public class TextToPdfContentTransformerTest
|
||||
boolean firstRead = true;
|
||||
byte[] bytes = new byte[8192];
|
||||
try (InputStream is = new BufferedInputStream(new FileInputStream(file));
|
||||
OutputStream os = new BufferedOutputStream(new FileOutputStream(originalFile)))
|
||||
OutputStream os = new BufferedOutputStream(new FileOutputStream(originalFile)))
|
||||
{
|
||||
int l;
|
||||
int off;
|
||||
@@ -398,7 +399,7 @@ public class TextToPdfContentTransformerTest
|
||||
if (firstRead)
|
||||
{
|
||||
firstRead = false;
|
||||
boolean actualEndianBytes = bytes[0] == (byte)0xfe; // if true [1] would also be 0xff
|
||||
boolean actualEndianBytes = bytes[0] == (byte) 0xfe; // if true [1] would also be 0xff
|
||||
switchBytes = actualEndianBytes != bigEndian;
|
||||
if (validBom == null)
|
||||
{
|
||||
@@ -419,14 +420,14 @@ public class TextToPdfContentTransformerTest
|
||||
if (switchBytes)
|
||||
{
|
||||
// Reverse the byte order of characters including the BOM.
|
||||
for (int i=0; i<l; i+=2)
|
||||
for (int i = 0; i < l; i += 2)
|
||||
{
|
||||
byte aByte = bytes[i];
|
||||
bytes[i] = bytes[i+1];
|
||||
bytes[i+1] = aByte;
|
||||
bytes[i] = bytes[i + 1];
|
||||
bytes[i + 1] = aByte;
|
||||
}
|
||||
}
|
||||
os.write(bytes, off, len-off);
|
||||
os.write(bytes, off, len - off);
|
||||
}
|
||||
} while (l != -1);
|
||||
}
|
||||
@@ -456,11 +457,11 @@ public class TextToPdfContentTransformerTest
|
||||
hexString = hexString.replaceAll(" *", "");
|
||||
int len = hexString.length() / 2;
|
||||
byte[] bytes = new byte[len];
|
||||
for (int j=0, i=0; i<len; i++)
|
||||
for (int j = 0, i = 0; i < len; i++)
|
||||
{
|
||||
int firstDigit = Character.digit(hexString.charAt(j++), 16);
|
||||
int secondDigit = Character.digit(hexString.charAt(j++), 16);
|
||||
bytes[i] = (byte)((firstDigit << 4) + secondDigit);
|
||||
bytes[i] = (byte) ((firstDigit << 4) + secondDigit);
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
@@ -469,7 +470,7 @@ public class TextToPdfContentTransformerTest
|
||||
{
|
||||
StringBuffer sb = new StringBuffer();
|
||||
int len = bytes.length;
|
||||
for (int i=0; i<len; i++)
|
||||
for (int i = 0; i < len; i++)
|
||||
{
|
||||
if (sb.length() > 0)
|
||||
{
|
||||
|
@@ -43,8 +43,7 @@ import org.junit.jupiter.params.provider.Arguments;
|
||||
public class ArgumentsCartesianProduct
|
||||
{
|
||||
/**
|
||||
* Creates arguments cartesian product of fixed object and a stream of objects.
|
||||
* Example: a ✕ {x,y,z} = {a,x}, {a,y}, {a,z}
|
||||
* Creates arguments cartesian product of fixed object and a stream of objects. Example: a ✕ {x,y,z} = {a,x}, {a,y}, {a,z}
|
||||
*/
|
||||
public static Stream<Arguments> of(final Object fixedFirstArgument, final Stream<?> secondArguments)
|
||||
{
|
||||
@@ -52,8 +51,7 @@ public class ArgumentsCartesianProduct
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates arguments cartesian product of a stream of objects and fixed object.
|
||||
* Example: {a,b,c} ✕ y ✕ z = {a,y,z}, {b,y,z}, {c,y,z}
|
||||
* Creates arguments cartesian product of a stream of objects and fixed object. Example: {a,b,c} ✕ y ✕ z = {a,y,z}, {b,y,z}, {c,y,z}
|
||||
*/
|
||||
public static Stream<Arguments> of(final Stream<?> firstArguments, final Object... otherFixedArguments)
|
||||
{
|
||||
@@ -61,8 +59,7 @@ public class ArgumentsCartesianProduct
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates arguments cartesian product of two streams of objects.
|
||||
* Example: {a,b} ✕ {y,z} = {a,y}, {a,z}, {b,y}, {b,z}
|
||||
* Creates arguments cartesian product of two streams of objects. Example: {a,b} ✕ {y,z} = {a,y}, {a,z}, {b,y}, {b,z}
|
||||
*/
|
||||
public static Stream<Arguments> of(final Stream<?> firstArguments, final Stream<?> secondArguments)
|
||||
{
|
||||
@@ -70,8 +67,7 @@ public class ArgumentsCartesianProduct
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates arguments cartesian product of multiple streams of objects.
|
||||
* Example: {a,b} ✕ {k,l,m} ✕ ... ✕ {y,z} = {a,k,...,y}, {a,k,...,z}, {a,l,...,y}, ..., {b,m,...,z}
|
||||
* Creates arguments cartesian product of multiple streams of objects. Example: {a,b} ✕ {k,l,m} ✕ ... ✕ {y,z} = {a,k,...,y}, {a,k,...,z}, {a,l,...,y}, ..., {b,m,...,z}
|
||||
*/
|
||||
public static Stream<Arguments> of(final Stream<?>... argumentsStreams)
|
||||
{
|
||||
@@ -79,18 +75,17 @@ public class ArgumentsCartesianProduct
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates arguments cartesian product of multiple streams of arguments.
|
||||
* Example: {a,b} ✕ {k,l,m} ✕ ... ✕ {y,z} = {a,k,...,y}, {a,k,...,z}, {a,l,...,y}, ..., {b,m,...,z}
|
||||
* Creates arguments cartesian product of multiple streams of arguments. Example: {a,b} ✕ {k,l,m} ✕ ... ✕ {y,z} = {a,k,...,y}, {a,k,...,z}, {a,l,...,y}, ..., {b,m,...,z}
|
||||
*/
|
||||
@SafeVarargs
|
||||
@SuppressWarnings("unchecked")
|
||||
public static Stream<Arguments> ofArguments(final Stream<Arguments>... argumentsStreams)
|
||||
{
|
||||
return cartesianProductOf(argumentsStreams)
|
||||
.map(argumentsStream -> (Stream<Arguments>) argumentsStream)
|
||||
.map(argumentsStream -> Arguments.of(argumentsStream
|
||||
.flatMap(arguments -> Arrays.stream(arguments.get()))
|
||||
.toArray()));
|
||||
.map(argumentsStream -> (Stream<Arguments>) argumentsStream)
|
||||
.map(argumentsStream -> Arguments.of(argumentsStream
|
||||
.flatMap(arguments -> Arrays.stream(arguments.get()))
|
||||
.toArray()));
|
||||
}
|
||||
|
||||
private static Stream<Stream<?>> cartesianProductOf(final Stream<?>... streams)
|
||||
@@ -101,17 +96,17 @@ public class ArgumentsCartesianProduct
|
||||
}
|
||||
|
||||
return Stream.of(streams)
|
||||
.filter(Objects::nonNull)
|
||||
.map(stream -> stream.map(Collections::<Object>singletonList))
|
||||
.reduce((result, nextElements) -> {
|
||||
final List<List<Object>> nextElementsCopy = nextElements.collect(Collectors.toList());
|
||||
return result.flatMap(resultPortion -> nextElementsCopy.stream().map(nextElementsPortion -> {
|
||||
final List<Object> extendedResultPortion = new ArrayList<>();
|
||||
extendedResultPortion.addAll(resultPortion);
|
||||
extendedResultPortion.addAll(nextElementsPortion);
|
||||
return extendedResultPortion;
|
||||
}));
|
||||
}).orElse(Stream.empty())
|
||||
.map(Collection::stream);
|
||||
.filter(Objects::nonNull)
|
||||
.map(stream -> stream.map(Collections::<Object> singletonList))
|
||||
.reduce((result, nextElements) -> {
|
||||
final List<List<Object>> nextElementsCopy = nextElements.collect(Collectors.toList());
|
||||
return result.flatMap(resultPortion -> nextElementsCopy.stream().map(nextElementsPortion -> {
|
||||
final List<Object> extendedResultPortion = new ArrayList<>();
|
||||
extendedResultPortion.addAll(resultPortion);
|
||||
extendedResultPortion.addAll(nextElementsPortion);
|
||||
return extendedResultPortion;
|
||||
}));
|
||||
}).orElse(Stream.empty())
|
||||
.map(Collection::stream);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user