REPO-4334 Move metadata extraction into T-Engines (#247)

* Metadata extract code added to T-Engines
* Required a refactor of duplicate code to avoid 3x more duplication:
        - try catches used to return return exit codes
        - calls to java libraries or commands to external processes
        - building of transform options in controllers, adaptors
* integration tests based on current extracts performed in the repo
* included extract code for libreoffice, and embed code even though not used out of the box any more. There may well be custom extracts using them that move to T-Engines
* removal of unused imports
* minor autoOrient / allowEnlargement bug fixes that were not included in Paddington on the T-Engine side.
This commit is contained in:
Alan Davis
2020-06-11 20:20:22 +01:00
committed by GitHub
parent ca394440bb
commit 06109dee75
158 changed files with 10288 additions and 1454 deletions

View File

@@ -26,6 +26,7 @@
*/
package org.alfresco.transformer;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -39,8 +40,6 @@ import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import io.micrometer.core.instrument.MeterRegistry;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,34 +26,20 @@
*/
package org.alfresco.transformer;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.fs.FileManager.createAttachment;
import static org.alfresco.transformer.fs.FileManager.createSourceFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
import static org.alfresco.transformer.transformers.HtmlParserContentTransformer.SOURCE_ENCODING;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.transformers.HtmlParserContentTransformer.SOURCE_ENCODING;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
@Controller
public class MiscController extends AbstractTransformerController
@@ -88,71 +74,16 @@ public class MiscController extends AbstractTransformerController
{
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformer.transform("html", sourceFile, targetFile, MIMETYPE_HTML,
MIMETYPE_TEXT_PLAIN, parameters);
transform("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
}
};
}
@Override
public void processTransform(final File sourceFile, final File targetFile,
final String sourceMimetype, final String targetMimetype,
final Map<String, String> transformOptions, final Long timeout)
protected void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
if (logger.isDebugEnabled())
{
logger.debug(
"Processing request with: sourceFile '{}', targetFile '{}', transformOptions" +
" '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout);
}
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
transformOptions);
transformer.transform(transform, sourceFile, targetFile, sourceMimetype, targetMimetype,
transformOptions);
}
@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") MultipartFile sourceMultipartFile,
@RequestParam("targetExtension") String targetExtension,
@RequestParam("targetMimetype") String targetMimetype,
@RequestParam(value = "targetEncoding", required = false) String targetEncoding,
@RequestParam("sourceMimetype") String sourceMimetype,
@RequestParam(value = "sourceEncoding", required = false) String sourceEncoding,
@RequestParam(value = "pageLimit", required = false) String pageLimit,
@RequestParam(value = "testDelay", required = false) Long testDelay)
{
if (logger.isDebugEnabled())
{
logger.debug(
"Processing request with: sourceMimetype '{}', sourceEncoding '{}', " +
"targetMimetype '{}', targetExtension '{}', targetEncoding '{}', pageLimit '{}'",
sourceMimetype, sourceEncoding, targetMimetype, targetExtension, targetEncoding,
pageLimit);
}
final String targetFilename = createTargetFileName(
sourceMultipartFile.getOriginalFilename(), targetExtension);
getProbeTestTransform().incrementTransformerCount();
final File sourceFile = createSourceFile(request, sourceMultipartFile);
final File targetFile = createTargetFile(request, targetFilename);
final Map<String, String> transformOptions = createTransformOptions(
"sourceEncoding", sourceEncoding,
"targetEncoding", targetEncoding,
"pageLimit", pageLimit);
final String transform = getTransformerName(sourceFile, sourceMimetype, targetMimetype,
transformOptions);
transformer.transform(transform, sourceFile, targetFile, sourceMimetype, targetMimetype,
transformOptions);
final ResponseEntity<Resource> body = createAttachment(targetFilename, targetFile);
LogEntry.setTargetSize(targetFile.length());
long time = LogEntry.setStatusCodeAndMessage(OK.value(), "Success");
time += LogEntry.addDelay(testDelay);
getProbeTestTransform().recordTransformTime(time);
return body;
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -0,0 +1,72 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.util.List;
import java.util.stream.Stream;
import static java.util.stream.Collectors.toList;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_RFC822;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_XHTML;
import static org.alfresco.transformer.TestFileInfo.testFile;
/**
* Metadata integration tests in the Misc T-Engine.
*
* @author adavis
*/
@RunWith(Parameterized.class)
public class MiscMetadataExtractsIT extends AbstractMetadataExtractsIT
{
public MiscMetadataExtractsIT(TestFileInfo testFileInfo)
{
super(testFileInfo);
}
@Parameterized.Parameters
public static List<TestFileInfo> engineTransformations()
{
return Stream.of(
// HtmlMetadataExtractor
testFile(MIMETYPE_HTML, "html", "quick.html"),
testFile(MIMETYPE_XHTML, "xhtml", "quick.xhtml.alf"), // avoid the license header check on xhtml
// RFC822MetadataExtractor
testFile(MIMETYPE_RFC822, "eml", "quick.eml"),
// Special test cases from the repo tests
// ======================================
testFile(MIMETYPE_RFC822, "eml", "quick.spanish.eml"),
testFile(MIMETYPE_HTML, "html", "quick.japanese.html")
).collect(toList());
}
}

View File

@@ -69,6 +69,23 @@
],
"transformOptions": [
]
},
{
"transformerName": "HtmlMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "text/html", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "RFC822MetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
}
]
}

View File

@@ -1,5 +1,5 @@
From: Nevin Nollop <nevin.nollop@alfresco.com>
To: Nevin Nollop <nevin.nollop@alfresco.com>
To: Nevin Nollop <nevin.nollop@gmail.com>
Cc: Nevin Nollop <nevinn@alfresco.com>
Message-ID: <20040604122322.GV1905@phoenix.home>
Date: Fri, 4 Jun 2004 14:23:22 +0200

View File

@@ -0,0 +1,15 @@
{
"{http://www.alfresco.org/model/content/1.0}addressee" : "Nevin Nollop <nevin.nollop@gmail.com>",
"{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/content/1.0}addressees" : "Nevin Nollop <nevinn@alfresco.com>",
"{http://www.alfresco.org/model/imap/1.0}dateSent" : 1086351802000,
"{http://www.alfresco.org/model/imap/1.0}messageTo" : "Nevin Nollop <nevin.nollop@gmail.com>",
"{http://www.alfresco.org/model/imap/1.0}messageId" : "<20040604122322.GV1905@phoenix.home>",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageSubject" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageCc" : "Nevin Nollop <nevinn@alfresco.com>",
"{http://www.alfresco.org/model/content/1.0}sentdate" : 1086351802000,
"{http://www.alfresco.org/model/content/1.0}subjectline" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageFrom" : "Nevin Nollop <nevin.nollop@alfresco.com>",
"{http://www.alfresco.org/model/content/1.0}originator" : "Nevin Nollop <nevin.nollop@alfresco.com>"
}

View File

@@ -0,0 +1,5 @@
{
"{http://www.alfresco.org/model/content/1.0}author": "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}description": "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}title": "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,12 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS">
<title><EFBFBD>m<EFBFBD>F<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʂ<EFBFBD><EFBFBD>Y<EFBFBD>t<EFBFBD><EFBFBD><EFBFBD>܂<EFBFBD><EFBFBD>̂ŁA<EFBFBD>m<EFBFBD>F<EFBFBD><EFBFBD><EFBFBD>Ă<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD></title>
</head>
<body>
</body>
</html>

View File

@@ -0,0 +1,3 @@
{
"{http://www.alfresco.org/model/content/1.0}title" : "確認した結果を添付しますので、確認してください"
}

View File

@@ -0,0 +1,16 @@
{
"{http://www.alfresco.org/model/imap/1.0}dateReceived" : "Thu, 16 Aug 2012 08:13:29 -0700 (PDT)",
"{http://www.alfresco.org/model/content/1.0}addressee" : "jane.doe@alfresco.com",
"{http://www.alfresco.org/model/content/1.0}description" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/content/1.0}addressees" : null,
"{http://www.alfresco.org/model/imap/1.0}dateSent" : 1345130009000,
"{http://www.alfresco.org/model/imap/1.0}messageTo" : "jane.doe@alfresco.com",
"{http://www.alfresco.org/model/imap/1.0}messageId" : "<CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>",
"{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageSubject" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageCc" : null,
"{http://www.alfresco.org/model/content/1.0}sentdate" : 1345130009000,
"{http://www.alfresco.org/model/content/1.0}subjectline" : "The quick brown fox jumps over the lazy dog",
"{http://www.alfresco.org/model/imap/1.0}messageFrom" : "john.doe@alfresco.com",
"{http://www.alfresco.org/model/content/1.0}originator" : "john.doe@alfresco.com"
}

View File

@@ -0,0 +1,17 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=windows-1252"/>
<title>The quick brown fox jumps over the lazy dog</title>
<meta name="author" content="Nevin Nollop"/>
<meta name="keywords" content="Pangram, fox, dog"/>
<meta name="description" content="Gym class featuring a brown fox and lazy dog"/>
</head>
<body lang="EN-US">
The quick brown fox jumps over the lazy dog
</body>
</html>

View File

@@ -0,0 +1,5 @@
{
"{http://www.alfresco.org/model/content/1.0}author": "Nevin Nollop",
"{http://www.alfresco.org/model/content/1.0}description": "Gym class featuring a brown fox and lazy dog",
"{http://www.alfresco.org/model/content/1.0}title": "The quick brown fox jumps over the lazy dog"
}

View File

@@ -0,0 +1,203 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005-2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.alfresco.transformer.transformers.SelectableTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.swing.text.ChangedCharSetException;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
/**
* Metadata extractor for HTML and XHTML.
*
* Configuration: (see HtmlMetadataExtractor_metadata_extract.properties and misc_engine_config.json)
*
* <pre>
* <b>author:</b> -- cm:author
* <b>title:</b> -- cm:title
* <b>description:</b> -- cm:description
* </pre>
*
* Based on HtmlMetadataExtracter from the content repository.
*
* @author Jesper Steen Møller
* @author Derek Hulley
* @author adavis
*/
public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(HtmlMetadataExtractor.class);
private static final String KEY_AUTHOR = "author";
private static final String KEY_TITLE = "title";
private static final String KEY_DESCRIPTION= "description";
public HtmlMetadataExtractor()
{
super(logger);
}
@Override
public void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
Map<String, Serializable> metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile);
mapMetadataAndWrite(targetFile, metadata);
}
@Override
public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions,
File sourceFile) throws Exception
{
final Map<String, Serializable> rawProperties = new HashMap<>();
HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback()
{
StringBuffer title = null;
boolean inHead = false;
public void handleText(char[] data, int pos)
{
if (title != null)
{
title.append(data);
}
}
public void handleComment(char[] data, int pos)
{
// Perhaps sniff for Office 9+ metadata in here?
}
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos)
{
if (HTML.Tag.HEAD.equals(t))
{
inHead = true;
}
else if (HTML.Tag.TITLE.equals(t) && inHead)
{
title = new StringBuffer();
}
else
{
handleSimpleTag(t, a, pos);
}
}
public void handleEndTag(HTML.Tag t, int pos)
{
if (HTML.Tag.HEAD.equals(t))
{
inHead = false;
}
else if (HTML.Tag.TITLE.equals(t) && title != null)
{
putRawValue(KEY_TITLE, title.toString(), rawProperties);
title = null;
}
}
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)
{
if (HTML.Tag.META.equals(t))
{
Object nameO = a.getAttribute(HTML.Attribute.NAME);
Object valueO = a.getAttribute(HTML.Attribute.CONTENT);
if (nameO == null || valueO == null)
{
return;
}
String name = nameO.toString();
if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author")
|| name.equalsIgnoreCase("dc.creator"))
{
putRawValue(KEY_AUTHOR, valueO.toString(), rawProperties);
}
else if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
{
putRawValue(KEY_DESCRIPTION, valueO.toString(), rawProperties);
}
}
}
public void handleError(String errorMsg, int pos)
{
}
};
String charsetGuess = "UTF-8";
int tries = 0;
while (tries < 3)
{
rawProperties.clear();
Reader r = null;
try (InputStream cis = new FileInputStream(sourceFile))
{
// TODO: for now, use default charset; we should attempt to map from html meta-data
r = new InputStreamReader(cis, charsetGuess);
HTMLEditorKit.Parser parser = new ParserDelegator();
parser.parse(r, callback, tries > 0);
break;
}
catch (ChangedCharSetException ccse)
{
tries++;
charsetGuess = ccse.getCharSetSpec();
int begin = charsetGuess.indexOf("charset=");
if (begin > 0)
{
charsetGuess = charsetGuess.substring(begin + 8, charsetGuess.length());
}
}
finally
{
if (r != null)
{
r.close();
}
}
}
return rawProperties;
}
}

View File

@@ -0,0 +1,196 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005-2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import org.alfresco.transformer.transformers.SelectableTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.mail.Header;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeMessage.RecipientType;
import javax.mail.internet.MimeUtility;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
* Metadata extractor for RFC822 mime emails.
*
* Configuration: (see HtmlMetadataExtractor_metadata_extract.properties and misc_engine_config.json)
*
* <pre>
* <b>messageFrom:</b> -- imap:messageFrom, cm:originator
* <b>messageTo:</b> -- imap:messageTo
* <b>messageCc:</b> -- imap:messageCc
* <b>messageSubject:</b> -- imap:messageSubject, cm:title, cm:description, cm:subjectline
* <b>messageSent:</b> -- imap:dateSent, cm:sentdate
* <b>messageReceived:</b> -- imap:dateReceived
* <b>All {@link Header#getName() header names}:</b>
* <b>Thread-Index:</b> -- imap:threadIndex
* <b>Message-ID:</b> -- imap:messageId
* </pre>
*
* @author Derek Hulley
* @author adavis
*/
public class RFC822MetadataExtractor extends AbstractMetadataExtractor implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(HtmlMetadataExtractor.class);
protected static final String KEY_MESSAGE_FROM = "messageFrom";
protected static final String KEY_MESSAGE_TO = "messageTo";
protected static final String KEY_MESSAGE_CC = "messageCc";
protected static final String KEY_MESSAGE_SUBJECT = "messageSubject";
protected static final String KEY_MESSAGE_SENT = "messageSent";
protected static final String KEY_MESSAGE_RECEIVED = "messageReceived";
public RFC822MetadataExtractor()
{
super(logger);
}
@Override
public void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
Map<String, Serializable> metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile);
mapMetadataAndWrite(targetFile, metadata);
}
@Override
public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions,
File sourceFile) throws Exception
{
final Map<String, Serializable> rawProperties = new HashMap<>();
try (InputStream is = new FileInputStream(sourceFile))
{
MimeMessage mimeMessage = new MimeMessage(null, is);
if (mimeMessage != null)
{
/**
* Extract RFC822 values that doesn't match to headers and need to be encoded.
* Or those special fields that require some code to extract data
*/
String tmp = InternetAddress.toString(mimeMessage.getFrom());
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
putRawValue(KEY_MESSAGE_FROM, tmp, rawProperties);
tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.TO));
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
putRawValue(KEY_MESSAGE_TO, tmp, rawProperties);
tmp = InternetAddress.toString(mimeMessage.getRecipients(RecipientType.CC));
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
putRawValue(KEY_MESSAGE_CC, tmp, rawProperties);
putRawValue(KEY_MESSAGE_SENT, mimeMessage.getSentDate(), rawProperties);
/**
* Received field from RFC 822
*
* "Received" ":" ; one per relay
* ["from" domain] ; sending host
* ["by" domain] ; receiving host
* ["via" atom] ; physical path
* ("with" atom) ; link/mail protocol
* ["id" msg-id] ; receiver msg id
* ["for" addr-spec] ; initial form
* ";" date-time ; time received
*/
Date rxDate = mimeMessage.getReceivedDate();
if(rxDate != null)
{
// The email implementation extracted the received date for us.
putRawValue(KEY_MESSAGE_RECEIVED, rxDate, rawProperties);
}
else
{
// the email implementation did not parse the received date for us.
String[] rx = mimeMessage.getHeader("received");
if(rx != null && rx.length > 0)
{
String lastReceived = rx[0];
lastReceived = MimeUtility.unfold(lastReceived);
int x = lastReceived.lastIndexOf(';');
if(x > 0)
{
String dateStr = lastReceived.substring(x + 1).trim();
putRawValue(KEY_MESSAGE_RECEIVED, dateStr, rawProperties);
}
}
}
String[] subj = mimeMessage.getHeader("Subject");
if (subj != null && subj.length > 0)
{
String decodedSubject = subj[0];
try
{
decodedSubject = MimeUtility.decodeText(decodedSubject);
}
catch (UnsupportedEncodingException e)
{
logger.warn(e.toString());
}
putRawValue(KEY_MESSAGE_SUBJECT, decodedSubject, rawProperties);
}
/*
* Extract values from all header fields, including extension fields "X-"
*/
Set<String> keys = getExtractMapping().keySet();
@SuppressWarnings("unchecked")
Enumeration<Header> headers = mimeMessage.getAllHeaders();
while (headers.hasMoreElements())
{
Header header = (Header) headers.nextElement();
if (keys.contains(header.getName()))
{
tmp = header.getValue();
tmp = tmp != null ? MimeUtility.decodeText(tmp) : null;
putRawValue(header.getName(), tmp, rawProperties);
}
}
}
}
return rawProperties;
}
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,7 +26,11 @@
*/
package org.alfresco.transformer.transformers;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
import com.google.common.collect.ImmutableList;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedInputStream;
import java.io.File;
@@ -37,12 +41,7 @@ import java.nio.file.StandardCopyOption;
import java.util.List;
import java.util.Map;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.ImmutableList;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
/**
* Converts Apple iWorks files to JPEGs for thumbnailing and previewing.
@@ -74,8 +73,8 @@ public class AppleIWorksContentTransformer implements SelectableTransformer
// (53 x 41) preview-micro.jpg
@Override
public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> parameters)
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile)
{
logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
sourceMimetype, targetMimetype);

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,10 +26,15 @@
*/
package org.alfresco.transformer.transformers;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import org.alfresco.transformer.fs.FileManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.internet.MimeMessage;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.File;
@@ -43,15 +48,9 @@ import java.io.Writer;
import java.util.Map;
import java.util.Properties;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.internet.MimeMessage;
import org.alfresco.transformer.fs.FileManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
/**
* Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
@@ -74,8 +73,8 @@ public class EMLTransformer implements SelectableTransformer
private static final String DEFAULT_ENCODING = "UTF-8";
@Override
public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> parameters) throws Exception
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
logger.debug("Performing RFC822 to text transform.");
// Use try with resource

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,6 +26,12 @@
*/
package org.alfresco.transformer.transformers;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
@@ -36,12 +42,6 @@ import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Map;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Content transformer which wraps the HTML Parser library for
* parsing HTML content.
@@ -75,8 +75,8 @@ public class HtmlParserContentTransformer implements SelectableTransformer
HtmlParserContentTransformer.class);
@Override
public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> parameters) throws Exception
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,13 +26,6 @@
*/
package org.alfresco.transformer.transformers;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.Map;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
@@ -41,6 +34,13 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.Map;
/**
* Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing.
* This transformer will only work for OOXML files where thumbnailing was enabled,
@@ -59,8 +59,8 @@ public class OOXMLThumbnailContentTransformer implements SelectableTransformer
OOXMLThumbnailContentTransformer.class);
@Override
public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> parameters) throws Exception
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
if (logger.isDebugEnabled())
{

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -39,14 +39,18 @@ public interface SelectableTransformer
String SOURCE_ENCODING = "sourceEncoding";
String TARGET_ENCODING = "targetEncoding";
/**
* Implementation of the actual transformation.
*
* @param sourceFile
* @param targetFile
* @param parameters
* @throws Exception
*/
void transform(File sourceFile, File targetFile, String sourceMimetype,
String targetMimetype, Map<String, String> parameters) throws Exception;
default void transform(String sourceMimetype, String targetMimetype, Map<String, String> parameters,
File sourceFile, File targetFile) throws Exception
{
}
default void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
}
default void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
}
}

View File

@@ -26,19 +26,17 @@
*/
package org.alfresco.transformer.transformers;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
import com.google.common.collect.ImmutableMap;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.metadataExtractors.HtmlMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.RFC822MetadataExtractor;
import java.io.File;
import java.util.Map;
import java.util.StringJoiner;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.logging.LogEntry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.ImmutableMap;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
/**
* The SelectingTransformer selects a registered {@link SelectableTransformer}
@@ -46,9 +44,9 @@ import com.google.common.collect.ImmutableMap;
*
* @author eknizat
*/
public class SelectingTransformer
public class SelectingTransformer implements Transformer
{
private static final Logger logger = LoggerFactory.getLogger(SelectingTransformer.class);
private static final String ID = "misc";
public static final String LICENCE =
"This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\\n" +
@@ -63,57 +61,45 @@ public class SelectingTransformer
.put("textToPdf", new TextToPdfContentTransformer())
.put("rfc822", new EMLTransformer())
.put("ooXmlThumbnail", new OOXMLThumbnailContentTransformer())
.put("HtmlMetadataExtractor", new HtmlMetadataExtractor())
.put("RFC822MetadataExtractor", new RFC822MetadataExtractor())
.build();
/**
* Performs a transform using a transformer selected based on the provided sourceMimetype and targetMimetype
*
* @param transform the name of the transformer
* @param sourceFile File to transform from
* @param targetFile File to transform to
* @param sourceMimetype Mimetype of the source file
* @throws TransformException if there was a problem internally
*/
public void transform(String transform, File sourceFile, File targetFile, String sourceMimetype,
String targetMimetype, Map<String, String> parameters) throws TransformException
@Override
public String getTransformerId()
{
try
{
final SelectableTransformer transformer = transformers.get(transform);
logOptions(sourceFile, targetFile, parameters);
transformer.transform(sourceFile, targetFile, sourceMimetype, targetMimetype,
parameters);
}
catch (IllegalArgumentException e)
{
throw new TransformException(BAD_REQUEST.value(), getMessage(e));
}
catch (Exception e)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(), getMessage(e));
}
if (!targetFile.exists())
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(),
"Transformer failed to create an output file. Target file does not exist.");
}
if (sourceFile.length() > 0 && targetFile.length() == 0)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(),
"Transformer failed to create an output file. Target file is empty but source file was not empty.");
}
return ID;
}
private static String getMessage(Exception e)
@Override
public void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
return e.getMessage() == null || e.getMessage().isEmpty() ? e.getClass().getSimpleName() : e.getMessage();
final SelectableTransformer transformer = transformers.get(transformName);
logOptions(sourceFile, targetFile, transformOptions);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
final SelectableTransformer transformer = transformers.get(transformName);
logOptions(sourceFile, targetFile, transformOptions);
transformer.extractMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
private static void logOptions(File sourceFile, File targetFile, Map<String, String> parameters)
{
StringJoiner sj = new StringJoiner(" ");
parameters.forEach((k, v) -> sj.add(
"--" + k + "=" + v)); // keeping the existing style used in other T-Engines
parameters.forEach((k, v) ->
{
if (!TRANSFORM_NAME_PARAMETER.equals(k))
{
sj.add("--" + k + "=" + v);
}
}); // keeping the existing style used in other T-Engines
sj.add(getExtension(sourceFile));
sj.add(getExtension(targetFile));
LogEntry.setOptions(sj.toString());

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,6 +26,9 @@
*/
package org.alfresco.transformer.transformers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
@@ -39,9 +42,6 @@ import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Converts any textual format to plain text.
* <p>
@@ -58,7 +58,7 @@ import org.apache.commons.logging.LogFactory;
public class StringExtractingContentTransformer implements SelectableTransformer
{
private static final Log logger = LogFactory.getLog(StringExtractingContentTransformer.class);
private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
/**
* Text to text conversions are done directly using the content reader and writer string
@@ -69,8 +69,8 @@ public class StringExtractingContentTransformer implements SelectableTransformer
* be unformatted but valid.
*/
@Override
public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> parameters) throws Exception
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
String targetEncoding = parameters.get(TARGET_ENCODING);
@@ -126,11 +126,11 @@ public class StringExtractingContentTransformer implements SelectableTransformer
{
if (charReader != null)
{
try { charReader.close(); } catch (Throwable e) { logger.error(e); }
try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
}
if (charWriter != null)
{
try { charWriter.close(); } catch (Throwable e) { logger.error(e); }
try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
}
}
// done

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,6 +26,15 @@
*/
package org.alfresco.transformer.transformers;
import org.alfresco.transformer.util.RequestParamMap;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.tools.TextToPDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
@@ -40,14 +49,6 @@ import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.tools.TextToPDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
@@ -62,7 +63,7 @@ public class TextToPdfContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
public static final String PAGE_LIMIT = "pageLimit";
public static final String PAGE_LIMIT = RequestParamMap.PAGE_LIMIT;
private final PagedTextToPDF transformer;
@@ -98,8 +99,8 @@ public class TextToPdfContentTransformer implements SelectableTransformer
}
@Override
public void transform(final File sourceFile, final File targetFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> parameters) throws Exception
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
String stringPageLimit = parameters.get(PAGE_LIMIT);

View File

@@ -0,0 +1,12 @@
#
# HtmlMetadataExtractor - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description

View File

@@ -0,0 +1,22 @@
#
# RFC822MetadataExtractor - default mapping
#
# Namespaces
namespace.prefix.imap=http://www.alfresco.org/model/imap/1.0
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
#Default values that doesn't match exactly to Header
messageFrom=imap:messageFrom, cm:originator
messageTo=imap:messageTo, cm:addressee
messageCc=imap:messageCc, cm:addressees
messageSubject=imap:messageSubject, cm:title, cm:description, cm:subjectline
messageSent=imap:dateSent, cm:sentdate
messageReceived=imap:dateReceived
#Add here any values you want to extract.
# Use Header name for key. LHS is a list of the destination properties.
Thread-Index=imap:threadIndex
Message-ID=imap:messageId

View File

@@ -69,6 +69,23 @@
],
"transformOptions": [
]
},
{
"transformerName": "HtmlMetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "text/html", "targetMediaType": "alfresco-metadata-extract"},
{"sourceMediaType": "application/xhtml+xml", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
},
{
"transformerName": "RFC822MetadataExtractor",
"supportedSourceAndTargetList": [
{"sourceMediaType": "message/rfc822", "targetMediaType": "alfresco-metadata-extract"}
],
"transformOptions": [
]
}
]
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,8 +26,7 @@
*/
package org.alfresco.transformer.transformers;
import static org.alfresco.transformer.transformers.StringExtractingContentTransformer.SOURCE_ENCODING;
import static org.junit.Assert.assertEquals;
import org.junit.Test;
import java.io.File;
import java.io.FileOutputStream;
@@ -36,7 +35,8 @@ import java.nio.file.Files;
import java.util.HashMap;
import java.util.Map;
import org.junit.Test;
import static org.alfresco.transformer.transformers.StringExtractingContentTransformer.SOURCE_ENCODING;
import static org.junit.Assert.assertEquals;
public class HtmlParserContentTransformerTest
{
@@ -81,7 +81,7 @@ public class HtmlParserContentTransformerTest
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
@@ -94,7 +94,7 @@ public class HtmlParserContentTransformerTest
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
@@ -106,7 +106,7 @@ public class HtmlParserContentTransformerTest
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-16");
transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
@@ -131,7 +131,7 @@ public class HtmlParserContentTransformerTest
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(tmpS, tmpD, SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters);
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,8 +26,10 @@
*/
package org.alfresco.transformer.transformers;
import static org.alfresco.transformer.transformers.TextToPdfContentTransformer.PAGE_LIMIT;
import static org.junit.Assert.assertEquals;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileOutputStream;
@@ -36,10 +38,8 @@ import java.io.StringWriter;
import java.util.HashMap;
import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.Before;
import org.junit.Test;
import static org.alfresco.transformer.util.RequestParamMap.PAGE_LIMIT;
import static org.junit.Assert.assertEquals;
public class TextToPdfContentTransformerTest
{
@@ -109,7 +109,7 @@ public class TextToPdfContentTransformerTest
// Transform to PDF
Map<String, String> parameters = new HashMap<>();
parameters.put(PAGE_LIMIT, pageLimit);
transformer.transform(sourceFile, targetFile, "text/plain", "application/pdf", parameters);
transformer.transform("text/plain", "application/pdf", parameters, sourceFile, targetFile);
// Read back in the PDF and check it
PDDocument doc = PDDocument.load(targetFile);