ATS-175 : T-Engine code cleanup

This commit is contained in:
Cezar.Leahu
2018-10-25 18:21:50 +03:00
parent ba8707c762
commit d85c03d362
42 changed files with 2042 additions and 1475 deletions

View File

@@ -11,15 +11,17 @@
*/
package org.alfresco.transformer;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.context.annotation.Bean;
import io.micrometer.core.instrument.MeterRegistry;
@SpringBootApplication
@EnableAutoConfiguration(exclude={DataSourceAutoConfiguration.class})
public class Application
@@ -27,10 +29,18 @@ public class Application
@Value("${container.name}")
private String containerName;
@Bean MeterRegistryCustomizer<MeterRegistry> metricsCommonTags() {
@Bean
MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
{
return registry -> registry.config().commonTags("containerName", containerName);
}
@Bean
public TikaJavaExecutor javaExecutor() throws Exception
{
return new TikaJavaExecutor();
}
public static void main(String[] args)
{
SpringApplication.run(Application.class, args);

View File

@@ -12,21 +12,31 @@
package org.alfresco.transformer;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.Tika.INCLUDE_CONTENTS;
import static org.alfresco.transformer.Tika.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transformer.Tika.PDF_BOX;
import static org.alfresco.transformer.Tika.TARGET_ENCODING;
import static org.alfresco.transformer.Tika.TARGET_MIMETYPE;
import static org.alfresco.transformer.Tika.TRANSFORM_NAMES;
import static org.alfresco.transformer.executors.Tika.INCLUDE_CONTENTS;
import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transformer.executors.Tika.PDF_BOX;
import static org.alfresco.transformer.executors.Tika.TARGET_ENCODING;
import static org.alfresco.transformer.executors.Tika.TARGET_MIMETYPE;
import static org.alfresco.transformer.executors.Tika.TRANSFORM_NAMES;
import static org.alfresco.transformer.fs.FileManager.createAttachment;
import static org.alfresco.transformer.fs.FileManager.createSourceFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFile;
import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
import static org.alfresco.transformer.logging.StandardMessages.ENTERPRISE_LICENCE;
import static org.alfresco.transformer.util.Util.stringToBoolean;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import org.alfresco.transformer.exceptions.TransformException;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.exception.TikaException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.http.MediaType;
@@ -35,95 +45,87 @@ import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.xml.sax.SAXException;
/**
* Controller for the Docker based Tika transformers.
*
* Status Codes:
*
* 200 Success
* 400 Bad Request: Invalid target mimetype &lt;mimetype>
* 400 Bad Request: Request parameter &lt;name> is missing (missing mandatory parameter)
* 400 Bad Request: Request parameter &lt;name> is of the wrong type
* 400 Bad Request: Transformer exit code was not 0 (possible problem with the source file)
* 400 Bad Request: The source filename was not supplied
* 500 Internal Server Error: (no message with low level IO problems)
* 500 Internal Server Error: The target filename was not supplied (should not happen as targetExtension is checked)
* 500 Internal Server Error: Transformer version check exit code was not 0
* 500 Internal Server Error: Transformer version check failed to create any output
* 500 Internal Server Error: Could not read the target file
* 500 Internal Server Error: The target filename was malformed (should not happen because of other checks)
* 500 Internal Server Error: Transformer failed to create an output file (the exit code was 0, so there should be some content)
* 500 Internal Server Error: Filename encoding error
* 507 Insufficient Storage: Failed to store the source file
* 200 Success
* 400 Bad Request: Invalid target mimetype &lt;mimetype>
* 400 Bad Request: Request parameter &lt;name> is missing (missing mandatory parameter)
* 400 Bad Request: Request parameter &lt;name> is of the wrong type
* 400 Bad Request: Transformer exit code was not 0 (possible problem with the source file)
* 400 Bad Request: The source filename was not supplied
* 500 Internal Server Error: (no message with low level IO problems)
* 500 Internal Server Error: The target filename was not supplied (should not happen as targetExtension is checked)
* 500 Internal Server Error: Transformer version check exit code was not 0
* 500 Internal Server Error: Transformer version check failed to create any output
* 500 Internal Server Error: Could not read the target file
* 500 Internal Server Error: The target filename was malformed (should not happen because of other checks)
* 500 Internal Server Error: Transformer failed to create an output file (the exit code was 0, so there should be some content)
* 500 Internal Server Error: Filename encoding error
* 507 Insufficient Storage: Failed to store the source file
*/
@Controller
public class TikaController extends AbstractTransformerController
{
private Tika tika;
private static final Log logger = LogFactory.getLog(TikaController.class);
@Autowired
public TikaController() throws TikaException, IOException, SAXException
private TikaJavaExecutor javaExecutor;
@Autowired
public TikaController()
{
logger = LogFactory.getLog(TikaController.class);
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
logEnterpriseLicenseMessage();
Arrays.stream(ENTERPRISE_LICENCE.split("\\n")).forEach(logger::info);
logger.info("Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt");
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
tika = new Tika();
}
@Override
protected String getTransformerName()
public String getTransformerName()
{
return "Tika";
}
@Override
public void callTransform(String... args)
{
tika.transform(args);
}
@Override
protected String version()
public String version()
{
return "Tika available";
}
@Override
protected ProbeTestTransform getProbeTestTransform()
public ProbeTestTransform getProbeTestTransform()
{
// See the Javadoc on this method and Probes.md for the choice of these values.
// the livenessPercentage is a little large as Tika does tend to suffer from slow transforms that class with a gc.
return new ProbeTestTransform(this, "quick.pdf", "quick.txt",
60, 16, 400, 10240, 60*30+1, 60*15+20)
return new ProbeTestTransform(this, logger, "quick.pdf", "quick.txt",
60, 16, 400, 10240, 60 * 30 + 1, 60 * 15 + 20)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
TikaController.this.callTransform(sourceFile, targetFile, PDF_BOX,
TARGET_MIMETYPE+MIMETYPE_TEXT_PLAIN, TARGET_ENCODING+"UTF-8");
javaExecutor.call(sourceFile, targetFile, PDF_BOX,
TARGET_MIMETYPE + MIMETYPE_TEXT_PLAIN, TARGET_ENCODING + "UTF-8");
}
};
}
@PostMapping(value = "/transform", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") MultipartFile sourceMultipartFile,
@RequestParam("targetExtension") String targetExtension,
@RequestParam("targetMimetype") String targetMimetype,
@RequestParam("targetEncoding") String targetEncoding,
@RequestParam("file") MultipartFile sourceMultipartFile,
@RequestParam("targetExtension") String targetExtension,
@RequestParam("targetMimetype") String targetMimetype,
@RequestParam("targetEncoding") String targetEncoding,
@RequestParam(value = "timeout", required = false) Long timeout,
@RequestParam(value = "testDelay", required = false) Long testDelay,
@RequestParam(value = "timeout", required = false) Long timeout,
@RequestParam(value = "testDelay", required = false) Long testDelay,
@RequestParam(value = "transform") String transform,
@RequestParam(value="includeContents", required = false) Boolean includeContents,
@RequestParam(value="notExtractBookmarksText", required = false) Boolean notExtractBookmarksText)
@RequestParam(value = "transform") String transform,
@RequestParam(value = "includeContents", required = false) Boolean includeContents,
@RequestParam(value = "notExtractBookmarksText", required = false) Boolean notExtractBookmarksText)
{
if (!TRANSFORM_NAMES.contains(transform))
{
@@ -131,6 +133,7 @@ public class TikaController extends AbstractTransformerController
}
String targetFilename = createTargetFileName(sourceMultipartFile.getOriginalFilename(), targetExtension);
getProbeTestTransform().incrementTransformerCount();
File sourceFile = createSourceFile(request, sourceMultipartFile);
File targetFile = createTargetFile(request, targetFilename);
// Both files are deleted by TransformInterceptor.afterCompletion
@@ -138,16 +141,21 @@ public class TikaController extends AbstractTransformerController
// TODO Consider streaming the request and response rather than using temporary files
// https://www.logicbig.com/tutorials/spring-framework/spring-web-mvc/streaming-response-body.html
callTransform(sourceFile, targetFile, transform,
includeContents != null && includeContents ? INCLUDE_CONTENTS : null,
notExtractBookmarksText != null && notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT: null,
TARGET_MIMETYPE+targetMimetype, TARGET_ENCODING+targetEncoding);
javaExecutor.call(sourceFile, targetFile, transform,
includeContents != null && includeContents ? INCLUDE_CONTENTS : null,
notExtractBookmarksText != null && notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
return createAttachment(targetFilename, targetFile, testDelay);
final ResponseEntity<Resource> body = createAttachment(targetFilename, targetFile);
LogEntry.setTargetSize(targetFile.length());
long time = LogEntry.setStatusCodeAndMessage(200, "Success");
time += LogEntry.addDelay(testDelay);
getProbeTestTransform().recordTransformTime(time);
return body;
}
@Override
protected void processTransform(File sourceFile, File targetFile,
public void processTransform(File sourceFile, File targetFile,
Map<String, String> transformOptions, Long timeout)
{
@@ -157,9 +165,9 @@ public class TikaController extends AbstractTransformerController
String targetMimetype = transformOptions.get("targetMimetype");
String targetEncoding = transformOptions.get("targetEncoding");
callTransform(sourceFile, targetFile, transform,
javaExecutor.call(sourceFile, targetFile, transform,
includeContents != null && includeContents ? INCLUDE_CONTENTS : null,
notExtractBookmarksText != null && notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT: null,
notExtractBookmarksText != null && notExtractBookmarksText ? NOT_EXTRACT_BOOKMARKS_TEXT : null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + targetEncoding);
}
}

View File

@@ -9,7 +9,36 @@
* agreement is prohibited.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transformer.executors;
import static java.util.Arrays.asList;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_HTML;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_IMAGE_PNG;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_IMAGE_TIFF;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_TEXT_CSV;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_XHTML;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_XML;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URL;
import java.util.List;
import java.util.regex.Pattern;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
@@ -30,19 +59,6 @@ import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.net.URL;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import static org.alfresco.repo.content.MimetypeMap.*;
/**
* Stripped down command line Tika transformers. Not actually run as a separate process, but the code fits the patten
* used by transformers that do.
@@ -424,7 +440,7 @@ public class Tika
public static final String TIKA_AUTO = "TikaAuto";
public static final String TEXT_MINING = "TextMining";
public static final List<String> TRANSFORM_NAMES = Arrays.asList(
public static final List<String> TRANSFORM_NAMES = asList(
ARCHIVE, OUTLOOK_MSG, PDF_BOX, POI_OFFICE, POI, POI_OO_XML, TIKA_AUTO, TEXT_MINING);
public static final String TARGET_MIMETYPE = "--targetMimetype=";
@@ -445,17 +461,17 @@ public class Tika
public static final String XML = "xml";
public static final String ZIP = "zip";
private Parser packageParser = new PackageParser();
private Parser pdfParser = new PDFParser();
private Parser officeParser = new OfficeParser();
private Parser autoDetectParser;
private Parser ooXmlParser = new OOXMLParser();
private Parser tikaOfficeDetectParser = new TikaOfficeDetectParser();
private PDFParserConfig pdfParserConfig = new PDFParserConfig();
private final Parser packageParser = new PackageParser();
private final Parser pdfParser = new PDFParser();
private final Parser officeParser = new OfficeParser();
private final Parser autoDetectParser;
private final Parser ooXmlParser = new OOXMLParser();
private final Parser tikaOfficeDetectParser = new TikaOfficeDetectParser();
private final PDFParserConfig pdfParserConfig = new PDFParserConfig();
private DocumentSelector pdfBoxEmbededDocumentSelector = new DocumentSelector()
{
private List<String> disabledMediaTypes = Arrays.asList(new String[] {MIMETYPE_IMAGE_JPEG, MIMETYPE_IMAGE_TIFF, MIMETYPE_IMAGE_PNG});
private final List<String> disabledMediaTypes = asList(MIMETYPE_IMAGE_JPEG, MIMETYPE_IMAGE_TIFF, MIMETYPE_IMAGE_PNG);
@Override
public boolean select(Metadata metadata)
@@ -628,17 +644,14 @@ public class Tika
String sourceFilename,
String targetFilename, String targetMimetype, String targetEncoding)
{
InputStream is = null;
OutputStream os = null;
Writer ow = null;
try
try (InputStream is = new BufferedInputStream(new FileInputStream(sourceFilename));
OutputStream os = new FileOutputStream(targetFilename);
Writer ow = new BufferedWriter(new OutputStreamWriter(os, targetEncoding)))
{
is = new BufferedInputStream(new FileInputStream(sourceFilename));
os = new FileOutputStream(targetFilename);
ow = new BufferedWriter(new OutputStreamWriter(os, targetEncoding));
Metadata metadata = new Metadata();
ParseContext context = buildParseContext(documentSelector, includeContents, notExtractBookmarksText);
ParseContext context = buildParseContext(documentSelector, includeContents,
notExtractBookmarksText);
ContentHandler handler = getContentHandler(targetMimetype, ow);
parser.parse(is, handler, metadata, context);
@@ -647,24 +660,9 @@ public class Tika
{
throw new IllegalStateException(e.getMessage(), e);
}
finally
{
if (is != null)
{
try { is.close(); } catch (Throwable e) {}
}
if (os != null)
{
try { os.close(); } catch (Throwable e) {}
}
if (ow != null)
{
try { ow.close(); } catch (Throwable e) {}
}
}
}
protected ContentHandler getContentHandler(String targetMimetype, Writer output)
private ContentHandler getContentHandler(String targetMimetype, Writer output)
{
try
{
@@ -676,7 +674,7 @@ public class Tika
else
{
SAXTransformerFactory factory = (SAXTransformerFactory)SAXTransformerFactory.newInstance();
TransformerHandler transformerHandler = null;
TransformerHandler transformerHandler;
transformerHandler = factory.newTransformerHandler();
transformerHandler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
transformerHandler.setResult(new StreamResult(output));
@@ -792,7 +790,8 @@ public class Tika
}
}
protected ParseContext buildParseContext(DocumentSelector documentSelector, Boolean includeContents, Boolean notExtractBookmarksText)
private ParseContext buildParseContext(DocumentSelector documentSelector,
Boolean includeContents, Boolean notExtractBookmarksText)
{
ParseContext context = new ParseContext();

View File

@@ -0,0 +1,92 @@
package org.alfresco.transformer.executors;
import org.springframework.stereotype.Component;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringJoiner;
import org.alfresco.transformer.exceptions.TransformException;
import org.alfresco.transformer.logging.LogEntry;
import org.apache.tika.exception.TikaException;
import org.springframework.beans.factory.annotation.Autowired;
import org.xml.sax.SAXException;
@Component
public class TikaJavaExecutor implements JavaExecutor
{
private final Tika tika;
@Autowired
public TikaJavaExecutor() throws TikaException, IOException, SAXException
{
tika = new Tika();
}
@Override
public void call(File sourceFile, File targetFile, String... args)
throws TransformException
{
args = buildArgs(sourceFile, targetFile, args);
try
{
tika.transform(args);
}
catch (IllegalArgumentException e)
{
throw new TransformException(400, getMessage(e));
}
catch (Exception e)
{
throw new TransformException(500, getMessage(e));
}
if (!targetFile.exists() || targetFile.length() == 0)
{
throw new TransformException(500, "Transformer failed to create an output file");
}
}
private static String getMessage(Exception e)
{
return e.getMessage() == null ? e.getClass().getSimpleName() : e.getMessage();
}
private static String[] buildArgs(File sourceFile, File targetFile, String[] args)
{
ArrayList<String> methodArgs = new ArrayList<>(args.length + 2);
StringJoiner sj = new StringJoiner(" ");
for (String arg : args)
{
addArg(methodArgs, sj, arg);
}
addFileArg(methodArgs, sj, sourceFile);
addFileArg(methodArgs, sj, targetFile);
LogEntry.setOptions(sj.toString());
return methodArgs.toArray(new String[0]);
}
private static void addArg(ArrayList<String> methodArgs, StringJoiner sj, String arg)
{
if (arg != null)
{
sj.add(arg);
methodArgs.add(arg);
}
}
private static void addFileArg(ArrayList<String> methodArgs, StringJoiner sj, File arg)
{
if (arg != null)
{
String path = arg.getAbsolutePath();
int i = path.lastIndexOf('.');
String ext = i == -1 ? "???" : path.substring(i + 1);
sj.add(ext);
methodArgs.add(path);
}
}
}

View File

@@ -23,7 +23,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transformer.executors;
import java.io.IOException;
import java.io.InputStream;
@@ -43,7 +43,7 @@ import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
///////// THIS FILE IS A COPY OF THE CODE IN alfresco-repository /////////////
///////// THIS FILE WAS A COPY OF THE CODE IN alfresco-repository /////////////
/**
* <a href="http://tika.apache.org/Apache Tika">Apache Tika</a> assumes that
@@ -58,11 +58,11 @@ import org.xml.sax.SAXException;
* @author Nick Burch
*/
public class TikaOfficeDetectParser implements Parser {
private Parser ole2Parser = new OfficeParser();
private Parser ooxmlParser = new OOXMLParser();
private final Parser ole2Parser = new OfficeParser();
private final Parser ooxmlParser = new OOXMLParser();
public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
Set<MediaType> types = new HashSet<MediaType>();
Set<MediaType> types = new HashSet<>();
types.addAll(ole2Parser.getSupportedTypes(parseContext));
types.addAll(ooxmlParser.getSupportedTypes(parseContext));
return types;

View File

@@ -37,41 +37,68 @@ import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_WORD;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_XHTML;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_XML;
import static org.alfresco.repo.content.MimetypeMap.MIMETYPE_ZIP;
import static org.alfresco.transformer.Tika.ARCHIVE;
import static org.alfresco.transformer.Tika.CSV;
import static org.alfresco.transformer.Tika.DOC;
import static org.alfresco.transformer.Tika.DOCX;
import static org.alfresco.transformer.Tika.HTML;
import static org.alfresco.transformer.Tika.MSG;
import static org.alfresco.transformer.Tika.OUTLOOK_MSG;
import static org.alfresco.transformer.Tika.PDF;
import static org.alfresco.transformer.Tika.PDF_BOX;
import static org.alfresco.transformer.Tika.POI;
import static org.alfresco.transformer.Tika.POI_OFFICE;
import static org.alfresco.transformer.Tika.POI_OO_XML;
import static org.alfresco.transformer.Tika.PPTX;
import static org.alfresco.transformer.Tika.TEXT_MINING;
import static org.alfresco.transformer.Tika.TIKA_AUTO;
import static org.alfresco.transformer.Tika.TXT;
import static org.alfresco.transformer.Tika.XHTML;
import static org.alfresco.transformer.Tika.XML;
import static org.alfresco.transformer.Tika.XSLX;
import static org.alfresco.transformer.Tika.ZIP;
import static org.springframework.test.util.AssertionErrors.assertTrue;
import static org.alfresco.transformer.executors.Tika.ARCHIVE;
import static org.alfresco.transformer.executors.Tika.CSV;
import static org.alfresco.transformer.executors.Tika.DOC;
import static org.alfresco.transformer.executors.Tika.DOCX;
import static org.alfresco.transformer.executors.Tika.HTML;
import static org.alfresco.transformer.executors.Tika.MSG;
import static org.alfresco.transformer.executors.Tika.OUTLOOK_MSG;
import static org.alfresco.transformer.executors.Tika.PDF;
import static org.alfresco.transformer.executors.Tika.PDF_BOX;
import static org.alfresco.transformer.executors.Tika.POI;
import static org.alfresco.transformer.executors.Tika.POI_OFFICE;
import static org.alfresco.transformer.executors.Tika.POI_OO_XML;
import static org.alfresco.transformer.executors.Tika.PPTX;
import static org.alfresco.transformer.executors.Tika.TEXT_MINING;
import static org.alfresco.transformer.executors.Tika.TIKA_AUTO;
import static org.alfresco.transformer.executors.Tika.TXT;
import static org.alfresco.transformer.executors.Tika.XHTML;
import static org.alfresco.transformer.executors.Tika.XML;
import static org.alfresco.transformer.executors.Tika.XSLX;
import static org.alfresco.transformer.executors.Tika.ZIP;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyLong;
import static org.mockito.Mockito.when;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.header;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.model.FileRefEntity;
import org.alfresco.transformer.model.FileRefResponse;
import org.alfresco.util.exec.RuntimeExec;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.stubbing.Answer;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.boot.test.mock.mockito.SpyBean;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
import org.springframework.util.StringUtils;
/**
* Test the TikaController without a server.
@@ -81,37 +108,126 @@ import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilde
@WebMvcTest(TikaController.class)
public class TikaControllerTest extends AbstractTransformerControllerTest
{
public static final String EXPECTED_XHTML_CONTENT_CONTAINS = "<p>The quick brown fox jumps over the lazy dog</p>";
public static final String EXPECTED_TEXT_CONTENT_CONTAINS = "The quick brown fox jumps over the lazy dog";
public static final String EXPECTED_MSG_CONTENT_CONTAINS = "Recipients\n" +
"\tmark.rogers@alfresco.com; speedy@quick.com; mrquick@nowhere.com\n" +
"\n" +
"The quick brown fox jumps over the lazy dogs";
public static final String EXPECTED_CSV_CONTENT_CONTAINS = "\"The\",\"quick\",\"brown\",\"fox\"";
private static final String EXPECTED_XHTML_CONTENT_CONTAINS = "<p>The quick brown fox jumps over the lazy dog</p>";
private static final String EXPECTED_TEXT_CONTENT_CONTAINS = "The quick brown fox jumps over the lazy dog";
private static final String EXPECTED_MSG_CONTENT_CONTAINS = "Recipients\n" +
"\tmark.rogers@alfresco.com; speedy@quick.com; mrquick@nowhere.com\n" +
"\n" +
"The quick brown fox jumps over the lazy dogs";
private static final String EXPECTED_CSV_CONTENT_CONTAINS = "\"The\",\"quick\",\"brown\",\"fox\"";
@Mock
private RuntimeExec.ExecutionResult mockExecutionResult;
@Mock
private RuntimeExec mockTransformCommand;
@Mock
private RuntimeExec mockCheckCommand;
@SpyBean
private TikaJavaExecutor javaExecutor;
@SpyBean
private TikaController controller;
String transform = PDF_BOX;
String targetEncoding = "UTF-8";
String targetMimetype = MIMETYPE_TEXT_PLAIN;
private String transform = PDF_BOX;
private String targetEncoding = "UTF-8";
private String targetMimetype = MIMETYPE_TEXT_PLAIN;
@Before
public void before() throws Exception
public void before()
{
controller.setAlfrescoSharedFileStoreClient(alfrescoSharedFileStoreClient);
super.controller = controller;
sourceExtension = "pdf";
targetExtension = "txt";
}
@Override
protected void mockTransformCommand(String sourceExtension,
String targetExtension, String sourceMimetype,
boolean readTargetFileBytes) throws IOException
{
this.sourceExtension = sourceExtension;
this.targetExtension = targetExtension;
this.sourceMimetype = sourceMimetype;
expectedOptions = null;
expectedSourceSuffix = null;
expectedSourceFileBytes = readTestFile(sourceExtension);
expectedTargetFileBytes = readTargetFileBytes ? readTestFile(targetExtension) : null;
sourceFile = new MockMultipartFile("file", "quick." + sourceExtension, sourceMimetype,
expectedSourceFileBytes);
when(mockTransformCommand.execute(any(), anyLong())).thenAnswer(
(Answer<RuntimeExec.ExecutionResult>) invocation -> {
Map<String, String> actualProperties = invocation.getArgument(0);
assertEquals("There should be 3 properties", 3, actualProperties.size());
String actualOptions = actualProperties.get("options");
String actualSource = actualProperties.get("source");
String actualTarget = actualProperties.get("target");
String actualTargetExtension = StringUtils.getFilenameExtension(actualTarget);
assertNotNull(actualSource);
assertNotNull(actualTarget);
if (expectedSourceSuffix != null)
{
assertTrue(
"The source file \"" + actualSource + "\" should have ended in \"" + expectedSourceSuffix + "\"",
actualSource.endsWith(expectedSourceSuffix));
actualSource = actualSource.substring(0,
actualSource.length() - expectedSourceSuffix.length());
}
assertNotNull(actualOptions);
if (expectedOptions != null)
{
assertEquals("expectedOptions", expectedOptions, actualOptions);
}
Long actualTimeout = invocation.getArgument(1);
assertNotNull(actualTimeout);
if (expectedTimeout != null)
{
assertEquals("expectedTimeout", expectedTimeout, actualTimeout);
}
// Copy a test file into the target file location if it exists
int i = actualTarget.lastIndexOf('_');
if (i >= 0)
{
String testFilename = actualTarget.substring(i + 1);
File testFile = getTestFile(testFilename, false);
File targetFile = new File(actualTarget);
generateTargetFileFromResourceFile(actualTargetExtension, testFile,
targetFile);
}
// Check the supplied source file has not been changed.
byte[] actualSourceFileBytes = Files.readAllBytes(new File(actualSource).toPath());
assertTrue("Source file is not the same",
Arrays.equals(expectedSourceFileBytes, actualSourceFileBytes));
return mockExecutionResult;
});
when(mockExecutionResult.getExitValue()).thenReturn(0);
when(mockExecutionResult.getStdErr()).thenReturn("STDERROR");
when(mockExecutionResult.getStdOut()).thenReturn("STDOUT");
}
@Override
protected AbstractTransformerController getController()
{
return controller;
}
private void transform(String transform, String sourceExtension, String targetExtension,
String sourceMimetype, String targetMimetype,
Boolean includeContents, String expectedContentContains) throws Exception
{
// We don't use targetFileBytes as some of the transforms contain different date text based on the os being used.
super.mockTransformCommand(controller, sourceExtension, targetExtension, sourceMimetype, false);
mockTransformCommand(sourceExtension, targetExtension, sourceMimetype, false);
this.transform = transform;
this.targetMimetype = targetMimetype;
@@ -141,7 +257,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Override
public void simpleTransformTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.simpleTransformTest();
}
@@ -149,21 +265,13 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Override
public void testDelayTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.testDelayTest();
}
@Test
@Override
public void badExitCodeTest() throws Exception
{
// Ignore the test in super class as the Tika transforms are real rather than mocked up.
// It is the mock that returns a non zero exit code.
}
@Test
@Override
public void noTargetFileTest() throws Exception
public void noTargetFileTest()
{
// Ignore the test in super class as the Tika transforms are real rather than mocked up.
// It is the mock that returns a zero length file for other transformers, when we supply an invalid targetExtension.
@@ -175,7 +283,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Override
public void dotDotSourceFilenameTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.dotDotSourceFilenameTest();
}
@@ -183,7 +291,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Override
public void noExtensionSourceFilenameTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.noExtensionSourceFilenameTest();
}
@@ -191,7 +299,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Override
public void badSourceFilenameTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.badSourceFilenameTest();
}
@@ -199,7 +307,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Override
public void blankSourceFilenameTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.blankSourceFilenameTest();
}
@@ -207,7 +315,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Override
public void noTargetExtensionTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.noTargetExtensionTest();
}
@@ -215,7 +323,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Override
public void calculateMaxTime() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
super.calculateMaxTime();
}
@@ -224,7 +332,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Test
public void badEncodingTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
targetEncoding = "rubbish";
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
.andExpect(status().is(500));
@@ -388,7 +496,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Test
public void pdfToTxtExtractBookmarksTest() throws Exception
{
super.mockTransformCommand(controller, PDF, TXT, MIMETYPE_PDF, true);
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension).param("notExtractBookmarksText", "true"))
.andExpect(status().is(200))
.andExpect(header().string("Content-Disposition", "attachment; filename*= UTF-8''quick." + targetExtension));
@@ -405,4 +513,54 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
transformRequest.getTransformRequestOptions().put("targetMimetype", MediaType.TEXT_PLAIN_VALUE);
transformRequest.getTransformRequestOptions().put("targetEncoding", "UTF-8");
}
@Test
public void testPojoTransform() throws Exception
{
// Files
String sourceFileRef = UUID.randomUUID().toString();
File sourceFile = getTestFile("quick." + sourceExtension, true);
String targetFileRef = UUID.randomUUID().toString();
// Transformation Request POJO
TransformRequest transformRequest = new TransformRequest();
transformRequest.setRequestId("1");
transformRequest.setSchema(1);
transformRequest.setClientData("Alfresco Digital Business Platform");
transformRequest.setTransformRequestOptions(new HashMap<>());
transformRequest.setSourceReference(sourceFileRef);
transformRequest.setSourceExtension(sourceExtension);
transformRequest.setSourceSize(sourceFile.length());
transformRequest.setTargetExtension(targetExtension);
// HTTP Request
HttpHeaders headers = new HttpHeaders();
headers.set(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=quick." + sourceExtension);
ResponseEntity<Resource> response = new ResponseEntity<>(new FileSystemResource(
sourceFile), headers, HttpStatus.OK);
when(alfrescoSharedFileStoreClient.retrieveFile(sourceFileRef)).thenReturn(response);
when(alfrescoSharedFileStoreClient.saveFile(any())).thenReturn(new FileRefResponse(new FileRefEntity(targetFileRef)));
when(mockExecutionResult.getExitValue()).thenReturn(0);
// Update the Transformation Request with any specific params before sending it
updateTransformRequestWithSpecificOptions(transformRequest);
// Serialize and call the transformer
String tr = objectMapper.writeValueAsString(transformRequest);
String transformationReplyAsString = mockMvc.perform(MockMvcRequestBuilders.post("/transform")
.header(HttpHeaders.ACCEPT, MediaType.APPLICATION_JSON_VALUE)
.header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE).content(tr))
.andExpect(status().is(HttpStatus.CREATED.value()))
.andReturn().getResponse().getContentAsString();
TransformReply transformReply = objectMapper.readValue(transformationReplyAsString, TransformReply.class);
// Assert the reply
assertEquals(transformRequest.getRequestId(), transformReply.getRequestId());
assertEquals(transformRequest.getClientData(), transformReply.getClientData());
assertEquals(transformRequest.getSchema(), transformReply.getSchema());
}
}