mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-08-14 17:58:27 +00:00
Save point: [skip ci]
* Tika extractors no longer use Files
This commit is contained in:
@@ -56,6 +56,7 @@ import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBo
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
@@ -187,17 +188,14 @@ public class TransformHandler
|
||||
}
|
||||
probeTestTransform.incrementTransformerCount();
|
||||
|
||||
// Obtain the source
|
||||
final String directUrl = requestParameters.getOrDefault(DIRECT_ACCESS_URL, "");
|
||||
InputStream inputStream = directUrl.isBlank()
|
||||
InputStream inputStream = new BufferedInputStream(directUrl.isBlank()
|
||||
? FileManager.getMultipartFileInputStream(sourceMultipartFile)
|
||||
: getDirectAccessUrlInputStream(directUrl);
|
||||
: getDirectAccessUrlInputStream(directUrl));
|
||||
long sourceSizeInBytes = -1L; // TODO pass in t-options or just ignore for http request as the repo will have checked.
|
||||
Map<String, String> transformOptions = getTransformOptions(requestParameters);
|
||||
String transformName = getTransformerName(sourceSizeInBytes, sourceMimetype, targetMimetype, transformOptions);
|
||||
CustomTransformer customTransformer = getCustomTransformer(transformName);
|
||||
String sourceEncoding = transformOptions.get(SOURCE_ENCODING);
|
||||
String targetEncoding = transformOptions.get(TARGET_ENCODING); // TODO not normally set
|
||||
String reference = "e"+httpRequestCount.getAndIncrement();
|
||||
transformerDebug.pushTransform(reference, sourceMimetype, targetMimetype, sourceSizeInBytes, transformName);
|
||||
transformerDebug.logOptions(reference, requestParameters);
|
||||
@@ -254,72 +252,76 @@ public class TransformHandler
|
||||
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
|
||||
}
|
||||
|
||||
String targetMimetype = request.getTargetMediaType();
|
||||
String sourceMimetype = request.getSourceMediaType();
|
||||
File targetFile = createTargetFile(null, sourceMimetype, targetMimetype);
|
||||
transformerDebug.pushTransform(request);
|
||||
|
||||
try
|
||||
{
|
||||
OutputStreamLengthRecorder outputStream =
|
||||
new OutputStreamLengthRecorder(new BufferedOutputStream(new FileOutputStream(targetFile)));
|
||||
String targetMimetype = request.getTargetMediaType();
|
||||
String sourceMimetype = request.getSourceMediaType();
|
||||
File targetFile = createTargetFile(null, sourceMimetype, targetMimetype);
|
||||
transformerDebug.pushTransform(request);
|
||||
|
||||
long sourceSizeInBytes = request.getSourceSize();
|
||||
Map<String, String> transformOptions = getTransformOptions(request.getTransformRequestOptions());
|
||||
String sourceEncoding = transformOptions.get(SOURCE_ENCODING);
|
||||
String targetEncoding = transformOptions.get(TARGET_ENCODING); // TODO not normally set
|
||||
transformerDebug.logOptions(request);
|
||||
String transformName = getTransformerName(sourceSizeInBytes, sourceMimetype, targetMimetype, transformOptions);
|
||||
CustomTransformer customTransformer = getCustomTransformer(transformName);
|
||||
|
||||
TransformManagerImpl transformManager = TransformManagerImpl.builder()
|
||||
.withSourceMimetype(sourceMimetype)
|
||||
.withTargetMimetype(targetMimetype)
|
||||
.withInputStream(inputStream)
|
||||
.withOutputStream(outputStream)
|
||||
.withTargetFile(targetFile)
|
||||
.build();
|
||||
|
||||
customTransformer.transform(sourceMimetype, inputStream,
|
||||
targetMimetype, outputStream, transformOptions, transformManager);
|
||||
|
||||
transformManager.ifUsedCopyTargetFileToOutputStream();
|
||||
|
||||
reply.getInternalContext().setCurrentSourceSize(outputStream.getLength());
|
||||
|
||||
if (saveTargetFileInSharedFileStore(targetFile, reply) == false)
|
||||
try (OutputStreamLengthRecorder outputStream = new OutputStreamLengthRecorder(new BufferedOutputStream(
|
||||
new FileOutputStream(targetFile))))
|
||||
{
|
||||
long sourceSizeInBytes = request.getSourceSize();
|
||||
Map<String, String> transformOptions = getTransformOptions(request.getTransformRequestOptions());
|
||||
transformerDebug.logOptions(request);
|
||||
String transformName = getTransformerName(sourceSizeInBytes, sourceMimetype, targetMimetype, transformOptions);
|
||||
CustomTransformer customTransformer = getCustomTransformer(transformName);
|
||||
|
||||
TransformManagerImpl transformManager = TransformManagerImpl.builder()
|
||||
.withSourceMimetype(sourceMimetype)
|
||||
.withTargetMimetype(targetMimetype)
|
||||
.withInputStream(inputStream)
|
||||
.withOutputStream(outputStream)
|
||||
.withTargetFile(targetFile)
|
||||
.build();
|
||||
|
||||
customTransformer.transform(sourceMimetype, inputStream,
|
||||
targetMimetype, outputStream, transformOptions, transformManager);
|
||||
|
||||
transformManager.ifUsedCopyTargetFileToOutputStream();
|
||||
|
||||
reply.getInternalContext().setCurrentSourceSize(outputStream.getLength());
|
||||
|
||||
if (saveTargetFileInSharedFileStore(targetFile, reply) == false)
|
||||
{
|
||||
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
|
||||
}
|
||||
|
||||
transformManager.deleteSourceFileIfExists();
|
||||
transformManager.deleteTargetFileIfExists();
|
||||
|
||||
probeTestTransform.recordTransformTime(System.currentTimeMillis()-start);
|
||||
transformerDebug.popTransform(reply);
|
||||
|
||||
logger.trace("Sending successful {}, timeout {} ms", reply, timeout);
|
||||
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
|
||||
}
|
||||
catch (TransformException e)
|
||||
{
|
||||
reply.setStatus(e.getStatusCode());
|
||||
reply.setErrorDetails(messageWithCause("Failed at processing transformation", e));
|
||||
|
||||
transformManager.deleteSourceFileIfExists();
|
||||
transformManager.deleteTargetFileIfExists();
|
||||
transformerDebug.logFailure(reply);
|
||||
logger.trace("Failed to perform transform (TransformException), sending " + reply, e);
|
||||
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
reply.setStatus(INTERNAL_SERVER_ERROR.value());
|
||||
reply.setErrorDetails(messageWithCause("Failed at processing transformation", e));
|
||||
|
||||
probeTestTransform.recordTransformTime(System.currentTimeMillis()-start);
|
||||
transformerDebug.popTransform(reply);
|
||||
|
||||
logger.trace("Sending successful {}, timeout {} ms", reply, timeout);
|
||||
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
|
||||
transformerDebug.logFailure(reply);
|
||||
logger.trace("Failed to perform transform (Exception), sending " + reply, e);
|
||||
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
|
||||
}
|
||||
}
|
||||
catch (TransformException e)
|
||||
finally
|
||||
{
|
||||
reply.setStatus(e.getStatusCode());
|
||||
reply.setErrorDetails(messageWithCause("Failed at processing transformation", e));
|
||||
|
||||
transformerDebug.logFailure(reply);
|
||||
logger.trace("Failed to perform transform (TransformException), sending " + reply, e);
|
||||
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
reply.setStatus(INTERNAL_SERVER_ERROR.value());
|
||||
reply.setErrorDetails(messageWithCause("Failed at processing transformation", e));
|
||||
|
||||
transformerDebug.logFailure(reply);
|
||||
logger.trace("Failed to perform transform (Exception), sending " + reply, e);
|
||||
return new ResponseEntity<>(reply, HttpStatus.valueOf(reply.getStatus()));
|
||||
closeInputStreamWithoutException(inputStream);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isTransformRequestValid(TransformRequest request, TransformReply reply)
|
||||
{
|
||||
final Errors errors = validateTransformRequest(request);
|
||||
@@ -412,9 +414,9 @@ public class TransformHandler
|
||||
InputStream inputStream = null;
|
||||
try
|
||||
{
|
||||
inputStream = directUrl.isBlank()
|
||||
inputStream = new BufferedInputStream(directUrl.isBlank()
|
||||
? getSharedFileStoreInputStream(request.getSourceReference())
|
||||
: getDirectAccessUrlInputStream(directUrl);
|
||||
: getDirectAccessUrlInputStream(directUrl));
|
||||
}
|
||||
catch (TransformException e)
|
||||
{
|
||||
@@ -574,4 +576,15 @@ public class TransformHandler
|
||||
}
|
||||
return customTransformer;
|
||||
}
|
||||
|
||||
private void closeInputStreamWithoutException(InputStream inputStream) {
|
||||
try
|
||||
{
|
||||
inputStream.close();
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -26,8 +26,9 @@
|
||||
*/
|
||||
package org.alfresco.transform.tika.transformers;
|
||||
|
||||
import org.alfresco.transform.base.CustomTransformer;
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.alfresco.transform.base.logging.LogEntry;
|
||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||
import org.alfresco.transform.common.RequestParamMap;
|
||||
import org.apache.tika.extractor.DocumentSelector;
|
||||
import org.apache.tika.parser.Parser;
|
||||
@@ -36,16 +37,18 @@ import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
import static java.lang.Boolean.parseBoolean;
|
||||
|
||||
public abstract class GenericTikaTransformer implements CustomTransformerFileAdaptor
|
||||
public abstract class AbstractTikaTransformer implements CustomTransformer
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(GenericTikaTransformer.class);
|
||||
private static final Logger logger = LoggerFactory.getLogger(AbstractTikaTransformer.class);
|
||||
|
||||
@Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}")
|
||||
boolean notExtractBookmarksTextDefault;
|
||||
@@ -67,9 +70,9 @@ public abstract class GenericTikaTransformer implements CustomTransformerFileAda
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(String sourceMimetype, String targetMimetype,
|
||||
Map<String, String> transformOptions, File sourceFile, File targetFile)
|
||||
throws Exception
|
||||
public void transform(String sourceMimetype, InputStream inputStream,
|
||||
String targetMimetype, OutputStream outputStream,
|
||||
Map<String, String> transformOptions, TransformManager transformManager) throws Exception
|
||||
{
|
||||
final boolean includeContents = parseBoolean(
|
||||
transformOptions.getOrDefault(RequestParamMap.INCLUDE_CONTENTS, "false"));
|
||||
@@ -80,56 +83,26 @@ public abstract class GenericTikaTransformer implements CustomTransformerFileAda
|
||||
{
|
||||
logger.trace("notExtractBookmarksText default value has been overridden to {}", notExtractBookmarksTextDefault);
|
||||
}
|
||||
String transformerName = getTransformerName();
|
||||
call(sourceFile, targetFile, transformerName,
|
||||
call(inputStream, outputStream,
|
||||
includeContents ? Tika.INCLUDE_CONTENTS : null,
|
||||
notExtractBookmarksText ? Tika.NOT_EXTRACT_BOOKMARKS_TEXT : null,
|
||||
Tika.TARGET_MIMETYPE + targetMimetype, Tika.TARGET_ENCODING + targetEncoding);
|
||||
}
|
||||
|
||||
void call(File sourceFile, File targetFile, String... args)
|
||||
void call(InputStream inputStream, OutputStream outputStream, String... args)
|
||||
{
|
||||
Parser parser = getParser();
|
||||
DocumentSelector documentSelector = getDocumentSelector();
|
||||
args = buildArgs(sourceFile, targetFile, args);
|
||||
tika.transform(parser, documentSelector, args);
|
||||
logArgs(args);
|
||||
tika.transform(parser, documentSelector, inputStream, outputStream, args);
|
||||
}
|
||||
|
||||
private static String[] buildArgs(File sourceFile, File targetFile, String[] args)
|
||||
private void logArgs(String[] args)
|
||||
{
|
||||
ArrayList<String> methodArgs = new ArrayList<>(args.length + 2);
|
||||
StringJoiner sj = new StringJoiner(" ");
|
||||
for (String arg : args)
|
||||
{
|
||||
addArg(methodArgs, sj, arg);
|
||||
}
|
||||
|
||||
addFileArg(methodArgs, sj, sourceFile);
|
||||
addFileArg(methodArgs, sj, targetFile);
|
||||
|
||||
Arrays.stream(args)
|
||||
.filter(Objects::nonNull)
|
||||
.forEach(arg -> sj.add(arg));
|
||||
LogEntry.setOptions(sj.toString());
|
||||
|
||||
return methodArgs.toArray(new String[0]);
|
||||
}
|
||||
|
||||
private static void addArg(ArrayList<String> methodArgs, StringJoiner sj, String arg)
|
||||
{
|
||||
if (arg != null)
|
||||
{
|
||||
sj.add(arg);
|
||||
methodArgs.add(arg);
|
||||
}
|
||||
}
|
||||
|
||||
private static void addFileArg(ArrayList<String> methodArgs, StringJoiner sj, File arg)
|
||||
{
|
||||
if (arg != null)
|
||||
{
|
||||
String path = arg.getAbsolutePath();
|
||||
int i = path.lastIndexOf('.');
|
||||
String ext = i == -1 ? "???" : path.substring(i + 1);
|
||||
sj.add(ext);
|
||||
methodArgs.add(path);
|
||||
}
|
||||
}
|
||||
}
|
@@ -29,10 +29,8 @@ package org.alfresco.transform.tika.transformers;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
|
||||
@Component
|
||||
public class ArchiveTransformer extends GenericTikaTransformer
|
||||
public class ArchiveTransformer extends AbstractTikaTransformer
|
||||
{
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
|
@@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class OOXMLTransformer extends GenericTikaTransformer
|
||||
public class OOXMLTransformer extends AbstractTikaTransformer
|
||||
{
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
|
@@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class OfficeTransformer extends GenericTikaTransformer
|
||||
public class OfficeTransformer extends AbstractTikaTransformer
|
||||
{
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
|
@@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class OutlookMsgTransformer extends GenericTikaTransformer
|
||||
public class OutlookMsgTransformer extends AbstractTikaTransformer
|
||||
{
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
|
@@ -31,7 +31,7 @@ import org.apache.tika.parser.Parser;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class PdfBoxTransformer extends GenericTikaTransformer
|
||||
public class PdfBoxTransformer extends AbstractTikaTransformer
|
||||
{
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
|
@@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class PoiTransformer extends GenericTikaTransformer
|
||||
public class PoiTransformer extends AbstractTikaTransformer
|
||||
{
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
|
@@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class TextMiningTransformer extends GenericTikaTransformer
|
||||
public class TextMiningTransformer extends AbstractTikaTransformer
|
||||
{
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
|
@@ -65,6 +65,7 @@ import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
||||
@@ -158,72 +159,48 @@ public class Tika
|
||||
}
|
||||
|
||||
// Extracts parameters form args
|
||||
public void transform(Parser parser, DocumentSelector documentSelector, String[] args)
|
||||
void transform(Parser parser, DocumentSelector documentSelector, InputStream inputStream,
|
||||
OutputStream outputStream, String[] args)
|
||||
{
|
||||
String transform = null;
|
||||
String targetMimetype = null;
|
||||
String targetEncoding = null;
|
||||
String sourceFilename = null;
|
||||
String targetFilename = null;
|
||||
Boolean includeContents = null;
|
||||
Boolean notExtractBookmarksText = null;
|
||||
|
||||
for (String arg : args)
|
||||
{
|
||||
if (arg.startsWith("--"))
|
||||
if (Objects.isNull(arg))
|
||||
{
|
||||
if (INCLUDE_CONTENTS.startsWith(arg))
|
||||
{
|
||||
getValue(arg, false, includeContents, INCLUDE_CONTENTS);
|
||||
includeContents = true;
|
||||
}
|
||||
else if (arg.startsWith(TARGET_ENCODING))
|
||||
{
|
||||
targetEncoding = getValue(arg, true, targetEncoding, TARGET_ENCODING);
|
||||
}
|
||||
else if (arg.startsWith(TARGET_MIMETYPE))
|
||||
{
|
||||
targetMimetype = getValue(arg, true, targetMimetype, TARGET_MIMETYPE);
|
||||
}
|
||||
else if (arg.startsWith(NOT_EXTRACT_BOOKMARKS_TEXT))
|
||||
{
|
||||
getValue(arg, false, notExtractBookmarksText, NOT_EXTRACT_BOOKMARKS_TEXT);
|
||||
notExtractBookmarksText = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IllegalArgumentException("Unexpected argument " + arg);
|
||||
}
|
||||
// ignore
|
||||
}
|
||||
else if (arg.startsWith(INCLUDE_CONTENTS))
|
||||
{
|
||||
getValue(arg, false, includeContents, INCLUDE_CONTENTS);
|
||||
includeContents = true;
|
||||
}
|
||||
else if (arg.startsWith(TARGET_ENCODING))
|
||||
{
|
||||
targetEncoding = getValue(arg, true, targetEncoding, TARGET_ENCODING);
|
||||
}
|
||||
else if (arg.startsWith(TARGET_MIMETYPE))
|
||||
{
|
||||
targetMimetype = getValue(arg, true, targetMimetype, TARGET_MIMETYPE);
|
||||
}
|
||||
else if (arg.startsWith(NOT_EXTRACT_BOOKMARKS_TEXT))
|
||||
{
|
||||
getValue(arg, false, notExtractBookmarksText, NOT_EXTRACT_BOOKMARKS_TEXT);
|
||||
notExtractBookmarksText = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (transform == null)
|
||||
{
|
||||
transform = arg;
|
||||
}
|
||||
else if (sourceFilename == null)
|
||||
{
|
||||
sourceFilename = arg;
|
||||
}
|
||||
else if (targetFilename == null)
|
||||
{
|
||||
targetFilename = arg;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IllegalArgumentException("Unexpected argument " + arg);
|
||||
}
|
||||
throw new IllegalArgumentException("Unexpected argument " + arg);
|
||||
}
|
||||
}
|
||||
if (targetFilename == null)
|
||||
{
|
||||
throw new IllegalArgumentException("Missing arguments");
|
||||
}
|
||||
includeContents = includeContents == null ? false : includeContents;
|
||||
notExtractBookmarksText = notExtractBookmarksText == null ? false : notExtractBookmarksText;
|
||||
|
||||
transform(parser, documentSelector, includeContents, notExtractBookmarksText, sourceFilename,
|
||||
targetFilename, targetMimetype, targetEncoding);
|
||||
transform(parser, documentSelector, includeContents, notExtractBookmarksText, inputStream,
|
||||
outputStream, targetMimetype, targetEncoding);
|
||||
}
|
||||
|
||||
private String getValue(String arg, boolean valueExpected, Object value, String optionName)
|
||||
@@ -247,20 +224,17 @@ public class Tika
|
||||
private void transform(Parser parser, DocumentSelector documentSelector,
|
||||
Boolean includeContents,
|
||||
Boolean notExtractBookmarksText,
|
||||
String sourceFilename,
|
||||
String targetFilename, String targetMimetype, String targetEncoding)
|
||||
InputStream inputStream,
|
||||
OutputStream outputStream, String targetMimetype, String targetEncoding)
|
||||
{
|
||||
|
||||
try (InputStream is = new BufferedInputStream(new FileInputStream(sourceFilename));
|
||||
OutputStream os = new FileOutputStream(targetFilename);
|
||||
Writer ow = new BufferedWriter(new OutputStreamWriter(os, targetEncoding)))
|
||||
try (Writer ow = new BufferedWriter(new OutputStreamWriter(outputStream, targetEncoding)))
|
||||
{
|
||||
Metadata metadata = new Metadata();
|
||||
ParseContext context = buildParseContext(documentSelector, includeContents,
|
||||
notExtractBookmarksText);
|
||||
ContentHandler handler = getContentHandler(targetMimetype, ow);
|
||||
|
||||
parser.parse(is, handler, metadata, context);
|
||||
parser.parse(inputStream, handler, metadata, context);
|
||||
}
|
||||
catch (SAXException | TikaException | IOException e)
|
||||
{
|
||||
|
@@ -30,7 +30,7 @@ import org.apache.tika.parser.Parser;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class TikaAutoTransformer extends GenericTikaTransformer
|
||||
public class TikaAutoTransformer extends AbstractTikaTransformer
|
||||
{
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
|
@@ -137,9 +137,6 @@ public class TikaControllerTest extends AbstractTransformControllerTest
|
||||
@Mock
|
||||
private RuntimeExec mockTransformCommand;
|
||||
|
||||
@Mock
|
||||
private RuntimeExec mockCheckCommand;
|
||||
|
||||
private String targetEncoding = "UTF-8";
|
||||
private String targetMimetype = MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
|
@@ -37,19 +37,21 @@ import static org.mockito.Mockito.spy;
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.verify;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.transform.base.TransformManager;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
public class GenericTikaTransformerTest
|
||||
public class AbstractTikaTransformerTest
|
||||
{
|
||||
private static class TikaTestTransformer extends GenericTikaTransformer
|
||||
private static class TikaTestTransformer extends AbstractTikaTransformer
|
||||
{
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
@@ -66,75 +68,71 @@ public class GenericTikaTransformerTest
|
||||
@Test
|
||||
public void testNotExtractBookmarkTextDefault() throws Exception
|
||||
{
|
||||
GenericTikaTransformer executorSpyDefaultTrue = spy(new TikaTestTransformer(true));
|
||||
GenericTikaTransformer executorSpyDefaultFalse = spy(new TikaTestTransformer(false));
|
||||
AbstractTikaTransformer executorSpyDefaultTrue = spy(new TikaTestTransformer(true));
|
||||
AbstractTikaTransformer executorSpyDefaultFalse = spy(new TikaTestTransformer(false));
|
||||
|
||||
File mockSourceFile = mock(File.class);
|
||||
File mockTargetFile = mock(File.class);
|
||||
String transformName = "transformName";
|
||||
InputStream mockInputStream = mock(InputStream.class);
|
||||
OutputStream mockOutputStream = mock(OutputStream.class);
|
||||
TransformManager mockTransformManager = mock(TransformManager.class);
|
||||
String sourceMimetype = "sourceMimetype";
|
||||
String targetMimetype = "targetMimetype";
|
||||
String defaultEncoding = "UTF-8";
|
||||
|
||||
// no need to continue execution passed here or check values as we're checking the correct params passed to this method later.
|
||||
lenient().doNothing().when(executorSpyDefaultTrue).call(any(), any(), any(), any(), any(), any(), any());
|
||||
lenient().doNothing().when(executorSpyDefaultFalse).call(any(), any(), any(), any(), any(), any(), any());
|
||||
lenient().doNothing().when(executorSpyDefaultTrue).call(any(), any(), any(), any(), any(), any());
|
||||
lenient().doNothing().when(executorSpyDefaultFalse).call(any(), any(), any(), any(), any(), any());
|
||||
|
||||
Map<String, String> transformOptions = new HashMap<>();
|
||||
|
||||
// use empty transformOptions to test defaults
|
||||
executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions,
|
||||
mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions,
|
||||
mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultTrue.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
|
||||
executorSpyDefaultFalse.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
|
||||
|
||||
// when default set to true, with no options passed we should get a call method with NOT_EXTRACT_BOOKMARKS_TEXT
|
||||
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null,
|
||||
verify(executorSpyDefaultTrue, times(1)).call(mockInputStream, mockOutputStream, null,
|
||||
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
|
||||
|
||||
// when default set to false, with no options passed we should get a call method without NOT_EXTRACT_BOOKMARKS_TEXT
|
||||
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
|
||||
verify(executorSpyDefaultFalse, times(1)).call(mockInputStream, mockOutputStream, null, null,
|
||||
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
|
||||
|
||||
// use transforms with notExtractBookmarksText set to true
|
||||
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
|
||||
transformOptions.put("notExtractBookmarksText", "true");
|
||||
executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions,
|
||||
mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions,
|
||||
mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultTrue.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
|
||||
executorSpyDefaultFalse.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
|
||||
|
||||
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT
|
||||
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null,
|
||||
verify(executorSpyDefaultTrue, times(1)).call(mockInputStream, mockOutputStream, null,
|
||||
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
|
||||
|
||||
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null,
|
||||
verify(executorSpyDefaultFalse, times(1)).call(mockInputStream, mockOutputStream, null,
|
||||
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
|
||||
|
||||
// use transforms with notExtractBookmarksText set to false
|
||||
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
|
||||
transformOptions.replace("notExtractBookmarksText", "true", "false");
|
||||
executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultTrue.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
|
||||
executorSpyDefaultFalse.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
|
||||
|
||||
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT
|
||||
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
|
||||
verify(executorSpyDefaultTrue, times(1)).call(mockInputStream, mockOutputStream, null, null,
|
||||
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
|
||||
|
||||
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
|
||||
verify(executorSpyDefaultFalse, times(1)).call(mockInputStream, mockOutputStream, null, null,
|
||||
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
|
||||
|
||||
// useful set of pdfbox transformOptions just to be safe
|
||||
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
|
||||
transformOptions.put("targetEncoding", "anyEncoding");
|
||||
executorSpyDefaultTrue.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultFalse.transform(sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
|
||||
executorSpyDefaultTrue.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
|
||||
executorSpyDefaultFalse.transform(sourceMimetype, mockInputStream, targetMimetype, mockOutputStream, transformOptions, mockTransformManager);
|
||||
|
||||
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT but the encoding will change
|
||||
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
|
||||
verify(executorSpyDefaultTrue, times(1)).call(mockInputStream, mockOutputStream, null, null,
|
||||
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding");
|
||||
|
||||
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
|
||||
verify(executorSpyDefaultFalse, times(1)).call(mockInputStream, mockOutputStream, null, null,
|
||||
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding");
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user