mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-08-21 18:08:37 +00:00
ATS-531 : Reformat code
This commit is contained in:
@@ -37,7 +37,7 @@ import org.springframework.context.annotation.Bean;
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
|
||||
@SpringBootApplication
|
||||
@EnableAutoConfiguration(exclude={DataSourceAutoConfiguration.class})
|
||||
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
|
||||
public class Application
|
||||
{
|
||||
@Value("${container.name}")
|
||||
|
@@ -95,10 +95,13 @@ public class TikaController extends AbstractTransformerController
|
||||
@Autowired
|
||||
public TikaController()
|
||||
{
|
||||
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
|
||||
logger.info(
|
||||
"--------------------------------------------------------------------------------------------------------------------------------------------------------------");
|
||||
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
|
||||
logger.info("Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt");
|
||||
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
|
||||
logger.info(
|
||||
"Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt");
|
||||
logger.info(
|
||||
"--------------------------------------------------------------------------------------------------------------------------------------------------------------");
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -149,7 +152,8 @@ public class TikaController extends AbstractTransformerController
|
||||
throw new TransformException(BAD_REQUEST.value(), "Invalid transform value");
|
||||
}
|
||||
|
||||
String targetFilename = createTargetFileName(sourceMultipartFile.getOriginalFilename(), targetExtension);
|
||||
String targetFilename = createTargetFileName(sourceMultipartFile.getOriginalFilename(),
|
||||
targetExtension);
|
||||
getProbeTestTransform().incrementTransformerCount();
|
||||
File sourceFile = createSourceFile(request, sourceMultipartFile);
|
||||
File targetFile = createTargetFile(request, targetFilename);
|
||||
|
@@ -456,25 +456,25 @@ public class Tika
|
||||
public static final String TEXT_MINING = "TextMining";
|
||||
|
||||
public static final List<String> TRANSFORM_NAMES = asList(
|
||||
ARCHIVE, OUTLOOK_MSG, PDF_BOX, POI_OFFICE, POI, POI_OO_XML, TIKA_AUTO, TEXT_MINING);
|
||||
ARCHIVE, OUTLOOK_MSG, PDF_BOX, POI_OFFICE, POI, POI_OO_XML, TIKA_AUTO, TEXT_MINING);
|
||||
|
||||
public static final String TARGET_MIMETYPE = "--targetMimetype=";
|
||||
public static final String TARGET_ENCODING = "--targetEncoding=";
|
||||
public static final String INCLUDE_CONTENTS = "--includeContents";
|
||||
public static final String NOT_EXTRACT_BOOKMARKS_TEXT = "--notExtractBookmarksText";
|
||||
|
||||
public static final String CSV = "csv";
|
||||
public static final String DOC = "doc";
|
||||
public static final String DOCX = "docx";
|
||||
public static final String HTML = "html";
|
||||
public static final String MSG = "msg";
|
||||
public static final String PDF = "pdf";
|
||||
public static final String PPTX = "pptx";
|
||||
public static final String TXT = "txt";
|
||||
public static final String XHTML = "xhtml";
|
||||
public static final String XSLX = "xslx";
|
||||
public static final String XML = "xml";
|
||||
public static final String ZIP = "zip";
|
||||
public static final String CSV = "csv";
|
||||
public static final String DOC = "doc";
|
||||
public static final String DOCX = "docx";
|
||||
public static final String HTML = "html";
|
||||
public static final String MSG = "msg";
|
||||
public static final String PDF = "pdf";
|
||||
public static final String PPTX = "pptx";
|
||||
public static final String TXT = "txt";
|
||||
public static final String XHTML = "xhtml";
|
||||
public static final String XSLX = "xslx";
|
||||
public static final String XML = "xml";
|
||||
public static final String ZIP = "zip";
|
||||
|
||||
private final Parser packageParser = new PackageParser();
|
||||
private final Parser pdfParser = new PDFParser();
|
||||
@@ -486,7 +486,8 @@ public class Tika
|
||||
|
||||
private DocumentSelector pdfBoxEmbededDocumentSelector = new DocumentSelector()
|
||||
{
|
||||
private final List<String> disabledMediaTypes = asList(MIMETYPE_IMAGE_JPEG, MIMETYPE_IMAGE_TIFF, MIMETYPE_IMAGE_PNG);
|
||||
private final List<String> disabledMediaTypes = asList(MIMETYPE_IMAGE_JPEG,
|
||||
MIMETYPE_IMAGE_TIFF, MIMETYPE_IMAGE_PNG);
|
||||
|
||||
@Override
|
||||
public boolean select(Metadata metadata)
|
||||
@@ -518,16 +519,16 @@ public class Tika
|
||||
}
|
||||
catch (IllegalArgumentException e)
|
||||
{
|
||||
System.err.println("ERROR "+e.getMessage());
|
||||
System.err.println("ERROR " + e.getMessage());
|
||||
System.exit(-1);
|
||||
}
|
||||
catch (IllegalStateException | TikaException | IOException | SAXException e)
|
||||
{
|
||||
System.err.println("ERROR "+e.getMessage());
|
||||
System.err.println("ERROR " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
System.exit(-2);
|
||||
}
|
||||
System.out.println("Finished in "+(System.currentTimeMillis()-start)+"ms");
|
||||
System.out.println("Finished in " + (System.currentTimeMillis() - start) + "ms");
|
||||
}
|
||||
|
||||
// Extracts parameters form args
|
||||
@@ -541,7 +542,7 @@ public class Tika
|
||||
Boolean includeContents = null;
|
||||
Boolean notExtractBookmarksText = null;
|
||||
|
||||
for (String arg: args)
|
||||
for (String arg : args)
|
||||
{
|
||||
if (arg.startsWith("--"))
|
||||
{
|
||||
@@ -565,7 +566,7 @@ public class Tika
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IllegalArgumentException("Unexpected argument "+arg);
|
||||
throw new IllegalArgumentException("Unexpected argument " + arg);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -584,7 +585,7 @@ public class Tika
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IllegalArgumentException("Unexpected argument "+arg);
|
||||
throw new IllegalArgumentException("Unexpected argument " + arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -593,71 +594,73 @@ public class Tika
|
||||
throw new IllegalArgumentException("Missing arguments");
|
||||
}
|
||||
includeContents = includeContents == null ? false : includeContents;
|
||||
notExtractBookmarksText = notExtractBookmarksText == null ? false : notExtractBookmarksText;
|
||||
notExtractBookmarksText = notExtractBookmarksText == null ? false : notExtractBookmarksText;
|
||||
|
||||
transform(transform, includeContents, notExtractBookmarksText, sourceFilename, targetFilename, targetMimetype, targetEncoding);
|
||||
transform(transform, includeContents, notExtractBookmarksText, sourceFilename,
|
||||
targetFilename, targetMimetype, targetEncoding);
|
||||
}
|
||||
|
||||
private String getValue(String arg, boolean valueExpected, Object value, String optionName)
|
||||
{
|
||||
if (value != null)
|
||||
{
|
||||
throw new IllegalArgumentException("Duplicate "+optionName);
|
||||
throw new IllegalArgumentException("Duplicate " + optionName);
|
||||
}
|
||||
String stringValue = arg.substring(optionName.length()).trim();
|
||||
if (!valueExpected && stringValue.length() > 0)
|
||||
{
|
||||
throw new IllegalArgumentException("Unexpected value with "+optionName);
|
||||
throw new IllegalArgumentException("Unexpected value with " + optionName);
|
||||
}
|
||||
if (valueExpected && stringValue.length() == 0)
|
||||
{
|
||||
throw new IllegalArgumentException("Expected value with "+optionName);
|
||||
throw new IllegalArgumentException("Expected value with " + optionName);
|
||||
}
|
||||
return stringValue;
|
||||
}
|
||||
|
||||
// Adds transform specific values such as parser and documentSelector.
|
||||
private void transform(String transform, Boolean includeContents,
|
||||
Boolean notExtractBookmarksText,
|
||||
String sourceFilename,
|
||||
String targetFilename, String targetMimetype, String targetEncoding)
|
||||
Boolean notExtractBookmarksText,
|
||||
String sourceFilename,
|
||||
String targetFilename, String targetMimetype, String targetEncoding)
|
||||
{
|
||||
Parser parser = null;
|
||||
DocumentSelector documentSelector = null;
|
||||
|
||||
switch(transform)
|
||||
switch (transform)
|
||||
{
|
||||
case ARCHIVE:
|
||||
parser = packageParser;
|
||||
break;
|
||||
case OUTLOOK_MSG:
|
||||
case POI_OFFICE:
|
||||
case TEXT_MINING:
|
||||
parser = officeParser;
|
||||
break;
|
||||
case PDF_BOX:
|
||||
parser = pdfParser;
|
||||
documentSelector = pdfBoxEmbededDocumentSelector;
|
||||
break;
|
||||
case POI:
|
||||
parser = tikaOfficeDetectParser;
|
||||
break;
|
||||
case POI_OO_XML:
|
||||
parser = ooXmlParser;
|
||||
break;
|
||||
case TIKA_AUTO:
|
||||
parser = autoDetectParser;
|
||||
break;
|
||||
case ARCHIVE:
|
||||
parser = packageParser;
|
||||
break;
|
||||
case OUTLOOK_MSG:
|
||||
case POI_OFFICE:
|
||||
case TEXT_MINING:
|
||||
parser = officeParser;
|
||||
break;
|
||||
case PDF_BOX:
|
||||
parser = pdfParser;
|
||||
documentSelector = pdfBoxEmbededDocumentSelector;
|
||||
break;
|
||||
case POI:
|
||||
parser = tikaOfficeDetectParser;
|
||||
break;
|
||||
case POI_OO_XML:
|
||||
parser = ooXmlParser;
|
||||
break;
|
||||
case TIKA_AUTO:
|
||||
parser = autoDetectParser;
|
||||
break;
|
||||
}
|
||||
|
||||
transform(parser, documentSelector, includeContents, notExtractBookmarksText, sourceFilename, targetFilename, targetMimetype, targetEncoding);
|
||||
transform(parser, documentSelector, includeContents, notExtractBookmarksText,
|
||||
sourceFilename, targetFilename, targetMimetype, targetEncoding);
|
||||
}
|
||||
|
||||
|
||||
private void transform(Parser parser, DocumentSelector documentSelector, Boolean includeContents,
|
||||
Boolean notExtractBookmarksText,
|
||||
String sourceFilename,
|
||||
String targetFilename, String targetMimetype, String targetEncoding)
|
||||
private void transform(Parser parser, DocumentSelector documentSelector,
|
||||
Boolean includeContents,
|
||||
Boolean notExtractBookmarksText,
|
||||
String sourceFilename,
|
||||
String targetFilename, String targetMimetype, String targetEncoding)
|
||||
{
|
||||
|
||||
try (InputStream is = new BufferedInputStream(new FileInputStream(sourceFilename));
|
||||
@@ -688,7 +691,7 @@ public class Tika
|
||||
}
|
||||
else
|
||||
{
|
||||
SAXTransformerFactory factory = (SAXTransformerFactory)SAXTransformerFactory.newInstance();
|
||||
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
|
||||
TransformerHandler transformerHandler;
|
||||
transformerHandler = factory.newTransformerHandler();
|
||||
transformerHandler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
|
||||
@@ -725,42 +728,52 @@ public class Tika
|
||||
/**
|
||||
* A wrapper around the normal Tika BodyContentHandler for CSV rather encoding than tab separated.
|
||||
*/
|
||||
protected static class CsvContentHandler extends BodyContentHandler {
|
||||
private static final char[] comma = new char[]{ ',' };
|
||||
protected static class CsvContentHandler extends BodyContentHandler
|
||||
{
|
||||
private static final char[] comma = new char[]{','};
|
||||
private static final Pattern all_nums = Pattern.compile("[\\d\\.\\-\\+]+");
|
||||
|
||||
private boolean inCell = false;
|
||||
private boolean needsComma = false;
|
||||
|
||||
protected CsvContentHandler(Writer output) {
|
||||
protected CsvContentHandler(Writer output)
|
||||
{
|
||||
super(output);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void ignorableWhitespace(char[] ch, int start, int length)
|
||||
throws SAXException {
|
||||
if(length == 1 && ch[0] == '\t') {
|
||||
throws SAXException
|
||||
{
|
||||
if (length == 1 && ch[0] == '\t')
|
||||
{
|
||||
// Ignore tabs, as they mess up the CSV output
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
super.ignorableWhitespace(ch, start, length);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void characters(char[] ch, int start, int length)
|
||||
throws SAXException {
|
||||
if(inCell) {
|
||||
StringBuffer t = new StringBuffer(new String(ch,start,length));
|
||||
throws SAXException
|
||||
{
|
||||
if (inCell)
|
||||
{
|
||||
StringBuffer t = new StringBuffer(new String(ch, start, length));
|
||||
|
||||
// Quote if not all numbers
|
||||
if(all_nums.matcher(t).matches())
|
||||
if (all_nums.matcher(t).matches())
|
||||
{
|
||||
super.characters(ch, start, length);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i=t.length()-1; i>=0; i--) {
|
||||
if(t.charAt(i) == '\"') {
|
||||
for (int i = t.length() - 1; i >= 0; i--)
|
||||
{
|
||||
if (t.charAt(i) == '\"')
|
||||
{
|
||||
// Double up double quotes
|
||||
t.insert(i, '\"');
|
||||
i--;
|
||||
@@ -771,33 +784,45 @@ public class Tika
|
||||
char[] c = t.toString().toCharArray();
|
||||
super.characters(c, 0, c.length);
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
super.characters(ch, start, length);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startElement(String uri, String localName, String name,
|
||||
Attributes atts) throws SAXException {
|
||||
if(localName.equals("td")) {
|
||||
Attributes atts) throws SAXException
|
||||
{
|
||||
if (localName.equals("td"))
|
||||
{
|
||||
inCell = true;
|
||||
if(needsComma) {
|
||||
if (needsComma)
|
||||
{
|
||||
super.characters(comma, 0, 1);
|
||||
needsComma = true;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
super.startElement(uri, localName, name, atts);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endElement(String uri, String localName, String name)
|
||||
throws SAXException {
|
||||
if(localName.equals("td")) {
|
||||
throws SAXException
|
||||
{
|
||||
if (localName.equals("td"))
|
||||
{
|
||||
needsComma = true;
|
||||
inCell = false;
|
||||
} else {
|
||||
if(localName.equals("tr")) {
|
||||
}
|
||||
else
|
||||
{
|
||||
if (localName.equals("tr"))
|
||||
{
|
||||
needsComma = false;
|
||||
}
|
||||
super.endElement(uri, localName, name);
|
||||
@@ -830,5 +855,4 @@ public class Tika
|
||||
|
||||
return context;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -42,7 +42,7 @@ import org.springframework.stereotype.Component;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
/**
|
||||
* JavaExecutor implementation for running TIKA transformations. It loads the
|
||||
* JavaExecutor implementation for running TIKA transformations. It loads the
|
||||
* transformation logic in the same JVM (check {@link Tika}).
|
||||
*/
|
||||
@Component
|
||||
|
@@ -48,21 +48,23 @@ import org.xml.sax.SAXException;
|
||||
|
||||
/**
|
||||
* <a href="http://tika.apache.org/Apache Tika">Apache Tika</a> assumes that
|
||||
* you either know exactly what your content is, or that
|
||||
* you'll leave it to auto-detection.
|
||||
* you either know exactly what your content is, or that
|
||||
* you'll leave it to auto-detection.
|
||||
* Within Alfresco, we usually do know. However, from time
|
||||
* to time, we don't know if we have one of the old or one
|
||||
* of the new office files (eg .xls and .xlsx).
|
||||
* to time, we don't know if we have one of the old or one
|
||||
* of the new office files (eg .xls and .xlsx).
|
||||
* This class allows automatically selects the appropriate
|
||||
* old (OLE2) or new (OOXML) Tika parser as required.
|
||||
* old (OLE2) or new (OOXML) Tika parser as required.
|
||||
*
|
||||
* @author Nick Burch
|
||||
*/
|
||||
public class TikaOfficeDetectParser implements Parser {
|
||||
public class TikaOfficeDetectParser implements Parser
|
||||
{
|
||||
private final Parser ole2Parser = new OfficeParser();
|
||||
private final Parser ooxmlParser = new OOXMLParser();
|
||||
|
||||
public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
|
||||
public Set<MediaType> getSupportedTypes(ParseContext parseContext)
|
||||
{
|
||||
Set<MediaType> types = new HashSet<>();
|
||||
types.addAll(ole2Parser.getSupportedTypes(parseContext));
|
||||
types.addAll(ooxmlParser.getSupportedTypes(parseContext));
|
||||
@@ -70,9 +72,9 @@ public class TikaOfficeDetectParser implements Parser {
|
||||
}
|
||||
|
||||
public void parse(InputStream stream,
|
||||
ContentHandler handler, Metadata metadata,
|
||||
ParseContext parseContext) throws IOException, SAXException,
|
||||
TikaException
|
||||
ContentHandler handler, Metadata metadata,
|
||||
ParseContext parseContext) throws IOException, SAXException,
|
||||
TikaException
|
||||
{
|
||||
byte[] initial4 = new byte[4];
|
||||
InputStream wrapped;
|
||||
@@ -93,10 +95,10 @@ public class TikaOfficeDetectParser implements Parser {
|
||||
}
|
||||
|
||||
// Which is it?
|
||||
if(initial4[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
|
||||
initial4[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
|
||||
initial4[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
|
||||
initial4[3] == POIFSConstants.OOXML_FILE_HEADER[3])
|
||||
if (initial4[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
|
||||
initial4[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
|
||||
initial4[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
|
||||
initial4[3] == POIFSConstants.OOXML_FILE_HEADER[3])
|
||||
{
|
||||
ooxmlParser.parse(wrapped, handler, metadata, parseContext);
|
||||
}
|
||||
@@ -110,8 +112,8 @@ public class TikaOfficeDetectParser implements Parser {
|
||||
* @deprecated This method will be removed in Apache Tika 1.0.
|
||||
*/
|
||||
public void parse(InputStream stream,
|
||||
ContentHandler handler, Metadata metadata)
|
||||
throws IOException, SAXException, TikaException
|
||||
ContentHandler handler, Metadata metadata)
|
||||
throws IOException, SAXException, TikaException
|
||||
{
|
||||
parse(stream, handler, metadata, new ParseContext());
|
||||
}
|
||||
|
@@ -112,7 +112,7 @@ import org.springframework.util.StringUtils;
|
||||
public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
{
|
||||
private static final String EXPECTED_XHTML_CONTENT_CONTAINS = "<p>The quick brown fox jumps over the lazy dog</p>";
|
||||
private static final String EXPECTED_TEXT_CONTENT_CONTAINS = "The quick brown fox jumps over the lazy dog";
|
||||
private static final String EXPECTED_TEXT_CONTENT_CONTAINS = "The quick brown fox jumps over the lazy dog";
|
||||
private static final String EXPECTED_MSG_CONTENT_CONTAINS = "Recipients\n" +
|
||||
"\tmark.rogers@alfresco.com; speedy@quick.com; mrquick@nowhere.com\n" +
|
||||
"\n" +
|
||||
@@ -130,7 +130,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
|
||||
@SpyBean
|
||||
private TikaJavaExecutor javaExecutor;
|
||||
|
||||
|
||||
@SpyBean
|
||||
private TikaController controller;
|
||||
|
||||
@@ -226,34 +226,39 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
}
|
||||
|
||||
private void transform(String transform, String sourceExtension, String targetExtension,
|
||||
String sourceMimetype, String targetMimetype,
|
||||
Boolean includeContents, String expectedContentContains) throws Exception
|
||||
String sourceMimetype, String targetMimetype,
|
||||
Boolean includeContents, String expectedContentContains) throws Exception
|
||||
{
|
||||
// We don't use targetFileBytes as some of the transforms contain different date text based on the os being used.
|
||||
mockTransformCommand(sourceExtension, targetExtension, sourceMimetype, false);
|
||||
this.transform = transform;
|
||||
this.targetMimetype = targetMimetype;
|
||||
|
||||
System.out.println("Test "+transform+" "+ sourceExtension +" to "+targetExtension);
|
||||
System.out.println("Test " + transform + " " + sourceExtension + " to " + targetExtension);
|
||||
MockHttpServletRequestBuilder requestBuilder = includeContents == null
|
||||
? mockMvcRequest("/transform", sourceFile, "targetExtension", this.targetExtension)
|
||||
: mockMvcRequest("/transform", sourceFile, "targetExtension", this.targetExtension, "includeContents", includeContents.toString());
|
||||
? mockMvcRequest("/transform", sourceFile,
|
||||
"targetExtension", this.targetExtension)
|
||||
: mockMvcRequest("/transform", sourceFile,
|
||||
"targetExtension", this.targetExtension, "includeContents", includeContents.toString());
|
||||
MvcResult result = mockMvc.perform(requestBuilder)
|
||||
.andExpect(status().is(OK.value()))
|
||||
.andExpect(header().string("Content-Disposition", "attachment; filename*= UTF-8''quick." + this.targetExtension)).
|
||||
andReturn();
|
||||
.andExpect(status().is(OK.value()))
|
||||
.andExpect(header().string("Content-Disposition",
|
||||
"attachment; filename*= UTF-8''quick." + this.targetExtension)).
|
||||
andReturn();
|
||||
String content = result.getResponse().getContentAsString();
|
||||
assertTrue("The content did not include \""+expectedContentContains, content.contains(expectedContentContains));
|
||||
assertTrue("The content did not include \"" + expectedContentContains,
|
||||
content.contains(expectedContentContains));
|
||||
}
|
||||
|
||||
@Override
|
||||
// Add extra required parameters to the request.
|
||||
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
|
||||
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile,
|
||||
String... params)
|
||||
{
|
||||
return super.mockMvcRequest(url, sourceFile, params)
|
||||
.param("transform", transform)
|
||||
.param("targetEncoding", targetEncoding)
|
||||
.param("targetMimetype", targetMimetype);
|
||||
.param("transform", transform)
|
||||
.param("targetEncoding", targetEncoding)
|
||||
.param("targetMimetype", targetMimetype);
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -337,8 +342,9 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
{
|
||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||
targetEncoding = "rubbish";
|
||||
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
|
||||
.andExpect(status().is(INTERNAL_SERVER_ERROR.value()));
|
||||
mockMvc.perform(
|
||||
mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
|
||||
.andExpect(status().is(INTERNAL_SERVER_ERROR.value()));
|
||||
}
|
||||
|
||||
// --- Archive ---
|
||||
@@ -346,56 +352,56 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
@Test
|
||||
public void zipToTextArchiveTest() throws Exception
|
||||
{
|
||||
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,false,
|
||||
"quick.html\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.pdf\n" +
|
||||
"\n" +
|
||||
"\n");
|
||||
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, false,
|
||||
"quick.html\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.pdf\n" +
|
||||
"\n" +
|
||||
"\n");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void zipToTextIncludeArchiveTest() throws Exception
|
||||
{
|
||||
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,true,
|
||||
"quick.html\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"The quick brown fox jumps over the lazy dog\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.pdf\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"The quick brown fox jumps over the lazy dog" +
|
||||
"\n" +
|
||||
"\n");
|
||||
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, true,
|
||||
"quick.html\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"The quick brown fox jumps over the lazy dog\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.pdf\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"The quick brown fox jumps over the lazy dog" +
|
||||
"\n" +
|
||||
"\n");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void zipToTextExcludeArchiveTest() throws Exception
|
||||
{
|
||||
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,
|
||||
false, "\n" +
|
||||
"folder/subfolder/quick.jpg\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.doc\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.html\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.pdf\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.txt\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.xml\n" +
|
||||
"\n");
|
||||
false, "\n" +
|
||||
"folder/subfolder/quick.jpg\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.doc\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.html\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.pdf\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.txt\n" +
|
||||
"\n" +
|
||||
"\n" +
|
||||
"quick.xml\n" +
|
||||
"\n");
|
||||
}
|
||||
|
||||
// --- OutlookMsg ---
|
||||
@@ -403,7 +409,8 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
@Test
|
||||
public void msgToTxtOutlookMsgTest() throws Exception
|
||||
{
|
||||
transform(OUTLOOK_MSG, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null, EXPECTED_MSG_CONTENT_CONTAINS);
|
||||
transform(OUTLOOK_MSG, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_MSG_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
// --- PdfBox ---
|
||||
@@ -411,31 +418,36 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
@Test
|
||||
public void pdfToTxtPdfBoxTest() throws Exception
|
||||
{
|
||||
transform(PDF_BOX, PDF, TXT, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
transform(PDF_BOX, PDF, TXT, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void pdfToCsvPdfBoxTest() throws Exception
|
||||
{
|
||||
transform(PDF_BOX, PDF, CSV, MIMETYPE_PDF, MIMETYPE_TEXT_CSV, null, EXPECTED_TEXT_CONTENT_CONTAINS); // Yes it is just text
|
||||
transform(PDF_BOX, PDF, CSV, MIMETYPE_PDF, MIMETYPE_TEXT_CSV, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS); // Yes it is just text
|
||||
}
|
||||
|
||||
@Test
|
||||
public void pdfToXmlPdfBoxTest() throws Exception
|
||||
{
|
||||
transform(PDF_BOX, PDF, XML, MIMETYPE_PDF, MIMETYPE_XML, null, EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
|
||||
transform(PDF_BOX, PDF, XML, MIMETYPE_PDF, MIMETYPE_XML, null,
|
||||
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
|
||||
}
|
||||
|
||||
@Test
|
||||
public void pdfToXhtmlPdfBoxTest() throws Exception
|
||||
{
|
||||
transform(PDF_BOX, PDF, XHTML, MIMETYPE_PDF, MIMETYPE_XHTML, null, EXPECTED_XHTML_CONTENT_CONTAINS);
|
||||
transform(PDF_BOX, PDF, XHTML, MIMETYPE_PDF, MIMETYPE_XHTML, null,
|
||||
EXPECTED_XHTML_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void pdfToHtmlPdfBoxTest() throws Exception
|
||||
{
|
||||
transform(PDF_BOX, PDF, HTML, MIMETYPE_PDF, MIMETYPE_HTML, null, EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
|
||||
transform(PDF_BOX, PDF, HTML, MIMETYPE_PDF, MIMETYPE_HTML, null,
|
||||
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
|
||||
}
|
||||
|
||||
// --- Office ---
|
||||
@@ -443,13 +455,15 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
@Test
|
||||
public void msgToTxtOfficeTest() throws Exception
|
||||
{
|
||||
transform(POI_OFFICE, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null, EXPECTED_MSG_CONTENT_CONTAINS);
|
||||
transform(POI_OFFICE, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_MSG_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void docToTxtOfficeTest() throws Exception
|
||||
{
|
||||
transform(POI_OFFICE, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
transform(POI_OFFICE, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
// --- Poi ---
|
||||
@@ -457,7 +471,8 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
@Test
|
||||
public void xslxToCsvPoiTest() throws Exception
|
||||
{
|
||||
transform(POI, XSLX, CSV, MIMETYPE_OPENXML_SPREADSHEET, MIMETYPE_TEXT_CSV, null, EXPECTED_CSV_CONTENT_CONTAINS);
|
||||
transform(POI, XSLX, CSV, MIMETYPE_OPENXML_SPREADSHEET, MIMETYPE_TEXT_CSV, null,
|
||||
EXPECTED_CSV_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
// --- OOXML ---
|
||||
@@ -465,13 +480,15 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
@Test
|
||||
public void docxToTxtOoXmlTest() throws Exception
|
||||
{
|
||||
transform(POI_OO_XML, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
transform(POI_OO_XML, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void pptxToTxtOoXmlTest() throws Exception
|
||||
{
|
||||
transform(POI_OO_XML, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
transform(POI_OO_XML, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
// --- TikaAuto ---
|
||||
@@ -479,13 +496,15 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
@Test
|
||||
public void ppxtToTxtTikaAutoTest() throws Exception
|
||||
{
|
||||
transform(TIKA_AUTO, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
transform(TIKA_AUTO, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void doctToTxtTikaAutoTest() throws Exception
|
||||
{
|
||||
transform(TIKA_AUTO, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
transform(TIKA_AUTO, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
// --- TextMining ---
|
||||
@@ -493,16 +512,20 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
@Test
|
||||
public void docToTxtTextMiningTest() throws Exception
|
||||
{
|
||||
transform(TEXT_MINING, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
transform(TEXT_MINING, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
|
||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void pdfToTxtExtractBookmarksTest() throws Exception
|
||||
{
|
||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension).param("notExtractBookmarksText", "true"))
|
||||
.andExpect(status().is(OK.value()))
|
||||
.andExpect(header().string("Content-Disposition", "attachment; filename*= UTF-8''quick." + targetExtension));
|
||||
mockMvc.perform(
|
||||
mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension).param(
|
||||
"notExtractBookmarksText", "true"))
|
||||
.andExpect(status().is(OK.value()))
|
||||
.andExpect(header().string("Content-Disposition",
|
||||
"attachment; filename*= UTF-8''quick." + targetExtension));
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -513,10 +536,11 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
transformRequest.setSourceMediaType(MediaType.APPLICATION_PDF_VALUE);
|
||||
transformRequest.setTargetMediaType(MediaType.TEXT_PLAIN_VALUE);
|
||||
transformRequest.getTransformRequestOptions().put("transform", "PdfBox");
|
||||
transformRequest.getTransformRequestOptions().put("targetMimetype", MediaType.TEXT_PLAIN_VALUE);
|
||||
transformRequest.getTransformRequestOptions().put("targetMimetype",
|
||||
MediaType.TEXT_PLAIN_VALUE);
|
||||
transformRequest.getTransformRequestOptions().put("targetEncoding", "UTF-8");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testPojoTransform() throws Exception
|
||||
{
|
||||
@@ -525,7 +549,6 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
File sourceFile = getTestFile("quick." + sourceExtension, true);
|
||||
String targetFileRef = UUID.randomUUID().toString();
|
||||
|
||||
|
||||
// Transformation Request POJO
|
||||
TransformRequest transformRequest = new TransformRequest();
|
||||
transformRequest.setRequestId("1");
|
||||
@@ -539,12 +562,14 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
|
||||
// HTTP Request
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.set(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=quick." + sourceExtension);
|
||||
headers.set(HttpHeaders.CONTENT_DISPOSITION,
|
||||
"attachment; filename=quick." + sourceExtension);
|
||||
ResponseEntity<Resource> response = new ResponseEntity<>(new FileSystemResource(
|
||||
sourceFile), headers, OK);
|
||||
|
||||
when(alfrescoSharedFileStoreClient.retrieveFile(sourceFileRef)).thenReturn(response);
|
||||
when(alfrescoSharedFileStoreClient.saveFile(any())).thenReturn(new FileRefResponse(new FileRefEntity(targetFileRef)));
|
||||
when(alfrescoSharedFileStoreClient.saveFile(any())).thenReturn(
|
||||
new FileRefResponse(new FileRefEntity(targetFileRef)));
|
||||
when(mockExecutionResult.getExitValue()).thenReturn(0);
|
||||
|
||||
// Update the Transformation Request with any specific params before sending it
|
||||
@@ -552,18 +577,21 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
|
||||
|
||||
// Serialize and call the transformer
|
||||
String tr = objectMapper.writeValueAsString(transformRequest);
|
||||
String transformationReplyAsString = mockMvc.perform(MockMvcRequestBuilders.post("/transform")
|
||||
.header(HttpHeaders.ACCEPT, MediaType.APPLICATION_JSON_VALUE)
|
||||
.header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE).content(tr))
|
||||
.andExpect(status().is(HttpStatus.CREATED.value()))
|
||||
String transformationReplyAsString = mockMvc.perform(
|
||||
MockMvcRequestBuilders.post("/transform")
|
||||
.header(HttpHeaders.ACCEPT, MediaType.APPLICATION_JSON_VALUE)
|
||||
.header(HttpHeaders.CONTENT_TYPE,
|
||||
MediaType.APPLICATION_JSON_VALUE).content(tr))
|
||||
.andExpect(
|
||||
status().is(HttpStatus.CREATED.value()))
|
||||
.andReturn().getResponse().getContentAsString();
|
||||
|
||||
TransformReply transformReply = objectMapper.readValue(transformationReplyAsString, TransformReply.class);
|
||||
TransformReply transformReply = objectMapper.readValue(transformationReplyAsString,
|
||||
TransformReply.class);
|
||||
|
||||
// Assert the reply
|
||||
assertEquals(transformRequest.getRequestId(), transformReply.getRequestId());
|
||||
assertEquals(transformRequest.getClientData(), transformReply.getClientData());
|
||||
assertEquals(transformRequest.getSchema(), transformReply.getSchema());
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -48,7 +48,8 @@ public class TikaQueueTransformServiceIT extends AbstractQueueTransformServiceIT
|
||||
@Override
|
||||
protected TransformRequest buildRequest()
|
||||
{
|
||||
return TransformRequest.builder()
|
||||
return TransformRequest
|
||||
.builder()
|
||||
.withRequestId(UUID.randomUUID().toString())
|
||||
.withSourceMediaType(MIMETYPE_OPENXML_WORDPROCESSING)
|
||||
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
|
||||
|
Reference in New Issue
Block a user