ATS-531 : Reformat code

This commit is contained in:
Cezar.Leahu
2019-08-14 22:21:06 +03:00
committed by CezarLeahu
parent 70ab0241dd
commit 485347729b
58 changed files with 1310 additions and 1074 deletions

View File

@@ -37,7 +37,7 @@ import org.springframework.context.annotation.Bean;
import io.micrometer.core.instrument.MeterRegistry;
@SpringBootApplication
@EnableAutoConfiguration(exclude={DataSourceAutoConfiguration.class})
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
public class Application
{
@Value("${container.name}")

View File

@@ -95,10 +95,13 @@ public class TikaController extends AbstractTransformerController
@Autowired
public TikaController()
{
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
logger.info(
"--------------------------------------------------------------------------------------------------------------------------------------------------------------");
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
logger.info("Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt");
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
logger.info(
"Tika is from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt");
logger.info(
"--------------------------------------------------------------------------------------------------------------------------------------------------------------");
}
@Override
@@ -149,7 +152,8 @@ public class TikaController extends AbstractTransformerController
throw new TransformException(BAD_REQUEST.value(), "Invalid transform value");
}
String targetFilename = createTargetFileName(sourceMultipartFile.getOriginalFilename(), targetExtension);
String targetFilename = createTargetFileName(sourceMultipartFile.getOriginalFilename(),
targetExtension);
getProbeTestTransform().incrementTransformerCount();
File sourceFile = createSourceFile(request, sourceMultipartFile);
File targetFile = createTargetFile(request, targetFilename);

View File

@@ -456,25 +456,25 @@ public class Tika
public static final String TEXT_MINING = "TextMining";
public static final List<String> TRANSFORM_NAMES = asList(
ARCHIVE, OUTLOOK_MSG, PDF_BOX, POI_OFFICE, POI, POI_OO_XML, TIKA_AUTO, TEXT_MINING);
ARCHIVE, OUTLOOK_MSG, PDF_BOX, POI_OFFICE, POI, POI_OO_XML, TIKA_AUTO, TEXT_MINING);
public static final String TARGET_MIMETYPE = "--targetMimetype=";
public static final String TARGET_ENCODING = "--targetEncoding=";
public static final String INCLUDE_CONTENTS = "--includeContents";
public static final String NOT_EXTRACT_BOOKMARKS_TEXT = "--notExtractBookmarksText";
public static final String CSV = "csv";
public static final String DOC = "doc";
public static final String DOCX = "docx";
public static final String HTML = "html";
public static final String MSG = "msg";
public static final String PDF = "pdf";
public static final String PPTX = "pptx";
public static final String TXT = "txt";
public static final String XHTML = "xhtml";
public static final String XSLX = "xslx";
public static final String XML = "xml";
public static final String ZIP = "zip";
public static final String CSV = "csv";
public static final String DOC = "doc";
public static final String DOCX = "docx";
public static final String HTML = "html";
public static final String MSG = "msg";
public static final String PDF = "pdf";
public static final String PPTX = "pptx";
public static final String TXT = "txt";
public static final String XHTML = "xhtml";
public static final String XSLX = "xslx";
public static final String XML = "xml";
public static final String ZIP = "zip";
private final Parser packageParser = new PackageParser();
private final Parser pdfParser = new PDFParser();
@@ -486,7 +486,8 @@ public class Tika
private DocumentSelector pdfBoxEmbededDocumentSelector = new DocumentSelector()
{
private final List<String> disabledMediaTypes = asList(MIMETYPE_IMAGE_JPEG, MIMETYPE_IMAGE_TIFF, MIMETYPE_IMAGE_PNG);
private final List<String> disabledMediaTypes = asList(MIMETYPE_IMAGE_JPEG,
MIMETYPE_IMAGE_TIFF, MIMETYPE_IMAGE_PNG);
@Override
public boolean select(Metadata metadata)
@@ -518,16 +519,16 @@ public class Tika
}
catch (IllegalArgumentException e)
{
System.err.println("ERROR "+e.getMessage());
System.err.println("ERROR " + e.getMessage());
System.exit(-1);
}
catch (IllegalStateException | TikaException | IOException | SAXException e)
{
System.err.println("ERROR "+e.getMessage());
System.err.println("ERROR " + e.getMessage());
e.printStackTrace();
System.exit(-2);
}
System.out.println("Finished in "+(System.currentTimeMillis()-start)+"ms");
System.out.println("Finished in " + (System.currentTimeMillis() - start) + "ms");
}
// Extracts parameters form args
@@ -541,7 +542,7 @@ public class Tika
Boolean includeContents = null;
Boolean notExtractBookmarksText = null;
for (String arg: args)
for (String arg : args)
{
if (arg.startsWith("--"))
{
@@ -565,7 +566,7 @@ public class Tika
}
else
{
throw new IllegalArgumentException("Unexpected argument "+arg);
throw new IllegalArgumentException("Unexpected argument " + arg);
}
}
else
@@ -584,7 +585,7 @@ public class Tika
}
else
{
throw new IllegalArgumentException("Unexpected argument "+arg);
throw new IllegalArgumentException("Unexpected argument " + arg);
}
}
}
@@ -593,71 +594,73 @@ public class Tika
throw new IllegalArgumentException("Missing arguments");
}
includeContents = includeContents == null ? false : includeContents;
notExtractBookmarksText = notExtractBookmarksText == null ? false : notExtractBookmarksText;
notExtractBookmarksText = notExtractBookmarksText == null ? false : notExtractBookmarksText;
transform(transform, includeContents, notExtractBookmarksText, sourceFilename, targetFilename, targetMimetype, targetEncoding);
transform(transform, includeContents, notExtractBookmarksText, sourceFilename,
targetFilename, targetMimetype, targetEncoding);
}
private String getValue(String arg, boolean valueExpected, Object value, String optionName)
{
if (value != null)
{
throw new IllegalArgumentException("Duplicate "+optionName);
throw new IllegalArgumentException("Duplicate " + optionName);
}
String stringValue = arg.substring(optionName.length()).trim();
if (!valueExpected && stringValue.length() > 0)
{
throw new IllegalArgumentException("Unexpected value with "+optionName);
throw new IllegalArgumentException("Unexpected value with " + optionName);
}
if (valueExpected && stringValue.length() == 0)
{
throw new IllegalArgumentException("Expected value with "+optionName);
throw new IllegalArgumentException("Expected value with " + optionName);
}
return stringValue;
}
// Adds transform specific values such as parser and documentSelector.
private void transform(String transform, Boolean includeContents,
Boolean notExtractBookmarksText,
String sourceFilename,
String targetFilename, String targetMimetype, String targetEncoding)
Boolean notExtractBookmarksText,
String sourceFilename,
String targetFilename, String targetMimetype, String targetEncoding)
{
Parser parser = null;
DocumentSelector documentSelector = null;
switch(transform)
switch (transform)
{
case ARCHIVE:
parser = packageParser;
break;
case OUTLOOK_MSG:
case POI_OFFICE:
case TEXT_MINING:
parser = officeParser;
break;
case PDF_BOX:
parser = pdfParser;
documentSelector = pdfBoxEmbededDocumentSelector;
break;
case POI:
parser = tikaOfficeDetectParser;
break;
case POI_OO_XML:
parser = ooXmlParser;
break;
case TIKA_AUTO:
parser = autoDetectParser;
break;
case ARCHIVE:
parser = packageParser;
break;
case OUTLOOK_MSG:
case POI_OFFICE:
case TEXT_MINING:
parser = officeParser;
break;
case PDF_BOX:
parser = pdfParser;
documentSelector = pdfBoxEmbededDocumentSelector;
break;
case POI:
parser = tikaOfficeDetectParser;
break;
case POI_OO_XML:
parser = ooXmlParser;
break;
case TIKA_AUTO:
parser = autoDetectParser;
break;
}
transform(parser, documentSelector, includeContents, notExtractBookmarksText, sourceFilename, targetFilename, targetMimetype, targetEncoding);
transform(parser, documentSelector, includeContents, notExtractBookmarksText,
sourceFilename, targetFilename, targetMimetype, targetEncoding);
}
private void transform(Parser parser, DocumentSelector documentSelector, Boolean includeContents,
Boolean notExtractBookmarksText,
String sourceFilename,
String targetFilename, String targetMimetype, String targetEncoding)
private void transform(Parser parser, DocumentSelector documentSelector,
Boolean includeContents,
Boolean notExtractBookmarksText,
String sourceFilename,
String targetFilename, String targetMimetype, String targetEncoding)
{
try (InputStream is = new BufferedInputStream(new FileInputStream(sourceFilename));
@@ -688,7 +691,7 @@ public class Tika
}
else
{
SAXTransformerFactory factory = (SAXTransformerFactory)SAXTransformerFactory.newInstance();
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler transformerHandler;
transformerHandler = factory.newTransformerHandler();
transformerHandler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
@@ -725,42 +728,52 @@ public class Tika
/**
* A wrapper around the normal Tika BodyContentHandler for CSV rather encoding than tab separated.
*/
protected static class CsvContentHandler extends BodyContentHandler {
private static final char[] comma = new char[]{ ',' };
protected static class CsvContentHandler extends BodyContentHandler
{
private static final char[] comma = new char[]{','};
private static final Pattern all_nums = Pattern.compile("[\\d\\.\\-\\+]+");
private boolean inCell = false;
private boolean needsComma = false;
protected CsvContentHandler(Writer output) {
protected CsvContentHandler(Writer output)
{
super(output);
}
@Override
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
if(length == 1 && ch[0] == '\t') {
throws SAXException
{
if (length == 1 && ch[0] == '\t')
{
// Ignore tabs, as they mess up the CSV output
} else {
}
else
{
super.ignorableWhitespace(ch, start, length);
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
if(inCell) {
StringBuffer t = new StringBuffer(new String(ch,start,length));
throws SAXException
{
if (inCell)
{
StringBuffer t = new StringBuffer(new String(ch, start, length));
// Quote if not all numbers
if(all_nums.matcher(t).matches())
if (all_nums.matcher(t).matches())
{
super.characters(ch, start, length);
}
else
{
for(int i=t.length()-1; i>=0; i--) {
if(t.charAt(i) == '\"') {
for (int i = t.length() - 1; i >= 0; i--)
{
if (t.charAt(i) == '\"')
{
// Double up double quotes
t.insert(i, '\"');
i--;
@@ -771,33 +784,45 @@ public class Tika
char[] c = t.toString().toCharArray();
super.characters(c, 0, c.length);
}
} else {
}
else
{
super.characters(ch, start, length);
}
}
@Override
public void startElement(String uri, String localName, String name,
Attributes atts) throws SAXException {
if(localName.equals("td")) {
Attributes atts) throws SAXException
{
if (localName.equals("td"))
{
inCell = true;
if(needsComma) {
if (needsComma)
{
super.characters(comma, 0, 1);
needsComma = true;
}
} else {
}
else
{
super.startElement(uri, localName, name, atts);
}
}
@Override
public void endElement(String uri, String localName, String name)
throws SAXException {
if(localName.equals("td")) {
throws SAXException
{
if (localName.equals("td"))
{
needsComma = true;
inCell = false;
} else {
if(localName.equals("tr")) {
}
else
{
if (localName.equals("tr"))
{
needsComma = false;
}
super.endElement(uri, localName, name);
@@ -830,5 +855,4 @@ public class Tika
return context;
}
}

View File

@@ -42,7 +42,7 @@ import org.springframework.stereotype.Component;
import org.xml.sax.SAXException;
/**
* JavaExecutor implementation for running TIKA transformations. It loads the
* JavaExecutor implementation for running TIKA transformations. It loads the
* transformation logic in the same JVM (check {@link Tika}).
*/
@Component

View File

@@ -48,21 +48,23 @@ import org.xml.sax.SAXException;
/**
* <a href="http://tika.apache.org/Apache Tika">Apache Tika</a> assumes that
* you either know exactly what your content is, or that
* you'll leave it to auto-detection.
* you either know exactly what your content is, or that
* you'll leave it to auto-detection.
* Within Alfresco, we usually do know. However, from time
* to time, we don't know if we have one of the old or one
* of the new office files (eg .xls and .xlsx).
* to time, we don't know if we have one of the old or one
* of the new office files (eg .xls and .xlsx).
* This class allows automatically selects the appropriate
* old (OLE2) or new (OOXML) Tika parser as required.
* old (OLE2) or new (OOXML) Tika parser as required.
*
* @author Nick Burch
*/
public class TikaOfficeDetectParser implements Parser {
public class TikaOfficeDetectParser implements Parser
{
private final Parser ole2Parser = new OfficeParser();
private final Parser ooxmlParser = new OOXMLParser();
public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
public Set<MediaType> getSupportedTypes(ParseContext parseContext)
{
Set<MediaType> types = new HashSet<>();
types.addAll(ole2Parser.getSupportedTypes(parseContext));
types.addAll(ooxmlParser.getSupportedTypes(parseContext));
@@ -70,9 +72,9 @@ public class TikaOfficeDetectParser implements Parser {
}
public void parse(InputStream stream,
ContentHandler handler, Metadata metadata,
ParseContext parseContext) throws IOException, SAXException,
TikaException
ContentHandler handler, Metadata metadata,
ParseContext parseContext) throws IOException, SAXException,
TikaException
{
byte[] initial4 = new byte[4];
InputStream wrapped;
@@ -93,10 +95,10 @@ public class TikaOfficeDetectParser implements Parser {
}
// Which is it?
if(initial4[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
initial4[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
initial4[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
initial4[3] == POIFSConstants.OOXML_FILE_HEADER[3])
if (initial4[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
initial4[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
initial4[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
initial4[3] == POIFSConstants.OOXML_FILE_HEADER[3])
{
ooxmlParser.parse(wrapped, handler, metadata, parseContext);
}
@@ -110,8 +112,8 @@ public class TikaOfficeDetectParser implements Parser {
* @deprecated This method will be removed in Apache Tika 1.0.
*/
public void parse(InputStream stream,
ContentHandler handler, Metadata metadata)
throws IOException, SAXException, TikaException
ContentHandler handler, Metadata metadata)
throws IOException, SAXException, TikaException
{
parse(stream, handler, metadata, new ParseContext());
}

View File

@@ -112,7 +112,7 @@ import org.springframework.util.StringUtils;
public class TikaControllerTest extends AbstractTransformerControllerTest
{
private static final String EXPECTED_XHTML_CONTENT_CONTAINS = "<p>The quick brown fox jumps over the lazy dog</p>";
private static final String EXPECTED_TEXT_CONTENT_CONTAINS = "The quick brown fox jumps over the lazy dog";
private static final String EXPECTED_TEXT_CONTENT_CONTAINS = "The quick brown fox jumps over the lazy dog";
private static final String EXPECTED_MSG_CONTENT_CONTAINS = "Recipients\n" +
"\tmark.rogers@alfresco.com; speedy@quick.com; mrquick@nowhere.com\n" +
"\n" +
@@ -130,7 +130,7 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@SpyBean
private TikaJavaExecutor javaExecutor;
@SpyBean
private TikaController controller;
@@ -226,34 +226,39 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
}
private void transform(String transform, String sourceExtension, String targetExtension,
String sourceMimetype, String targetMimetype,
Boolean includeContents, String expectedContentContains) throws Exception
String sourceMimetype, String targetMimetype,
Boolean includeContents, String expectedContentContains) throws Exception
{
// We don't use targetFileBytes as some of the transforms contain different date text based on the os being used.
mockTransformCommand(sourceExtension, targetExtension, sourceMimetype, false);
this.transform = transform;
this.targetMimetype = targetMimetype;
System.out.println("Test "+transform+" "+ sourceExtension +" to "+targetExtension);
System.out.println("Test " + transform + " " + sourceExtension + " to " + targetExtension);
MockHttpServletRequestBuilder requestBuilder = includeContents == null
? mockMvcRequest("/transform", sourceFile, "targetExtension", this.targetExtension)
: mockMvcRequest("/transform", sourceFile, "targetExtension", this.targetExtension, "includeContents", includeContents.toString());
? mockMvcRequest("/transform", sourceFile,
"targetExtension", this.targetExtension)
: mockMvcRequest("/transform", sourceFile,
"targetExtension", this.targetExtension, "includeContents", includeContents.toString());
MvcResult result = mockMvc.perform(requestBuilder)
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition", "attachment; filename*= UTF-8''quick." + this.targetExtension)).
andReturn();
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
"attachment; filename*= UTF-8''quick." + this.targetExtension)).
andReturn();
String content = result.getResponse().getContentAsString();
assertTrue("The content did not include \""+expectedContentContains, content.contains(expectedContentContains));
assertTrue("The content did not include \"" + expectedContentContains,
content.contains(expectedContentContains));
}
@Override
// Add extra required parameters to the request.
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile,
String... params)
{
return super.mockMvcRequest(url, sourceFile, params)
.param("transform", transform)
.param("targetEncoding", targetEncoding)
.param("targetMimetype", targetMimetype);
.param("transform", transform)
.param("targetEncoding", targetEncoding)
.param("targetMimetype", targetMimetype);
}
@Test
@@ -337,8 +342,9 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
targetEncoding = "rubbish";
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
.andExpect(status().is(INTERNAL_SERVER_ERROR.value()));
mockMvc.perform(
mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension))
.andExpect(status().is(INTERNAL_SERVER_ERROR.value()));
}
// --- Archive ---
@@ -346,56 +352,56 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Test
public void zipToTextArchiveTest() throws Exception
{
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,false,
"quick.html\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n");
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, false,
"quick.html\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n");
}
@Test
public void zipToTextIncludeArchiveTest() throws Exception
{
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,true,
"quick.html\n" +
"\n" +
"\n" +
"The quick brown fox jumps over the lazy dog\n" +
"\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n" +
"The quick brown fox jumps over the lazy dog" +
"\n" +
"\n");
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, true,
"quick.html\n" +
"\n" +
"\n" +
"The quick brown fox jumps over the lazy dog\n" +
"\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n" +
"The quick brown fox jumps over the lazy dog" +
"\n" +
"\n");
}
@Test
public void zipToTextExcludeArchiveTest() throws Exception
{
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,
false, "\n" +
"folder/subfolder/quick.jpg\n" +
"\n" +
"\n" +
"quick.doc\n" +
"\n" +
"\n" +
"quick.html\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n" +
"quick.txt\n" +
"\n" +
"\n" +
"quick.xml\n" +
"\n");
false, "\n" +
"folder/subfolder/quick.jpg\n" +
"\n" +
"\n" +
"quick.doc\n" +
"\n" +
"\n" +
"quick.html\n" +
"\n" +
"\n" +
"quick.pdf\n" +
"\n" +
"\n" +
"quick.txt\n" +
"\n" +
"\n" +
"quick.xml\n" +
"\n");
}
// --- OutlookMsg ---
@@ -403,7 +409,8 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Test
public void msgToTxtOutlookMsgTest() throws Exception
{
transform(OUTLOOK_MSG, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null, EXPECTED_MSG_CONTENT_CONTAINS);
transform(OUTLOOK_MSG, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_MSG_CONTENT_CONTAINS);
}
// --- PdfBox ---
@@ -411,31 +418,36 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Test
public void pdfToTxtPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, TXT, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
transform(PDF_BOX, PDF, TXT, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void pdfToCsvPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, CSV, MIMETYPE_PDF, MIMETYPE_TEXT_CSV, null, EXPECTED_TEXT_CONTENT_CONTAINS); // Yes it is just text
transform(PDF_BOX, PDF, CSV, MIMETYPE_PDF, MIMETYPE_TEXT_CSV, null,
EXPECTED_TEXT_CONTENT_CONTAINS); // Yes it is just text
}
@Test
public void pdfToXmlPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, XML, MIMETYPE_PDF, MIMETYPE_XML, null, EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
transform(PDF_BOX, PDF, XML, MIMETYPE_PDF, MIMETYPE_XML, null,
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
}
@Test
public void pdfToXhtmlPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, XHTML, MIMETYPE_PDF, MIMETYPE_XHTML, null, EXPECTED_XHTML_CONTENT_CONTAINS);
transform(PDF_BOX, PDF, XHTML, MIMETYPE_PDF, MIMETYPE_XHTML, null,
EXPECTED_XHTML_CONTENT_CONTAINS);
}
@Test
public void pdfToHtmlPdfBoxTest() throws Exception
{
transform(PDF_BOX, PDF, HTML, MIMETYPE_PDF, MIMETYPE_HTML, null, EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
transform(PDF_BOX, PDF, HTML, MIMETYPE_PDF, MIMETYPE_HTML, null,
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
}
// --- Office ---
@@ -443,13 +455,15 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Test
public void msgToTxtOfficeTest() throws Exception
{
transform(POI_OFFICE, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null, EXPECTED_MSG_CONTENT_CONTAINS);
transform(POI_OFFICE, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_MSG_CONTENT_CONTAINS);
}
@Test
public void docToTxtOfficeTest() throws Exception
{
transform(POI_OFFICE, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
transform(POI_OFFICE, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
// --- Poi ---
@@ -457,7 +471,8 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Test
public void xslxToCsvPoiTest() throws Exception
{
transform(POI, XSLX, CSV, MIMETYPE_OPENXML_SPREADSHEET, MIMETYPE_TEXT_CSV, null, EXPECTED_CSV_CONTENT_CONTAINS);
transform(POI, XSLX, CSV, MIMETYPE_OPENXML_SPREADSHEET, MIMETYPE_TEXT_CSV, null,
EXPECTED_CSV_CONTENT_CONTAINS);
}
// --- OOXML ---
@@ -465,13 +480,15 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Test
public void docxToTxtOoXmlTest() throws Exception
{
transform(POI_OO_XML, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
transform(POI_OO_XML, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void pptxToTxtOoXmlTest() throws Exception
{
transform(POI_OO_XML, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
transform(POI_OO_XML, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
// --- TikaAuto ---
@@ -479,13 +496,15 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Test
public void ppxtToTxtTikaAutoTest() throws Exception
{
transform(TIKA_AUTO, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
transform(TIKA_AUTO, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void doctToTxtTikaAutoTest() throws Exception
{
transform(TIKA_AUTO, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
transform(TIKA_AUTO, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
// --- TextMining ---
@@ -493,16 +512,20 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Test
public void docToTxtTextMiningTest() throws Exception
{
transform(TEXT_MINING, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null, EXPECTED_TEXT_CONTENT_CONTAINS);
transform(TEXT_MINING, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void pdfToTxtExtractBookmarksTest() throws Exception
{
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
mockMvc.perform(mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension).param("notExtractBookmarksText", "true"))
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition", "attachment; filename*= UTF-8''quick." + targetExtension));
mockMvc.perform(
mockMvcRequest("/transform", sourceFile, "targetExtension", targetExtension).param(
"notExtractBookmarksText", "true"))
.andExpect(status().is(OK.value()))
.andExpect(header().string("Content-Disposition",
"attachment; filename*= UTF-8''quick." + targetExtension));
}
@Override
@@ -513,10 +536,11 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
transformRequest.setSourceMediaType(MediaType.APPLICATION_PDF_VALUE);
transformRequest.setTargetMediaType(MediaType.TEXT_PLAIN_VALUE);
transformRequest.getTransformRequestOptions().put("transform", "PdfBox");
transformRequest.getTransformRequestOptions().put("targetMimetype", MediaType.TEXT_PLAIN_VALUE);
transformRequest.getTransformRequestOptions().put("targetMimetype",
MediaType.TEXT_PLAIN_VALUE);
transformRequest.getTransformRequestOptions().put("targetEncoding", "UTF-8");
}
@Test
public void testPojoTransform() throws Exception
{
@@ -525,7 +549,6 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
File sourceFile = getTestFile("quick." + sourceExtension, true);
String targetFileRef = UUID.randomUUID().toString();
// Transformation Request POJO
TransformRequest transformRequest = new TransformRequest();
transformRequest.setRequestId("1");
@@ -539,12 +562,14 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
// HTTP Request
HttpHeaders headers = new HttpHeaders();
headers.set(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=quick." + sourceExtension);
headers.set(HttpHeaders.CONTENT_DISPOSITION,
"attachment; filename=quick." + sourceExtension);
ResponseEntity<Resource> response = new ResponseEntity<>(new FileSystemResource(
sourceFile), headers, OK);
when(alfrescoSharedFileStoreClient.retrieveFile(sourceFileRef)).thenReturn(response);
when(alfrescoSharedFileStoreClient.saveFile(any())).thenReturn(new FileRefResponse(new FileRefEntity(targetFileRef)));
when(alfrescoSharedFileStoreClient.saveFile(any())).thenReturn(
new FileRefResponse(new FileRefEntity(targetFileRef)));
when(mockExecutionResult.getExitValue()).thenReturn(0);
// Update the Transformation Request with any specific params before sending it
@@ -552,18 +577,21 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
// Serialize and call the transformer
String tr = objectMapper.writeValueAsString(transformRequest);
String transformationReplyAsString = mockMvc.perform(MockMvcRequestBuilders.post("/transform")
.header(HttpHeaders.ACCEPT, MediaType.APPLICATION_JSON_VALUE)
.header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE).content(tr))
.andExpect(status().is(HttpStatus.CREATED.value()))
String transformationReplyAsString = mockMvc.perform(
MockMvcRequestBuilders.post("/transform")
.header(HttpHeaders.ACCEPT, MediaType.APPLICATION_JSON_VALUE)
.header(HttpHeaders.CONTENT_TYPE,
MediaType.APPLICATION_JSON_VALUE).content(tr))
.andExpect(
status().is(HttpStatus.CREATED.value()))
.andReturn().getResponse().getContentAsString();
TransformReply transformReply = objectMapper.readValue(transformationReplyAsString, TransformReply.class);
TransformReply transformReply = objectMapper.readValue(transformationReplyAsString,
TransformReply.class);
// Assert the reply
assertEquals(transformRequest.getRequestId(), transformReply.getRequestId());
assertEquals(transformRequest.getClientData(), transformReply.getClientData());
assertEquals(transformRequest.getSchema(), transformReply.getSchema());
}
}

View File

@@ -48,7 +48,8 @@ public class TikaQueueTransformServiceIT extends AbstractQueueTransformServiceIT
@Override
protected TransformRequest buildRequest()
{
return TransformRequest.builder()
return TransformRequest
.builder()
.withRequestId(UUID.randomUUID().toString())
.withSourceMediaType(MIMETYPE_OPENXML_WORDPROCESSING)
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)