ATS-762: T-Core Legacy Part 2 - Legacy Pipeline additions (was: review failing legacy transforms) (#262)

* ATS-762: Add Tika unit test for pdf to csv

* ATS-762: Fix indentation

* ATS-762: Added 3 tests for simple pipepline. msg > txt, txt > doc, txt > odt, txt > rtf

* ATS-762: Added tests for libreofficeToPdf pipeline

* ATS-762: Addressed Jan's comment about not using asterisk when importing modules

* ATS-762: Changed comment to pdf-->csv to address Jan's comment on the PR

* task/ATS-762_T: noticed the txt mime type was wrong so fixed it

Co-authored-by: kristian <kristian.dimitrov@alfresco.com>
This commit is contained in:
Ayman Harake 2020-06-19 18:03:56 +01:00 committed by GitHub
parent 38f7a8cc9c
commit 14e70b9785
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 51 additions and 3 deletions

View File

@ -53,11 +53,16 @@ import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORDPERFECT; import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORDPERFECT;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML; import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET_TEMPLATE_MACRO; import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET_TEMPLATE_MACRO;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET_ADDIN_MACRO;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW; import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW_MACRO; import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW_MACRO;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OUTLOOK_MSG; import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OUTLOOK_MSG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_DITA; import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_DITA;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_SXI;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_SXC;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_STW;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_STI;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_STC;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail; import static org.junit.Assert.fail;
import static org.springframework.http.HttpStatus.OK; import static org.springframework.http.HttpStatus.OK;
@ -118,6 +123,10 @@ public class LibreOfficeTransformationIT
testFile(MIMETYPE_PDF,"pdf",null) testFile(MIMETYPE_PDF,"pdf",null)
); );
private static final Set<TestFileInfo> txtTarget = ImmutableSet.of(
testFile(MIMETYPE_TEXT_PLAIN,"txt",null)
);
private final String sourceFile; private final String sourceFile;
private final String targetExtension; private final String targetExtension;
private final String sourceMimetype; private final String sourceMimetype;
@ -146,6 +155,12 @@ public class LibreOfficeTransformationIT
testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW_MACRO ,"ppsm" ,"quick.ppsm"), testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW_MACRO ,"ppsm" ,"quick.ppsm"),
testFile(MIMETYPE_OUTLOOK_MSG ,"msg" ,"quick.msg"), testFile(MIMETYPE_OUTLOOK_MSG ,"msg" ,"quick.msg"),
testFile(MIMETYPE_DITA ,"dita" ,"quick.dita"), testFile(MIMETYPE_DITA ,"dita" ,"quick.dita"),
testFile(MIMETYPE_TEXT_PLAIN ,"txt" ,"quick.txt"),
testFile(MIMETYPE_STC ,"stc" ,"quick.stc"),
testFile(MIMETYPE_STI ,"sti" ,"quick.sti"),
testFile(MIMETYPE_STW ,"stw" ,"quick.stw"),
testFile(MIMETYPE_SXC ,"sxc" ,"quick.sxc"),
testFile(MIMETYPE_SXI ,"sxi" ,"quick.sxi"),
testFile(MIMETYPE_TSV ,"tsv" ,"sample.tsv") testFile(MIMETYPE_TSV ,"tsv" ,"sample.tsv")
).collect(toMap(TestFileInfo::getPath, identity())); ).collect(toMap(TestFileInfo::getPath, identity()));
@ -167,6 +182,7 @@ public class LibreOfficeTransformationIT
allTargets("quick.html", documentsTargets), allTargets("quick.html", documentsTargets),
allTargets("quick.odt", documentsTargets), allTargets("quick.odt", documentsTargets),
allTargets("quick.wpd", documentsTargets), allTargets("quick.wpd", documentsTargets),
allTargets("quick.txt", documentsTargets),
allTargets("sample.rtf", documentsTargets), allTargets("sample.rtf", documentsTargets),
allTargets("quick.odp", presentationTargets), allTargets("quick.odp", presentationTargets),
@ -188,7 +204,14 @@ public class LibreOfficeTransformationIT
allTargets("quick.dita", pdfTarget), allTargets("quick.dita", pdfTarget),
allTargets("quick.msg", pdfTarget), allTargets("quick.msg", pdfTarget),
allTargets("quick.ppsm", pdfTarget), allTargets("quick.ppsm", pdfTarget),
allTargets("quick.ppsx", pdfTarget) allTargets("quick.ppsx", pdfTarget),
allTargets("quick.stc", pdfTarget),
allTargets("quick.sti", pdfTarget),
allTargets("quick.stw", pdfTarget),
allTargets("quick.sxc", pdfTarget),
allTargets("quick.sxi", pdfTarget),
allTargets("quick.msg", txtTarget)
) )
.flatMap(identity()) .flatMap(identity())
.collect(toSet()); .collect(toSet());

View File

@ -0,0 +1,8 @@
The quick brown fox jumps over the lazy dog
Blank Page

View File

@ -72,7 +72,15 @@ public class TikaTransformationIT
{ {
sourceFile = entry.getLeft(); sourceFile = entry.getLeft();
targetExtension = entry.getMiddle(); targetExtension = entry.getMiddle();
targetMimetype = extensionMimetype.get(entry.getMiddle()); //Single test to cover pdf-->csv
if (sourceFile.contains("pdf") && targetExtension.contains("csv"))
{
targetMimetype = "text/csv";
}
else
{
targetMimetype = extensionMimetype.get(entry.getMiddle());
}
sourceMimetype = entry.getRight(); sourceMimetype = entry.getRight();
} }
@ -108,6 +116,9 @@ public class TikaTransformationIT
Triple.of("quick.numbers", "xhtml", "application/vnd.apple.numbers"), Triple.of("quick.numbers", "xhtml", "application/vnd.apple.numbers"),
Triple.of("quick.numbers", "xml", "application/vnd.apple.numbers") Triple.of("quick.numbers", "xml", "application/vnd.apple.numbers")
), ),
Stream.of(
Triple.of("quick.pdf", "csv", "application/pdf")
),
allTargets("quick.odp", "application/vnd.oasis.opendocument.presentation"), allTargets("quick.odp", "application/vnd.oasis.opendocument.presentation"),
allTargets("quick.ods", "application/vnd.oasis.opendocument.spreadsheet"), allTargets("quick.ods", "application/vnd.oasis.opendocument.spreadsheet"),
allTargets("quick.odt", "application/vnd.oasis.opendocument.text"), allTargets("quick.odt", "application/vnd.oasis.opendocument.text"),

View File

@ -181,4 +181,10 @@ public interface MimetypeMap
String MIMETYPE_VORBIS = "audio/vorbis"; String MIMETYPE_VORBIS = "audio/vorbis";
String MIMETYPE_FLAC = "audio/x-flac"; String MIMETYPE_FLAC = "audio/x-flac";
String MIMETYPE_ACP = "application/acp"; String MIMETYPE_ACP = "application/acp";
String MIMETYPE_STC = "application/vnd.sun.xml.calc.template";
String MIMETYPE_STI = "application/vnd.sun.xml.impress.template";
String MIMETYPE_STW = "application/vnd.sun.xml.writer.template";
String MIMETYPE_SXC = "application/vnd.sun.xml.calc";
String MIMETYPE_SXI = "application/vnd.sun.xml.impress";
} }