ATS-762: T-Core Legacy Part 2 - Legacy Pipeline additions (was: review failing legacy transforms) (#262)

* ATS-762: Add Tika unit test for pdf to csv

* ATS-762: Fix indentation

* ATS-762: Added 3 tests for simple pipepline. msg > txt, txt > doc, txt > odt, txt > rtf

* ATS-762: Added tests for libreofficeToPdf pipeline

* ATS-762: Addressed Jan's comment about not using asterisk when importing modules

* ATS-762: Changed comment to pdf-->csv to address Jan's comment on the PR

* task/ATS-762_T: noticed the txt mime type was wrong so fixed it

Co-authored-by: kristian <kristian.dimitrov@alfresco.com>
This commit is contained in:
Ayman Harake 2020-06-19 18:03:56 +01:00 committed by GitHub
parent 38f7a8cc9c
commit 14e70b9785
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 51 additions and 3 deletions

View File

@ -53,11 +53,16 @@ import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORD;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_WORDPERFECT;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_XML;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET_TEMPLATE_MACRO;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET_ADDIN_MACRO;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW_MACRO;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_OUTLOOK_MSG;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_DITA;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_SXI;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_SXC;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_STW;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_STI;
import static org.alfresco.transformer.util.MimetypeMap.MIMETYPE_STC;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import static org.springframework.http.HttpStatus.OK;
@ -118,6 +123,10 @@ public class LibreOfficeTransformationIT
testFile(MIMETYPE_PDF,"pdf",null)
);
private static final Set<TestFileInfo> txtTarget = ImmutableSet.of(
testFile(MIMETYPE_TEXT_PLAIN,"txt",null)
);
private final String sourceFile;
private final String targetExtension;
private final String sourceMimetype;
@ -146,6 +155,12 @@ public class LibreOfficeTransformationIT
testFile(MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW_MACRO ,"ppsm" ,"quick.ppsm"),
testFile(MIMETYPE_OUTLOOK_MSG ,"msg" ,"quick.msg"),
testFile(MIMETYPE_DITA ,"dita" ,"quick.dita"),
testFile(MIMETYPE_TEXT_PLAIN ,"txt" ,"quick.txt"),
testFile(MIMETYPE_STC ,"stc" ,"quick.stc"),
testFile(MIMETYPE_STI ,"sti" ,"quick.sti"),
testFile(MIMETYPE_STW ,"stw" ,"quick.stw"),
testFile(MIMETYPE_SXC ,"sxc" ,"quick.sxc"),
testFile(MIMETYPE_SXI ,"sxi" ,"quick.sxi"),
testFile(MIMETYPE_TSV ,"tsv" ,"sample.tsv")
).collect(toMap(TestFileInfo::getPath, identity()));
@ -167,6 +182,7 @@ public class LibreOfficeTransformationIT
allTargets("quick.html", documentsTargets),
allTargets("quick.odt", documentsTargets),
allTargets("quick.wpd", documentsTargets),
allTargets("quick.txt", documentsTargets),
allTargets("sample.rtf", documentsTargets),
allTargets("quick.odp", presentationTargets),
@ -188,7 +204,14 @@ public class LibreOfficeTransformationIT
allTargets("quick.dita", pdfTarget),
allTargets("quick.msg", pdfTarget),
allTargets("quick.ppsm", pdfTarget),
allTargets("quick.ppsx", pdfTarget)
allTargets("quick.ppsx", pdfTarget),
allTargets("quick.stc", pdfTarget),
allTargets("quick.sti", pdfTarget),
allTargets("quick.stw", pdfTarget),
allTargets("quick.sxc", pdfTarget),
allTargets("quick.sxi", pdfTarget),
allTargets("quick.msg", txtTarget)
)
.flatMap(identity())
.collect(toSet());

View File

@ -0,0 +1,8 @@
The quick brown fox jumps over the lazy dog
Blank Page

View File

@ -72,7 +72,15 @@ public class TikaTransformationIT
{
sourceFile = entry.getLeft();
targetExtension = entry.getMiddle();
//Single test to cover pdf-->csv
if (sourceFile.contains("pdf") && targetExtension.contains("csv"))
{
targetMimetype = "text/csv";
}
else
{
targetMimetype = extensionMimetype.get(entry.getMiddle());
}
sourceMimetype = entry.getRight();
}
@ -108,6 +116,9 @@ public class TikaTransformationIT
Triple.of("quick.numbers", "xhtml", "application/vnd.apple.numbers"),
Triple.of("quick.numbers", "xml", "application/vnd.apple.numbers")
),
Stream.of(
Triple.of("quick.pdf", "csv", "application/pdf")
),
allTargets("quick.odp", "application/vnd.oasis.opendocument.presentation"),
allTargets("quick.ods", "application/vnd.oasis.opendocument.spreadsheet"),
allTargets("quick.odt", "application/vnd.oasis.opendocument.text"),

View File

@ -181,4 +181,10 @@ public interface MimetypeMap
String MIMETYPE_VORBIS = "audio/vorbis";
String MIMETYPE_FLAC = "audio/x-flac";
String MIMETYPE_ACP = "application/acp";
String MIMETYPE_STC = "application/vnd.sun.xml.calc.template";
String MIMETYPE_STI = "application/vnd.sun.xml.impress.template";
String MIMETYPE_STW = "application/vnd.sun.xml.writer.template";
String MIMETYPE_SXC = "application/vnd.sun.xml.calc";
String MIMETYPE_SXI = "application/vnd.sun.xml.impress";
}