From b538b809d715b4d64b01f3de2540a0785fea718d Mon Sep 17 00:00:00 2001 From: Alan Davis Date: Mon, 1 Jun 2020 14:30:10 +0100 Subject: [PATCH] =?UTF-8?q?REPO-5188=20SourceEncoding=20should=20not=20be?= =?UTF-8?q?=20used=20to=20select=20transforms=20as=20i=E2=80=A6=20(#249)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * REPO-5188 SourceEncoding should not be used to select transforms as it is always provided to T-Engines * Fix test. Now that SourceEncoding is in the options but is not used to select the transformer, we need to add another option to force the use of the textToPdf transform rather than libreoffice. --- .../transformer/AIOTransformRegistryTest.java | 2 +- .../test/resources/misc_engine_config.json | 8 +---- .../transformer/MiscControllerTest.java | 25 ++++++++++++--- .../test/resources/misc_engine_config.json | 8 +---- .../main/resources/misc_engine_config.json | 12 ++----- .../AbstractTransformerController.java | 31 ++++++++++++++----- 6 files changed, 50 insertions(+), 36 deletions(-) diff --git a/alfresco-transform-core-aio/alfresco-transform-core-aio/src/test/java/org/alfresco/transformer/AIOTransformRegistryTest.java b/alfresco-transform-core-aio/alfresco-transform-core-aio/src/test/java/org/alfresco/transformer/AIOTransformRegistryTest.java index 2579e9e6..0524c99f 100644 --- a/alfresco-transform-core-aio/alfresco-transform-core-aio/src/test/java/org/alfresco/transformer/AIOTransformRegistryTest.java +++ b/alfresco-transform-core-aio/alfresco-transform-core-aio/src/test/java/org/alfresco/transformer/AIOTransformRegistryTest.java @@ -98,7 +98,7 @@ public class AIOTransformRegistryTest "Archive", "OutlookMsg", "PdfBox", "Office", "Poi", "OOXML", "TikaAuto", "TextMining"); List expectedTransformOptionNames = Arrays.asList("tikaOptions", "archiveOptions", "pdfboxOptions", - "textToPdfOptions", "stringOptions", "htmlOptions"); + "textToPdfOptions", "stringOptions"); TransformConfig miscConfig = loadConfig("misc_engine_config.json"); TransformConfig tikaConfig = loadConfig("tika_engine_config.json"); diff --git a/alfresco-transform-core-aio/alfresco-transform-core-aio/src/test/resources/misc_engine_config.json b/alfresco-transform-core-aio/alfresco-transform-core-aio/src/test/resources/misc_engine_config.json index 5948332f..b7f0384f 100644 --- a/alfresco-transform-core-aio/alfresco-transform-core-aio/src/test/resources/misc_engine_config.json +++ b/alfresco-transform-core-aio/alfresco-transform-core-aio/src/test/resources/misc_engine_config.json @@ -1,15 +1,10 @@ { "transformOptions": { "textToPdfOptions": [ - {"value": {"name": "pageLimit"}}, - {"value": {"name": "sourceEncoding"}} + {"value": {"name": "pageLimit"}} ], "stringOptions": [ - {"value": {"name": "sourceEncoding"}}, {"value": {"name": "targetEncoding"}} - ], - "htmlOptions": [ - {"value": {"name": "sourceEncoding"}} ] }, "transformers": [ @@ -19,7 +14,6 @@ {"sourceMediaType": "text/html", "targetMediaType": "text/plain"} ], "transformOptions": [ - "htmlOptions" ] }, { diff --git a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscControllerTest.java b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscControllerTest.java index 08d7ad90..4a1f1313 100644 --- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscControllerTest.java +++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/java/org/alfresco/transformer/MiscControllerTest.java @@ -149,6 +149,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest "txt", MIMETYPE_TEXT_PLAIN, null, + null, readTestFile("eml")); assertTrue("Content from eml transform didn't contain expected value. ", result.getResponse().getContentAsString().contains(expected)); @@ -167,6 +168,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest "txt", MIMETYPE_TEXT_PLAIN, null, + null, readTestFile("spanish.eml")); String contentResult = new String(result.getResponse().getContentAsByteArray(), UTF_8); @@ -188,6 +190,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest "txt", MIMETYPE_TEXT_PLAIN, null, + null, readTestFile("attachment.eml")); assertTrue("Content from eml transform didn't contain expected value. ", result.getResponse().getContentAsString().contains(expected)); @@ -207,6 +210,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest "txt", MIMETYPE_TEXT_PLAIN, null, + null, readTestFile("alternative.eml")); assertTrue("Content from eml transform didn't contain expected value. ", result.getResponse().getContentAsString().contains(expected)); @@ -225,6 +229,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest "txt", MIMETYPE_TEXT_PLAIN, null, + null, readTestFile("nested.alternative.eml")); assertTrue("Content from eml transform didn't contain expected value. ", result.getResponse().getContentAsString().contains(expected)); @@ -243,6 +248,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest "txt", MIMETYPE_TEXT_PLAIN, null, + null, readTestFile("htmlChars.eml")); assertFalse(result.getResponse().getContentAsString().contains(expected)); } @@ -268,6 +274,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest "txt", MIMETYPE_TEXT_PLAIN, null, + null, expected.getBytes()); String contentResult = new String(result.getResponse().getContentAsByteArray(), @@ -296,6 +303,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest "txt", MIMETYPE_TEXT_PLAIN, "UTF-8", + null, content); String contentResult = new String(result.getResponse().getContentAsByteArray(), @@ -315,6 +323,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest "txt", MIMETYPE_TEXT_PLAIN, "UTF-8", + null, content); assertEquals("Returned content should be empty for an empty source file", 0, @@ -339,6 +348,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest "pdf", MIMETYPE_PDF, null, + "1", expected.getBytes()); // Read back in the PDF and check it @@ -358,7 +368,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest public void testAppleIWorksPages() throws Exception { MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS, - "jpeg", MIMETYPE_IMAGE_JPEG, null, readTestFile("pages")); + "jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("pages")); assertTrue("Expected image content but content is empty.", result.getResponse().getContentLengthLong() > 0L); } @@ -367,7 +377,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest public void testAppleIWorksNumbers() throws Exception { MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS, - "jpeg", MIMETYPE_IMAGE_JPEG, null, readTestFile("numbers")); + "jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("numbers")); assertTrue("Expected image content but content is empty.", result.getResponse().getContentLengthLong() > 0L); } @@ -376,7 +386,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest public void testAppleIWorksKey() throws Exception { MvcResult result = sendRequest("key", null, MIMETYPE_IWORK_KEYNOTE, - "jpeg", MIMETYPE_IMAGE_JPEG, null, readTestFile("key")); + "jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("key")); assertTrue("Expected image content but content is empty.", result.getResponse().getContentLengthLong() > 0L); } @@ -386,7 +396,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest public void testOOXML() throws Exception { MvcResult result = sendRequest("docx", null, MIMETYPE_OPENXML_WORDPROCESSING, - "jpeg", MIMETYPE_IMAGE_JPEG, null, readTestFile("docx")); + "jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("docx")); assertTrue("Expected image content but content is empty.", result.getResponse().getContentLengthLong() > 0L); } @@ -397,6 +407,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest String targetExtension, String targetMimetype, String targetEncoding, + String pageLimit, byte[] content) throws Exception { final MockMultipartFile sourceFile = new MockMultipartFile("file", @@ -408,6 +419,8 @@ public class MiscControllerTest extends AbstractTransformerControllerTest .param("targetMimetype", targetMimetype) .param("sourceMimetype", sourceMimetype); + // SourceEncoding is available in the options but is not used to select the transformer as it is a known + // like the source mimetype. if (sourceEncoding != null) { requestBuilder.param("sourceEncoding", sourceEncoding); @@ -416,6 +429,10 @@ public class MiscControllerTest extends AbstractTransformerControllerTest { requestBuilder.param("targetEncoding", targetEncoding); } + if (pageLimit != null) + { + requestBuilder.param("pageLimit", pageLimit); + } return mockMvc.perform(requestBuilder) .andExpect(status().is(OK.value())) diff --git a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/misc_engine_config.json b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/misc_engine_config.json index 5948332f..b7f0384f 100644 --- a/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/misc_engine_config.json +++ b/alfresco-transform-misc/alfresco-transform-misc-boot/src/test/resources/misc_engine_config.json @@ -1,15 +1,10 @@ { "transformOptions": { "textToPdfOptions": [ - {"value": {"name": "pageLimit"}}, - {"value": {"name": "sourceEncoding"}} + {"value": {"name": "pageLimit"}} ], "stringOptions": [ - {"value": {"name": "sourceEncoding"}}, {"value": {"name": "targetEncoding"}} - ], - "htmlOptions": [ - {"value": {"name": "sourceEncoding"}} ] }, "transformers": [ @@ -19,7 +14,6 @@ {"sourceMediaType": "text/html", "targetMediaType": "text/plain"} ], "transformOptions": [ - "htmlOptions" ] }, { diff --git a/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/misc_engine_config.json b/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/misc_engine_config.json index 5948332f..7edd1275 100644 --- a/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/misc_engine_config.json +++ b/alfresco-transform-misc/alfresco-transform-misc/src/main/resources/misc_engine_config.json @@ -1,15 +1,10 @@ { "transformOptions": { "textToPdfOptions": [ - {"value": {"name": "pageLimit"}}, - {"value": {"name": "sourceEncoding"}} + {"value": {"name": "pageLimit"}} ], "stringOptions": [ - {"value": {"name": "sourceEncoding"}}, - {"value": {"name": "targetEncoding"}} - ], - "htmlOptions": [ - {"value": {"name": "sourceEncoding"}} + {"value": {"name": "targetEncoding"}} ] }, "transformers": [ @@ -19,8 +14,7 @@ {"sourceMediaType": "text/html", "targetMediaType": "text/plain"} ], "transformOptions": [ - "htmlOptions" - ] + ] }, { "transformerName": "string", diff --git a/alfresco-transformer-base/src/main/java/org/alfresco/transformer/AbstractTransformerController.java b/alfresco-transformer-base/src/main/java/org/alfresco/transformer/AbstractTransformerController.java index 862d50ae..a25f50e1 100644 --- a/alfresco-transformer-base/src/main/java/org/alfresco/transformer/AbstractTransformerController.java +++ b/alfresco-transformer-base/src/main/java/org/alfresco/transformer/AbstractTransformerController.java @@ -33,6 +33,7 @@ import static org.alfresco.transformer.fs.FileManager.createTargetFileName; import static org.alfresco.transformer.fs.FileManager.deleteFile; import static org.alfresco.transformer.fs.FileManager.getFilenameFromContentDisposition; import static org.alfresco.transformer.fs.FileManager.save; +import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING; import static org.springframework.http.HttpStatus.BAD_REQUEST; import static org.springframework.http.HttpStatus.CREATED; import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR; @@ -341,15 +342,29 @@ public abstract class AbstractTransformerController implements TransformControll protected String getTransformerName(final File sourceFile, final String sourceMimetype, final String targetMimetype, final Map transformOptions) { - final long sourceSizeInBytes = sourceFile.length(); - final String transformerName = transformRegistry.findTransformerName(sourceMimetype, - sourceSizeInBytes, targetMimetype, transformOptions, null); - if (transformerName == null) + // The transformOptions always contains sourceEncoding when sent to a T-Engine, even though it should not be + // used to select a transformer. Similar to source and target mimetypes and extensions, but these are not + // passed in transformOptions. + String sourceEncoding = transformOptions.remove(SOURCE_ENCODING); + try { - throw new TransformException(BAD_REQUEST.value(), - "No transforms were able to handle the request"); + final long sourceSizeInBytes = sourceFile.length(); + final String transformerName = transformRegistry.findTransformerName(sourceMimetype, + sourceSizeInBytes, targetMimetype, transformOptions, null); + if (transformerName == null) + { + throw new TransformException(BAD_REQUEST.value(), + "No transforms were able to handle the request"); + } + return transformerName; + } + finally + { + if (sourceEncoding != null) + { + transformOptions.put(SOURCE_ENCODING, sourceEncoding); + } } - return transformerName; } protected Map createTransformOptions(Object... namesAndValues) @@ -365,7 +380,7 @@ public abstract class AbstractTransformerController implements TransformControll { String name = namesAndValues[i].toString(); Object value = namesAndValues[i + 1]; - if (value != null) + if (value != null && (!(value instanceof String) || !((String)value).isBlank())) { transformOptions.put(name, value.toString()); }