REPO-5188 SourceEncoding should not be used to select transforms as i… (#249)

* REPO-5188 SourceEncoding should not be used to select transforms as it is always provided to T-Engines

* Fix test. Now that SourceEncoding is in the options but is not used to select the transformer, we need to add another
option to force the use of the textToPdf transform rather than libreoffice.
This commit is contained in:
Alan Davis 2020-06-01 14:30:10 +01:00 committed by GitHub
parent 163d17e9d3
commit b538b809d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 50 additions and 36 deletions

View File

@ -98,7 +98,7 @@ public class AIOTransformRegistryTest
"Archive", "OutlookMsg", "PdfBox", "Office", "Poi", "OOXML", "TikaAuto", "TextMining");
List<String> expectedTransformOptionNames = Arrays.asList("tikaOptions", "archiveOptions", "pdfboxOptions",
"textToPdfOptions", "stringOptions", "htmlOptions");
"textToPdfOptions", "stringOptions");
TransformConfig miscConfig = loadConfig("misc_engine_config.json");
TransformConfig tikaConfig = loadConfig("tika_engine_config.json");

View File

@ -1,15 +1,10 @@
{
"transformOptions": {
"textToPdfOptions": [
{"value": {"name": "pageLimit"}},
{"value": {"name": "sourceEncoding"}}
{"value": {"name": "pageLimit"}}
],
"stringOptions": [
{"value": {"name": "sourceEncoding"}},
{"value": {"name": "targetEncoding"}}
],
"htmlOptions": [
{"value": {"name": "sourceEncoding"}}
]
},
"transformers": [
@ -19,7 +14,6 @@
{"sourceMediaType": "text/html", "targetMediaType": "text/plain"}
],
"transformOptions": [
"htmlOptions"
]
},
{

View File

@ -149,6 +149,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
"txt",
MIMETYPE_TEXT_PLAIN,
null,
null,
readTestFile("eml"));
assertTrue("Content from eml transform didn't contain expected value. ",
result.getResponse().getContentAsString().contains(expected));
@ -167,6 +168,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
"txt",
MIMETYPE_TEXT_PLAIN,
null,
null,
readTestFile("spanish.eml"));
String contentResult = new String(result.getResponse().getContentAsByteArray(), UTF_8);
@ -188,6 +190,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
"txt",
MIMETYPE_TEXT_PLAIN,
null,
null,
readTestFile("attachment.eml"));
assertTrue("Content from eml transform didn't contain expected value. ",
result.getResponse().getContentAsString().contains(expected));
@ -207,6 +210,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
"txt",
MIMETYPE_TEXT_PLAIN,
null,
null,
readTestFile("alternative.eml"));
assertTrue("Content from eml transform didn't contain expected value. ",
result.getResponse().getContentAsString().contains(expected));
@ -225,6 +229,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
"txt",
MIMETYPE_TEXT_PLAIN,
null,
null,
readTestFile("nested.alternative.eml"));
assertTrue("Content from eml transform didn't contain expected value. ",
result.getResponse().getContentAsString().contains(expected));
@ -243,6 +248,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
"txt",
MIMETYPE_TEXT_PLAIN,
null,
null,
readTestFile("htmlChars.eml"));
assertFalse(result.getResponse().getContentAsString().contains(expected));
}
@ -268,6 +274,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
"txt",
MIMETYPE_TEXT_PLAIN,
null,
null,
expected.getBytes());
String contentResult = new String(result.getResponse().getContentAsByteArray(),
@ -296,6 +303,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
"txt",
MIMETYPE_TEXT_PLAIN,
"UTF-8",
null,
content);
String contentResult = new String(result.getResponse().getContentAsByteArray(),
@ -315,6 +323,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
"txt",
MIMETYPE_TEXT_PLAIN,
"UTF-8",
null,
content);
assertEquals("Returned content should be empty for an empty source file", 0,
@ -339,6 +348,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
"pdf",
MIMETYPE_PDF,
null,
"1",
expected.getBytes());
// Read back in the PDF and check it
@ -358,7 +368,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
public void testAppleIWorksPages() throws Exception
{
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
"jpeg", MIMETYPE_IMAGE_JPEG, null, readTestFile("pages"));
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("pages"));
assertTrue("Expected image content but content is empty.",
result.getResponse().getContentLengthLong() > 0L);
}
@ -367,7 +377,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
public void testAppleIWorksNumbers() throws Exception
{
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
"jpeg", MIMETYPE_IMAGE_JPEG, null, readTestFile("numbers"));
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("numbers"));
assertTrue("Expected image content but content is empty.",
result.getResponse().getContentLengthLong() > 0L);
}
@ -376,7 +386,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
public void testAppleIWorksKey() throws Exception
{
MvcResult result = sendRequest("key", null, MIMETYPE_IWORK_KEYNOTE,
"jpeg", MIMETYPE_IMAGE_JPEG, null, readTestFile("key"));
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("key"));
assertTrue("Expected image content but content is empty.",
result.getResponse().getContentLengthLong() > 0L);
}
@ -386,7 +396,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
public void testOOXML() throws Exception
{
MvcResult result = sendRequest("docx", null, MIMETYPE_OPENXML_WORDPROCESSING,
"jpeg", MIMETYPE_IMAGE_JPEG, null, readTestFile("docx"));
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, readTestFile("docx"));
assertTrue("Expected image content but content is empty.",
result.getResponse().getContentLengthLong() > 0L);
}
@ -397,6 +407,7 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
String targetExtension,
String targetMimetype,
String targetEncoding,
String pageLimit,
byte[] content) throws Exception
{
final MockMultipartFile sourceFile = new MockMultipartFile("file",
@ -408,6 +419,8 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
.param("targetMimetype", targetMimetype)
.param("sourceMimetype", sourceMimetype);
// SourceEncoding is available in the options but is not used to select the transformer as it is a known
// like the source mimetype.
if (sourceEncoding != null)
{
requestBuilder.param("sourceEncoding", sourceEncoding);
@ -416,6 +429,10 @@ public class MiscControllerTest extends AbstractTransformerControllerTest
{
requestBuilder.param("targetEncoding", targetEncoding);
}
if (pageLimit != null)
{
requestBuilder.param("pageLimit", pageLimit);
}
return mockMvc.perform(requestBuilder)
.andExpect(status().is(OK.value()))

View File

@ -1,15 +1,10 @@
{
"transformOptions": {
"textToPdfOptions": [
{"value": {"name": "pageLimit"}},
{"value": {"name": "sourceEncoding"}}
{"value": {"name": "pageLimit"}}
],
"stringOptions": [
{"value": {"name": "sourceEncoding"}},
{"value": {"name": "targetEncoding"}}
],
"htmlOptions": [
{"value": {"name": "sourceEncoding"}}
]
},
"transformers": [
@ -19,7 +14,6 @@
{"sourceMediaType": "text/html", "targetMediaType": "text/plain"}
],
"transformOptions": [
"htmlOptions"
]
},
{

View File

@ -1,15 +1,10 @@
{
"transformOptions": {
"textToPdfOptions": [
{"value": {"name": "pageLimit"}},
{"value": {"name": "sourceEncoding"}}
{"value": {"name": "pageLimit"}}
],
"stringOptions": [
{"value": {"name": "sourceEncoding"}},
{"value": {"name": "targetEncoding"}}
],
"htmlOptions": [
{"value": {"name": "sourceEncoding"}}
{"value": {"name": "targetEncoding"}}
]
},
"transformers": [
@ -19,8 +14,7 @@
{"sourceMediaType": "text/html", "targetMediaType": "text/plain"}
],
"transformOptions": [
"htmlOptions"
]
]
},
{
"transformerName": "string",

View File

@ -33,6 +33,7 @@ import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
import static org.alfresco.transformer.fs.FileManager.deleteFile;
import static org.alfresco.transformer.fs.FileManager.getFilenameFromContentDisposition;
import static org.alfresco.transformer.fs.FileManager.save;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
import static org.springframework.http.HttpStatus.BAD_REQUEST;
import static org.springframework.http.HttpStatus.CREATED;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
@ -341,15 +342,29 @@ public abstract class AbstractTransformerController implements TransformControll
protected String getTransformerName(final File sourceFile, final String sourceMimetype,
final String targetMimetype, final Map<String, String> transformOptions)
{
final long sourceSizeInBytes = sourceFile.length();
final String transformerName = transformRegistry.findTransformerName(sourceMimetype,
sourceSizeInBytes, targetMimetype, transformOptions, null);
if (transformerName == null)
// The transformOptions always contains sourceEncoding when sent to a T-Engine, even though it should not be
// used to select a transformer. Similar to source and target mimetypes and extensions, but these are not
// passed in transformOptions.
String sourceEncoding = transformOptions.remove(SOURCE_ENCODING);
try
{
throw new TransformException(BAD_REQUEST.value(),
"No transforms were able to handle the request");
final long sourceSizeInBytes = sourceFile.length();
final String transformerName = transformRegistry.findTransformerName(sourceMimetype,
sourceSizeInBytes, targetMimetype, transformOptions, null);
if (transformerName == null)
{
throw new TransformException(BAD_REQUEST.value(),
"No transforms were able to handle the request");
}
return transformerName;
}
finally
{
if (sourceEncoding != null)
{
transformOptions.put(SOURCE_ENCODING, sourceEncoding);
}
}
return transformerName;
}
protected Map<String, String> createTransformOptions(Object... namesAndValues)
@ -365,7 +380,7 @@ public abstract class AbstractTransformerController implements TransformControll
{
String name = namesAndValues[i].toString();
Object value = namesAndValues[i + 1];
if (value != null)
if (value != null && (!(value instanceof String) || !((String)value).isBlank()))
{
transformOptions.put(name, value.toString());
}