ACS-3476 Checking the size of intermediate results in a pipeline IS NOT the root cause of the ai-rendition test failures.

Adding extra error messages to the t-config checking, for the case where a pipeline
specifies source and target mimetypes that cannot be provided by the step transformers,
so that it will be clearer that the pipeline t-config is wrong.

In the case of the AI rendition tests the AI-transform t-config has a pipeline that
uses libreoffice as a step transformer, to transform some source to text/plain before
asking AWS_AI to process it into the final mimetype. However libreoffice does not convert to text/plain. What is happening is that the request was still being sent to the
all-in-one t-engine that contains libreoffice, and it workout that it should be using
the tika transformer. As a result the pipeline works by accident. The size check that
was commented out (uncommented now) was just finding out that libreoffice was unable to
do the transform and was reporting it.

officeToComprehendPiiEntityTypesViaText is the pipeline with the error.
This commit is contained in:
alandavis
2022-09-11 19:55:45 +01:00
parent 31d04467ea
commit 7ca8a483ad
4 changed files with 271 additions and 33 deletions

View File

@@ -360,19 +360,15 @@ public class TransformRegistry extends AbstractTransformRegistry
public boolean checkSourceSize(String transformerName, String sourceMediaType, Long sourceSize, String targetMediaType)
{
//TODO issue mentioned in ACS-3476,
//commenting out changes to code due to issues with libreoffice blocking a release
return true;
// return Optional.ofNullable(getTransformer(transformerName)).
// map(transformer -> transformer.getSupportedSourceAndTargetList().stream().
// filter(supported -> supported.getSourceMediaType().equals(sourceMediaType) &&
// supported.getTargetMediaType().equals(targetMediaType)).
// findFirst().
// map(supported -> supported.getMaxSourceSizeBytes() == -1 ||
// supported.getMaxSourceSizeBytes() >= sourceSize).
// orElse(false)).
// orElse(false);
return Optional.ofNullable(getTransformer(transformerName)).
map(transformer -> transformer.getSupportedSourceAndTargetList().stream().
filter(supported -> supported.getSourceMediaType().equals(sourceMediaType) &&
supported.getTargetMediaType().equals(targetMediaType)).
findFirst().
map(supported -> supported.getMaxSourceSizeBytes() == -1 ||
supported.getMaxSourceSizeBytes() >= sourceSize).
orElse(false)).
orElse(false);
}
public String getEngineName(String transformerName)

View File

@@ -266,10 +266,8 @@ public class TransformRegistryTest
assertTrue( transformRegistry.checkSourceSize("transformerName", MIMETYPE_WORD, Long.MAX_VALUE, MIMETYPE_PDF));
assertTrue( transformRegistry.checkSourceSize("transformerName", MIMETYPE_EXCEL, 12345L, MIMETYPE_PDF));
//TODO issue mentioned in ACS-3476,
//commenting out changes to code due to issues with libreoffice blocking a release
// assertFalse(transformRegistry.checkSourceSize("transformerName", MIMETYPE_EXCEL, 12346L, MIMETYPE_PDF));
// assertFalse(transformRegistry.checkSourceSize("transformerName", "doesNotExist", 12345L, MIMETYPE_PDF));
// assertFalse(transformRegistry.checkSourceSize("doesNotExist", MIMETYPE_WORD, 12345L, MIMETYPE_PDF));
assertFalse(transformRegistry.checkSourceSize("transformerName", MIMETYPE_EXCEL, 12346L, MIMETYPE_PDF));
assertFalse(transformRegistry.checkSourceSize("transformerName", "doesNotExist", 12345L, MIMETYPE_PDF));
assertFalse(transformRegistry.checkSourceSize("doesNotExist", MIMETYPE_WORD, 12345L, MIMETYPE_PDF));
}
}