ACS-3476 Checking the size of intermediate results in a pipeline IS NOT the root cause of the ai-rendition test failures.

Adding extra error messages to the t-config checking, for the case where a pipeline
specifies source and target mimetypes that cannot be provided by the step transformers,
so that it will be clearer that the pipeline t-config is wrong.

In the case of the AI rendition tests the AI-transform t-config has a pipeline that
uses libreoffice as a step transformer, to transform some source to text/plain before
asking AWS_AI to process it into the final mimetype. However libreoffice does not convert to text/plain. What is happening is that the request was still being sent to the
all-in-one t-engine that contains libreoffice, and it workout that it should be using
the tika transformer. As a result the pipeline works by accident. The size check that
was commented out (uncommented now) was just finding out that libreoffice was unable to
do the transform and was reporting it.

officeToComprehendPiiEntityTypesViaText is the pipeline with the error.
This commit is contained in:
alandavis
2022-09-11 19:55:45 +01:00
parent 31d04467ea
commit 7ca8a483ad
4 changed files with 271 additions and 33 deletions

View File

@@ -360,19 +360,15 @@ public class TransformRegistry extends AbstractTransformRegistry
public boolean checkSourceSize(String transformerName, String sourceMediaType, Long sourceSize, String targetMediaType)
{
//TODO issue mentioned in ACS-3476,
//commenting out changes to code due to issues with libreoffice blocking a release
return true;
// return Optional.ofNullable(getTransformer(transformerName)).
// map(transformer -> transformer.getSupportedSourceAndTargetList().stream().
// filter(supported -> supported.getSourceMediaType().equals(sourceMediaType) &&
// supported.getTargetMediaType().equals(targetMediaType)).
// findFirst().
// map(supported -> supported.getMaxSourceSizeBytes() == -1 ||
// supported.getMaxSourceSizeBytes() >= sourceSize).
// orElse(false)).
// orElse(false);
return Optional.ofNullable(getTransformer(transformerName)).
map(transformer -> transformer.getSupportedSourceAndTargetList().stream().
filter(supported -> supported.getSourceMediaType().equals(sourceMediaType) &&
supported.getTargetMediaType().equals(targetMediaType)).
findFirst().
map(supported -> supported.getMaxSourceSizeBytes() == -1 ||
supported.getMaxSourceSizeBytes() >= sourceSize).
orElse(false)).
orElse(false);
}
public String getEngineName(String transformerName)

View File

@@ -266,10 +266,8 @@ public class TransformRegistryTest
assertTrue( transformRegistry.checkSourceSize("transformerName", MIMETYPE_WORD, Long.MAX_VALUE, MIMETYPE_PDF));
assertTrue( transformRegistry.checkSourceSize("transformerName", MIMETYPE_EXCEL, 12345L, MIMETYPE_PDF));
//TODO issue mentioned in ACS-3476,
//commenting out changes to code due to issues with libreoffice blocking a release
// assertFalse(transformRegistry.checkSourceSize("transformerName", MIMETYPE_EXCEL, 12346L, MIMETYPE_PDF));
// assertFalse(transformRegistry.checkSourceSize("transformerName", "doesNotExist", 12345L, MIMETYPE_PDF));
// assertFalse(transformRegistry.checkSourceSize("doesNotExist", MIMETYPE_WORD, 12345L, MIMETYPE_PDF));
assertFalse(transformRegistry.checkSourceSize("transformerName", MIMETYPE_EXCEL, 12346L, MIMETYPE_PDF));
assertFalse(transformRegistry.checkSourceSize("transformerName", "doesNotExist", 12345L, MIMETYPE_PDF));
assertFalse(transformRegistry.checkSourceSize("doesNotExist", MIMETYPE_WORD, 12345L, MIMETYPE_PDF));
}
}

View File

@@ -40,6 +40,8 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringJoiner;
import java.util.TreeSet;
import java.util.function.Function;
import java.util.stream.Collectors;
import static java.util.stream.Collectors.toSet;
@@ -264,6 +266,7 @@ public class CombinedTransformConfig
sortTransformers(registry);
applyDefaults();
addWildcardSupportedSourceAndTarget(registry);
removePipelinesWithUnsupportedTransforms(registry);
setCoreVersionOnCombinedMultiStepTransformers();
}
@@ -312,7 +315,7 @@ public class CombinedTransformConfig
if (isPipeline && isFailover)
{
throw new IllegalArgumentException("Transformer " + transformerName(name) +
" cannot have pipeline and failover sections. Read from " + readFrom);
" cannot have both pipeline and failover sections. Read from " + readFrom);
}
// Remove transforms as they may override each other or be invalid
@@ -816,6 +819,78 @@ public class CombinedTransformConfig
return transformOptions;
}
/**
* Removes pipeline transformers if the step transformers cannot be chained together via the intermediary types
* to produce the set of claimed source and target mimetypes.
* @param registry used to log messages
*/
private void removePipelinesWithUnsupportedTransforms(AbstractTransformRegistry registry)
{
Map<String, Transformer> transformersByName = combinedTransformers.stream()
.map(Origin::get)
.collect(Collectors.toMap(Transformer::getTransformerName, Function.identity()));
for (int i=0; i<combinedTransformers.size(); i++)
{
try
{
Origin<Transformer> transformAndItsOrigin = combinedTransformers.get(i);
Transformer transformer = transformAndItsOrigin.get();
String readFrom = transformAndItsOrigin.getReadFrom();
String name = transformer.getTransformerName();
List<TransformStep> pipeline = transformer.getTransformerPipeline();
boolean isPipeline = pipeline != null && !pipeline.isEmpty();
if (isPipeline)
{
if (transformer.getSupportedSourceAndTargetList().isEmpty())
{
throw new IllegalStateException("Transformer " + transformerName(name) +
" has no supported source and target mimetypes, so will be ignored. Read from " + readFrom);
}
List<String> unsupported = new ArrayList<>();
transformer.getSupportedSourceAndTargetList()
.forEach(supportedSourceAndTarget -> {
String nextSource = supportedSourceAndTarget.getSourceMediaType();
for (TransformStep step : pipeline)
{
String source = nextSource;
String target = step.getTargetMediaType() == null
? supportedSourceAndTarget.getTargetMediaType()
: step.getTargetMediaType();
if (transformersByName.get(step.getTransformerName()).getSupportedSourceAndTargetList().stream()
.noneMatch(stepsSupportedSourceAndTarget -> source.equals(stepsSupportedSourceAndTarget.getSourceMediaType())
&& target.equals(stepsSupportedSourceAndTarget.getTargetMediaType())))
{
unsupported.add(supportedSourceAndTarget.getSourceMediaType()+"->"+supportedSourceAndTarget.getTargetMediaType());
break;
}
nextSource = target;
}
});
if (!unsupported.isEmpty())
{
throw new IllegalStateException("Pipeline transformer " + transformerName(name) +
" steps do not support ("+
unsupported.stream()
.sorted()
.collect(Collectors.joining(", "))+
") so will be ignored");
}
}
}
catch (IllegalStateException e)
{
String msg = e.getMessage();
registry.logError(msg);
combinedTransformers.remove(i--);
}
}
}
private void setCoreVersionOnCombinedMultiStepTransformers()
{
setCoreVersionOnMultiStepTransformers(combinedTransformOptions, combinedTransformers.stream()

View File

@@ -41,7 +41,6 @@ import static java.util.Collections.emptySet;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
* Test the CombinedTransformConfig, extended by both T-Router and ACS repository.
@@ -87,14 +86,14 @@ public class CombinedTransformConfigTest
.build();
// Not static as pipelines gets modified.
private static final Transformer PIPELINE1_2C3 = Transformer.builder().withTransformerName("1")
private final Transformer PIPELINE1_2C3 = Transformer.builder().withTransformerName("1")
.withTransformerPipeline(List.of(
new TransformStep("2", "mimetype/c"),
new TransformStep("3", null)))
.build();
// Not static as pipelines gets modified.
private static final Transformer PIPELINE1_2C3_COPY = Transformer.builder().withTransformerName("1")
private final Transformer PIPELINE1_2C3_COPY = Transformer.builder().withTransformerName("1")
.withTransformerPipeline(List.of(
new TransformStep("2", "mimetype/c"),
new TransformStep("3", null)))
@@ -349,7 +348,7 @@ public class CombinedTransformConfigTest
config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry);
config.combineTransformerConfig(registry);
String expected = "Transformer \"2\" cannot have pipeline and failover sections. Read from readFromB";
String expected = "Transformer \"2\" cannot have both pipeline and failover sections. Read from readFromB";
assertEquals(1, registry.errorMessages.size());
assertEquals(expected, registry.errorMessages.get(0));
}
@@ -566,7 +565,7 @@ public class CombinedTransformConfigTest
@Test
public void testInvalidIndexToRemoveBeforeCurrent()
{
// indexToRemove is is before the current i value, so we are removing an overridden transform
// indexToRemove is before the current i value, so we are removing an overridden transform
// Code throws an IllegalArgumentException and i is simply decremented to ignore the current value
final TransformConfig tEngineTransformConfig = TransformConfig.builder()
.withTransformers(ImmutableList.of(
@@ -681,10 +680,11 @@ public class CombinedTransformConfigTest
config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry);
config.combineTransformerConfig(registry);
String expected = "No supported source and target mimetypes could be added to the transformer \"5\" as " +
"the final step should not have a target mimetype. Read from readFromB";
assertEquals(1, registry.errorMessages.size());
assertEquals(expected, registry.errorMessages.get(0));
assertEquals(2, registry.errorMessages.size());
assertEquals("No supported source and target mimetypes could be added to the transformer \"5\" as " +
"the final step should not have a target mimetype. Read from readFromB", registry.errorMessages.get(0));
assertEquals("Transformer \"5\" has no supported source and target mimetypes, "
+ "so will be ignored. Read from readFromB", registry.errorMessages.get(1));
}
@Test
@@ -737,6 +737,173 @@ public class CombinedTransformConfigTest
assertEquals(expected, registry.warnMessages.get(0));
}
@Test
public void testPipelineWithValidSupportedSourceAndTarget()
{
// Tests the case where a pipeline's declared SupportedSourceAndTarget are supported by the step
// transformers.
final Transformer pipeline = Transformer.builder().withTransformerName("1")
.withTransformerPipeline(List.of(
new TransformStep("2", "mimetype/c"),
new TransformStep("3", null)))
.withSupportedSourceAndTargetList(Set.of(
SupportedSourceAndTarget.builder()
.withSourceMediaType("mimetype/a")
.withTargetMediaType("mimetype/d")
.build()))
.build();
final TransformConfig transformConfig = TransformConfig.builder()
.withTransformers(ImmutableList.of(
TRANSFORMER2_A2C,
TRANSFORMER3_C2D,
pipeline))
.build();
config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry);
config.combineTransformerConfig(registry);
config.registerCombinedTransformers(registry);
assertEquals(0, registry.errorMessages.size());
assertEquals(3, config.buildTransformConfig().getTransformers().size());
assertEquals("1", registry.findTransformerName("mimetype/a", -1,
"mimetype/d", emptyMap(), null));
assertEquals("2", registry.findTransformerName("mimetype/a", -1,
"mimetype/c", emptyMap(), null));
assertEquals("3", registry.findTransformerName("mimetype/c", -1,
"mimetype/d", emptyMap(), null));
}
@Test
public void testPipelineWithValidWildcardSupportedSourceAndTarget()
{
// Tests the case where a pipeline's declared SupportedSourceAndTarget are NOT supplied so should
// be the cartesian product of the step transformers a->d
final Transformer pipeline = Transformer.builder().withTransformerName("1")
.withTransformerPipeline(List.of(
new TransformStep("2", "mimetype/c"),
new TransformStep("3", null)))
.build();
final TransformConfig transformConfig = TransformConfig.builder()
.withTransformers(ImmutableList.of(
TRANSFORMER2_A2C,
TRANSFORMER3_C2D,
pipeline))
.build();
config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry);
config.combineTransformerConfig(registry);
config.registerCombinedTransformers(registry);
assertEquals(0, registry.errorMessages.size());
assertEquals(3, config.buildTransformConfig().getTransformers().size());
assertEquals("1", registry.findTransformerName("mimetype/a", -1,
"mimetype/d", emptyMap(), null));
}
@Test
public void testIgnorePipelineWithInvalidSupportedSourceAndTarget()
{
// Tests the case where a pipeline's declared SupportedSourceAndTargets are NOT supported by the step
// transformers.
final Transformer pipeline = Transformer.builder().withTransformerName("1")
.withTransformerPipeline(List.of(
new TransformStep("2", "mimetype/c"),
new TransformStep("2", null)))
.withSupportedSourceAndTargetList(Set.of(
SupportedSourceAndTarget.builder()
.withSourceMediaType("mimetype/a")
.withTargetMediaType("mimetype/d")
.build()
))
.build();
final TransformConfig transformConfig = TransformConfig.builder()
.withTransformers(ImmutableList.of(
TRANSFORMER2_B2C, // Does NOT support a->c so steps are invalid for a->d
TRANSFORMER3_C2D,
pipeline))
.build();
config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry);
config.combineTransformerConfig(registry);
config.registerCombinedTransformers(registry);
String expected = "Pipeline transformer \"1\" steps do not support (mimetype/a->mimetype/d) so will be ignored";
assertEquals(1, registry.errorMessages.size());
assertEquals(expected, registry.errorMessages.get(0));
assertEquals(2, config.buildTransformConfig().getTransformers().size());
assertEquals("2", registry.findTransformerName("mimetype/b", -1,
"mimetype/c", emptyMap(), null));
assertEquals("3", registry.findTransformerName("mimetype/c", -1,
"mimetype/d", emptyMap(), null));
}
@Test
public void testIgnorePipelineWithMultipleInvalidSupportedSourceAndTarget()
{
// Same as testIgnorePipelineWithInvalidSupportedSourceAndTarget, but with multiple invalid declared
// SupportedSourceAndTargets
final Transformer pipeline = Transformer.builder().withTransformerName("1")
.withTransformerPipeline(List.of(
new TransformStep("2", "mimetype/c"),
new TransformStep("2", null)))
.withSupportedSourceAndTargetList(Set.of(
SupportedSourceAndTarget.builder()
.withSourceMediaType("mimetype/a")
.withTargetMediaType("mimetype/d")
.build(),
SupportedSourceAndTarget.builder() // Always going to be invalid with these steps
.withSourceMediaType("mimetype/x")
.withTargetMediaType("mimetype/y")
.build()
))
.build();
final TransformConfig transformConfig = TransformConfig.builder()
.withTransformers(ImmutableList.of(
TRANSFORMER2_B2C, // Does NOT support a->c so steps are invalid for a->d
TRANSFORMER3_C2D,
pipeline))
.build();
config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry);
config.combineTransformerConfig(registry);
config.registerCombinedTransformers(registry);
String expected = "Pipeline transformer \"1\" steps do not support (mimetype/a->mimetype/d, mimetype/x->mimetype/y) so will be ignored";
assertEquals(1, registry.errorMessages.size());
assertEquals(expected, registry.errorMessages.get(0));
}
@Test
public void testIgnorePipelineWithInvalidWildcardSupportedSourceAndTarget()
{
// Tests the case where a pipeline's declared SupportedSourceAndTarget are NOT supplied so should
// be the cartesian product of the step transformers BUT AND THERE ARE NONE.
final Transformer pipeline = Transformer.builder().withTransformerName("1")
.withTransformerPipeline(List.of(
new TransformStep("2", "mimetype/c"),
new TransformStep("2", null)))
.build();
final TransformConfig transformConfig = TransformConfig.builder()
.withTransformers(ImmutableList.of(
TRANSFORMER2_B2X, // Does NOT support <anything>->c so steps are invalid
TRANSFORMER3_C2D,
pipeline))
.build();
config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry);
config.combineTransformerConfig(registry);
config.registerCombinedTransformers(registry);
assertEquals(2, registry.errorMessages.size());
assertEquals("No supported source and target mimetypes could be added to the transformer \"1\" as "
+ "the first step transformer \"2\" does not support to \"mimetype/c\". Read from readFromB",
registry.errorMessages.get(0));
assertEquals("Transformer \"1\" has no supported source and target mimetypes, so will be ignored. "
+ "Read from readFromB",
registry.errorMessages.get(1));
assertEquals(2, config.buildTransformConfig().getTransformers().size());
}
@Test
public void testInvalidCircularTransformStep()
{
@@ -858,12 +1025,14 @@ public class CombinedTransformConfigTest
config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry);
config.combineTransformerConfig(registry);
String expected = expectedWildcardError("the step transformer \"2\" does not support \"mimetype/b\" to \"mimetype/c\"");
assertEquals(1, registry.errorMessages.size());
assertEquals(expected, registry.errorMessages.get(0));
assertEquals(2, registry.errorMessages.size());
assertEquals(expectedWildcardError("the step transformer \"2\" does not support \"mimetype/b\" "
+ "to \"mimetype/c\""), registry.errorMessages.get(0));
assertEquals("Transformer \"4\" has no supported source and target mimetypes, so will be ignored. "
+ "Read from readFromB", registry.errorMessages.get(1));
// 4: the pipeline is not removed, but will not be used as it has no supported transforms.
assertEquals(4, config.buildTransformConfig().getTransformers().size());
// The pipeline is removed, so 3 are expected
assertEquals(3, config.buildTransformConfig().getTransformers().size());
}
@Test