From 7ca8a483adbed2daed416e5e508d492e40126c5d Mon Sep 17 00:00:00 2001 From: alandavis Date: Sun, 11 Sep 2022 19:55:45 +0100 Subject: [PATCH] ACS-3476 Checking the size of intermediate results in a pipeline IS NOT the root cause of the ai-rendition test failures. Adding extra error messages to the t-config checking, for the case where a pipeline specifies source and target mimetypes that cannot be provided by the step transformers, so that it will be clearer that the pipeline t-config is wrong. In the case of the AI rendition tests the AI-transform t-config has a pipeline that uses libreoffice as a step transformer, to transform some source to text/plain before asking AWS_AI to process it into the final mimetype. However libreoffice does not convert to text/plain. What is happening is that the request was still being sent to the all-in-one t-engine that contains libreoffice, and it workout that it should be using the tika transformer. As a result the pipeline works by accident. The size check that was commented out (uncommented now) was just finding out that libreoffice was unable to do the transform and was reporting it. officeToComprehendPiiEntityTypesViaText is the pipeline with the error. --- .../base/registry/TransformRegistry.java | 22 +- .../base/registry/TransformRegistryTest.java | 8 +- .../registry/CombinedTransformConfig.java | 77 ++++++- .../registry/CombinedTransformConfigTest.java | 197 ++++++++++++++++-- 4 files changed, 271 insertions(+), 33 deletions(-) diff --git a/engines/base/src/main/java/org/alfresco/transform/base/registry/TransformRegistry.java b/engines/base/src/main/java/org/alfresco/transform/base/registry/TransformRegistry.java index 772098d9..2aa05661 100644 --- a/engines/base/src/main/java/org/alfresco/transform/base/registry/TransformRegistry.java +++ b/engines/base/src/main/java/org/alfresco/transform/base/registry/TransformRegistry.java @@ -360,19 +360,15 @@ public class TransformRegistry extends AbstractTransformRegistry public boolean checkSourceSize(String transformerName, String sourceMediaType, Long sourceSize, String targetMediaType) { - //TODO issue mentioned in ACS-3476, - //commenting out changes to code due to issues with libreoffice blocking a release - return true; - -// return Optional.ofNullable(getTransformer(transformerName)). -// map(transformer -> transformer.getSupportedSourceAndTargetList().stream(). -// filter(supported -> supported.getSourceMediaType().equals(sourceMediaType) && -// supported.getTargetMediaType().equals(targetMediaType)). -// findFirst(). -// map(supported -> supported.getMaxSourceSizeBytes() == -1 || -// supported.getMaxSourceSizeBytes() >= sourceSize). -// orElse(false)). -// orElse(false); + return Optional.ofNullable(getTransformer(transformerName)). + map(transformer -> transformer.getSupportedSourceAndTargetList().stream(). + filter(supported -> supported.getSourceMediaType().equals(sourceMediaType) && + supported.getTargetMediaType().equals(targetMediaType)). + findFirst(). + map(supported -> supported.getMaxSourceSizeBytes() == -1 || + supported.getMaxSourceSizeBytes() >= sourceSize). + orElse(false)). + orElse(false); } public String getEngineName(String transformerName) diff --git a/engines/base/src/test/java/org/alfresco/transform/base/registry/TransformRegistryTest.java b/engines/base/src/test/java/org/alfresco/transform/base/registry/TransformRegistryTest.java index b2af4f14..2de11a64 100644 --- a/engines/base/src/test/java/org/alfresco/transform/base/registry/TransformRegistryTest.java +++ b/engines/base/src/test/java/org/alfresco/transform/base/registry/TransformRegistryTest.java @@ -266,10 +266,8 @@ public class TransformRegistryTest assertTrue( transformRegistry.checkSourceSize("transformerName", MIMETYPE_WORD, Long.MAX_VALUE, MIMETYPE_PDF)); assertTrue( transformRegistry.checkSourceSize("transformerName", MIMETYPE_EXCEL, 12345L, MIMETYPE_PDF)); -//TODO issue mentioned in ACS-3476, -//commenting out changes to code due to issues with libreoffice blocking a release -// assertFalse(transformRegistry.checkSourceSize("transformerName", MIMETYPE_EXCEL, 12346L, MIMETYPE_PDF)); -// assertFalse(transformRegistry.checkSourceSize("transformerName", "doesNotExist", 12345L, MIMETYPE_PDF)); -// assertFalse(transformRegistry.checkSourceSize("doesNotExist", MIMETYPE_WORD, 12345L, MIMETYPE_PDF)); + assertFalse(transformRegistry.checkSourceSize("transformerName", MIMETYPE_EXCEL, 12346L, MIMETYPE_PDF)); + assertFalse(transformRegistry.checkSourceSize("transformerName", "doesNotExist", 12345L, MIMETYPE_PDF)); + assertFalse(transformRegistry.checkSourceSize("doesNotExist", MIMETYPE_WORD, 12345L, MIMETYPE_PDF)); } } diff --git a/model/src/main/java/org/alfresco/transform/registry/CombinedTransformConfig.java b/model/src/main/java/org/alfresco/transform/registry/CombinedTransformConfig.java index cfcb29df..858a948c 100644 --- a/model/src/main/java/org/alfresco/transform/registry/CombinedTransformConfig.java +++ b/model/src/main/java/org/alfresco/transform/registry/CombinedTransformConfig.java @@ -40,6 +40,8 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.StringJoiner; +import java.util.TreeSet; +import java.util.function.Function; import java.util.stream.Collectors; import static java.util.stream.Collectors.toSet; @@ -264,6 +266,7 @@ public class CombinedTransformConfig sortTransformers(registry); applyDefaults(); addWildcardSupportedSourceAndTarget(registry); + removePipelinesWithUnsupportedTransforms(registry); setCoreVersionOnCombinedMultiStepTransformers(); } @@ -312,7 +315,7 @@ public class CombinedTransformConfig if (isPipeline && isFailover) { throw new IllegalArgumentException("Transformer " + transformerName(name) + - " cannot have pipeline and failover sections. Read from " + readFrom); + " cannot have both pipeline and failover sections. Read from " + readFrom); } // Remove transforms as they may override each other or be invalid @@ -816,6 +819,78 @@ public class CombinedTransformConfig return transformOptions; } + /** + * Removes pipeline transformers if the step transformers cannot be chained together via the intermediary types + * to produce the set of claimed source and target mimetypes. + * @param registry used to log messages + */ + private void removePipelinesWithUnsupportedTransforms(AbstractTransformRegistry registry) + { + Map transformersByName = combinedTransformers.stream() + .map(Origin::get) + .collect(Collectors.toMap(Transformer::getTransformerName, Function.identity())); + + for (int i=0; i transformAndItsOrigin = combinedTransformers.get(i); + Transformer transformer = transformAndItsOrigin.get(); + String readFrom = transformAndItsOrigin.getReadFrom(); + String name = transformer.getTransformerName(); + List pipeline = transformer.getTransformerPipeline(); + boolean isPipeline = pipeline != null && !pipeline.isEmpty(); + + if (isPipeline) + { + if (transformer.getSupportedSourceAndTargetList().isEmpty()) + { + throw new IllegalStateException("Transformer " + transformerName(name) + + " has no supported source and target mimetypes, so will be ignored. Read from " + readFrom); + } + + List unsupported = new ArrayList<>(); + transformer.getSupportedSourceAndTargetList() + .forEach(supportedSourceAndTarget -> { + String nextSource = supportedSourceAndTarget.getSourceMediaType(); + for (TransformStep step : pipeline) + { + String source = nextSource; + String target = step.getTargetMediaType() == null + ? supportedSourceAndTarget.getTargetMediaType() + : step.getTargetMediaType(); + + if (transformersByName.get(step.getTransformerName()).getSupportedSourceAndTargetList().stream() + .noneMatch(stepsSupportedSourceAndTarget -> source.equals(stepsSupportedSourceAndTarget.getSourceMediaType()) + && target.equals(stepsSupportedSourceAndTarget.getTargetMediaType()))) + { + unsupported.add(supportedSourceAndTarget.getSourceMediaType()+"->"+supportedSourceAndTarget.getTargetMediaType()); + break; + } + nextSource = target; + } + }); + + if (!unsupported.isEmpty()) + { + throw new IllegalStateException("Pipeline transformer " + transformerName(name) + + " steps do not support ("+ + unsupported.stream() + .sorted() + .collect(Collectors.joining(", "))+ + ") so will be ignored"); + } + } + } + catch (IllegalStateException e) + { + String msg = e.getMessage(); + registry.logError(msg); + combinedTransformers.remove(i--); + } + } + } + private void setCoreVersionOnCombinedMultiStepTransformers() { setCoreVersionOnMultiStepTransformers(combinedTransformOptions, combinedTransformers.stream() diff --git a/model/src/test/java/org/alfresco/transform/registry/CombinedTransformConfigTest.java b/model/src/test/java/org/alfresco/transform/registry/CombinedTransformConfigTest.java index 146ea4f3..0d53c0e1 100644 --- a/model/src/test/java/org/alfresco/transform/registry/CombinedTransformConfigTest.java +++ b/model/src/test/java/org/alfresco/transform/registry/CombinedTransformConfigTest.java @@ -41,7 +41,6 @@ import static java.util.Collections.emptySet; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; /** * Test the CombinedTransformConfig, extended by both T-Router and ACS repository. @@ -87,14 +86,14 @@ public class CombinedTransformConfigTest .build(); // Not static as pipelines gets modified. - private static final Transformer PIPELINE1_2C3 = Transformer.builder().withTransformerName("1") + private final Transformer PIPELINE1_2C3 = Transformer.builder().withTransformerName("1") .withTransformerPipeline(List.of( new TransformStep("2", "mimetype/c"), new TransformStep("3", null))) .build(); // Not static as pipelines gets modified. - private static final Transformer PIPELINE1_2C3_COPY = Transformer.builder().withTransformerName("1") + private final Transformer PIPELINE1_2C3_COPY = Transformer.builder().withTransformerName("1") .withTransformerPipeline(List.of( new TransformStep("2", "mimetype/c"), new TransformStep("3", null))) @@ -349,7 +348,7 @@ public class CombinedTransformConfigTest config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry); config.combineTransformerConfig(registry); - String expected = "Transformer \"2\" cannot have pipeline and failover sections. Read from readFromB"; + String expected = "Transformer \"2\" cannot have both pipeline and failover sections. Read from readFromB"; assertEquals(1, registry.errorMessages.size()); assertEquals(expected, registry.errorMessages.get(0)); } @@ -566,7 +565,7 @@ public class CombinedTransformConfigTest @Test public void testInvalidIndexToRemoveBeforeCurrent() { - // indexToRemove is is before the current i value, so we are removing an overridden transform + // indexToRemove is before the current i value, so we are removing an overridden transform // Code throws an IllegalArgumentException and i is simply decremented to ignore the current value final TransformConfig tEngineTransformConfig = TransformConfig.builder() .withTransformers(ImmutableList.of( @@ -681,10 +680,11 @@ public class CombinedTransformConfigTest config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry); config.combineTransformerConfig(registry); - String expected = "No supported source and target mimetypes could be added to the transformer \"5\" as " + - "the final step should not have a target mimetype. Read from readFromB"; - assertEquals(1, registry.errorMessages.size()); - assertEquals(expected, registry.errorMessages.get(0)); + assertEquals(2, registry.errorMessages.size()); + assertEquals("No supported source and target mimetypes could be added to the transformer \"5\" as " + + "the final step should not have a target mimetype. Read from readFromB", registry.errorMessages.get(0)); + assertEquals("Transformer \"5\" has no supported source and target mimetypes, " + + "so will be ignored. Read from readFromB", registry.errorMessages.get(1)); } @Test @@ -737,6 +737,173 @@ public class CombinedTransformConfigTest assertEquals(expected, registry.warnMessages.get(0)); } + @Test + public void testPipelineWithValidSupportedSourceAndTarget() + { + // Tests the case where a pipeline's declared SupportedSourceAndTarget are supported by the step + // transformers. + final Transformer pipeline = Transformer.builder().withTransformerName("1") + .withTransformerPipeline(List.of( + new TransformStep("2", "mimetype/c"), + new TransformStep("3", null))) + .withSupportedSourceAndTargetList(Set.of( + SupportedSourceAndTarget.builder() + .withSourceMediaType("mimetype/a") + .withTargetMediaType("mimetype/d") + .build())) + .build(); + final TransformConfig transformConfig = TransformConfig.builder() + .withTransformers(ImmutableList.of( + TRANSFORMER2_A2C, + TRANSFORMER3_C2D, + pipeline)) + .build(); + + config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry); + config.combineTransformerConfig(registry); + config.registerCombinedTransformers(registry); + + assertEquals(0, registry.errorMessages.size()); + assertEquals(3, config.buildTransformConfig().getTransformers().size()); + assertEquals("1", registry.findTransformerName("mimetype/a", -1, + "mimetype/d", emptyMap(), null)); + assertEquals("2", registry.findTransformerName("mimetype/a", -1, + "mimetype/c", emptyMap(), null)); + assertEquals("3", registry.findTransformerName("mimetype/c", -1, + "mimetype/d", emptyMap(), null)); + } + + @Test + public void testPipelineWithValidWildcardSupportedSourceAndTarget() + { + // Tests the case where a pipeline's declared SupportedSourceAndTarget are NOT supplied so should + // be the cartesian product of the step transformers a->d + final Transformer pipeline = Transformer.builder().withTransformerName("1") + .withTransformerPipeline(List.of( + new TransformStep("2", "mimetype/c"), + new TransformStep("3", null))) + .build(); + final TransformConfig transformConfig = TransformConfig.builder() + .withTransformers(ImmutableList.of( + TRANSFORMER2_A2C, + TRANSFORMER3_C2D, + pipeline)) + .build(); + + config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry); + config.combineTransformerConfig(registry); + config.registerCombinedTransformers(registry); + + assertEquals(0, registry.errorMessages.size()); + assertEquals(3, config.buildTransformConfig().getTransformers().size()); + assertEquals("1", registry.findTransformerName("mimetype/a", -1, + "mimetype/d", emptyMap(), null)); + } + + @Test + public void testIgnorePipelineWithInvalidSupportedSourceAndTarget() + { + // Tests the case where a pipeline's declared SupportedSourceAndTargets are NOT supported by the step + // transformers. + final Transformer pipeline = Transformer.builder().withTransformerName("1") + .withTransformerPipeline(List.of( + new TransformStep("2", "mimetype/c"), + new TransformStep("2", null))) + .withSupportedSourceAndTargetList(Set.of( + SupportedSourceAndTarget.builder() + .withSourceMediaType("mimetype/a") + .withTargetMediaType("mimetype/d") + .build() + )) + .build(); + final TransformConfig transformConfig = TransformConfig.builder() + .withTransformers(ImmutableList.of( + TRANSFORMER2_B2C, // Does NOT support a->c so steps are invalid for a->d + TRANSFORMER3_C2D, + pipeline)) + .build(); + + config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry); + config.combineTransformerConfig(registry); + config.registerCombinedTransformers(registry); + + String expected = "Pipeline transformer \"1\" steps do not support (mimetype/a->mimetype/d) so will be ignored"; + assertEquals(1, registry.errorMessages.size()); + assertEquals(expected, registry.errorMessages.get(0)); + assertEquals(2, config.buildTransformConfig().getTransformers().size()); + assertEquals("2", registry.findTransformerName("mimetype/b", -1, + "mimetype/c", emptyMap(), null)); + assertEquals("3", registry.findTransformerName("mimetype/c", -1, + "mimetype/d", emptyMap(), null)); + } + + @Test + public void testIgnorePipelineWithMultipleInvalidSupportedSourceAndTarget() + { + // Same as testIgnorePipelineWithInvalidSupportedSourceAndTarget, but with multiple invalid declared + // SupportedSourceAndTargets + final Transformer pipeline = Transformer.builder().withTransformerName("1") + .withTransformerPipeline(List.of( + new TransformStep("2", "mimetype/c"), + new TransformStep("2", null))) + .withSupportedSourceAndTargetList(Set.of( + SupportedSourceAndTarget.builder() + .withSourceMediaType("mimetype/a") + .withTargetMediaType("mimetype/d") + .build(), + SupportedSourceAndTarget.builder() // Always going to be invalid with these steps + .withSourceMediaType("mimetype/x") + .withTargetMediaType("mimetype/y") + .build() + )) + .build(); + final TransformConfig transformConfig = TransformConfig.builder() + .withTransformers(ImmutableList.of( + TRANSFORMER2_B2C, // Does NOT support a->c so steps are invalid for a->d + TRANSFORMER3_C2D, + pipeline)) + .build(); + + config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry); + config.combineTransformerConfig(registry); + config.registerCombinedTransformers(registry); + + String expected = "Pipeline transformer \"1\" steps do not support (mimetype/a->mimetype/d, mimetype/x->mimetype/y) so will be ignored"; + assertEquals(1, registry.errorMessages.size()); + assertEquals(expected, registry.errorMessages.get(0)); + } + + @Test + public void testIgnorePipelineWithInvalidWildcardSupportedSourceAndTarget() + { + // Tests the case where a pipeline's declared SupportedSourceAndTarget are NOT supplied so should + // be the cartesian product of the step transformers BUT AND THERE ARE NONE. + final Transformer pipeline = Transformer.builder().withTransformerName("1") + .withTransformerPipeline(List.of( + new TransformStep("2", "mimetype/c"), + new TransformStep("2", null))) + .build(); + final TransformConfig transformConfig = TransformConfig.builder() + .withTransformers(ImmutableList.of( + TRANSFORMER2_B2X, // Does NOT support ->c so steps are invalid + TRANSFORMER3_C2D, + pipeline)) + .build(); + + config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry); + config.combineTransformerConfig(registry); + config.registerCombinedTransformers(registry); + + assertEquals(2, registry.errorMessages.size()); + assertEquals("No supported source and target mimetypes could be added to the transformer \"1\" as " + + "the first step transformer \"2\" does not support to \"mimetype/c\". Read from readFromB", + registry.errorMessages.get(0)); + assertEquals("Transformer \"1\" has no supported source and target mimetypes, so will be ignored. " + + "Read from readFromB", + registry.errorMessages.get(1)); + assertEquals(2, config.buildTransformConfig().getTransformers().size()); + } + @Test public void testInvalidCircularTransformStep() { @@ -858,12 +1025,14 @@ public class CombinedTransformConfigTest config.addTransformConfig(transformConfig, READ_FROM_B, BASE_URL_B, registry); config.combineTransformerConfig(registry); - String expected = expectedWildcardError("the step transformer \"2\" does not support \"mimetype/b\" to \"mimetype/c\""); - assertEquals(1, registry.errorMessages.size()); - assertEquals(expected, registry.errorMessages.get(0)); + assertEquals(2, registry.errorMessages.size()); + assertEquals(expectedWildcardError("the step transformer \"2\" does not support \"mimetype/b\" " + + "to \"mimetype/c\""), registry.errorMessages.get(0)); + assertEquals("Transformer \"4\" has no supported source and target mimetypes, so will be ignored. " + + "Read from readFromB", registry.errorMessages.get(1)); - // 4: the pipeline is not removed, but will not be used as it has no supported transforms. - assertEquals(4, config.buildTransformConfig().getTransformers().size()); + // The pipeline is removed, so 3 are expected + assertEquals(3, config.buildTransformConfig().getTransformers().size()); } @Test