diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/java/org/alfresco/transformer/TikaMetadataExtractsIT.java b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/java/org/alfresco/transformer/TikaMetadataExtractsIT.java index b5e35eb2..bcd376d7 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/java/org/alfresco/transformer/TikaMetadataExtractsIT.java +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/java/org/alfresco/transformer/TikaMetadataExtractsIT.java @@ -548,4 +548,28 @@ public class TikaMetadataExtractsIT extends AbstractMetadataExtractsIT ); } + + @ParameterizedTest + @MethodSource("tika2_2_1_upgradeFailures") + public void testTika_2_2_1_upgradeFailures(TestFileInfo testFileInfo) + { + super.testTransformation(testFileInfo); + } + + private static Stream tika2_2_1_upgradeFailures() + { + // When we upgraded to Tika 2.2.1 from 2.2.0: + // - the original OfficeOpenXMLCore.SUBJECT raw metadata value started being null. + // - the replacement TikaCoreProperties.SUBJECT raw metadata changed into a multi value + // The following test files were the ones that failed. + return Stream.of( + testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE, "otg", "quick.otg"), + testFile(MIMETYPE_OPENOFFICE1_WRITER, "sxw", "quick.sxw"), + testFile(MIMETYPE_OPENDOCUMENT_GRAPHICS, "odg", "quick.odg"), + testFile(MIMETYPE_OPENDOCUMENT_TEXT, "odt", "quick.odt"), + testFile(MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE, "ott", "quick.ott"), + testFile(MIMETYPE_OPENDOCUMENT_FORMULA, "odf", "quick.odf"), + testFile(MIMETYPE_PDF, "pdf", "quick.pdf") + ); + } } diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.odf_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.odf_metadata.json index 64a82c12..e185ac01 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.odf_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.odf_metadata.json @@ -1,5 +1,5 @@ { - "{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog", + "{http://www.alfresco.org/model/content/1.0}description" : "Pangram, fox, dog, Gym class featuring a brown fox and lazy dog", "{http://www.alfresco.org/model/content/1.0}created" : 1138362922000, "{http://www.alfresco.org/model/content/1.0}author" : null, "{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog" diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.odg_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.odg_metadata.json index a542951b..c08f6a81 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.odg_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.odg_metadata.json @@ -1,5 +1,5 @@ { - "{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog", + "{http://www.alfresco.org/model/content/1.0}description" : "Pangram, fox, dog, Gym class featuring a brown fox and lazy dog", "{http://www.alfresco.org/model/content/1.0}created" : 1138362371000, "{http://www.alfresco.org/model/content/1.0}author" : "Derek Hulley", "{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog" diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.odt_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.odt_metadata.json index 37545ead..18faa8b9 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.odt_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.odt_metadata.json @@ -1,5 +1,5 @@ { - "{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog", + "{http://www.alfresco.org/model/content/1.0}description" : "Pangram, fox, dog, Gym class featuring a brown fox and lazy dog", "{http://www.alfresco.org/model/content/1.0}created" : 1126049640000, "{http://www.alfresco.org/model/content/1.0}author" : "Jesper Steen Møller", "{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog" diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.otg_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.otg_metadata.json index a542951b..c08f6a81 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.otg_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.otg_metadata.json @@ -1,5 +1,5 @@ { - "{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog", + "{http://www.alfresco.org/model/content/1.0}description" : "Pangram, fox, dog, Gym class featuring a brown fox and lazy dog", "{http://www.alfresco.org/model/content/1.0}created" : 1138362371000, "{http://www.alfresco.org/model/content/1.0}author" : "Derek Hulley", "{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog" diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.ott_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.ott_metadata.json index 37545ead..18faa8b9 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.ott_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.ott_metadata.json @@ -1,5 +1,5 @@ { - "{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog", + "{http://www.alfresco.org/model/content/1.0}description" : "Pangram, fox, dog, Gym class featuring a brown fox and lazy dog", "{http://www.alfresco.org/model/content/1.0}created" : 1126049640000, "{http://www.alfresco.org/model/content/1.0}author" : "Jesper Steen Møller", "{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog" diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.pdf_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.pdf_metadata.json index 0f46dcb3..8758c90f 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.pdf_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.pdf_metadata.json @@ -1,5 +1,5 @@ { - "{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog", + "{http://www.alfresco.org/model/content/1.0}description" : "Pangram, fox, dog, Gym class featuring a brown fox and lazy dog", "{http://www.alfresco.org/model/content/1.0}created" : "2005-05-26T19:52:58Z", "{http://www.alfresco.org/model/content/1.0}author" : "Nevin Nollop", "{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog" diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.sxw_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.sxw_metadata.json index 37545ead..18faa8b9 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.sxw_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.sxw_metadata.json @@ -1,5 +1,5 @@ { - "{http://www.alfresco.org/model/content/1.0}description" : "Gym class featuring a brown fox and lazy dog", + "{http://www.alfresco.org/model/content/1.0}description" : "Pangram, fox, dog, Gym class featuring a brown fox and lazy dog", "{http://www.alfresco.org/model/content/1.0}created" : 1126049640000, "{http://www.alfresco.org/model/content/1.0}author" : "Jesper Steen Møller", "{http://www.alfresco.org/model/content/1.0}title" : "The quick brown fox jumps over the lazy dog" diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java index c9e8ab60..e52e9394 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java @@ -2,7 +2,7 @@ * #%L * Alfresco Transform Core * %% - * Copyright (C) 2005 - 2021 Alfresco Software Limited + * Copyright (C) 2005 - 2022 Alfresco Software Limited * %% * This file is part of the Alfresco software. * - @@ -269,30 +269,30 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr // Get the subject and description, despite things not // being nearly as consistent as one might hope - String subject = getMetadataValue(metadata, OfficeOpenXMLCore.SUBJECT); + String subject = getMetadataValue(metadata, TikaCoreProperties.SUBJECT); String description = getMetadataValue(metadata, TikaCoreProperties.DESCRIPTION); - if(subject != null && description != null) + if (subject != null && description != null) { putRawValue(KEY_DESCRIPTION, description, rawProperties); putRawValue(KEY_SUBJECT, subject, rawProperties); } - else if(subject != null) + else if (subject != null) { putRawValue(KEY_DESCRIPTION, subject, rawProperties); putRawValue(KEY_SUBJECT, subject, rawProperties); } - else if(description != null) + else if (description != null) { putRawValue(KEY_DESCRIPTION, description, rawProperties); putRawValue(KEY_SUBJECT, description, rawProperties); } // Try for the dates two different ways too - if(metadata.get(TikaCoreProperties.CREATED) != null) + if (metadata.get(TikaCoreProperties.CREATED) != null) { putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.CREATED), rawProperties); } - else if(metadata.get(TikaCoreProperties.MODIFIED) != null) + else if (metadata.get(TikaCoreProperties.MODIFIED) != null) { putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.MODIFIED), rawProperties); } @@ -458,7 +458,7 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr public void characters(char[] ch, int start, int len) { - if(text != null) + if (text != null) { text.append(ch, start, len); } @@ -466,7 +466,7 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr public void endElement(String namespace, String localname, String qname) { - if(text != null && text.length() > 0) + if (text != null && text.length() > 0) { tags.put(qname, text.toString()); } diff --git a/alfresco-transformer-base/src/test/java/org/alfresco/transformer/AbstractMetadataExtractsIT.java b/alfresco-transformer-base/src/test/java/org/alfresco/transformer/AbstractMetadataExtractsIT.java index f6cd7069..97addbaa 100644 --- a/alfresco-transformer-base/src/test/java/org/alfresco/transformer/AbstractMetadataExtractsIT.java +++ b/alfresco-transformer-base/src/test/java/org/alfresco/transformer/AbstractMetadataExtractsIT.java @@ -2,7 +2,7 @@ * #%L * Alfresco Transform Core * %% - * Copyright (C) 2005 - 2021 Alfresco Software Limited + * Copyright (C) 2005 - 2022 Alfresco Software Limited * %% * This file is part of the Alfresco software. * - @@ -102,8 +102,8 @@ public abstract class AbstractMetadataExtractsIT jsonObjectMapper.writerWithDefaultPrettyPrinter().writeValue(actualMetadataFile, actualMetadata); Map expectedMetadata = readExpectedMetadata(metadataFilename, actualMetadataFile); - assertEquals(expectedMetadata, actualMetadata, - "The metadata did not match the expected value. It has been saved in "+actualMetadataFile.getAbsolutePath()); + assertEquals(expectedMetadata, actualMetadata, + sourceFile+": The metadata did not match the expected value. It has been saved in "+actualMetadataFile.getAbsolutePath()); actualMetadataFile.delete(); } catch (Exception e) diff --git a/alfresco-transformer-base/src/test/java/org/alfresco/transformer/TestFileInfo.java b/alfresco-transformer-base/src/test/java/org/alfresco/transformer/TestFileInfo.java index 331213a9..15b8fb70 100644 --- a/alfresco-transformer-base/src/test/java/org/alfresco/transformer/TestFileInfo.java +++ b/alfresco-transformer-base/src/test/java/org/alfresco/transformer/TestFileInfo.java @@ -76,4 +76,10 @@ public class TestFileInfo { return new TestFileInfo(mimeType, extension, path, false); } + + @Override + public String toString() + { + return path; + } } diff --git a/pom.xml b/pom.xml index 14deeb10..75d34617 100644 --- a/pom.xml +++ b/pom.xml @@ -27,7 +27,7 @@ ${dependency.jackson.version} 4.13.2 3.5.0 - 2.1.0 + 2.2.1 4.1.2 1.4