diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/pom.xml b/alfresco-transform-tika/alfresco-transform-tika-boot/pom.xml index 6ff5ea13..283e6374 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/pom.xml +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/pom.xml @@ -65,9 +65,13 @@ org.apache.tika - tika-parsers-standard-package + tika-parsers ${dependency.tika.version} + + com.tdunning + json + org.bouncycastle bcprov-jdk15on @@ -76,9 +80,10 @@ org.bouncycastle bcmail-jdk15on + - xml-apis - xml-apis + org.quartz-scheduler + quartz diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3g2_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3g2_metadata.json index 4aba2f05..75afde88 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3g2_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3g2_metadata.json @@ -2,8 +2,7 @@ "{http://www.alfresco.org/model/content/1.0}description" : null, "{http://www.alfresco.org/model/audio/1.0}releaseDate" : null, "{http://www.alfresco.org/model/content/1.0}created" : null, - "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "8000", + "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000", "{http://www.alfresco.org/model/content/1.0}author" : null, - "{http://www.alfresco.org/model/content/1.0}title" : null, - "{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo" + "{http://www.alfresco.org/model/content/1.0}title" : null } \ No newline at end of file diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3gp_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3gp_metadata.json index 4aba2f05..75afde88 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3gp_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3gp_metadata.json @@ -2,8 +2,7 @@ "{http://www.alfresco.org/model/content/1.0}description" : null, "{http://www.alfresco.org/model/audio/1.0}releaseDate" : null, "{http://www.alfresco.org/model/content/1.0}created" : null, - "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "8000", + "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000", "{http://www.alfresco.org/model/content/1.0}author" : null, - "{http://www.alfresco.org/model/content/1.0}title" : null, - "{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo" + "{http://www.alfresco.org/model/content/1.0}title" : null } \ No newline at end of file diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.m4v_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.m4v_metadata.json index dc61188e..75afde88 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.m4v_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.m4v_metadata.json @@ -2,8 +2,7 @@ "{http://www.alfresco.org/model/content/1.0}description" : null, "{http://www.alfresco.org/model/audio/1.0}releaseDate" : null, "{http://www.alfresco.org/model/content/1.0}created" : null, - "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "22050", + "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000", "{http://www.alfresco.org/model/content/1.0}author" : null, - "{http://www.alfresco.org/model/content/1.0}title" : null, - "{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo" + "{http://www.alfresco.org/model/content/1.0}title" : null } \ No newline at end of file diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mov_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mov_metadata.json index 96545700..75afde88 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mov_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mov_metadata.json @@ -2,8 +2,7 @@ "{http://www.alfresco.org/model/content/1.0}description" : null, "{http://www.alfresco.org/model/audio/1.0}releaseDate" : null, "{http://www.alfresco.org/model/content/1.0}created" : null, - "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "22050", + "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000", "{http://www.alfresco.org/model/content/1.0}author" : null, - "{http://www.alfresco.org/model/content/1.0}title" : null, - "{http://www.alfresco.org/model/audio/1.0}channelType" : "Mono" + "{http://www.alfresco.org/model/content/1.0}title" : null } \ No newline at end of file diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mp4_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mp4_metadata.json index 96545700..39086dfb 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mp4_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mp4_metadata.json @@ -2,8 +2,7 @@ "{http://www.alfresco.org/model/content/1.0}description" : null, "{http://www.alfresco.org/model/audio/1.0}releaseDate" : null, "{http://www.alfresco.org/model/content/1.0}created" : null, - "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "22050", + "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "90000", "{http://www.alfresco.org/model/content/1.0}author" : null, - "{http://www.alfresco.org/model/content/1.0}title" : null, - "{http://www.alfresco.org/model/audio/1.0}channelType" : "Mono" + "{http://www.alfresco.org/model/content/1.0}title" : null } \ No newline at end of file diff --git a/alfresco-transform-tika/alfresco-transform-tika/pom.xml b/alfresco-transform-tika/alfresco-transform-tika/pom.xml index 3672452d..48fe99bc 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/pom.xml +++ b/alfresco-transform-tika/alfresco-transform-tika/pom.xml @@ -27,9 +27,13 @@ org.apache.tika - tika-parsers-standard-package + tika-parsers ${dependency.tika.version} + + com.tdunning + json + org.bouncycastle bcprov-jdk15on @@ -38,6 +42,11 @@ org.bouncycastle bcmail-jdk15on + + + org.quartz-scheduler + quartz + xml-apis xml-apis diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java index c9e8ab60..4a78ae00 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java @@ -28,11 +28,7 @@ package org.alfresco.transformer.metadataExtractors; import org.apache.tika.embedder.Embedder; import org.apache.tika.extractor.DocumentSelector; -import org.apache.tika.metadata.DublinCore; import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.OfficeOpenXMLCore; -import org.apache.tika.metadata.Property; -import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.sax.ContentHandlerDecorator; @@ -62,10 +58,7 @@ import java.util.HashMap; import java.util.LinkedHashSet; import java.util.Locale; import java.util.Map; -import java.util.Objects; import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; /** * The parent of all Metadata Extractors which use Apache Tika under the hood. This handles all the @@ -90,7 +83,7 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr protected static final String KEY_CREATED = "created"; protected static final String KEY_DESCRIPTION = "description"; protected static final String KEY_COMMENTS = "comments"; - protected static final String KEY_TAGS = DublinCore.SUBJECT.getName(); + protected static final String KEY_TAGS = "dc:subject"; private static final String METADATA_SEPARATOR = ","; @@ -215,6 +208,7 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr } @Override + @SuppressWarnings( "deprecation" ) public Map extractMetadata(String sourceMimetype, Map transformOptions, File sourceFile) throws Exception { @@ -251,7 +245,7 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr for (String tikaKey : metadata.names()) { // TODO review this change (part of MNT-15267) - should we really force string concatenation here !? - putRawValue(tikaKey, getMetadataValue(metadata, Property.internalText(tikaKey)), rawProperties); + putRawValue(tikaKey, getMetadataValue(metadata, tikaKey), rawProperties); } // Now, map the common Tika metadata keys onto @@ -260,17 +254,17 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr // to work without needing any changes // The simple ones - putRawValue(KEY_AUTHOR, getMetadataValue(metadata, TikaCoreProperties.CREATOR), rawProperties); - putRawValue(KEY_TITLE, getMetadataValue(metadata, TikaCoreProperties.TITLE), rawProperties); - putRawValue(KEY_COMMENTS, getMetadataValue(metadata, TikaCoreProperties.COMMENTS), rawProperties); + putRawValue(KEY_AUTHOR, getMetadataValue(metadata, Metadata.AUTHOR), rawProperties); + putRawValue(KEY_TITLE, getMetadataValue(metadata, Metadata.TITLE), rawProperties); + putRawValue(KEY_COMMENTS, getMetadataValue(metadata, Metadata.COMMENTS), rawProperties); // Tags putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties); // Get the subject and description, despite things not // being nearly as consistent as one might hope - String subject = getMetadataValue(metadata, OfficeOpenXMLCore.SUBJECT); - String description = getMetadataValue(metadata, TikaCoreProperties.DESCRIPTION); + String subject = getMetadataValue(metadata, Metadata.SUBJECT); + String description = getMetadataValue(metadata, Metadata.DESCRIPTION); if(subject != null && description != null) { putRawValue(KEY_DESCRIPTION, description, rawProperties); @@ -288,13 +282,13 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr } // Try for the dates two different ways too - if(metadata.get(TikaCoreProperties.CREATED) != null) + if(metadata.get(Metadata.CREATION_DATE) != null) { - putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.CREATED), rawProperties); + putRawValue(KEY_CREATED, metadata.get(Metadata.CREATION_DATE), rawProperties); } - else if(metadata.get(TikaCoreProperties.MODIFIED) != null) + else if(metadata.get(Metadata.DATE) != null) { - putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.MODIFIED), rawProperties); + putRawValue(KEY_CREATED, metadata.get(Metadata.DATE), rawProperties); } // If people created a specific instance @@ -394,11 +388,24 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr return values.length == 0 ? null : (values.length == 1 ? values[0] : values); } - private String getMetadataValue(Metadata metadata, Property key) + private String getMetadataValue(Metadata metadata, String key) { if (metadata.isMultiValued(key)) { - return distinct(metadata.getValues(key)).collect(Collectors.joining(", ")); + String[] parts = metadata.getValues(key); + + // use Set to prevent duplicates + Set value = new LinkedHashSet<>(parts.length); + + for (int i = 0; i < parts.length; i++) + { + value.add(parts[i]); + } + + String valueStr = value.toString(); + + // remove leading/trailing braces [] + return valueStr.substring(1, valueStr.length() - 1); } else { @@ -406,15 +413,6 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr } } - protected static Stream distinct(final String[] strings) - { - return Stream.of(strings) - .filter(Objects::nonNull) - .map(String::strip) - .filter(s -> !s.isEmpty()) - .distinct(); - } - /** * This content handler will capture entries from within * the header of the Tika content XHTML, but ignore the diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/DWGMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/DWGMetadataExtractor.java index 47f300ed..9f6ec66e 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/DWGMetadataExtractor.java +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/DWGMetadataExtractor.java @@ -2,7 +2,7 @@ * #%L * Alfresco Transform Core * %% - * Copyright (C) 2005 - 2021 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * - @@ -27,7 +27,6 @@ package org.alfresco.transformer.metadataExtractors; import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.Parser; import org.apache.tika.parser.dwg.DWGParser; import org.slf4j.Logger; @@ -65,12 +64,13 @@ public class DWGMetadataExtractor extends AbstractTikaMetadataExtractor super(logger); } + @SuppressWarnings("deprecation") @Override protected Map extractSpecific(Metadata metadata, Map properties, Map headers) { - putRawValue(KEY_KEYWORD, metadata.get(TikaCoreProperties.SUBJECT), properties); - putRawValue(KEY_LAST_AUTHOR, metadata.get(TikaCoreProperties.MODIFIED), properties); + putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties); + putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties); return properties; } diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MP3MetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MP3MetadataExtractor.java index b6cd2826..d8957863 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MP3MetadataExtractor.java +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MP3MetadataExtractor.java @@ -2,7 +2,7 @@ * #%L * Alfresco Transform Core * %% - * Copyright (C) 2005 - 2021 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * - @@ -27,7 +27,6 @@ package org.alfresco.transformer.metadataExtractors; import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.metadata.XMPDM; import org.apache.tika.parser.Parser; import org.apache.tika.parser.mp3.Mp3Parser; @@ -87,6 +86,7 @@ public class MP3MetadataExtractor extends TikaAudioMetadataExtractor return new Mp3Parser(); } + @SuppressWarnings("deprecation") @Override protected Map extractSpecific(Metadata metadata, Map properties, Map headers) @@ -98,7 +98,7 @@ public class MP3MetadataExtractor extends TikaAudioMetadataExtractor // We only need these for people who had pre-existing mapping // properties from before the proper audio model was added putRawValue(KEY_ALBUM_TITLE, metadata.get(XMPDM.ALBUM), properties); - putRawValue(KEY_SONG_TITLE, metadata.get(TikaCoreProperties.TITLE), properties); + putRawValue(KEY_SONG_TITLE, metadata.get(Metadata.TITLE), properties); putRawValue(KEY_ARTIST, metadata.get(XMPDM.ARTIST), properties); putRawValue(KEY_COMMENT, metadata.get(XMPDM.LOG_COMMENT), properties); putRawValue(KEY_TRACK_NUMBER, metadata.get(XMPDM.TRACK_NUMBER), properties); diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MailMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MailMetadataExtractor.java index 86d168c6..5d0a047f 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MailMetadataExtractor.java +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MailMetadataExtractor.java @@ -2,7 +2,7 @@ * #%L * Alfresco Transform Core * %% - * Copyright (C) 2005 - 2021 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * - @@ -26,9 +26,7 @@ */ package org.alfresco.transformer.metadataExtractors; -import org.apache.tika.metadata.Message; import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.OfficeParser; import org.slf4j.Logger; @@ -84,25 +82,26 @@ public class MailMetadataExtractor extends AbstractTikaMetadataExtractor return new OfficeParser(); } + @SuppressWarnings("deprecation") @Override protected Map extractSpecific(Metadata metadata, Map properties, Map headers) { - putRawValue(KEY_ORIGINATOR, metadata.get(TikaCoreProperties.CREATOR), properties); - putRawValue(KEY_SUBJECT, metadata.get(TikaCoreProperties.TITLE), properties); - putRawValue(KEY_DESCRIPTION, metadata.get(TikaCoreProperties.SUBJECT), properties); - putRawValue(KEY_SENT_DATE, metadata.get(TikaCoreProperties.MODIFIED), properties); + putRawValue(KEY_ORIGINATOR, metadata.get(Metadata.AUTHOR), properties); + putRawValue(KEY_SUBJECT, metadata.get(Metadata.TITLE), properties); + putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.SUBJECT), properties); + putRawValue(KEY_SENT_DATE, metadata.get(Metadata.LAST_SAVED), properties); // Store the TO, but not cc/bcc in the addressee field - putRawValue(KEY_ADDRESSEE, metadata.get(Message.MESSAGE_TO), properties); + putRawValue(KEY_ADDRESSEE, metadata.get(Metadata.MESSAGE_TO), properties); // Store each of To, CC and BCC in their own fields - putRawValue(KEY_TO_NAMES, metadata.getValues(Message.MESSAGE_TO), properties); - putRawValue(KEY_CC_NAMES, metadata.getValues(Message.MESSAGE_CC), properties); - putRawValue(KEY_BCC_NAMES, metadata.getValues(Message.MESSAGE_BCC), properties); + putRawValue(KEY_TO_NAMES, metadata.getValues(Metadata.MESSAGE_TO), properties); + putRawValue(KEY_CC_NAMES, metadata.getValues(Metadata.MESSAGE_CC), properties); + putRawValue(KEY_BCC_NAMES, metadata.getValues(Metadata.MESSAGE_BCC), properties); // But store all email addresses (to/cc/bcc) in the addresses field - putRawValue(KEY_ADDRESSEES, metadata.getValues(Message.MESSAGE_RECIPIENT_ADDRESS), properties); + putRawValue(KEY_ADDRESSEES, metadata.getValues(Metadata.MESSAGE_RECIPIENT_ADDRESS), properties); return properties; } diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OfficeMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OfficeMetadataExtractor.java index 7612a386..dfca577e 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OfficeMetadataExtractor.java +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OfficeMetadataExtractor.java @@ -2,7 +2,7 @@ * #%L * Alfresco Transform Core * %% - * Copyright (C) 2005 - 2021 Alfresco Software Limited + * Copyright (C) 2005 - 2020 Alfresco Software Limited * %% * This file is part of the Alfresco software. * - @@ -27,8 +27,6 @@ package org.alfresco.transformer.metadataExtractors; import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.Office; -import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.OfficeParser; import org.slf4j.Logger; @@ -42,7 +40,7 @@ import java.util.Map; * * Configuration: (see OfficeMetadataExtractor_metadata_extract.properties and tika_engine_config.json) * - * This extractor uses the POI library to extract the following: + * This extracter uses the POI library to extract the following: *
  *   author:             --      cm:author
  *   title:              --      cm:title
@@ -93,20 +91,23 @@ public class OfficeMetadataExtractor extends AbstractTikaMetadataExtractor
         return new OfficeParser();
     }
 
+    @SuppressWarnings("deprecation")
     @Override
     protected Map extractSpecific(Metadata metadata,
                                                         Map properties, Map headers)
     {
-        putRawValue(KEY_CREATE_DATETIME, metadata.get(TikaCoreProperties.CREATED), properties);
-        putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(TikaCoreProperties.MODIFIED), properties);
-        putRawValue(KEY_EDIT_TIME, metadata.get(TikaCoreProperties.MODIFIED), properties);
-        putRawValue(KEY_FORMAT, metadata.get(TikaCoreProperties.FORMAT), properties);
-        putRawValue(KEY_KEYWORDS, metadata.get(TikaCoreProperties.SUBJECT), properties);
-        putRawValue(KEY_LAST_AUTHOR, metadata.get(TikaCoreProperties.MODIFIER), properties);
-        putRawValue(KEY_LAST_PRINTED, metadata.get(TikaCoreProperties.PRINT_DATE), properties);
-        putRawValue(KEY_PAGE_COUNT, metadata.get(Office.PAGE_COUNT), properties);
-        putRawValue(KEY_PARAGRAPH_COUNT, metadata.get(Office.PARAGRAPH_COUNT), properties);
-        putRawValue(KEY_WORD_COUNT, metadata.get(Office.WORD_COUNT), properties);
+        putRawValue(KEY_CREATE_DATETIME, metadata.get(Metadata.CREATION_DATE), properties);
+        putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(Metadata.LAST_SAVED), properties);
+        putRawValue(KEY_EDIT_TIME, metadata.get(Metadata.EDIT_TIME), properties);
+        putRawValue(KEY_FORMAT, metadata.get(Metadata.FORMAT), properties);
+        putRawValue(KEY_KEYWORDS, metadata.get(Metadata.KEYWORDS), properties);
+        putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties);
+        putRawValue(KEY_LAST_PRINTED, metadata.get(Metadata.LAST_PRINTED), properties);
+//       putRawValue(KEY_OS_VERSION, metadata.get(Metadata.OS_VERSION), properties);
+//       putRawValue(KEY_THUMBNAIL, metadata.get(Metadata.THUMBNAIL), properties);
+        putRawValue(KEY_PAGE_COUNT, metadata.get(Metadata.PAGE_COUNT), properties);
+        putRawValue(KEY_PARAGRAPH_COUNT, metadata.get(Metadata.PARAGRAPH_COUNT), properties);
+        putRawValue(KEY_WORD_COUNT, metadata.get(Metadata.WORD_COUNT), properties);
         return properties;
     }
 }
diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OpenDocumentMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OpenDocumentMetadataExtractor.java
index 8014802b..4de536da 100644
--- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OpenDocumentMetadataExtractor.java
+++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OpenDocumentMetadataExtractor.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Transform Core
  * %%
- * Copyright (C) 2005 - 2021 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software.
  * -
@@ -26,28 +26,18 @@
  */
 package org.alfresco.transformer.metadataExtractors;
 
-import static org.apache.tika.metadata.DublinCore.NAMESPACE_URI_DC;
-
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.odf.OpenDocumentMetaParser;
 import org.apache.tika.parser.odf.OpenDocumentParser;
-import org.apache.tika.parser.xml.ElementMetadataHandler;
-import org.apache.tika.sax.TeeContentHandler;
 import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.xml.sax.ContentHandler;
 
 import java.io.Serializable;
 import java.util.Date;
-import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.stream.Collectors;
 
 /**
  * {@code "application/vnd.oasis.opendocument..."} and {@code "applicationvnd.oasis.opendocument..."} metadata extractor.
@@ -87,7 +77,6 @@ public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
     private static final String KEY_INITIAL_CREATOR = "initialCreator";
     private static final String KEY_KEYWORD = "keyword";
     private static final String KEY_LANGUAGE = "language";
-    private static final String KEY_ALFRESCO_CREATOR = "_alfresco:creator";
 
     private static final String CUSTOM_PREFIX = "custom:";
 
@@ -101,33 +90,22 @@ public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
     @Override
     protected Parser getParser()
     {
-        OpenDocumentParser parser = new OpenDocumentParser();
-        parser.setMetaParser(new OpenDocumentMetaParser() {
-            @Override
-            protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, ParseContext context)
-            {
-                final ContentHandler superHandler = super.getContentHandler(ch, md, context);
-                final ContentHandler creatorHandler = new ElementMetadataHandler(NAMESPACE_URI_DC, KEY_CREATOR, md, KEY_ALFRESCO_CREATOR);
-                return new TeeContentHandler(superHandler, creatorHandler);
-            }
-        });
-        return parser;
+        return new OpenDocumentParser();
     }
 
+    @SuppressWarnings("deprecation")
     @Override
     protected Map extractSpecific(Metadata metadata,
                                                         Map properties, Map headers)
     {
-        putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(TikaCoreProperties.CREATED)), properties);
-        final String creator = getCreator(metadata);
-        putRawValue(KEY_CREATOR, creator, properties);
-        putRawValue(KEY_AUTHOR, creator, properties);
-        putRawValue(KEY_DATE, getDateOrNull(metadata.get(TikaCoreProperties.MODIFIED)), properties);
-        putRawValue(KEY_DESCRIPTION, metadata.get(TikaCoreProperties.DESCRIPTION), properties);
+        putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), properties);
+        putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), properties);
+        putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), properties);
+        putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.DESCRIPTION), properties);
         putRawValue(KEY_GENERATOR, metadata.get("generator"), properties);
         putRawValue(KEY_INITIAL_CREATOR, metadata.get("initial-creator"), properties);
-        putRawValue(KEY_KEYWORD, metadata.get(TikaCoreProperties.SUBJECT), properties);
-        putRawValue(KEY_LANGUAGE, metadata.get(TikaCoreProperties.LANGUAGE), properties);
+        putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
+        putRawValue(KEY_LANGUAGE, metadata.get(Metadata.LANGUAGE), properties);
 
         // Handle user-defined properties dynamically
         Map> mapping = super.getExtractMapping();
@@ -142,18 +120,6 @@ public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
         return properties;
     }
 
-    private String getCreator(Metadata metadata)
-    {
-        final List creators = distinct(metadata.getValues(TikaCoreProperties.CREATOR))
-                .collect(Collectors.toUnmodifiableList());
-        if (creators.size() == 1)
-        {
-            return creators.get(0);
-        }
-
-        return metadata.get(KEY_ALFRESCO_CREATOR);
-    }
-
     private Date getDateOrNull(String dateString)
     {
         if (dateString != null && dateString.length() != 0)
diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/TikaAudioMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/TikaAudioMetadataExtractor.java
index e7933ef3..1a8a4a84 100644
--- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/TikaAudioMetadataExtractor.java
+++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/TikaAudioMetadataExtractor.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Transform Core
  * %%
- * Copyright (C) 2005 - 2021 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software.
  * -
@@ -28,7 +28,6 @@ package org.alfresco.transformer.metadataExtractors;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.metadata.XMPDM;
 import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.Parser;
@@ -149,12 +148,13 @@ public class TikaAudioMetadataExtractor extends AbstractTikaMetadataExtractor
      * @param metadata     the metadata extracted from the file
      * @return          the description
      */
+    @SuppressWarnings("deprecation")
     private String generateDescription(Metadata metadata)
     {
         StringBuilder result = new StringBuilder();
-        if (metadata.get(TikaCoreProperties.TITLE) != null)
+        if (metadata.get(Metadata.TITLE) != null)
         {
-            result.append(metadata.get(TikaCoreProperties.TITLE));
+            result.append(metadata.get(Metadata.TITLE));
             if (metadata.get(XMPDM.ALBUM) != null)
             {
                 result
diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java
index 9e15731e..e43677a4 100644
--- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java
+++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java
@@ -44,9 +44,9 @@ import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.io.output.NullOutputStream;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.io.NullOutputStream;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
@@ -57,7 +57,7 @@ import org.apache.tika.parser.external.ExternalParser;
 import org.apache.tika.parser.external.ExternalParsersFactory;
 import org.apache.tika.parser.image.ImageParser;
 import org.apache.tika.parser.image.TiffParser;
-import org.apache.tika.parser.image.JpegParser;
+import org.apache.tika.parser.jpeg.JpegParser;
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -263,7 +263,7 @@ public class ExifToolParser extends ExternalParser {
      * stream of the given process to the given XHTML content handler.
      * The standard output stream is closed once fully processed.
      *
-     * @param stream stream
+     * @param process process
      * @param xhtml XHTML content handler
      * @throws SAXException if the XHTML SAX events could not be handled
      * @throws IOException if an input error occurred
@@ -315,13 +315,13 @@ public class ExifToolParser extends ExternalParser {
      * standard stream of the given process. Potential exceptions
      * are ignored, and the stream is closed once fully processed.
      *
-     * @param stream stream
+     * @param process process
      */
     private void ignoreStream(final InputStream stream) {
         Thread t = new Thread() {
             public void run() {
                 try {
-                    IOUtils.copy(stream, NullOutputStream.NULL_OUTPUT_STREAM);
+                    IOUtils.copy(stream, new NullOutputStream());
                 } catch (IOException e) {
                 } finally {
                     IOUtils.closeQuietly(stream);
diff --git a/pom.xml b/pom.xml
index bd0ba4ca..cf5d142f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -27,7 +27,7 @@
         ${dependency.jackson.version}
         4.13.2
         3.5.0
-        2.1.0
+        1.26
         4.1.2
         1.4