diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/pom.xml b/alfresco-transform-tika/alfresco-transform-tika-boot/pom.xml index e60698f0..ce041d99 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/pom.xml +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/pom.xml @@ -65,13 +65,9 @@ org.apache.tika - tika-parsers + tika-parsers-standard-package ${dependency.tika.version} - - com.tdunning - json - org.bouncycastle bcprov-jdk15on @@ -80,10 +76,9 @@ org.bouncycastle bcmail-jdk15on - - org.quartz-scheduler - quartz + xml-apis + xml-apis diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3g2_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3g2_metadata.json index 75afde88..4aba2f05 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3g2_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3g2_metadata.json @@ -2,7 +2,8 @@ "{http://www.alfresco.org/model/content/1.0}description" : null, "{http://www.alfresco.org/model/audio/1.0}releaseDate" : null, "{http://www.alfresco.org/model/content/1.0}created" : null, - "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000", + "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "8000", "{http://www.alfresco.org/model/content/1.0}author" : null, - "{http://www.alfresco.org/model/content/1.0}title" : null + "{http://www.alfresco.org/model/content/1.0}title" : null, + "{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo" } \ No newline at end of file diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3gp_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3gp_metadata.json index 75afde88..4aba2f05 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3gp_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.3gp_metadata.json @@ -2,7 +2,8 @@ "{http://www.alfresco.org/model/content/1.0}description" : null, "{http://www.alfresco.org/model/audio/1.0}releaseDate" : null, "{http://www.alfresco.org/model/content/1.0}created" : null, - "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000", + "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "8000", "{http://www.alfresco.org/model/content/1.0}author" : null, - "{http://www.alfresco.org/model/content/1.0}title" : null + "{http://www.alfresco.org/model/content/1.0}title" : null, + "{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo" } \ No newline at end of file diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.m4v_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.m4v_metadata.json index 75afde88..dc61188e 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.m4v_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.m4v_metadata.json @@ -2,7 +2,8 @@ "{http://www.alfresco.org/model/content/1.0}description" : null, "{http://www.alfresco.org/model/audio/1.0}releaseDate" : null, "{http://www.alfresco.org/model/content/1.0}created" : null, - "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000", + "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "22050", "{http://www.alfresco.org/model/content/1.0}author" : null, - "{http://www.alfresco.org/model/content/1.0}title" : null + "{http://www.alfresco.org/model/content/1.0}title" : null, + "{http://www.alfresco.org/model/audio/1.0}channelType" : "Stereo" } \ No newline at end of file diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mov_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mov_metadata.json index 75afde88..96545700 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mov_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mov_metadata.json @@ -2,7 +2,8 @@ "{http://www.alfresco.org/model/content/1.0}description" : null, "{http://www.alfresco.org/model/audio/1.0}releaseDate" : null, "{http://www.alfresco.org/model/content/1.0}created" : null, - "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "1000", + "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "22050", "{http://www.alfresco.org/model/content/1.0}author" : null, - "{http://www.alfresco.org/model/content/1.0}title" : null + "{http://www.alfresco.org/model/content/1.0}title" : null, + "{http://www.alfresco.org/model/audio/1.0}channelType" : "Mono" } \ No newline at end of file diff --git a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mp4_metadata.json b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mp4_metadata.json index 39086dfb..96545700 100644 --- a/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mp4_metadata.json +++ b/alfresco-transform-tika/alfresco-transform-tika-boot/src/test/resources/quick.mp4_metadata.json @@ -2,7 +2,8 @@ "{http://www.alfresco.org/model/content/1.0}description" : null, "{http://www.alfresco.org/model/audio/1.0}releaseDate" : null, "{http://www.alfresco.org/model/content/1.0}created" : null, - "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "90000", + "{http://www.alfresco.org/model/audio/1.0}sampleRate" : "22050", "{http://www.alfresco.org/model/content/1.0}author" : null, - "{http://www.alfresco.org/model/content/1.0}title" : null + "{http://www.alfresco.org/model/content/1.0}title" : null, + "{http://www.alfresco.org/model/audio/1.0}channelType" : "Mono" } \ No newline at end of file diff --git a/alfresco-transform-tika/alfresco-transform-tika/pom.xml b/alfresco-transform-tika/alfresco-transform-tika/pom.xml index e0d09619..4059bcff 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/pom.xml +++ b/alfresco-transform-tika/alfresco-transform-tika/pom.xml @@ -27,13 +27,9 @@ org.apache.tika - tika-parsers + tika-parsers-standard-package ${dependency.tika.version} - - com.tdunning - json - org.bouncycastle bcprov-jdk15on @@ -42,11 +38,6 @@ org.bouncycastle bcmail-jdk15on - - - org.quartz-scheduler - quartz - xml-apis xml-apis diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java index 4a78ae00..c9e8ab60 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/AbstractTikaMetadataExtractor.java @@ -28,7 +28,11 @@ package org.alfresco.transformer.metadataExtractors; import org.apache.tika.embedder.Embedder; import org.apache.tika.extractor.DocumentSelector; +import org.apache.tika.metadata.DublinCore; import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.OfficeOpenXMLCore; +import org.apache.tika.metadata.Property; +import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.sax.ContentHandlerDecorator; @@ -58,7 +62,10 @@ import java.util.HashMap; import java.util.LinkedHashSet; import java.util.Locale; import java.util.Map; +import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * The parent of all Metadata Extractors which use Apache Tika under the hood. This handles all the @@ -83,7 +90,7 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr protected static final String KEY_CREATED = "created"; protected static final String KEY_DESCRIPTION = "description"; protected static final String KEY_COMMENTS = "comments"; - protected static final String KEY_TAGS = "dc:subject"; + protected static final String KEY_TAGS = DublinCore.SUBJECT.getName(); private static final String METADATA_SEPARATOR = ","; @@ -208,7 +215,6 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr } @Override - @SuppressWarnings( "deprecation" ) public Map extractMetadata(String sourceMimetype, Map transformOptions, File sourceFile) throws Exception { @@ -245,7 +251,7 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr for (String tikaKey : metadata.names()) { // TODO review this change (part of MNT-15267) - should we really force string concatenation here !? - putRawValue(tikaKey, getMetadataValue(metadata, tikaKey), rawProperties); + putRawValue(tikaKey, getMetadataValue(metadata, Property.internalText(tikaKey)), rawProperties); } // Now, map the common Tika metadata keys onto @@ -254,17 +260,17 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr // to work without needing any changes // The simple ones - putRawValue(KEY_AUTHOR, getMetadataValue(metadata, Metadata.AUTHOR), rawProperties); - putRawValue(KEY_TITLE, getMetadataValue(metadata, Metadata.TITLE), rawProperties); - putRawValue(KEY_COMMENTS, getMetadataValue(metadata, Metadata.COMMENTS), rawProperties); + putRawValue(KEY_AUTHOR, getMetadataValue(metadata, TikaCoreProperties.CREATOR), rawProperties); + putRawValue(KEY_TITLE, getMetadataValue(metadata, TikaCoreProperties.TITLE), rawProperties); + putRawValue(KEY_COMMENTS, getMetadataValue(metadata, TikaCoreProperties.COMMENTS), rawProperties); // Tags putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), rawProperties); // Get the subject and description, despite things not // being nearly as consistent as one might hope - String subject = getMetadataValue(metadata, Metadata.SUBJECT); - String description = getMetadataValue(metadata, Metadata.DESCRIPTION); + String subject = getMetadataValue(metadata, OfficeOpenXMLCore.SUBJECT); + String description = getMetadataValue(metadata, TikaCoreProperties.DESCRIPTION); if(subject != null && description != null) { putRawValue(KEY_DESCRIPTION, description, rawProperties); @@ -282,13 +288,13 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr } // Try for the dates two different ways too - if(metadata.get(Metadata.CREATION_DATE) != null) + if(metadata.get(TikaCoreProperties.CREATED) != null) { - putRawValue(KEY_CREATED, metadata.get(Metadata.CREATION_DATE), rawProperties); + putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.CREATED), rawProperties); } - else if(metadata.get(Metadata.DATE) != null) + else if(metadata.get(TikaCoreProperties.MODIFIED) != null) { - putRawValue(KEY_CREATED, metadata.get(Metadata.DATE), rawProperties); + putRawValue(KEY_CREATED, metadata.get(TikaCoreProperties.MODIFIED), rawProperties); } // If people created a specific instance @@ -388,24 +394,11 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr return values.length == 0 ? null : (values.length == 1 ? values[0] : values); } - private String getMetadataValue(Metadata metadata, String key) + private String getMetadataValue(Metadata metadata, Property key) { if (metadata.isMultiValued(key)) { - String[] parts = metadata.getValues(key); - - // use Set to prevent duplicates - Set value = new LinkedHashSet<>(parts.length); - - for (int i = 0; i < parts.length; i++) - { - value.add(parts[i]); - } - - String valueStr = value.toString(); - - // remove leading/trailing braces [] - return valueStr.substring(1, valueStr.length() - 1); + return distinct(metadata.getValues(key)).collect(Collectors.joining(", ")); } else { @@ -413,6 +406,15 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr } } + protected static Stream distinct(final String[] strings) + { + return Stream.of(strings) + .filter(Objects::nonNull) + .map(String::strip) + .filter(s -> !s.isEmpty()) + .distinct(); + } + /** * This content handler will capture entries from within * the header of the Tika content XHTML, but ignore the diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/DWGMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/DWGMetadataExtractor.java index 9f6ec66e..47f300ed 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/DWGMetadataExtractor.java +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/DWGMetadataExtractor.java @@ -2,7 +2,7 @@ * #%L * Alfresco Transform Core * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited + * Copyright (C) 2005 - 2021 Alfresco Software Limited * %% * This file is part of the Alfresco software. * - @@ -27,6 +27,7 @@ package org.alfresco.transformer.metadataExtractors; import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.Parser; import org.apache.tika.parser.dwg.DWGParser; import org.slf4j.Logger; @@ -64,13 +65,12 @@ public class DWGMetadataExtractor extends AbstractTikaMetadataExtractor super(logger); } - @SuppressWarnings("deprecation") @Override protected Map extractSpecific(Metadata metadata, Map properties, Map headers) { - putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties); - putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties); + putRawValue(KEY_KEYWORD, metadata.get(TikaCoreProperties.SUBJECT), properties); + putRawValue(KEY_LAST_AUTHOR, metadata.get(TikaCoreProperties.MODIFIED), properties); return properties; } diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MP3MetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MP3MetadataExtractor.java index d8957863..b6cd2826 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MP3MetadataExtractor.java +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MP3MetadataExtractor.java @@ -2,7 +2,7 @@ * #%L * Alfresco Transform Core * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited + * Copyright (C) 2005 - 2021 Alfresco Software Limited * %% * This file is part of the Alfresco software. * - @@ -27,6 +27,7 @@ package org.alfresco.transformer.metadataExtractors; import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.metadata.XMPDM; import org.apache.tika.parser.Parser; import org.apache.tika.parser.mp3.Mp3Parser; @@ -86,7 +87,6 @@ public class MP3MetadataExtractor extends TikaAudioMetadataExtractor return new Mp3Parser(); } - @SuppressWarnings("deprecation") @Override protected Map extractSpecific(Metadata metadata, Map properties, Map headers) @@ -98,7 +98,7 @@ public class MP3MetadataExtractor extends TikaAudioMetadataExtractor // We only need these for people who had pre-existing mapping // properties from before the proper audio model was added putRawValue(KEY_ALBUM_TITLE, metadata.get(XMPDM.ALBUM), properties); - putRawValue(KEY_SONG_TITLE, metadata.get(Metadata.TITLE), properties); + putRawValue(KEY_SONG_TITLE, metadata.get(TikaCoreProperties.TITLE), properties); putRawValue(KEY_ARTIST, metadata.get(XMPDM.ARTIST), properties); putRawValue(KEY_COMMENT, metadata.get(XMPDM.LOG_COMMENT), properties); putRawValue(KEY_TRACK_NUMBER, metadata.get(XMPDM.TRACK_NUMBER), properties); diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MailMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MailMetadataExtractor.java index 5d0a047f..86d168c6 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MailMetadataExtractor.java +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/MailMetadataExtractor.java @@ -2,7 +2,7 @@ * #%L * Alfresco Transform Core * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited + * Copyright (C) 2005 - 2021 Alfresco Software Limited * %% * This file is part of the Alfresco software. * - @@ -26,7 +26,9 @@ */ package org.alfresco.transformer.metadataExtractors; +import org.apache.tika.metadata.Message; import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.OfficeParser; import org.slf4j.Logger; @@ -82,26 +84,25 @@ public class MailMetadataExtractor extends AbstractTikaMetadataExtractor return new OfficeParser(); } - @SuppressWarnings("deprecation") @Override protected Map extractSpecific(Metadata metadata, Map properties, Map headers) { - putRawValue(KEY_ORIGINATOR, metadata.get(Metadata.AUTHOR), properties); - putRawValue(KEY_SUBJECT, metadata.get(Metadata.TITLE), properties); - putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.SUBJECT), properties); - putRawValue(KEY_SENT_DATE, metadata.get(Metadata.LAST_SAVED), properties); + putRawValue(KEY_ORIGINATOR, metadata.get(TikaCoreProperties.CREATOR), properties); + putRawValue(KEY_SUBJECT, metadata.get(TikaCoreProperties.TITLE), properties); + putRawValue(KEY_DESCRIPTION, metadata.get(TikaCoreProperties.SUBJECT), properties); + putRawValue(KEY_SENT_DATE, metadata.get(TikaCoreProperties.MODIFIED), properties); // Store the TO, but not cc/bcc in the addressee field - putRawValue(KEY_ADDRESSEE, metadata.get(Metadata.MESSAGE_TO), properties); + putRawValue(KEY_ADDRESSEE, metadata.get(Message.MESSAGE_TO), properties); // Store each of To, CC and BCC in their own fields - putRawValue(KEY_TO_NAMES, metadata.getValues(Metadata.MESSAGE_TO), properties); - putRawValue(KEY_CC_NAMES, metadata.getValues(Metadata.MESSAGE_CC), properties); - putRawValue(KEY_BCC_NAMES, metadata.getValues(Metadata.MESSAGE_BCC), properties); + putRawValue(KEY_TO_NAMES, metadata.getValues(Message.MESSAGE_TO), properties); + putRawValue(KEY_CC_NAMES, metadata.getValues(Message.MESSAGE_CC), properties); + putRawValue(KEY_BCC_NAMES, metadata.getValues(Message.MESSAGE_BCC), properties); // But store all email addresses (to/cc/bcc) in the addresses field - putRawValue(KEY_ADDRESSEES, metadata.getValues(Metadata.MESSAGE_RECIPIENT_ADDRESS), properties); + putRawValue(KEY_ADDRESSEES, metadata.getValues(Message.MESSAGE_RECIPIENT_ADDRESS), properties); return properties; } diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OfficeMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OfficeMetadataExtractor.java index dfca577e..7612a386 100644 --- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OfficeMetadataExtractor.java +++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OfficeMetadataExtractor.java @@ -2,7 +2,7 @@ * #%L * Alfresco Transform Core * %% - * Copyright (C) 2005 - 2020 Alfresco Software Limited + * Copyright (C) 2005 - 2021 Alfresco Software Limited * %% * This file is part of the Alfresco software. * - @@ -27,6 +27,8 @@ package org.alfresco.transformer.metadataExtractors; import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Office; +import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.OfficeParser; import org.slf4j.Logger; @@ -40,7 +42,7 @@ import java.util.Map; * * Configuration: (see OfficeMetadataExtractor_metadata_extract.properties and tika_engine_config.json) * - * This extracter uses the POI library to extract the following: + * This extractor uses the POI library to extract the following: *
  *   author:             --      cm:author
  *   title:              --      cm:title
@@ -91,23 +93,20 @@ public class OfficeMetadataExtractor extends AbstractTikaMetadataExtractor
         return new OfficeParser();
     }
 
-    @SuppressWarnings("deprecation")
     @Override
     protected Map extractSpecific(Metadata metadata,
                                                         Map properties, Map headers)
     {
-        putRawValue(KEY_CREATE_DATETIME, metadata.get(Metadata.CREATION_DATE), properties);
-        putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(Metadata.LAST_SAVED), properties);
-        putRawValue(KEY_EDIT_TIME, metadata.get(Metadata.EDIT_TIME), properties);
-        putRawValue(KEY_FORMAT, metadata.get(Metadata.FORMAT), properties);
-        putRawValue(KEY_KEYWORDS, metadata.get(Metadata.KEYWORDS), properties);
-        putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties);
-        putRawValue(KEY_LAST_PRINTED, metadata.get(Metadata.LAST_PRINTED), properties);
-//       putRawValue(KEY_OS_VERSION, metadata.get(Metadata.OS_VERSION), properties);
-//       putRawValue(KEY_THUMBNAIL, metadata.get(Metadata.THUMBNAIL), properties);
-        putRawValue(KEY_PAGE_COUNT, metadata.get(Metadata.PAGE_COUNT), properties);
-        putRawValue(KEY_PARAGRAPH_COUNT, metadata.get(Metadata.PARAGRAPH_COUNT), properties);
-        putRawValue(KEY_WORD_COUNT, metadata.get(Metadata.WORD_COUNT), properties);
+        putRawValue(KEY_CREATE_DATETIME, metadata.get(TikaCoreProperties.CREATED), properties);
+        putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(TikaCoreProperties.MODIFIED), properties);
+        putRawValue(KEY_EDIT_TIME, metadata.get(TikaCoreProperties.MODIFIED), properties);
+        putRawValue(KEY_FORMAT, metadata.get(TikaCoreProperties.FORMAT), properties);
+        putRawValue(KEY_KEYWORDS, metadata.get(TikaCoreProperties.SUBJECT), properties);
+        putRawValue(KEY_LAST_AUTHOR, metadata.get(TikaCoreProperties.MODIFIER), properties);
+        putRawValue(KEY_LAST_PRINTED, metadata.get(TikaCoreProperties.PRINT_DATE), properties);
+        putRawValue(KEY_PAGE_COUNT, metadata.get(Office.PAGE_COUNT), properties);
+        putRawValue(KEY_PARAGRAPH_COUNT, metadata.get(Office.PARAGRAPH_COUNT), properties);
+        putRawValue(KEY_WORD_COUNT, metadata.get(Office.WORD_COUNT), properties);
         return properties;
     }
 }
diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OpenDocumentMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OpenDocumentMetadataExtractor.java
index 4de536da..8014802b 100644
--- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OpenDocumentMetadataExtractor.java
+++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OpenDocumentMetadataExtractor.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Transform Core
  * %%
- * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * Copyright (C) 2005 - 2021 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software.
  * -
@@ -26,18 +26,28 @@
  */
 package org.alfresco.transformer.metadataExtractors;
 
+import static org.apache.tika.metadata.DublinCore.NAMESPACE_URI_DC;
+
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.odf.OpenDocumentMetaParser;
 import org.apache.tika.parser.odf.OpenDocumentParser;
+import org.apache.tika.parser.xml.ElementMetadataHandler;
+import org.apache.tika.sax.TeeContentHandler;
 import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.xml.sax.ContentHandler;
 
 import java.io.Serializable;
 import java.util.Date;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 /**
  * {@code "application/vnd.oasis.opendocument..."} and {@code "applicationvnd.oasis.opendocument..."} metadata extractor.
@@ -77,6 +87,7 @@ public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
     private static final String KEY_INITIAL_CREATOR = "initialCreator";
     private static final String KEY_KEYWORD = "keyword";
     private static final String KEY_LANGUAGE = "language";
+    private static final String KEY_ALFRESCO_CREATOR = "_alfresco:creator";
 
     private static final String CUSTOM_PREFIX = "custom:";
 
@@ -90,22 +101,33 @@ public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
     @Override
     protected Parser getParser()
     {
-        return new OpenDocumentParser();
+        OpenDocumentParser parser = new OpenDocumentParser();
+        parser.setMetaParser(new OpenDocumentMetaParser() {
+            @Override
+            protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, ParseContext context)
+            {
+                final ContentHandler superHandler = super.getContentHandler(ch, md, context);
+                final ContentHandler creatorHandler = new ElementMetadataHandler(NAMESPACE_URI_DC, KEY_CREATOR, md, KEY_ALFRESCO_CREATOR);
+                return new TeeContentHandler(superHandler, creatorHandler);
+            }
+        });
+        return parser;
     }
 
-    @SuppressWarnings("deprecation")
     @Override
     protected Map extractSpecific(Metadata metadata,
                                                         Map properties, Map headers)
     {
-        putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), properties);
-        putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), properties);
-        putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), properties);
-        putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.DESCRIPTION), properties);
+        putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(TikaCoreProperties.CREATED)), properties);
+        final String creator = getCreator(metadata);
+        putRawValue(KEY_CREATOR, creator, properties);
+        putRawValue(KEY_AUTHOR, creator, properties);
+        putRawValue(KEY_DATE, getDateOrNull(metadata.get(TikaCoreProperties.MODIFIED)), properties);
+        putRawValue(KEY_DESCRIPTION, metadata.get(TikaCoreProperties.DESCRIPTION), properties);
         putRawValue(KEY_GENERATOR, metadata.get("generator"), properties);
         putRawValue(KEY_INITIAL_CREATOR, metadata.get("initial-creator"), properties);
-        putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
-        putRawValue(KEY_LANGUAGE, metadata.get(Metadata.LANGUAGE), properties);
+        putRawValue(KEY_KEYWORD, metadata.get(TikaCoreProperties.SUBJECT), properties);
+        putRawValue(KEY_LANGUAGE, metadata.get(TikaCoreProperties.LANGUAGE), properties);
 
         // Handle user-defined properties dynamically
         Map> mapping = super.getExtractMapping();
@@ -120,6 +142,18 @@ public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
         return properties;
     }
 
+    private String getCreator(Metadata metadata)
+    {
+        final List creators = distinct(metadata.getValues(TikaCoreProperties.CREATOR))
+                .collect(Collectors.toUnmodifiableList());
+        if (creators.size() == 1)
+        {
+            return creators.get(0);
+        }
+
+        return metadata.get(KEY_ALFRESCO_CREATOR);
+    }
+
     private Date getDateOrNull(String dateString)
     {
         if (dateString != null && dateString.length() != 0)
diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/TikaAudioMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/TikaAudioMetadataExtractor.java
index 1a8a4a84..e7933ef3 100644
--- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/TikaAudioMetadataExtractor.java
+++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/TikaAudioMetadataExtractor.java
@@ -2,7 +2,7 @@
  * #%L
  * Alfresco Transform Core
  * %%
- * Copyright (C) 2005 - 2020 Alfresco Software Limited
+ * Copyright (C) 2005 - 2021 Alfresco Software Limited
  * %%
  * This file is part of the Alfresco software.
  * -
@@ -28,6 +28,7 @@ package org.alfresco.transformer.metadataExtractors;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.metadata.XMPDM;
 import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.Parser;
@@ -148,13 +149,12 @@ public class TikaAudioMetadataExtractor extends AbstractTikaMetadataExtractor
      * @param metadata     the metadata extracted from the file
      * @return          the description
      */
-    @SuppressWarnings("deprecation")
     private String generateDescription(Metadata metadata)
     {
         StringBuilder result = new StringBuilder();
-        if (metadata.get(Metadata.TITLE) != null)
+        if (metadata.get(TikaCoreProperties.TITLE) != null)
         {
-            result.append(metadata.get(Metadata.TITLE));
+            result.append(metadata.get(TikaCoreProperties.TITLE));
             if (metadata.get(XMPDM.ALBUM) != null)
             {
                 result
diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java
index e43677a4..9e15731e 100644
--- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java
+++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java
@@ -44,9 +44,9 @@ import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.io.output.NullOutputStream;
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
-import org.apache.tika.io.NullOutputStream;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
@@ -57,7 +57,7 @@ import org.apache.tika.parser.external.ExternalParser;
 import org.apache.tika.parser.external.ExternalParsersFactory;
 import org.apache.tika.parser.image.ImageParser;
 import org.apache.tika.parser.image.TiffParser;
-import org.apache.tika.parser.jpeg.JpegParser;
+import org.apache.tika.parser.image.JpegParser;
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -263,7 +263,7 @@ public class ExifToolParser extends ExternalParser {
      * stream of the given process to the given XHTML content handler.
      * The standard output stream is closed once fully processed.
      *
-     * @param process process
+     * @param stream stream
      * @param xhtml XHTML content handler
      * @throws SAXException if the XHTML SAX events could not be handled
      * @throws IOException if an input error occurred
@@ -315,13 +315,13 @@ public class ExifToolParser extends ExternalParser {
      * standard stream of the given process. Potential exceptions
      * are ignored, and the stream is closed once fully processed.
      *
-     * @param process process
+     * @param stream stream
      */
     private void ignoreStream(final InputStream stream) {
         Thread t = new Thread() {
             public void run() {
                 try {
-                    IOUtils.copy(stream, new NullOutputStream());
+                    IOUtils.copy(stream, NullOutputStream.NULL_OUTPUT_STREAM);
                 } catch (IOException e) {
                 } finally {
                     IOUtils.closeQuietly(stream);
diff --git a/pom.xml b/pom.xml
index e0916686..edef853b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -27,7 +27,7 @@
         ${dependency.jackson.version}
         4.13.2
         3.5.0
-        1.26
+        2.1.0
         4.1.2
         1.4