headers)
{
- putRawValue(KEY_ORIGINATOR, metadata.get(TikaCoreProperties.CREATOR), properties);
- putRawValue(KEY_SUBJECT, metadata.get(TikaCoreProperties.TITLE), properties);
- putRawValue(KEY_DESCRIPTION, metadata.get(TikaCoreProperties.SUBJECT), properties);
- putRawValue(KEY_SENT_DATE, metadata.get(TikaCoreProperties.MODIFIED), properties);
+ putRawValue(KEY_ORIGINATOR, metadata.get(Metadata.AUTHOR), properties);
+ putRawValue(KEY_SUBJECT, metadata.get(Metadata.TITLE), properties);
+ putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.SUBJECT), properties);
+ putRawValue(KEY_SENT_DATE, metadata.get(Metadata.LAST_SAVED), properties);
// Store the TO, but not cc/bcc in the addressee field
- putRawValue(KEY_ADDRESSEE, metadata.get(Message.MESSAGE_TO), properties);
+ putRawValue(KEY_ADDRESSEE, metadata.get(Metadata.MESSAGE_TO), properties);
// Store each of To, CC and BCC in their own fields
- putRawValue(KEY_TO_NAMES, metadata.getValues(Message.MESSAGE_TO), properties);
- putRawValue(KEY_CC_NAMES, metadata.getValues(Message.MESSAGE_CC), properties);
- putRawValue(KEY_BCC_NAMES, metadata.getValues(Message.MESSAGE_BCC), properties);
+ putRawValue(KEY_TO_NAMES, metadata.getValues(Metadata.MESSAGE_TO), properties);
+ putRawValue(KEY_CC_NAMES, metadata.getValues(Metadata.MESSAGE_CC), properties);
+ putRawValue(KEY_BCC_NAMES, metadata.getValues(Metadata.MESSAGE_BCC), properties);
// But store all email addresses (to/cc/bcc) in the addresses field
- putRawValue(KEY_ADDRESSEES, metadata.getValues(Message.MESSAGE_RECIPIENT_ADDRESS), properties);
+ putRawValue(KEY_ADDRESSEES, metadata.getValues(Metadata.MESSAGE_RECIPIENT_ADDRESS), properties);
return properties;
}
diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OfficeMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OfficeMetadataExtractor.java
index 7612a386..dfca577e 100644
--- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OfficeMetadataExtractor.java
+++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OfficeMetadataExtractor.java
@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
- * Copyright (C) 2005 - 2021 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -27,8 +27,6 @@
package org.alfresco.transformer.metadataExtractors;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.Office;
-import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.slf4j.Logger;
@@ -42,7 +40,7 @@ import java.util.Map;
*
* Configuration: (see OfficeMetadataExtractor_metadata_extract.properties and tika_engine_config.json)
*
- * This extractor uses the POI library to extract the following:
+ * This extracter uses the POI library to extract the following:
*
* author: -- cm:author
* title: -- cm:title
@@ -93,20 +91,23 @@ public class OfficeMetadataExtractor extends AbstractTikaMetadataExtractor
return new OfficeParser();
}
+ @SuppressWarnings("deprecation")
@Override
protected Map extractSpecific(Metadata metadata,
Map properties, Map headers)
{
- putRawValue(KEY_CREATE_DATETIME, metadata.get(TikaCoreProperties.CREATED), properties);
- putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(TikaCoreProperties.MODIFIED), properties);
- putRawValue(KEY_EDIT_TIME, metadata.get(TikaCoreProperties.MODIFIED), properties);
- putRawValue(KEY_FORMAT, metadata.get(TikaCoreProperties.FORMAT), properties);
- putRawValue(KEY_KEYWORDS, metadata.get(TikaCoreProperties.SUBJECT), properties);
- putRawValue(KEY_LAST_AUTHOR, metadata.get(TikaCoreProperties.MODIFIER), properties);
- putRawValue(KEY_LAST_PRINTED, metadata.get(TikaCoreProperties.PRINT_DATE), properties);
- putRawValue(KEY_PAGE_COUNT, metadata.get(Office.PAGE_COUNT), properties);
- putRawValue(KEY_PARAGRAPH_COUNT, metadata.get(Office.PARAGRAPH_COUNT), properties);
- putRawValue(KEY_WORD_COUNT, metadata.get(Office.WORD_COUNT), properties);
+ putRawValue(KEY_CREATE_DATETIME, metadata.get(Metadata.CREATION_DATE), properties);
+ putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(Metadata.LAST_SAVED), properties);
+ putRawValue(KEY_EDIT_TIME, metadata.get(Metadata.EDIT_TIME), properties);
+ putRawValue(KEY_FORMAT, metadata.get(Metadata.FORMAT), properties);
+ putRawValue(KEY_KEYWORDS, metadata.get(Metadata.KEYWORDS), properties);
+ putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties);
+ putRawValue(KEY_LAST_PRINTED, metadata.get(Metadata.LAST_PRINTED), properties);
+// putRawValue(KEY_OS_VERSION, metadata.get(Metadata.OS_VERSION), properties);
+// putRawValue(KEY_THUMBNAIL, metadata.get(Metadata.THUMBNAIL), properties);
+ putRawValue(KEY_PAGE_COUNT, metadata.get(Metadata.PAGE_COUNT), properties);
+ putRawValue(KEY_PARAGRAPH_COUNT, metadata.get(Metadata.PARAGRAPH_COUNT), properties);
+ putRawValue(KEY_WORD_COUNT, metadata.get(Metadata.WORD_COUNT), properties);
return properties;
}
}
diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OpenDocumentMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OpenDocumentMetadataExtractor.java
index 8014802b..4de536da 100644
--- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OpenDocumentMetadataExtractor.java
+++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/OpenDocumentMetadataExtractor.java
@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
- * Copyright (C) 2005 - 2021 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,28 +26,18 @@
*/
package org.alfresco.transformer.metadataExtractors;
-import static org.apache.tika.metadata.DublinCore.NAMESPACE_URI_DC;
-
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.odf.OpenDocumentMetaParser;
import org.apache.tika.parser.odf.OpenDocumentParser;
-import org.apache.tika.parser.xml.ElementMetadataHandler;
-import org.apache.tika.sax.TeeContentHandler;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.xml.sax.ContentHandler;
import java.io.Serializable;
import java.util.Date;
-import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.stream.Collectors;
/**
* {@code "application/vnd.oasis.opendocument..."} and {@code "applicationvnd.oasis.opendocument..."} metadata extractor.
@@ -87,7 +77,6 @@ public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
private static final String KEY_INITIAL_CREATOR = "initialCreator";
private static final String KEY_KEYWORD = "keyword";
private static final String KEY_LANGUAGE = "language";
- private static final String KEY_ALFRESCO_CREATOR = "_alfresco:creator";
private static final String CUSTOM_PREFIX = "custom:";
@@ -101,33 +90,22 @@ public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
@Override
protected Parser getParser()
{
- OpenDocumentParser parser = new OpenDocumentParser();
- parser.setMetaParser(new OpenDocumentMetaParser() {
- @Override
- protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, ParseContext context)
- {
- final ContentHandler superHandler = super.getContentHandler(ch, md, context);
- final ContentHandler creatorHandler = new ElementMetadataHandler(NAMESPACE_URI_DC, KEY_CREATOR, md, KEY_ALFRESCO_CREATOR);
- return new TeeContentHandler(superHandler, creatorHandler);
- }
- });
- return parser;
+ return new OpenDocumentParser();
}
+ @SuppressWarnings("deprecation")
@Override
protected Map extractSpecific(Metadata metadata,
Map properties, Map headers)
{
- putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(TikaCoreProperties.CREATED)), properties);
- final String creator = getCreator(metadata);
- putRawValue(KEY_CREATOR, creator, properties);
- putRawValue(KEY_AUTHOR, creator, properties);
- putRawValue(KEY_DATE, getDateOrNull(metadata.get(TikaCoreProperties.MODIFIED)), properties);
- putRawValue(KEY_DESCRIPTION, metadata.get(TikaCoreProperties.DESCRIPTION), properties);
+ putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), properties);
+ putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), properties);
+ putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), properties);
+ putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.DESCRIPTION), properties);
putRawValue(KEY_GENERATOR, metadata.get("generator"), properties);
putRawValue(KEY_INITIAL_CREATOR, metadata.get("initial-creator"), properties);
- putRawValue(KEY_KEYWORD, metadata.get(TikaCoreProperties.SUBJECT), properties);
- putRawValue(KEY_LANGUAGE, metadata.get(TikaCoreProperties.LANGUAGE), properties);
+ putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
+ putRawValue(KEY_LANGUAGE, metadata.get(Metadata.LANGUAGE), properties);
// Handle user-defined properties dynamically
Map> mapping = super.getExtractMapping();
@@ -142,18 +120,6 @@ public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
return properties;
}
- private String getCreator(Metadata metadata)
- {
- final List creators = distinct(metadata.getValues(TikaCoreProperties.CREATOR))
- .collect(Collectors.toUnmodifiableList());
- if (creators.size() == 1)
- {
- return creators.get(0);
- }
-
- return metadata.get(KEY_ALFRESCO_CREATOR);
- }
-
private Date getDateOrNull(String dateString)
{
if (dateString != null && dateString.length() != 0)
diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/TikaAudioMetadataExtractor.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/TikaAudioMetadataExtractor.java
index e7933ef3..1a8a4a84 100644
--- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/TikaAudioMetadataExtractor.java
+++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/metadataExtractors/TikaAudioMetadataExtractor.java
@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
- * Copyright (C) 2005 - 2021 Alfresco Software Limited
+ * Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -28,7 +28,6 @@ package org.alfresco.transformer.metadataExtractors;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.Parser;
@@ -149,12 +148,13 @@ public class TikaAudioMetadataExtractor extends AbstractTikaMetadataExtractor
* @param metadata the metadata extracted from the file
* @return the description
*/
+ @SuppressWarnings("deprecation")
private String generateDescription(Metadata metadata)
{
StringBuilder result = new StringBuilder();
- if (metadata.get(TikaCoreProperties.TITLE) != null)
+ if (metadata.get(Metadata.TITLE) != null)
{
- result.append(metadata.get(TikaCoreProperties.TITLE));
+ result.append(metadata.get(Metadata.TITLE));
if (metadata.get(XMPDM.ALBUM) != null)
{
result
diff --git a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java
index 9e15731e..e43677a4 100644
--- a/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java
+++ b/alfresco-transform-tika/alfresco-transform-tika/src/main/java/org/alfresco/transformer/tika/parsers/ExifToolParser.java
@@ -44,9 +44,9 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.io.output.NullOutputStream;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.io.NullOutputStream;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
@@ -57,7 +57,7 @@ import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.parser.external.ExternalParsersFactory;
import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.image.TiffParser;
-import org.apache.tika.parser.image.JpegParser;
+import org.apache.tika.parser.jpeg.JpegParser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -263,7 +263,7 @@ public class ExifToolParser extends ExternalParser {
* stream of the given process to the given XHTML content handler.
* The standard output stream is closed once fully processed.
*
- * @param stream stream
+ * @param process process
* @param xhtml XHTML content handler
* @throws SAXException if the XHTML SAX events could not be handled
* @throws IOException if an input error occurred
@@ -315,13 +315,13 @@ public class ExifToolParser extends ExternalParser {
* standard stream of the given process. Potential exceptions
* are ignored, and the stream is closed once fully processed.
*
- * @param stream stream
+ * @param process process
*/
private void ignoreStream(final InputStream stream) {
Thread t = new Thread() {
public void run() {
try {
- IOUtils.copy(stream, NullOutputStream.NULL_OUTPUT_STREAM);
+ IOUtils.copy(stream, new NullOutputStream());
} catch (IOException e) {
} finally {
IOUtils.closeQuietly(stream);
diff --git a/pom.xml b/pom.xml
index bd0ba4ca..cf5d142f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -27,7 +27,7 @@
${dependency.jackson.version}
4.13.2
3.5.0
- 2.1.0
+ 1.26
4.1.2
1.4