diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/AbstractTikaMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/AbstractTikaMetadataExtractorEmbeddor.java similarity index 97% rename from engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/AbstractTikaMetadataExtractor.java rename to engines/tika/src/main/java/org/alfresco/transform/tika/metadata/AbstractTikaMetadataExtractorEmbeddor.java index 3a9ae1cf..a0297c0c 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/AbstractTikaMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/AbstractTikaMetadataExtractorEmbeddor.java @@ -24,7 +24,7 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; +package org.alfresco.transform.tika.metadata; import org.alfresco.transform.base.TransformManager; import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor; @@ -51,7 +51,6 @@ import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.Locator; -import java.io.FileInputStream; import java.io.InputStream; import java.io.OutputStream; import java.io.Serializable; @@ -81,7 +80,7 @@ import java.util.stream.Stream; * @author Nick Burch * @author adavis */ -public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtractor +public abstract class AbstractTikaMetadataExtractorEmbeddor extends AbstractMetadataExtractor { protected static final String KEY_AUTHOR = "author"; protected static final String KEY_TITLE = "title"; @@ -96,7 +95,7 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr private final DateTimeFormatter tikaUTCDateFormater; private final DateTimeFormatter tikaDateFormater; - public AbstractTikaMetadataExtractor(Type type, Logger logger) + public AbstractTikaMetadataExtractorEmbeddor(Type type, Logger logger) { super(type, logger); @@ -153,11 +152,6 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr return dateStr; } - /** - * Returns the correct Tika Parser to process the document. - * If you don't know which you want, use {@link TikaAutoMetadataExtractor} - * which makes use of the Tika auto-detection. - */ protected abstract Parser getParser(); /** @@ -168,7 +162,6 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr */ protected Embedder getEmbedder() { - // TODO make this an abstract method once more extracters support embedding return null; } diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/DWGMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/DWGMetadataExtractor.java similarity index 94% rename from engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/DWGMetadataExtractor.java rename to engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/DWGMetadataExtractor.java index 977454ff..1af7e54e 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/DWGMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/DWGMetadataExtractor.java @@ -24,8 +24,9 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; +package org.alfresco.transform.tika.metadata.extractors; +import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.Parser; @@ -57,7 +58,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt * @author adavis */ @Component -public class DWGMetadataExtractor extends AbstractTikaMetadataExtractor +public class DWGMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor { private static final Logger logger = LoggerFactory.getLogger(DWGMetadataExtractor.class); diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/IPTCMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractor.java similarity index 94% rename from engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/IPTCMetadataExtractor.java rename to engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractor.java index 93d80295..e9012403 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/IPTCMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractor.java @@ -24,8 +24,9 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; +package org.alfresco.transform.tika.metadata.extractors; +import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor; import org.alfresco.transform.tika.parsers.ExifToolParser; import org.apache.commons.lang3.StringUtils; import org.apache.tika.metadata.Metadata; @@ -44,7 +45,7 @@ import java.util.regex.Pattern; import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; @Component -public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor +public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor { private static final Logger logger = LoggerFactory.getLogger(IPTCMetadataExtractor.class); @@ -118,7 +119,7 @@ public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor * @return dateStrings in Iso8601 format * @see #iptcToIso8601DateString */ - protected String[] iptcToIso8601DateStrings(String[] dateStrings) + public String[] iptcToIso8601DateStrings(String[] dateStrings) { for (int i = 0; i < dateStrings.length; i++) { diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/MP3MetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/MP3MetadataExtractor.java similarity index 98% rename from engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/MP3MetadataExtractor.java rename to engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/MP3MetadataExtractor.java index 041efbcb..b22dc0db 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/MP3MetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/MP3MetadataExtractor.java @@ -24,7 +24,7 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; +package org.alfresco.transform.tika.metadata.extractors; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/MailMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/MailMetadataExtractor.java similarity index 96% rename from engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/MailMetadataExtractor.java rename to engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/MailMetadataExtractor.java index 3554c4f0..132e7ac8 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/MailMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/MailMetadataExtractor.java @@ -24,8 +24,9 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; +package org.alfresco.transform.tika.metadata.extractors; +import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor; import org.apache.tika.metadata.Message; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; @@ -63,7 +64,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt * @author adavis */ @Component -public class MailMetadataExtractor extends AbstractTikaMetadataExtractor +public class MailMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor { private static final Logger logger = LoggerFactory.getLogger(MailMetadataExtractor.class); diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/OfficeMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OfficeMetadataExtractor.java similarity index 96% rename from engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/OfficeMetadataExtractor.java rename to engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OfficeMetadataExtractor.java index c7190a0b..51bc7ff1 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/OfficeMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OfficeMetadataExtractor.java @@ -24,8 +24,9 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; +package org.alfresco.transform.tika.metadata.extractors; +import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Office; import org.apache.tika.metadata.TikaCoreProperties; @@ -71,7 +72,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt * @author adavis */ @Component -public class OfficeMetadataExtractor extends AbstractTikaMetadataExtractor +public class OfficeMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor { private static final Logger logger = LoggerFactory.getLogger(OfficeMetadataExtractor.class); diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/OpenDocumentMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OpenDocumentMetadataExtractor.java similarity index 97% rename from engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/OpenDocumentMetadataExtractor.java rename to engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OpenDocumentMetadataExtractor.java index a67c2a09..fe1a7cfa 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/OpenDocumentMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OpenDocumentMetadataExtractor.java @@ -24,8 +24,9 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; +package org.alfresco.transform.tika.metadata.extractors; +import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.ParseContext; @@ -79,7 +80,7 @@ import static org.apache.tika.metadata.DublinCore.NAMESPACE_URI_DC; * @author adavis */ @Component -public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor +public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor { private static final Logger logger = LoggerFactory.getLogger(OpenDocumentMetadataExtractor.class); diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/PdfBoxMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PdfBoxMetadataExtractor.java similarity index 94% rename from engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/PdfBoxMetadataExtractor.java rename to engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PdfBoxMetadataExtractor.java index d0722051..01415e80 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/PdfBoxMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PdfBoxMetadataExtractor.java @@ -24,8 +24,9 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; +package org.alfresco.transform.tika.metadata.extractors; +import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor; import org.alfresco.transform.tika.transformers.Tika; import org.apache.tika.extractor.DocumentSelector; import org.apache.tika.metadata.Metadata; @@ -56,7 +57,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt * @author adavis */ @Component -public class PdfBoxMetadataExtractor extends AbstractTikaMetadataExtractor +public class PdfBoxMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor { private static final Logger logger = LoggerFactory.getLogger(PdfBoxMetadataExtractor.class); diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/PoiMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PoiMetadataExtractor.java similarity index 82% rename from engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/PoiMetadataExtractor.java rename to engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PoiMetadataExtractor.java index 9872ee4e..11918e6d 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/PoiMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PoiMetadataExtractor.java @@ -24,27 +24,15 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; +package org.alfresco.transform.tika.metadata.extractors; -import org.apache.poi.ooxml.POIXMLProperties; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; -import org.apache.tika.embedder.Embedder; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.ParseContext; +import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor; import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Collections; -import java.util.Set; -import java.util.StringJoiner; - import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; /** @@ -66,7 +54,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt * @author adavis */ @Component -public class PoiMetadataExtractor extends AbstractTikaMetadataExtractor +public class PoiMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor { private static final Logger logger = LoggerFactory.getLogger(PoiMetadataExtractor.class); diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/TikaAudioMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAudioMetadataExtractor.java similarity index 97% rename from engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/TikaAudioMetadataExtractor.java rename to engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAudioMetadataExtractor.java index 2092244d..634fb2f1 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/TikaAudioMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAudioMetadataExtractor.java @@ -24,8 +24,9 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; +package org.alfresco.transform.tika.metadata.extractors; +import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor; import org.apache.tika.config.TikaConfig; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; @@ -69,7 +70,7 @@ import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig; * @author adavis */ @Component -public class TikaAudioMetadataExtractor extends AbstractTikaMetadataExtractor +public class TikaAudioMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor { private static final Logger logger = LoggerFactory.getLogger(TikaAudioMetadataExtractor.class); diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/TikaAutoMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAutoMetadataExtractor.java similarity index 97% rename from engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/TikaAutoMetadataExtractor.java rename to engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAutoMetadataExtractor.java index ed4aad6b..dfda0518 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/TikaAutoMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAutoMetadataExtractor.java @@ -24,8 +24,9 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; +package org.alfresco.transform.tika.metadata.extractors; +import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor; import org.apache.tika.config.TikaConfig; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TIFF; @@ -63,7 +64,7 @@ import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig; * @author adavis */ @Component -public class TikaAutoMetadataExtractor extends AbstractTikaMetadataExtractor +public class TikaAutoMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor { private static final Logger logger = LoggerFactory.getLogger(TikaAutoMetadataExtractor.class); diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/PoiMetadataEmbedder.java b/engines/tika/src/test/java/org/alfresco/transform/tika/metadata/embedders/PoiMetadataEmbedder.java similarity index 83% rename from engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/PoiMetadataEmbedder.java rename to engines/tika/src/test/java/org/alfresco/transform/tika/metadata/embedders/PoiMetadataEmbedder.java index 8102547d..a670e5f3 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadataExtractors/PoiMetadataEmbedder.java +++ b/engines/tika/src/test/java/org/alfresco/transform/tika/metadata/embedders/PoiMetadataEmbedder.java @@ -24,8 +24,9 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; +package org.alfresco.transform.tika.metadata.embedders; +import org.alfresco.transform.tika.metadata.AbstractTikaMetadataExtractorEmbeddor; import org.apache.poi.ooxml.POIXMLProperties; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.tika.embedder.Embedder; @@ -45,50 +46,20 @@ import java.util.Collections; import java.util.Set; import java.util.StringJoiner; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; +import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EMBEDDER; /** * Sample POI metadata embedder to demonstrate it is possible to add custom T-Engines that will add * metadata. This is not production code, so no supported mimetypes exist in the {@code tika_engine_config.json}. - * Adding the following would make it available: - * - *
- * {
- *   "transformOptions": {
- *     ...
- *     "metadataEmbedOptions": [
- *       {"value": {"name": "metadata", "required": true}}
- *     ]
- *   },
- *   "transformers": [
- *     ...
- *     {
- *       "transformerName": "PoiMetadataEmbedder",
- *       "supportedSourceAndTargetList": [
- *         ...
- *         {"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "alfresco-metadata-embed"}
- *       ],
- *       "transformOptions": [
- *         "metadataEmbedOptions"
- *       ]
- *     }
- *   ]
- * }
- * 
- - * @author Nick Burch - * @author Neil McErlean - * @author Dmitry Velichkevich - * @author adavis */ @Component -public class PoiMetadataEmbedder extends AbstractTikaMetadataExtractor +public class PoiMetadataEmbedder extends AbstractTikaMetadataExtractorEmbeddor { private static final Logger logger = LoggerFactory.getLogger(PoiMetadataEmbedder.class); public PoiMetadataEmbedder() { - super(EXTRACTOR, logger); + super(EMBEDDER, logger); } @Override @@ -127,7 +98,7 @@ public class PoiMetadataEmbedder extends AbstractTikaMetadataExtractor for (String name : metadata.names()) { metadata.isMultiValued("description"); - String value = null; + String value; if (metadata.isMultiValued(name)) { String[] values = metadata.getValues(name); diff --git a/engines/tika/src/test/java/org/alfresco/transform/tika/metadataExtractors/IPTCMetadataExtractorTest.java b/engines/tika/src/test/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractorTest.java similarity index 96% rename from engines/tika/src/test/java/org/alfresco/transform/tika/metadataExtractors/IPTCMetadataExtractorTest.java rename to engines/tika/src/test/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractorTest.java index b16a5df8..f4e8cd86 100644 --- a/engines/tika/src/test/java/org/alfresco/transform/tika/metadataExtractors/IPTCMetadataExtractorTest.java +++ b/engines/tika/src/test/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractorTest.java @@ -24,12 +24,12 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.tika.metadataExtractors; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; +package org.alfresco.transform.tika.metadata.extractors; import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; + public class IPTCMetadataExtractorTest { IPTCMetadataExtractor extractor = new IPTCMetadataExtractor();