From 2e73c394883e8e8613aa86428e962e31763bf612 Mon Sep 17 00:00:00 2001 From: alandavis Date: Tue, 26 Jul 2022 15:14:38 +0100 Subject: [PATCH] Save point: [skip ci] * metadata package split --- .../AbstractMetadataExtractorEmbedder.java} | 10 +++++----- .../misc/metadataExtractors/HtmlMetadataExtractor.java | 7 ++----- .../metadataExtractors/RFC822MetadataExtractor.java | 7 ++----- .../AbstractTikaMetadataExtractorEmbeddor.java | 6 ++++-- .../tika/metadata/extractors/DWGMetadataExtractor.java | 2 +- .../metadata/extractors/IPTCMetadataExtractor.java | 2 +- .../metadata/extractors/MailMetadataExtractor.java | 2 +- .../metadata/extractors/OfficeMetadataExtractor.java | 2 +- .../extractors/OpenDocumentMetadataExtractor.java | 2 +- .../metadata/extractors/PdfBoxMetadataExtractor.java | 2 +- .../tika/metadata/extractors/PoiMetadataExtractor.java | 2 +- .../extractors/TikaAudioMetadataExtractor.java | 2 +- .../metadata/extractors/TikaAutoMetadataExtractor.java | 2 +- .../tika/metadata/embedders/PoiMetadataEmbedder.java | 2 +- 14 files changed, 23 insertions(+), 27 deletions(-) rename engines/base/src/main/java/org/alfresco/transform/base/{metadataExtractors/AbstractMetadataExtractor.java => metadata/AbstractMetadataExtractorEmbedder.java} (98%) diff --git a/engines/base/src/main/java/org/alfresco/transform/base/metadataExtractors/AbstractMetadataExtractor.java b/engines/base/src/main/java/org/alfresco/transform/base/metadata/AbstractMetadataExtractorEmbedder.java similarity index 98% rename from engines/base/src/main/java/org/alfresco/transform/base/metadataExtractors/AbstractMetadataExtractor.java rename to engines/base/src/main/java/org/alfresco/transform/base/metadata/AbstractMetadataExtractorEmbedder.java index 10571fb6..84f62300 100644 --- a/engines/base/src/main/java/org/alfresco/transform/base/metadataExtractors/AbstractMetadataExtractor.java +++ b/engines/base/src/main/java/org/alfresco/transform/base/metadata/AbstractMetadataExtractorEmbedder.java @@ -24,7 +24,7 @@ * along with Alfresco. If not, see . * #L% */ -package org.alfresco.transform.base.metadataExtractors; +package org.alfresco.transform.base.metadata; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.type.TypeReference; @@ -50,7 +50,7 @@ import java.util.Set; import java.util.StringTokenizer; import java.util.TreeMap; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EMBEDDER; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EMBEDDER; /** * Helper methods for metadata extract and embed. @@ -94,7 +94,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt * @author Derek Hulley * @author adavis */ -public abstract class AbstractMetadataExtractor implements CustomTransformer +public abstract class AbstractMetadataExtractorEmbedder implements CustomTransformer { private static final String EXTRACT = "extract"; private static final String EMBED = "embed"; @@ -126,7 +126,7 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer private final Type type; - public AbstractMetadataExtractor(Type type, Logger logger) + public AbstractMetadataExtractorEmbedder(Type type, Logger logger) { this.type = type; this.logger = logger; @@ -385,7 +385,7 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer Properties properties = null; try { - InputStream inputStream = AbstractMetadataExtractor.class.getClassLoader().getResourceAsStream(filename); + InputStream inputStream = AbstractMetadataExtractorEmbedder.class.getClassLoader().getResourceAsStream(filename); if (inputStream != null) { properties = new Properties(); diff --git a/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/HtmlMetadataExtractor.java b/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/HtmlMetadataExtractor.java index ba839029..9ed7a6e9 100644 --- a/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/HtmlMetadataExtractor.java +++ b/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/HtmlMetadataExtractor.java @@ -28,7 +28,6 @@ package org.alfresco.transform.misc.metadataExtractors; import org.alfresco.transform.base.CustomTransformer; import org.alfresco.transform.base.TransformManager; -import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,8 +36,6 @@ import javax.swing.text.MutableAttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; -import java.io.File; -import java.io.FileInputStream; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; @@ -47,7 +44,7 @@ import java.io.Serializable; import java.util.HashMap; import java.util.Map; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR; /** * Metadata extractor for HTML and XHTML. @@ -66,7 +63,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt * @author Derek Hulley * @author adavis */ -public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements CustomTransformer +public class HtmlMetadataExtractor extends AbstractMetadataExtractorEmbedder implements CustomTransformer { private static final Logger logger = LoggerFactory.getLogger(HtmlMetadataExtractor.class); diff --git a/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/RFC822MetadataExtractor.java b/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/RFC822MetadataExtractor.java index 2550e973..0c8e7241 100644 --- a/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/RFC822MetadataExtractor.java +++ b/engines/misc/src/main/java/org/alfresco/transform/misc/metadataExtractors/RFC822MetadataExtractor.java @@ -28,7 +28,6 @@ package org.alfresco.transform.misc.metadataExtractors; import org.alfresco.transform.base.CustomTransformer; import org.alfresco.transform.base.TransformManager; -import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; @@ -38,8 +37,6 @@ import javax.mail.internet.InternetAddress; import javax.mail.internet.MimeMessage; import javax.mail.internet.MimeMessage.RecipientType; import javax.mail.internet.MimeUtility; -import java.io.File; -import java.io.FileInputStream; import java.io.InputStream; import java.io.OutputStream; import java.io.Serializable; @@ -50,7 +47,7 @@ import java.util.HashMap; import java.util.Map; import java.util.Set; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR; /** * Metadata extractor for RFC822 mime emails. @@ -73,7 +70,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt * @author adavis */ @Component -public class RFC822MetadataExtractor extends AbstractMetadataExtractor implements CustomTransformer +public class RFC822MetadataExtractor extends AbstractMetadataExtractorEmbedder implements CustomTransformer { private static final Logger logger = LoggerFactory.getLogger(RFC822MetadataExtractor.class); diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/AbstractTikaMetadataExtractorEmbeddor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/AbstractTikaMetadataExtractorEmbeddor.java index a0297c0c..91856139 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/AbstractTikaMetadataExtractorEmbeddor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/AbstractTikaMetadataExtractorEmbeddor.java @@ -27,7 +27,7 @@ package org.alfresco.transform.tika.metadata; import org.alfresco.transform.base.TransformManager; -import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor; +import org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder; import org.apache.tika.embedder.Embedder; import org.apache.tika.extractor.DocumentSelector; import org.apache.tika.metadata.DublinCore; @@ -65,6 +65,8 @@ import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type; + /** * The parent of all Metadata Extractors which use Apache Tika under the hood. This handles all the * common parts of processing the files, and the common mappings. @@ -80,7 +82,7 @@ import java.util.stream.Stream; * @author Nick Burch * @author adavis */ -public abstract class AbstractTikaMetadataExtractorEmbeddor extends AbstractMetadataExtractor +public abstract class AbstractTikaMetadataExtractorEmbeddor extends AbstractMetadataExtractorEmbedder { protected static final String KEY_AUTHOR = "author"; protected static final String KEY_TITLE = "title"; diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/DWGMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/DWGMetadataExtractor.java index 1af7e54e..f72e5208 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/DWGMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/DWGMetadataExtractor.java @@ -38,7 +38,7 @@ import org.springframework.stereotype.Component; import java.io.Serializable; import java.util.Map; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR; /** * {@code "application/dwg"} and {@code "image/vnd.dwg"} metadata extractor. diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractor.java index e9012403..0384ba9a 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/IPTCMetadataExtractor.java @@ -42,7 +42,7 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR; @Component public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/MailMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/MailMetadataExtractor.java index 132e7ac8..a7b580a2 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/MailMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/MailMetadataExtractor.java @@ -39,7 +39,7 @@ import org.springframework.stereotype.Component; import java.io.Serializable; import java.util.Map; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR; /** * Outlook MAPI format email metadata extractor. diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OfficeMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OfficeMetadataExtractor.java index 51bc7ff1..5088ff4d 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OfficeMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OfficeMetadataExtractor.java @@ -39,7 +39,7 @@ import org.springframework.stereotype.Component; import java.io.Serializable; import java.util.Map; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR; /** * Office file format metadata extractor. diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OpenDocumentMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OpenDocumentMetadataExtractor.java index fe1a7cfa..c42daa34 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OpenDocumentMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/OpenDocumentMetadataExtractor.java @@ -49,7 +49,7 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR; import static org.apache.tika.metadata.DublinCore.NAMESPACE_URI_DC; /** diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PdfBoxMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PdfBoxMetadataExtractor.java index 01415e80..648d7c30 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PdfBoxMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PdfBoxMetadataExtractor.java @@ -36,7 +36,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR; /** * Metadata extractor for the PDF documents. diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PoiMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PoiMetadataExtractor.java index 11918e6d..edc99fcd 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PoiMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/PoiMetadataExtractor.java @@ -33,7 +33,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR; /** * POI-based metadata extractor for Office 07 documents. See http://poi.apache.org/ for information on POI. diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAudioMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAudioMetadataExtractor.java index 634fb2f1..f1e31f36 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAudioMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAudioMetadataExtractor.java @@ -44,7 +44,7 @@ import java.io.Serializable; import java.util.Calendar; import java.util.Map; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR; import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig; /** diff --git a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAutoMetadataExtractor.java b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAutoMetadataExtractor.java index dfda0518..2baebebf 100644 --- a/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAutoMetadataExtractor.java +++ b/engines/tika/src/main/java/org/alfresco/transform/tika/metadata/extractors/TikaAutoMetadataExtractor.java @@ -39,7 +39,7 @@ import org.springframework.stereotype.Component; import java.io.Serializable; import java.util.Map; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR; import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG; import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig; diff --git a/engines/tika/src/test/java/org/alfresco/transform/tika/metadata/embedders/PoiMetadataEmbedder.java b/engines/tika/src/test/java/org/alfresco/transform/tika/metadata/embedders/PoiMetadataEmbedder.java index a670e5f3..7d4e0d35 100644 --- a/engines/tika/src/test/java/org/alfresco/transform/tika/metadata/embedders/PoiMetadataEmbedder.java +++ b/engines/tika/src/test/java/org/alfresco/transform/tika/metadata/embedders/PoiMetadataEmbedder.java @@ -46,7 +46,7 @@ import java.util.Collections; import java.util.Set; import java.util.StringJoiner; -import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EMBEDDER; +import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EMBEDDER; /** * Sample POI metadata embedder to demonstrate it is possible to add custom T-Engines that will add