Save point: [skip ci]

* metadata package split
This commit is contained in:
alandavis
2022-07-26 15:14:38 +01:00
parent c5a8958c26
commit 2e73c39488
14 changed files with 23 additions and 27 deletions

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>. * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L% * #L%
*/ */
package org.alfresco.transform.base.metadataExtractors; package org.alfresco.transform.base.metadata;
import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.core.type.TypeReference;
@@ -50,7 +50,7 @@ import java.util.Set;
import java.util.StringTokenizer; import java.util.StringTokenizer;
import java.util.TreeMap; import java.util.TreeMap;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EMBEDDER; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EMBEDDER;
/** /**
* Helper methods for metadata extract and embed. * Helper methods for metadata extract and embed.
@@ -94,7 +94,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt
* @author Derek Hulley * @author Derek Hulley
* @author adavis * @author adavis
*/ */
public abstract class AbstractMetadataExtractor implements CustomTransformer public abstract class AbstractMetadataExtractorEmbedder implements CustomTransformer
{ {
private static final String EXTRACT = "extract"; private static final String EXTRACT = "extract";
private static final String EMBED = "embed"; private static final String EMBED = "embed";
@@ -126,7 +126,7 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
private final Type type; private final Type type;
public AbstractMetadataExtractor(Type type, Logger logger) public AbstractMetadataExtractorEmbedder(Type type, Logger logger)
{ {
this.type = type; this.type = type;
this.logger = logger; this.logger = logger;
@@ -385,7 +385,7 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
Properties properties = null; Properties properties = null;
try try
{ {
InputStream inputStream = AbstractMetadataExtractor.class.getClassLoader().getResourceAsStream(filename); InputStream inputStream = AbstractMetadataExtractorEmbedder.class.getClassLoader().getResourceAsStream(filename);
if (inputStream != null) if (inputStream != null)
{ {
properties = new Properties(); properties = new Properties();

View File

@@ -28,7 +28,6 @@ package org.alfresco.transform.misc.metadataExtractors;
import org.alfresco.transform.base.CustomTransformer; import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.TransformManager; import org.alfresco.transform.base.TransformManager;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@@ -37,8 +36,6 @@ import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML; import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator; import javax.swing.text.html.parser.ParserDelegator;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.OutputStream; import java.io.OutputStream;
@@ -47,7 +44,7 @@ import java.io.Serializable;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/** /**
* Metadata extractor for HTML and XHTML. * Metadata extractor for HTML and XHTML.
@@ -66,7 +63,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt
* @author Derek Hulley * @author Derek Hulley
* @author adavis * @author adavis
*/ */
public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements CustomTransformer public class HtmlMetadataExtractor extends AbstractMetadataExtractorEmbedder implements CustomTransformer
{ {
private static final Logger logger = LoggerFactory.getLogger(HtmlMetadataExtractor.class); private static final Logger logger = LoggerFactory.getLogger(HtmlMetadataExtractor.class);

View File

@@ -28,7 +28,6 @@ package org.alfresco.transform.misc.metadataExtractors;
import org.alfresco.transform.base.CustomTransformer; import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.TransformManager; import org.alfresco.transform.base.TransformManager;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
@@ -38,8 +37,6 @@ import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage; import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeMessage.RecipientType; import javax.mail.internet.MimeMessage.RecipientType;
import javax.mail.internet.MimeUtility; import javax.mail.internet.MimeUtility;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.Serializable; import java.io.Serializable;
@@ -50,7 +47,7 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/** /**
* Metadata extractor for RFC822 mime emails. * Metadata extractor for RFC822 mime emails.
@@ -73,7 +70,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt
* @author adavis * @author adavis
*/ */
@Component @Component
public class RFC822MetadataExtractor extends AbstractMetadataExtractor implements CustomTransformer public class RFC822MetadataExtractor extends AbstractMetadataExtractorEmbedder implements CustomTransformer
{ {
private static final Logger logger = LoggerFactory.getLogger(RFC822MetadataExtractor.class); private static final Logger logger = LoggerFactory.getLogger(RFC822MetadataExtractor.class);

View File

@@ -27,7 +27,7 @@
package org.alfresco.transform.tika.metadata; package org.alfresco.transform.tika.metadata;
import org.alfresco.transform.base.TransformManager; import org.alfresco.transform.base.TransformManager;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor; import org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder;
import org.apache.tika.embedder.Embedder; import org.apache.tika.embedder.Embedder;
import org.apache.tika.extractor.DocumentSelector; import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.DublinCore; import org.apache.tika.metadata.DublinCore;
@@ -65,6 +65,8 @@ import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type;
/** /**
* The parent of all Metadata Extractors which use Apache Tika under the hood. This handles all the * The parent of all Metadata Extractors which use Apache Tika under the hood. This handles all the
* common parts of processing the files, and the common mappings. * common parts of processing the files, and the common mappings.
@@ -80,7 +82,7 @@ import java.util.stream.Stream;
* @author Nick Burch * @author Nick Burch
* @author adavis * @author adavis
*/ */
public abstract class AbstractTikaMetadataExtractorEmbeddor extends AbstractMetadataExtractor public abstract class AbstractTikaMetadataExtractorEmbeddor extends AbstractMetadataExtractorEmbedder
{ {
protected static final String KEY_AUTHOR = "author"; protected static final String KEY_AUTHOR = "author";
protected static final String KEY_TITLE = "title"; protected static final String KEY_TITLE = "title";

View File

@@ -38,7 +38,7 @@ import org.springframework.stereotype.Component;
import java.io.Serializable; import java.io.Serializable;
import java.util.Map; import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/** /**
* {@code "application/dwg"} and {@code "image/vnd.dwg"} metadata extractor. * {@code "application/dwg"} and {@code "image/vnd.dwg"} metadata extractor.

View File

@@ -42,7 +42,7 @@ import java.util.Set;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
@Component @Component
public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor

View File

@@ -39,7 +39,7 @@ import org.springframework.stereotype.Component;
import java.io.Serializable; import java.io.Serializable;
import java.util.Map; import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/** /**
* Outlook MAPI format email metadata extractor. * Outlook MAPI format email metadata extractor.

View File

@@ -39,7 +39,7 @@ import org.springframework.stereotype.Component;
import java.io.Serializable; import java.io.Serializable;
import java.util.Map; import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/** /**
* Office file format metadata extractor. * Office file format metadata extractor.

View File

@@ -49,7 +49,7 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
import static org.apache.tika.metadata.DublinCore.NAMESPACE_URI_DC; import static org.apache.tika.metadata.DublinCore.NAMESPACE_URI_DC;
/** /**

View File

@@ -36,7 +36,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/** /**
* Metadata extractor for the PDF documents. * Metadata extractor for the PDF documents.

View File

@@ -33,7 +33,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/** /**
* POI-based metadata extractor for Office 07 documents. See http://poi.apache.org/ for information on POI. * POI-based metadata extractor for Office 07 documents. See http://poi.apache.org/ for information on POI.

View File

@@ -44,7 +44,7 @@ import java.io.Serializable;
import java.util.Calendar; import java.util.Calendar;
import java.util.Map; import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig; import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig;
/** /**

View File

@@ -39,7 +39,7 @@ import org.springframework.stereotype.Component;
import java.io.Serializable; import java.io.Serializable;
import java.util.Map; import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG; import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig; import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig;

View File

@@ -46,7 +46,7 @@ import java.util.Collections;
import java.util.Set; import java.util.Set;
import java.util.StringJoiner; import java.util.StringJoiner;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EMBEDDER; import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EMBEDDER;
/** /**
* Sample POI metadata embedder to demonstrate it is possible to add custom T-Engines that will add * Sample POI metadata embedder to demonstrate it is possible to add custom T-Engines that will add