Save point: [skip ci]

* metadata package split
This commit is contained in:
alandavis
2022-07-26 15:14:38 +01:00
parent c5a8958c26
commit 2e73c39488
14 changed files with 23 additions and 27 deletions

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.base.metadataExtractors;
package org.alfresco.transform.base.metadata;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
@@ -50,7 +50,7 @@ import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EMBEDDER;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EMBEDDER;
/**
* Helper methods for metadata extract and embed.
@@ -94,7 +94,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt
* @author Derek Hulley
* @author adavis
*/
public abstract class AbstractMetadataExtractor implements CustomTransformer
public abstract class AbstractMetadataExtractorEmbedder implements CustomTransformer
{
private static final String EXTRACT = "extract";
private static final String EMBED = "embed";
@@ -126,7 +126,7 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
private final Type type;
public AbstractMetadataExtractor(Type type, Logger logger)
public AbstractMetadataExtractorEmbedder(Type type, Logger logger)
{
this.type = type;
this.logger = logger;
@@ -385,7 +385,7 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
Properties properties = null;
try
{
InputStream inputStream = AbstractMetadataExtractor.class.getClassLoader().getResourceAsStream(filename);
InputStream inputStream = AbstractMetadataExtractorEmbedder.class.getClassLoader().getResourceAsStream(filename);
if (inputStream != null)
{
properties = new Properties();

View File

@@ -28,7 +28,6 @@ package org.alfresco.transform.misc.metadataExtractors;
import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.TransformManager;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -37,8 +36,6 @@ import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
@@ -47,7 +44,7 @@ import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/**
* Metadata extractor for HTML and XHTML.
@@ -66,7 +63,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt
* @author Derek Hulley
* @author adavis
*/
public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements CustomTransformer
public class HtmlMetadataExtractor extends AbstractMetadataExtractorEmbedder implements CustomTransformer
{
private static final Logger logger = LoggerFactory.getLogger(HtmlMetadataExtractor.class);

View File

@@ -28,7 +28,6 @@ package org.alfresco.transform.misc.metadataExtractors;
import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.TransformManager;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
@@ -38,8 +37,6 @@ import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeMessage.RecipientType;
import javax.mail.internet.MimeUtility;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
@@ -50,7 +47,7 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/**
* Metadata extractor for RFC822 mime emails.
@@ -73,7 +70,7 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt
* @author adavis
*/
@Component
public class RFC822MetadataExtractor extends AbstractMetadataExtractor implements CustomTransformer
public class RFC822MetadataExtractor extends AbstractMetadataExtractorEmbedder implements CustomTransformer
{
private static final Logger logger = LoggerFactory.getLogger(RFC822MetadataExtractor.class);

View File

@@ -27,7 +27,7 @@
package org.alfresco.transform.tika.metadata;
import org.alfresco.transform.base.TransformManager;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
import org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder;
import org.apache.tika.embedder.Embedder;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.DublinCore;
@@ -65,6 +65,8 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type;
/**
* The parent of all Metadata Extractors which use Apache Tika under the hood. This handles all the
* common parts of processing the files, and the common mappings.
@@ -80,7 +82,7 @@ import java.util.stream.Stream;
* @author Nick Burch
* @author adavis
*/
public abstract class AbstractTikaMetadataExtractorEmbeddor extends AbstractMetadataExtractor
public abstract class AbstractTikaMetadataExtractorEmbeddor extends AbstractMetadataExtractorEmbedder
{
protected static final String KEY_AUTHOR = "author";
protected static final String KEY_TITLE = "title";

View File

@@ -38,7 +38,7 @@ import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/**
* {@code "application/dwg"} and {@code "image/vnd.dwg"} metadata extractor.

View File

@@ -42,7 +42,7 @@ import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
@Component
public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor

View File

@@ -39,7 +39,7 @@ import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/**
* Outlook MAPI format email metadata extractor.

View File

@@ -39,7 +39,7 @@ import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/**
* Office file format metadata extractor.

View File

@@ -49,7 +49,7 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
import static org.apache.tika.metadata.DublinCore.NAMESPACE_URI_DC;
/**

View File

@@ -36,7 +36,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/**
* Metadata extractor for the PDF documents.

View File

@@ -33,7 +33,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
/**
* POI-based metadata extractor for Office 07 documents. See http://poi.apache.org/ for information on POI.

View File

@@ -44,7 +44,7 @@ import java.io.Serializable;
import java.util.Calendar;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig;
/**

View File

@@ -39,7 +39,7 @@ import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EXTRACTOR;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EXTRACTOR;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig;

View File

@@ -46,7 +46,7 @@ import java.util.Collections;
import java.util.Set;
import java.util.StringJoiner;
import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor.Type.EMBEDDER;
import static org.alfresco.transform.base.metadata.AbstractMetadataExtractorEmbedder.Type.EMBEDDER;
/**
* Sample POI metadata embedder to demonstrate it is possible to add custom T-Engines that will add