Save point: [skip ci]

* Moved iptc reference info to model rather than having a separate models directory
* Use InputStream and OutputStream in metadata extractors rather than Files.
This commit is contained in:
alandavis
2022-07-12 10:18:15 +01:00
parent 2e17c3ec53
commit 6bac1c4287
24 changed files with 211 additions and 235 deletions

View File

@@ -38,7 +38,8 @@ public interface TransformManager
{ {
/** /**
* Allows a {@link CustomTransformer} to use a local source {@code File} rather than the supplied {@code InputStream}. * Allows a {@link CustomTransformer} to use a local source {@code File} rather than the supplied {@code InputStream}.
* The file will be deleted once the request is completed. * The file will be deleted once the request is completed. To avoid creating extra files, if a File has already
* been created by the base t-engine, it is returned.
* If possible this method should be avoided as it is better not to leave content on disk. * If possible this method should be avoided as it is better not to leave content on disk.
* @throws IllegalStateException if this method has already been called. * @throws IllegalStateException if this method has already been called.
*/ */
@@ -46,14 +47,14 @@ public interface TransformManager
/** /**
* Allows a {@link CustomTransformer} to use a local target {@code File} rather than the supplied {@code OutputStream}. * Allows a {@link CustomTransformer} to use a local target {@code File} rather than the supplied {@code OutputStream}.
* The file will be deleted once the request is completed. * The file will be deleted once the request is completed. To avoid creating extra files, if a File has already
* been created by the base t-engine, it is returned.
* If possible this method should be avoided as it is better not to leave content on disk. * If possible this method should be avoided as it is better not to leave content on disk.
* @throws IllegalStateException if this method has already been called. A call to {@link #respondWithFragment(Integer)} * @throws IllegalStateException if this method has already been called. A call to {@link #respondWithFragment(Integer)}
* allows the method to be called again. * allows the method to be called again.
*/ */
File createTargetFile(); File createTargetFile();
// TODO: Do we want to support the following?
/** /**
* Allows a single transform request to have multiple transform responses. For example images from a video at * Allows a single transform request to have multiple transform responses. For example images from a video at
* different time offsets or different pages of a document. Following a call to this method a transform response is * different time offsets or different pages of a document. Following a call to this method a transform response is

View File

@@ -33,7 +33,6 @@ import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.TransformManager; import org.alfresco.transform.base.TransformManager;
import org.slf4j.Logger; import org.slf4j.Logger;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
@@ -64,19 +63,17 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt
* *
* The transform results in a Map of extracted properties encoded as json being returned to the content repository. * The transform results in a Map of extracted properties encoded as json being returned to the content repository.
* <ul> * <ul>
* <li>The content repository will use a transform in preference to any metadata extractors it might have defined * <li>The method extracts ALL available metadata from the document with
* locally for the same MIMETYPE.</li> * {@link #extractMetadata(String, InputStream, String, OutputStream, Map, TransformManager)} and then calls
* <li>The T-Engine's Controller class will call a method in a class that extends {@link AbstractMetadataExtractor} * {@link #mapMetadataAndWrite(OutputStream, Map, Map)}.</li>
* based on the source and target mediatypes in the normal way.</li>
* <li>The method extracts ALL available metadata is extracted from the document and then calls
* {@link #mapMetadataAndWrite(File, Map, Map)}.</li>
* <li>Selected values from the available metadata are mapped into content repository property names and values, * <li>Selected values from the available metadata are mapped into content repository property names and values,
* depending on what is defined in a {@code "<classname>_metadata_extract.properties"} file.</li> * depending on what is defined in a {@code "<classname>_metadata_extract.properties"} file.</li>
* <li>The selected values are set back to the content repository as a JSON representation of a Map, where the values * <li>The selected values are set back to the content repository as a JSON representation of a Map, where the values
* are applied to the source node.</li> * are applied to the source node.</li>
* </ul> * </ul>
* To support the same functionality as metadata extractors configured inside the content repository, * To support the same functionality as metadata extractors configured inside the content repository,
* extra key value pairs may be returned from {@link #extractMetadata}. These are: * extra key value pairs may be returned from {@link #extractMetadata(String, InputStream, String, OutputStream, Map, TransformManager)}.
* These are:
* <ul> * <ul>
* <li>{@code "sys:overwritePolicy"} which can specify the * <li>{@code "sys:overwritePolicy"} which can specify the
* {@code org.alfresco.repo.content.metadata.MetadataExtracter.OverwritePolicy} name. Defaults to "PRAGMATIC".</li> * {@code org.alfresco.repo.content.metadata.MetadataExtracter.OverwritePolicy} name. Defaults to "PRAGMATIC".</li>
@@ -89,7 +86,8 @@ import static org.alfresco.transform.base.metadataExtractors.AbstractMetadataExt
* If a transform specifies that it can convert from {@code "<MIMETYPE>"} to {@code "alfresco-metadata-embed"}, it is * If a transform specifies that it can convert from {@code "<MIMETYPE>"} to {@code "alfresco-metadata-embed"}, it is
* indicating that it can embed metadata in {@code <MIMETYPE>}. * indicating that it can embed metadata in {@code <MIMETYPE>}.
* *
* The transform results in a new version of supplied source file that contains the metadata supplied in the transform * The transform calls {@link #embedMetadata(String, InputStream, String, OutputStream, Map, TransformManager)}
* which should results in a new version of supplied source file that contains the metadata supplied in the transform
* options. * options.
* *
* @author Jesper Steen Møller * @author Jesper Steen Møller
@@ -162,24 +160,13 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
} }
else else
{ {
extractMetadata(sourceMimetype, inputStream, targetMimetype, outputStream, transformOptions, transformManager); extractMapAndWriteMetadata(sourceMimetype, inputStream, targetMimetype, outputStream, transformOptions, transformManager);
} }
} }
public void embedMetadata(String sourceMimetype, InputStream inputStream, public abstract void embedMetadata(String sourceMimetype, InputStream inputStream, String targetMimetype,
String targetMimetype, OutputStream outputStream, OutputStream outputStream, Map<String, String> transformOptions, TransformManager transformManager)
Map<String, String> transformOptions, TransformManager transformManager) throws Exception throws Exception;
{
File sourceFile = transformManager.createSourceFile();
File targetFile = transformManager.createTargetFile();
embedMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
public void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
// Default nothing, as embedding is not supported in most cases
}
protected Map<String, Serializable> getMetadata(Map<String, String> transformOptions) protected Map<String, Serializable> getMetadata(Map<String, String> transformOptions)
{ {
@@ -507,31 +494,18 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
return true; return true;
} }
public void extractMetadata(String sourceMimetype, InputStream inputStream, private void extractMapAndWriteMetadata(String sourceMimetype, InputStream inputStream, String targetMimetype,
String targetMimetype, OutputStream outputStream, OutputStream outputStream, Map<String, String> transformOptions, TransformManager transformManager)
Map<String, String> transformOptions, TransformManager transformManager) throws Exception throws Exception
{ {
File sourceFile = transformManager.createSourceFile();
File targetFile = transformManager.createTargetFile();
extractMetadata(sourceMimetype, transformOptions, sourceFile, targetFile);
}
/**
* The {@code transformOptions} may contain a replacement set of mappings. These will be used in place of the
* default mappings from read from file if supplied.
*/
public void extractMetadata(String sourceMimetype, Map<String, String> transformOptions, File sourceFile,
File targetFile) throws Exception
{
Map<String, Set<String>> mapping = getExtractMappingFromOptions(transformOptions, defaultExtractMapping);
// Use a ThreadLocal to avoid changing method signatures of methods that currently call getExtractMapping. // Use a ThreadLocal to avoid changing method signatures of methods that currently call getExtractMapping.
Map<String, Set<String>> mapping = getExtractMappingFromOptions(transformOptions, defaultExtractMapping);
try try
{ {
extractMapping.set(mapping); extractMapping.set(mapping);
Map<String, Serializable> metadata = extractMetadata(sourceMimetype, transformOptions, sourceFile); Map<String, Serializable> metadata = extractMetadata(sourceMimetype, inputStream, targetMimetype,
mapMetadataAndWrite(targetFile, metadata, mapping); outputStream, transformOptions, transformManager);
mapMetadataAndWrite(outputStream, metadata, mapping);
} }
finally finally
{ {
@@ -539,8 +513,9 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
} }
} }
public abstract Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions, public abstract Map<String, Serializable> extractMetadata(String sourceMimetype, InputStream inputStream,
File sourceFile) throws Exception; String targetMimetype, OutputStream outputStream, Map<String, String> transformOptions,
TransformManager transformManager) throws Exception;
private Map<String, Set<String>> getExtractMappingFromOptions(Map<String, String> transformOptions, Map<String, private Map<String, Set<String>> getExtractMappingFromOptions(Map<String, String> transformOptions, Map<String,
Set<String>> defaultExtractMapping) Set<String>> defaultExtractMapping)
@@ -561,17 +536,7 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
return defaultExtractMapping; return defaultExtractMapping;
} }
/** public void mapMetadataAndWrite(OutputStream outputStream, Map<String, Serializable> metadata,
* @deprecated use {@link #extractMetadata(String, Map, File, File)} rather than calling this method.
* By default call the overloaded method with the default {@code extractMapping}.
*/
@Deprecated
public void mapMetadataAndWrite(File targetFile, Map<String, Serializable> metadata) throws IOException
{
mapMetadataAndWrite(targetFile, metadata, defaultExtractMapping);
}
public void mapMetadataAndWrite(File targetFile, Map<String, Serializable> metadata,
Map<String, Set<String>> extractMapping) throws IOException Map<String, Set<String>> extractMapping) throws IOException
{ {
if (logger.isDebugEnabled()) if (logger.isDebugEnabled())
@@ -581,7 +546,7 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
} }
metadata = mapRawToSystem(metadata, extractMapping); metadata = mapRawToSystem(metadata, extractMapping);
writeMetadata(targetFile, metadata); writeMetadata(outputStream, metadata);
} }
/** /**
@@ -633,9 +598,9 @@ public abstract class AbstractMetadataExtractor implements CustomTransformer
return new TreeMap<String, Serializable>(systemProperties); return new TreeMap<String, Serializable>(systemProperties);
} }
private void writeMetadata(File targetFile, Map<String, Serializable> results) private void writeMetadata(OutputStream outputStream, Map<String, Serializable> results)
throws IOException throws IOException
{ {
jsonObjectMapper.writeValue(targetFile, results); jsonObjectMapper.writeValue(outputStream, results);
} }
} }

View File

@@ -29,7 +29,6 @@ package org.alfresco.transform.misc.metadataExtractors;
import org.alfresco.transform.base.CustomTransformer; import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.TransformManager; import org.alfresco.transform.base.TransformManager;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor; import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
import org.alfresco.transform.common.TransformException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@@ -87,8 +86,17 @@ public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements
} }
@Override @Override
public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions, public void embedMetadata(String sourceMimetype, InputStream inputStream, String targetMimetype,
File sourceFile) throws Exception OutputStream outputStream, Map<String, String> transformOptions, TransformManager transformManager)
throws Exception
{
// Only used for extract, so may be empty.
}
@Override
public Map<String, Serializable> extractMetadata(String sourceMimetype, InputStream inputStream,
String targetMimetype, OutputStream outputStream, Map<String, String> transformOptions,
TransformManager transformManager) throws Exception
{ {
final Map<String, Serializable> rawProperties = new HashMap<>(); final Map<String, Serializable> rawProperties = new HashMap<>();
@@ -175,10 +183,10 @@ public class HtmlMetadataExtractor extends AbstractMetadataExtractor implements
rawProperties.clear(); rawProperties.clear();
Reader r = null; Reader r = null;
try (InputStream cis = new FileInputStream(sourceFile)) try
{ {
// TODO: for now, use default charset; we should attempt to map from html meta-data // TODO: for now, use default charset; we should attempt to map from html meta-data
r = new InputStreamReader(cis, charsetGuess); r = new InputStreamReader(inputStream, charsetGuess);
HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.Parser parser = new ParserDelegator();
parser.parse(r, callback, tries > 0); parser.parse(r, callback, tries > 0);
break; break;

View File

@@ -29,7 +29,6 @@ package org.alfresco.transform.misc.metadataExtractors;
import org.alfresco.transform.base.CustomTransformer; import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.TransformManager; import org.alfresco.transform.base.TransformManager;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor; import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
import org.alfresco.transform.common.TransformException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@@ -89,14 +88,21 @@ public class RFC822MetadataExtractor extends AbstractMetadataExtractor implement
} }
@Override @Override
public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions, public void embedMetadata(String sourceMimetype, InputStream inputStream, String targetMimetype,
File sourceFile) throws Exception OutputStream outputStream, Map<String, String> transformOptions, TransformManager transformManager)
throws Exception
{
// Only used for extract, so may be empty.
}
@Override
public Map<String, Serializable> extractMetadata(String sourceMimetype, InputStream inputStream,
String targetMimetype, OutputStream outputStream, Map<String, String> transformOptions,
TransformManager transformManager) throws Exception
{ {
final Map<String, Serializable> rawProperties = new HashMap<>(); final Map<String, Serializable> rawProperties = new HashMap<>();
try (InputStream is = new FileInputStream(sourceFile)) MimeMessage mimeMessage = new MimeMessage(null, inputStream);
{
MimeMessage mimeMessage = new MimeMessage(null, is);
if (mimeMessage != null) if (mimeMessage != null)
{ {
@@ -187,7 +193,6 @@ public class RFC822MetadataExtractor extends AbstractMetadataExtractor implement
} }
} }
} }
}
return rawProperties; return rawProperties;
} }

View File

@@ -51,7 +51,6 @@ import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler; import org.xml.sax.ContentHandler;
import org.xml.sax.Locator; import org.xml.sax.Locator;
import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
@@ -215,13 +214,12 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr
} }
@Override @Override
public Map<String, Serializable> extractMetadata(String sourceMimetype, Map<String, String> transformOptions, public Map<String, Serializable> extractMetadata(String sourceMimetype, InputStream inputStream,
File sourceFile) throws Exception String targetMimetype, OutputStream outputStream, Map<String, String> transformOptions,
TransformManager transformManager) throws Exception
{ {
Map<String, Serializable> rawProperties = new HashMap<>(); Map<String, Serializable> rawProperties = new HashMap<>();
try (InputStream is = new FileInputStream(sourceFile))
{
Parser parser = getParser(); Parser parser = getParser();
Metadata metadata = new Metadata(); Metadata metadata = new Metadata();
@@ -243,7 +241,7 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr
handler = new NullContentHandler(); handler = new NullContentHandler();
} }
parser.parse(is, handler, metadata, context); parser.parse(inputStream, handler, metadata, context);
// First up, copy all the Tika metadata over // First up, copy all the Tika metadata over
// This allows people to map any of the Tika // This allows people to map any of the Tika
@@ -303,7 +301,6 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr
// existing namespace so that older properties // existing namespace so that older properties
// files continue to map correctly // files continue to map correctly
rawProperties = extractSpecific(metadata, rawProperties, headers); rawProperties = extractSpecific(metadata, rawProperties, headers);
}
return rawProperties; return rawProperties;
} }