mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-08-14 17:58:27 +00:00
REPO-4334 Move metadata extraction into T-Engines (#247)
* Metadata extract code added to T-Engines * Required a refactor of duplicate code to avoid 3x more duplication: - try catches used to return return exit codes - calls to java libraries or commands to external processes - building of transform options in controllers, adaptors * integration tests based on current extracts performed in the repo * included extract code for libreoffice, and embed code even though not used out of the box any more. There may well be custom extracts using them that move to T-Engines * removal of unused imports * minor autoOrient / allowEnlargement bug fixes that were not included in Paddington on the T-Engine side.
This commit is contained in:
@@ -26,10 +26,7 @@
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@@ -43,7 +40,9 @@ import org.springframework.boot.context.event.ApplicationReadyEvent;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.event.EventListener;
|
||||
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
|
||||
|
||||
@SpringBootApplication
|
||||
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
|
||||
@@ -60,6 +59,9 @@ public class Application
|
||||
return registry -> registry.config().commonTags("containerName", containerName);
|
||||
}
|
||||
|
||||
// To run the LibreOffice T-Engine from the command line on a Mac, you generally need to
|
||||
// install LibreOffice and add: -Dtransform.core.libreoffice.path=/Applications/LibreOffice.app/Contents/
|
||||
// to the start up command.
|
||||
public static void main(String[] args)
|
||||
{
|
||||
SpringApplication.run(Application.class, args);
|
||||
|
@@ -2,7 +2,7 @@
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2019 Alfresco Software Limited
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
@@ -26,37 +26,21 @@
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import static org.alfresco.transformer.fs.FileManager.createAttachment;
|
||||
import static org.alfresco.transformer.fs.FileManager.createSourceFile;
|
||||
import static org.alfresco.transformer.fs.FileManager.createTargetFile;
|
||||
import static org.alfresco.transformer.fs.FileManager.createTargetFileName;
|
||||
import static org.springframework.http.HttpStatus.OK;
|
||||
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA_VALUE;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
|
||||
import org.alfresco.transformer.logging.LogEntry;
|
||||
import org.alfresco.transformer.probes.ProbeTestTransform;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Controller for the Docker based LibreOffice transformer.
|
||||
*
|
||||
*
|
||||
* Status Codes:
|
||||
*
|
||||
* 200 Success
|
||||
@@ -112,44 +96,23 @@ public class LibreOfficeController extends AbstractTransformerController
|
||||
@Override
|
||||
protected void executeTransformCommand(File sourceFile, File targetFile)
|
||||
{
|
||||
transform(null, null, null, Collections.emptyMap(), sourceFile, targetFile);
|
||||
javaExecutor.call(sourceFile, targetFile);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
//todo: the "timeout" request parameter is ignored; the timeout is preset at JodConverter creation
|
||||
@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
|
||||
public ResponseEntity<Resource> transform(HttpServletRequest request,
|
||||
@RequestParam("file") MultipartFile sourceMultipartFile,
|
||||
@RequestParam("targetExtension") String targetExtension,
|
||||
@RequestParam(value = "timeout", required = false) Long timeout,
|
||||
@RequestParam(value = "testDelay", required = false) Long testDelay)
|
||||
@Override
|
||||
protected String getTransformerName(final File sourceFile, final String sourceMimetype,
|
||||
final String targetMimetype, final Map<String, String> transformOptions)
|
||||
{
|
||||
String targetFilename = createTargetFileName(sourceMultipartFile.getOriginalFilename(),
|
||||
targetExtension);
|
||||
getProbeTestTransform().incrementTransformerCount();
|
||||
File sourceFile = createSourceFile(request, sourceMultipartFile);
|
||||
File targetFile = createTargetFile(request, targetFilename);
|
||||
// Both files are deleted by TransformInterceptor.afterCompletion
|
||||
|
||||
javaExecutor.call(sourceFile, targetFile);
|
||||
|
||||
final ResponseEntity<Resource> body = createAttachment(targetFilename, targetFile);
|
||||
LogEntry.setTargetSize(targetFile.length());
|
||||
long time = LogEntry.setStatusCodeAndMessage(OK.value(), "Success");
|
||||
time += LogEntry.addDelay(testDelay);
|
||||
getProbeTestTransform().recordTransformTime(time);
|
||||
return body;
|
||||
return null; // does not matter what value is returned, as it is not used because there is only one.
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processTransform(final File sourceFile, final File targetFile,
|
||||
final String sourceMimetype, final String targetMimetype,
|
||||
final Map<String, String> transformOptions, final Long timeout)
|
||||
protected void transform(String transformName, String sourceMimetype, String targetMimetype,
|
||||
Map<String, String> transformOptions, File sourceFile, File targetFile)
|
||||
{
|
||||
logger.debug("Processing request with: sourceFile '{}', targetFile '{}', transformOptions" +
|
||||
" '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout);
|
||||
|
||||
javaExecutor.call(sourceFile, targetFile);
|
||||
javaExecutor.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1 @@
|
||||
#logging.level.org.alfresco.transformer.LibreOfficeController=debug
|
@@ -6,6 +6,8 @@
|
||||
<form method="POST" enctype="multipart/form-data" action="/transform">
|
||||
<table>
|
||||
<tr><td><div style="text-align:right">file *</div></td><td><input type="file" name="file" /></td></tr>
|
||||
<tr><td><div style="text-align:right">sourceMimetype *</div></td><td><input type="text" name="sourceMimetype" value="" /></td></tr>
|
||||
<tr><td><div style="text-align:right">targetMimetype</div></td><td><input type="text" name="targetMimetype" value="" /></td></tr>
|
||||
<tr><td><div style="text-align:right">targetExtension *</div></td><td><input type="text" name="targetExtension" value="" /></td></tr>
|
||||
<tr><td><div style="text-align:right">timeout</div></td><td><input type="text" name="timeout" value="" /></td></tr>
|
||||
<tr><td><div style="text-align:right">testDelay</div></td><td><input type="text" name="testDelay" value="" /></td></tr>
|
||||
|
@@ -0,0 +1,211 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.executors;
|
||||
|
||||
import com.sun.star.beans.PropertyValue;
|
||||
import com.sun.star.beans.UnknownPropertyException;
|
||||
import com.sun.star.beans.XPropertySet;
|
||||
import com.sun.star.document.XDocumentInfoSupplier;
|
||||
import com.sun.star.frame.XComponentLoader;
|
||||
import com.sun.star.io.IOException;
|
||||
import com.sun.star.lang.IllegalArgumentException;
|
||||
import com.sun.star.lang.WrappedTargetException;
|
||||
import com.sun.star.lang.XComponent;
|
||||
import com.sun.star.task.ErrorCodeIOException;
|
||||
import com.sun.star.util.CloseVetoException;
|
||||
import com.sun.star.util.XCloseable;
|
||||
import com.sun.star.util.XRefreshable;
|
||||
import org.artofsolving.jodconverter.office.OfficeContext;
|
||||
import org.artofsolving.jodconverter.office.OfficeException;
|
||||
import org.artofsolving.jodconverter.office.OfficeTask;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.artofsolving.jodconverter.office.OfficeUtils.SERVICE_DESKTOP;
|
||||
import static org.artofsolving.jodconverter.office.OfficeUtils.cast;
|
||||
import static org.artofsolving.jodconverter.office.OfficeUtils.toUrl;
|
||||
|
||||
/**
|
||||
* @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1
|
||||
*
|
||||
* Extracts values from Open Office documents into the following:
|
||||
* <pre>
|
||||
* <b>author:</b> -- cm:author
|
||||
* <b>title:</b> -- cm:title
|
||||
* <b>description:</b> -- cm:description
|
||||
* </pre>
|
||||
*
|
||||
* @author Neil McErlean
|
||||
* @author adavis
|
||||
*/
|
||||
@Deprecated
|
||||
public class LibreOfficeExtractMetadataTask implements OfficeTask
|
||||
{
|
||||
/*
|
||||
* These keys are used by Alfresco to map properties into a content model and do need to
|
||||
* have lower-case initial letters.
|
||||
*/
|
||||
private static final String KEY_AUTHOR = "author";
|
||||
private static final String KEY_TITLE = "title";
|
||||
private static final String KEY_DESCRIPTION = "description";
|
||||
|
||||
private File inputFile;
|
||||
private Map<String, Serializable> metadata = new HashMap<String, Serializable>();
|
||||
|
||||
public LibreOfficeExtractMetadataTask(File inputFile)
|
||||
{
|
||||
this.inputFile = inputFile;
|
||||
}
|
||||
|
||||
public Map<String, Serializable> getMetadata()
|
||||
{
|
||||
return metadata;
|
||||
}
|
||||
|
||||
public void execute(OfficeContext context)
|
||||
{
|
||||
XComponent document = null;
|
||||
try
|
||||
{
|
||||
if (!inputFile.exists())
|
||||
{
|
||||
throw new OfficeException("input document not found");
|
||||
}
|
||||
XComponentLoader loader = cast(XComponentLoader.class, context
|
||||
.getService(SERVICE_DESKTOP));
|
||||
|
||||
// Need to set the Hidden property to ensure that OOo GUI does not appear.
|
||||
PropertyValue hiddenOOo = new PropertyValue();
|
||||
hiddenOOo.Name = "Hidden";
|
||||
hiddenOOo.Value = Boolean.TRUE;
|
||||
PropertyValue readOnly = new PropertyValue();
|
||||
readOnly.Name = "ReadOnly";
|
||||
readOnly.Value = Boolean.TRUE;
|
||||
|
||||
try
|
||||
{
|
||||
// TODO The following call fails. Not debugged why as it appears this extractor is not used any more.
|
||||
document = loader.loadComponentFromURL(toUrl(inputFile), "_blank", 0,
|
||||
new PropertyValue[]{hiddenOOo, readOnly});
|
||||
}
|
||||
catch (IllegalArgumentException illegalArgumentException)
|
||||
{
|
||||
throw new OfficeException("could not load document: "
|
||||
+ inputFile.getName(), illegalArgumentException);
|
||||
}
|
||||
catch (ErrorCodeIOException errorCodeIOException)
|
||||
{
|
||||
throw new OfficeException("could not load document: "
|
||||
+ inputFile.getName() + "; errorCode: "
|
||||
+ errorCodeIOException.ErrCode, errorCodeIOException);
|
||||
}
|
||||
catch (IOException ioException)
|
||||
{
|
||||
throw new OfficeException("could not load document: "
|
||||
+ inputFile.getName(), ioException);
|
||||
}
|
||||
if (document == null)
|
||||
{
|
||||
throw new OfficeException("could not load document: "
|
||||
+ inputFile.getName());
|
||||
}
|
||||
XRefreshable refreshable = cast(XRefreshable.class, document);
|
||||
if (refreshable != null)
|
||||
{
|
||||
refreshable.refresh();
|
||||
}
|
||||
|
||||
XDocumentInfoSupplier docInfoSupplier = cast(XDocumentInfoSupplier.class, document);
|
||||
XPropertySet propSet = cast(XPropertySet.class, docInfoSupplier.getDocumentInfo());
|
||||
|
||||
// The strings below are property names as used by OOo. They need upper-case
|
||||
// initial letters.
|
||||
Object author = getPropertyValueIfAvailable(propSet, "Author");
|
||||
Object description = getPropertyValueIfAvailable(propSet, "Subject");
|
||||
Object title = getPropertyValueIfAvailable(propSet, "Title");
|
||||
|
||||
metadata = new HashMap<String, Serializable>(3);
|
||||
metadata.put(KEY_AUTHOR, author == null ? null : author.toString());
|
||||
metadata.put(KEY_DESCRIPTION, description == null ? null : description.toString());
|
||||
metadata.put(KEY_TITLE, title == null ? null : title.toString());
|
||||
}
|
||||
catch (OfficeException officeException)
|
||||
{
|
||||
throw officeException;
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
throw new OfficeException("conversion failed", exception);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (document != null)
|
||||
{
|
||||
XCloseable closeable = cast(XCloseable.class, document);
|
||||
if (closeable != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
closeable.close(true);
|
||||
}
|
||||
catch (CloseVetoException closeVetoException)
|
||||
{
|
||||
// whoever raised the veto should close the document
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
document.dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* OOo throws exceptions if we ask for properties that aren't there, so we'll tread carefully.
|
||||
*
|
||||
* @param propSet
|
||||
* @param propertyName property name as used by the OOo API.
|
||||
* @throws UnknownPropertyException
|
||||
* @throws WrappedTargetException
|
||||
*/
|
||||
private Object getPropertyValueIfAvailable(XPropertySet propSet, String propertyName)
|
||||
throws UnknownPropertyException, WrappedTargetException
|
||||
{
|
||||
if (propSet.getPropertySetInfo().hasPropertyByName(propertyName))
|
||||
{
|
||||
return propSet.getPropertyValue(propertyName);
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
@@ -26,12 +26,8 @@
|
||||
*/
|
||||
package org.alfresco.transformer.executors;
|
||||
|
||||
import static org.springframework.http.HttpStatus.BAD_REQUEST;
|
||||
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.sun.star.task.ErrorCodeIOException;
|
||||
import org.alfresco.transform.exceptions.TransformException;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
@@ -42,7 +38,13 @@ import org.artofsolving.jodconverter.office.OfficeManager;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.sun.star.task.ErrorCodeIOException;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.springframework.http.HttpStatus.BAD_REQUEST;
|
||||
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
|
||||
|
||||
/**
|
||||
* JavaExecutor implementation for running LibreOffice transformations. It loads the
|
||||
@@ -50,6 +52,8 @@ import com.sun.star.task.ErrorCodeIOException;
|
||||
*/
|
||||
public class LibreOfficeJavaExecutor implements JavaExecutor
|
||||
{
|
||||
private static String ID = "libreoffice";
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(LibreOfficeJavaExecutor.class);
|
||||
|
||||
private static final int JODCONVERTER_TRANSFORMATION_ERROR_CODE = 3088;
|
||||
@@ -58,7 +62,9 @@ public class LibreOfficeJavaExecutor implements JavaExecutor
|
||||
|
||||
public static final String LICENCE = "This transformer uses LibreOffice from The Document Foundation. See the license at https://www.libreoffice.org/download/license/ or in /libreoffice.txt";
|
||||
|
||||
private JodConverter jodconverter;
|
||||
private final JodConverter jodconverter;
|
||||
|
||||
private final ObjectMapper jsonObjectMapper = new ObjectMapper();
|
||||
|
||||
public LibreOfficeJavaExecutor(String path)
|
||||
{
|
||||
@@ -76,7 +82,7 @@ public class LibreOfficeJavaExecutor implements JavaExecutor
|
||||
|
||||
final JodConverterSharedInstance jodconverter = new JodConverterSharedInstance();
|
||||
|
||||
jodconverter.setOfficeHome(LIBREOFFICE_HOME); // jodconverter.officeHome
|
||||
jodconverter.setOfficeHome(LIBREOFFICE_HOME); // jodconverter.officeHome
|
||||
jodconverter.setMaxTasksPerProcess("200"); // jodconverter.maxTasksPerProcess
|
||||
jodconverter.setTaskExecutionTimeout(timeout); // jodconverter.maxTaskExecutionTimeout
|
||||
jodconverter.setTaskQueueTimeout(timeout); // jodconverter.taskQueueTimeout
|
||||
@@ -89,6 +95,19 @@ public class LibreOfficeJavaExecutor implements JavaExecutor
|
||||
return jodconverter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTransformerId()
|
||||
{
|
||||
return ID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(String transformName, String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile)
|
||||
{
|
||||
call(sourceFile, targetFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void call(File sourceFile, File targetFile, String... args)
|
||||
{
|
||||
@@ -147,7 +166,7 @@ public class LibreOfficeJavaExecutor implements JavaExecutor
|
||||
|
||||
PDPage pdfPage = new PDPage();
|
||||
try (PDDocument pdfDoc = new PDDocument();
|
||||
PDPageContentStream contentStream = new PDPageContentStream(pdfDoc, pdfPage))
|
||||
PDPageContentStream ignore = new PDPageContentStream(pdfDoc, pdfPage))
|
||||
{
|
||||
// Even though, we want an empty PDF, some libs (e.g. PDFRenderer) object to PDFs
|
||||
// that have literally nothing in them. So we'll put a content stream in it.
|
||||
@@ -162,4 +181,48 @@ public class LibreOfficeJavaExecutor implements JavaExecutor
|
||||
"Error creating empty PDF file", iox);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated The JodConverterMetadataExtracter has not been in use since 6.0.1.
|
||||
* This code exists in case there are custom implementations, that need to be converted to T-Engines.
|
||||
* It is simply a copy and paste from the content repository and has received limited testing.
|
||||
*/
|
||||
@Override
|
||||
public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
|
||||
Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile)
|
||||
{
|
||||
OfficeManager officeManager = jodconverter.getOfficeManager();
|
||||
LibreOfficeExtractMetadataTask extractMetadataTask = new LibreOfficeExtractMetadataTask(sourceFile);
|
||||
try
|
||||
{
|
||||
officeManager.execute(extractMetadataTask);
|
||||
}
|
||||
catch (OfficeException e)
|
||||
{
|
||||
throw new TransformException(BAD_REQUEST.value(),
|
||||
"LibreOffice metadata extract failed: \n" +
|
||||
" from file: " + sourceFile, e);
|
||||
}
|
||||
Map<String, Serializable> metadata = extractMetadataTask.getMetadata();
|
||||
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
metadata.forEach((k,v) -> logger.debug(k+"="+v));
|
||||
}
|
||||
|
||||
writeMetadataIntoTargetFile(targetFile, metadata);
|
||||
}
|
||||
|
||||
private void writeMetadataIntoTargetFile(File targetFile, Map<String, Serializable> results)
|
||||
{
|
||||
try
|
||||
{
|
||||
jsonObjectMapper.writeValue(targetFile, results);
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new TransformException(INTERNAL_SERVER_ERROR.value(), "Failed to write metadata to targetFile", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user