Save point: [skip ci]

* Beginnings of new t-base (using TransformEngine and CustomeTransformer, no need for a controller of Application in t-engine modules)
* Using org.alfresco.transform.<module> package
* Beginnings of new Tika t-engine
This commit is contained in:
alandavis
2022-06-30 13:39:24 +01:00
parent 78c82c9a01
commit b619f27207
157 changed files with 8740 additions and 1543 deletions

View File

@@ -102,6 +102,9 @@
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<mainClass>org.alfresco.transform.base.Application</mainClass>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>

View File

@@ -24,11 +24,12 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import org.alfresco.transform.config.TransformConfig;
import org.alfresco.transform.common.TransformException;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transform.base.executors.Transformer;
import org.alfresco.transformer.AbstractTransformerController;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -45,8 +46,8 @@ import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transform.config.CoreVersionDecorator.setOrClearCoreVersion;
import static org.alfresco.transform.common.RequestParamMap.CONFIG_VERSION_DEFAULT;
import static org.alfresco.transformer.util.RequestParamMap.CONFIG_VERSION;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transform.base.util.RequestParamMap.CONFIG_VERSION;
import static org.alfresco.transform.base.util.RequestParamMap.SOURCE_ENCODING;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
import static org.springframework.http.HttpStatus.OK;

View File

@@ -24,15 +24,16 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import org.alfresco.transform.aio.AIOTransformRegistry;
import org.alfresco.transform.registry.TransformServiceRegistry;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.alfresco.transform.imagemagick.transformers.ImageMagickCommandExecutor;
import org.alfresco.transform.office.transformers.LibreOfficeJavaExecutor;
import org.alfresco.transform.pdfRenderer.transformers.PdfRendererCommandExecutor;
import org.alfresco.transform.base.executors.Transformer;
import org.alfresco.transformer.AbstractTransformerController;
import org.alfresco.transform.misc.transformers.SelectingTransformer;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -113,7 +114,7 @@ public class AIOCustomConfig
List<Transformer> getTEnginesSortedByName()
{
return Stream.of(new SelectingTransformer(),
new TikaJavaExecutor(notExtractBookmarksTextDefault),
// new TikaJavaExecutor(notExtractBookmarksTextDefault),
new ImageMagickCommandExecutor(imageMagickExePath, imageMagickDynPath, imageMagickRootPath, imageMagickCodersPath, imageMagickConfigPath),
new LibreOfficeJavaExecutor(libreofficePath, libreofficeMaxTasksPerProcess, libreofficeTimeout, libreofficePortNumbers, libreofficeTemplateProfileDir, libreofficeIsEnabled),
new PdfRendererCommandExecutor(pdfRendererPath))

View File

@@ -1,85 +0,0 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
public class Application
{
private static final Logger logger = LoggerFactory.getLogger(Application.class);
@Value("${container.name}")
private String containerName;
@Bean
MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
{
return registry -> registry.config().commonTags("containerName", containerName);
}
public static void main(String[] args)
{
SpringApplication.run(Application.class, args);
}
@EventListener(ApplicationReadyEvent.class)
public void startup()
{
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
logger.info(ImageMagickCommandExecutor.LICENCE);
logger.info(LibreOfficeJavaExecutor.LICENCE);
Arrays.stream(TikaJavaExecutor.LICENCE.split("\\n")).forEach(logger::info);
logger.info(PdfRendererCommandExecutor.LICENCE);
Arrays.stream(SelectingTransformer.LICENCE.split("\\n")).forEach(logger::info);
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
logger.info("Starting application components... Done");
}
}

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,8 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import org.alfresco.transformer.AbstractHttpRequestTest;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.SpringBootTest.WebEnvironment;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,14 +24,19 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
import java.util.Map;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transform.aio.AIOController;
import org.alfresco.transform.aio.AIOCustomConfig;
import org.alfresco.transform.aio.AIOTransformRegistry;
import org.alfresco.transform.imagemagick.ImageMagickControllerTest;
import org.alfresco.transform.base.executors.Transformer;
import org.alfresco.transformer.AbstractTransformerController;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
@@ -51,8 +56,7 @@ public class AIOControllerImageMagickTest extends ImageMagickControllerTest
{
// All tests contained in ImageMagickControllerTest
@Autowired
AIOTransformRegistry transformRegistry;
@Autowired AIOTransformRegistry transformRegistry;
@BeforeEach @Override
public void before() throws IOException

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,14 +24,19 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Map;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transform.aio.AIOController;
import org.alfresco.transform.aio.AIOCustomConfig;
import org.alfresco.transform.aio.AIOTransformRegistry;
import org.alfresco.transform.office.LibreOfficeControllerTest;
import org.alfresco.transform.office.transformers.LibreOfficeJavaExecutor;
import org.alfresco.transform.base.executors.Transformer;
import org.alfresco.transformer.AbstractTransformerController;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
@@ -56,8 +61,7 @@ public class AIOControllerLibreOfficeTest extends LibreOfficeControllerTest
assertTrue(controller instanceof AIOController,"Wrong controller wired for test");
}
@Autowired
AIOTransformRegistry transformRegistry;
@Autowired AIOTransformRegistry transformRegistry;
@Override
// Used by the super class to mock the javaExecutor, a different implementation is required here

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,10 +24,13 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.alfresco.transform.aio.AIOController;
import org.alfresco.transform.aio.AIOCustomConfig;
import org.alfresco.transform.misc.MiscControllerTest;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,14 +24,18 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Map;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transform.aio.AIOController;
import org.alfresco.transform.aio.AIOCustomConfig;
import org.alfresco.transform.aio.AIOTransformRegistry;
import org.alfresco.transform.pdfRenderer.AlfrescoPdfRendererControllerTest;
import org.alfresco.transform.base.executors.Transformer;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
@@ -48,8 +52,7 @@ import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilde
*/
public class AIOControllerPdfRendererTest extends AlfrescoPdfRendererControllerTest
{
@Autowired
AIOTransformRegistry transformRegistry;
@Autowired AIOTransformRegistry transformRegistry;
@Override
protected void setFields()

View File

@@ -24,12 +24,15 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import java.io.IOException;
import org.alfresco.transform.aio.AIOController;
import org.alfresco.transform.aio.AIOCustomConfig;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transform.config.TransformConfig;
import org.alfresco.transformer.AbstractTransformerController;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,10 +24,13 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.alfresco.transform.aio.AIOController;
import org.alfresco.transform.aio.AIOCustomConfig;
import org.alfresco.transform.tika.TikaControllerTest;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,8 +24,10 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import org.alfresco.transform.aio.AIOController;
import org.alfresco.transform.aio.AIOCustomConfig;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import org.alfresco.transform.imagemagick.ImageMagickTransformationIT;
public class AIOImageMagickIT extends ImageMagickTransformationIT
{

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import org.alfresco.transform.office.LibreOfficeTransformationIT;
public class AIOLibreOfficeTransformationIT extends LibreOfficeTransformationIT
{

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import org.alfresco.transform.misc.MiscMetadataExtractsIT;
/**
* Metadata integration tests in the Misc T-Engine, but run from the AIO T-Engine.

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import org.alfresco.transform.misc.MiscTransformsIT;
public class AIOMiscTransformsIT extends MiscTransformsIT
{

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import org.alfresco.transform.pdfRenderer.AlfrescoPdfRendererTransformationIT;
public class AIOPdfRendererIT extends AlfrescoPdfRendererTransformationIT {
// Tests are in AlfrescoPdfRendererTransformationIT

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
@@ -32,6 +32,7 @@ import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.AbstractQueueTransformServiceIT;
import org.springframework.boot.test.context.SpringBootTest;
/**

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import org.alfresco.transform.tika.TikaMetadataExtractsIT;
/**
* Metadata integration tests in the Tika T-Engine, but run from the AIO T-Engine.

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import org.alfresco.transform.tika.TikaTransformationIT;
public class AIOTikaTransformationIT extends TikaTransformationIT
{

View File

@@ -24,14 +24,14 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.alfresco.transform.config.TransformConfig;
import org.alfresco.transform.registry.AbstractTransformRegistry;
import org.alfresco.transform.registry.CombinedTransformConfig;
import org.alfresco.transform.registry.TransformCache;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transform.base.executors.Transformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@@ -24,13 +24,12 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.aio;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.alfresco.transform.aio.AIOTransformRegistry;
import org.alfresco.transform.config.TransformConfig;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.alfresco.transform.base.executors.Transformer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.jupiter.api.BeforeEach;
@@ -50,7 +49,7 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.alfresco.transformer.util.RequestParamMap.PAGE_LIMIT;
import static org.alfresco.transform.base.util.RequestParamMap.PAGE_LIMIT;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -67,8 +66,8 @@ public class AIOTransformRegistryTest
@BeforeEach
public void before() throws Exception
{
aioTransformerRegistry.registerTransformer(new SelectingTransformer());
aioTransformerRegistry.registerTransformer(new TikaJavaExecutor());
// aioTransformerRegistry.registerTransformer(new SelectingTransformer());
// aioTransformerRegistry.registerTransformer(new TikaJavaExecutor());
aioTransformerRegistry.registerCombinedTransformers();
}

View File

@@ -66,6 +66,9 @@
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<mainClass>org.alfresco.transform.base.Application</mainClass>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,10 +24,10 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.imagemagick;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.alfresco.transform.imagemagick.transformers.ImageMagickCommandExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;

View File

@@ -24,9 +24,10 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.imagemagick;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.alfresco.transform.imagemagick.transformers.ImageMagickCommandExecutor;
import org.alfresco.transformer.AbstractTransformerController;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.imagemagick;
import static org.alfresco.transform.common.RequestParamMap.ENDPOINT_TRANSFORM;
import static org.hamcrest.Matchers.containsString;
@@ -58,7 +58,9 @@ import javax.annotation.PostConstruct;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.alfresco.transform.imagemagick.transformers.ImageMagickCommandExecutor;
import org.alfresco.transformer.AbstractTransformerController;
import org.alfresco.transformer.AbstractTransformerControllerTest;
import org.alfresco.transformer.executors.RuntimeExec;
import org.alfresco.transformer.executors.RuntimeExec.ExecutionResult;
import org.alfresco.transformer.model.FileRefEntity;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,8 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.imagemagick;
import org.alfresco.transformer.AbstractHttpRequestTest;
import org.springframework.boot.test.context.SpringBootTest;
/**

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.imagemagick;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_PNG;
@@ -32,6 +32,7 @@ import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_PNG;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.AbstractQueueTransformServiceIT;
import org.springframework.boot.test.context.SpringBootTest;
/**

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.imagemagick;
import static java.text.MessageFormat.format;
import static java.util.Collections.emptyMap;
@@ -76,6 +76,7 @@ import java.util.stream.Stream;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.alfresco.transformer.TestFileInfo;
import org.apache.commons.lang3.tuple.Pair;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.imagemagick;
import com.google.common.collect.ImmutableList;
import org.alfresco.transform.common.TransformException;

View File

@@ -24,10 +24,12 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
package org.alfresco.transform.imagemagick.transformers;
import org.alfresco.transform.imagemagick.ImageMagickOptionsBuilder;
import org.alfresco.transform.common.TransformException;
import org.alfresco.transformer.ImageMagickOptionsBuilder;
import org.alfresco.transformer.executors.AbstractCommandExecutor;
import org.alfresco.transformer.executors.RuntimeExec;
import java.io.File;
import java.util.HashMap;

View File

@@ -74,6 +74,9 @@
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<mainClass>org.alfresco.transform.base.Application</mainClass>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,10 +24,10 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.office;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transform.office.transformers.LibreOfficeJavaExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;

View File

@@ -24,9 +24,10 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.office;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.AbstractTransformerController;
import org.alfresco.transform.office.transformers.LibreOfficeJavaExecutor;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.office;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_PDF;
import static org.alfresco.transform.common.RequestParamMap.ENDPOINT_TRANSFORM;
@@ -58,7 +58,9 @@ import javax.annotation.PostConstruct;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.AbstractTransformerController;
import org.alfresco.transformer.AbstractTransformerControllerTest;
import org.alfresco.transform.office.transformers.LibreOfficeJavaExecutor;
import org.alfresco.transformer.executors.RuntimeExec.ExecutionResult;
import org.alfresco.transformer.model.FileRefEntity;
import org.alfresco.transformer.model.FileRefResponse;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,8 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.office;
import org.alfresco.transformer.AbstractHttpRequestTest;
import org.springframework.boot.test.context.SpringBootTest;
/**

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,13 +24,14 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.office;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_WORDPROCESSING;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.AbstractQueueTransformServiceIT;
import org.springframework.boot.test.context.SpringBootTest;
/**

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.office;
import static java.text.MessageFormat.format;
import static java.util.function.Function.identity;
@@ -54,7 +54,6 @@ import static org.alfresco.transform.common.Mimetype.MIMETYPE_XML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_SPREADSHEET_TEMPLATE_MACRO;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_PRESENTATION_SLIDESHOW_MACRO;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OUTLOOK_MSG;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_DITA;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_SXI;
@@ -72,6 +71,7 @@ import java.util.stream.Stream;
import com.google.common.collect.ImmutableSet;
import org.alfresco.transformer.TestFileInfo;
import org.apache.commons.lang3.tuple.Pair;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
package org.alfresco.transform.office.transformers;
import org.artofsolving.jodconverter.office.OfficeManager;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
package org.alfresco.transform.office.transformers;
import static java.util.Arrays.asList;
import static java.util.Objects.requireNonNull;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
package org.alfresco.transform.office.transformers;
import com.sun.star.beans.PropertyValue;
import com.sun.star.beans.UnknownPropertyException;

View File

@@ -24,11 +24,12 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
package org.alfresco.transform.office.transformers;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.sun.star.task.ErrorCodeIOException;
import org.alfresco.transform.common.TransformException;
import org.alfresco.transformer.executors.JavaExecutor;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;

View File

@@ -62,6 +62,9 @@
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<mainClass>org.alfresco.transform.base.Application</mainClass>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,10 +24,10 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.misc;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.alfresco.transform.misc.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;

View File

@@ -24,10 +24,11 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.misc;
import org.alfresco.transformer.AbstractTransformerController;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.alfresco.transform.misc.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
@@ -39,7 +40,6 @@ import java.util.Map;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
@Controller
public class MiscController extends AbstractTransformerController

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.misc;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
@@ -48,6 +48,8 @@ import java.io.UnsupportedEncodingException;
import java.nio.file.Files;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.AbstractTransformerController;
import org.alfresco.transformer.AbstractTransformerControllerTest;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.jupiter.api.BeforeEach;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.misc;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_RFC822;
@@ -33,6 +33,8 @@ import static org.alfresco.transformer.TestFileInfo.testFile;
import java.util.stream.Stream;
import org.alfresco.transformer.AbstractMetadataExtractsIT;
import org.alfresco.transformer.TestFileInfo;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.misc;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
@@ -32,6 +32,7 @@ import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.AbstractQueueTransformServiceIT;
import org.springframework.boot.test.context.SpringBootTest;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,8 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.misc;
import org.alfresco.transformer.AbstractHttpRequestTest;
import org.springframework.boot.test.context.SpringBootTest;
/**

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.misc;
import static java.text.MessageFormat.format;
import static java.util.function.Function.identity;
@@ -63,6 +63,8 @@ import static org.springframework.http.HttpStatus.OK;
import java.util.Map;
import java.util.stream.Stream;
import org.alfresco.transformer.SourceTarget;
import org.alfresco.transformer.TestFileInfo;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
import org.slf4j.Logger;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005-2020 Alfresco Software Limited
* Copyright (C) 2005-2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,7 +26,7 @@
*/
package org.alfresco.transformer.metadataExtractors;
import org.alfresco.transformer.transformers.SelectableTransformer;
import org.alfresco.transform.misc.transformers.SelectableTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005-2020 Alfresco Software Limited
* Copyright (C) 2005-2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -26,7 +26,7 @@
*/
package org.alfresco.transformer.metadataExtractors;
import org.alfresco.transformer.transformers.SelectableTransformer;
import org.alfresco.transform.misc.transformers.SelectableTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
package org.alfresco.transform.misc.transformers;
import com.google.common.collect.ImmutableList;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
package org.alfresco.transform.misc.transformers;
import org.alfresco.transformer.fs.FileManager;
import org.slf4j.Logger;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
package org.alfresco.transform.misc.transformers;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
package org.alfresco.transform.misc.transformers;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
package org.alfresco.transform.misc.transformers;
import java.io.File;
import java.util.Map;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
package org.alfresco.transform.misc.transformers;
import com.google.common.collect.ImmutableMap;
import org.alfresco.transformer.executors.Transformer;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
package org.alfresco.transform.misc.transformers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
package org.alfresco.transform.misc.transformers;
import org.alfresco.transformer.util.RequestParamMap;
import org.apache.pdfbox.pdmodel.PDDocument;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
package org.alfresco.transform.misc.transformers;
import org.junit.jupiter.api.Test;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
package org.alfresco.transform.misc.transformers;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

View File

@@ -0,0 +1,63 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.common;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.alfresco.transform.config.TransformConfig;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
@Component
public class TransformConfigResourceReader
{
@Autowired ResourceLoader resourceLoader;
private ObjectMapper jsonObjectMapper = new ObjectMapper();
public TransformConfig read(String engineConfigLocation)
{
Resource engineConfig = resourceLoader.getResource(engineConfigLocation);
try (Reader reader = new InputStreamReader(engineConfig.getInputStream(), UTF_8))
{
TransformConfig transformConfig = jsonObjectMapper.readValue(reader, TransformConfig.class);
return transformConfig;
}
catch (IOException e)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(), "Could not read " + engineConfigLocation, e);
}
}
}

View File

@@ -62,6 +62,9 @@
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<mainClass>org.alfresco.transform.base.Application</mainClass>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>

View File

@@ -24,9 +24,10 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.pdfRenderer;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.alfresco.transformer.AbstractTransformerController;
import org.alfresco.transform.pdfRenderer.transformers.PdfRendererCommandExecutor;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,10 +24,10 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.pdfRenderer;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.alfresco.transform.pdfRenderer.transformers.PdfRendererCommandExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.pdfRenderer;
import static org.alfresco.transform.common.RequestParamMap.ENDPOINT_TRANSFORM;
import static org.hamcrest.Matchers.containsString;
@@ -59,7 +59,9 @@ import javax.annotation.PostConstruct;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.alfresco.transformer.AbstractTransformerController;
import org.alfresco.transformer.AbstractTransformerControllerTest;
import org.alfresco.transform.pdfRenderer.transformers.PdfRendererCommandExecutor;
import org.alfresco.transformer.executors.RuntimeExec;
import org.alfresco.transformer.executors.RuntimeExec.ExecutionResult;
import org.alfresco.transformer.model.FileRefEntity;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,8 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.pdfRenderer;
import org.alfresco.transformer.AbstractHttpRequestTest;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.SpringBootTest.WebEnvironment;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.pdfRenderer;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_WORDPROCESSING;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_PDF;
@@ -32,6 +32,7 @@ import static org.alfresco.transform.common.Mimetype.MIMETYPE_PDF;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.AbstractQueueTransformServiceIT;
import org.springframework.boot.test.context.SpringBootTest;
/**

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.pdfRenderer;
import static java.text.MessageFormat.format;
import static java.util.function.Function.identity;
@@ -38,6 +38,7 @@ import static org.springframework.http.HttpStatus.OK;
import java.util.Map;
import java.util.stream.Stream;
import org.alfresco.transformer.TestFileInfo;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
import org.slf4j.Logger;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.pdfRenderer;
import static org.alfresco.transformer.util.Util.stringToBoolean;
import static org.alfresco.transformer.util.Util.stringToInteger;

View File

@@ -24,10 +24,12 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
package org.alfresco.transform.pdfRenderer.transformers;
import org.alfresco.transform.common.TransformException;
import org.alfresco.transformer.PdfRendererOptionsBuilder;
import org.alfresco.transform.pdfRenderer.PdfRendererOptionsBuilder;
import org.alfresco.transformer.executors.AbstractCommandExecutor;
import org.alfresco.transformer.executors.RuntimeExec;
import java.io.File;
import java.util.HashMap;

View File

@@ -20,12 +20,12 @@
<dependencies>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-transformer-base</artifactId>
<artifactId>alfresco-t-engine-base</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-transformer-base</artifactId>
<artifactId>alfresco-t-engine-base</artifactId>
<version>${project.version}</version>
<classifier>tests</classifier>
<type>test-jar</type>
@@ -146,6 +146,9 @@
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<mainClass>org.alfresco.transform.base.Application</mainClass>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>

View File

@@ -1,111 +0,0 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Controller;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transformer.executors.Tika.PDF_BOX;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_PDF;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
/**
* Controller for the Docker based Tika transformers.
*
* Status Codes:
*
* 200 Success
* 400 Bad Request: Invalid target mimetype <mimetype>
* 400 Bad Request: Request parameter <name> is missing (missing mandatory parameter)
* 400 Bad Request: Request parameter <name> is of the wrong type
* 400 Bad Request: Transformer exit code was not 0 (possible problem with the source file)
* 400 Bad Request: The source filename was not supplied
* 500 Internal Server Error: (no message with low level IO problems)
* 500 Internal Server Error: The target filename was not supplied (should not happen as targetExtension is checked)
* 500 Internal Server Error: Transformer version check exit code was not 0
* 500 Internal Server Error: Transformer version check failed to create any output
* 500 Internal Server Error: Could not read the target file
* 500 Internal Server Error: The target filename was malformed (should not happen because of other checks)
* 500 Internal Server Error: Transformer failed to create an output file (the exit code was 0, so there should be some content)
* 500 Internal Server Error: Filename encoding error
* 507 Insufficient Storage: Failed to store the source file
*/
@Controller
public class TikaController extends AbstractTransformerController
{
private static final Logger logger = LoggerFactory.getLogger(TikaController.class);
private TikaJavaExecutor javaExecutor;
public TikaController(@Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}") boolean notExtractBookmarksTextDefault)
{
javaExecutor= new TikaJavaExecutor(notExtractBookmarksTextDefault);
}
@Override
public String getTransformerName()
{
return "Tika";
}
@Override
public String version()
{
return "Tika available";
}
@Override
public ProbeTestTransform getProbeTestTransform()
{
// See the Javadoc on this method and Probes.md for the choice of these values.
// the livenessPercentage is a little large as Tika does tend to suffer from slow transforms that class with a gc.
return new ProbeTestTransform(this, "quick.pdf", "quick.txt",
60, 16, 400, 10240, 60 * 30 + 1, 60 * 15 + 20)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
transformImpl(PDF_BOX, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, new HashMap<>(), sourceFile, targetFile);
}
};
}
@Override
public void transformImpl(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
javaExecutor.transformExtractOrEmbed(transformName, sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -24,30 +24,30 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.tika;
import static java.nio.file.Files.readAllBytes;
import static org.alfresco.transform.common.RequestParamMap.ENDPOINT_TRANSFORM;
import static org.alfresco.transformer.executors.Tika.ARCHIVE;
import static org.alfresco.transformer.executors.Tika.CSV;
import static org.alfresco.transformer.executors.Tika.DOC;
import static org.alfresco.transformer.executors.Tika.DOCX;
import static org.alfresco.transformer.executors.Tika.HTML;
import static org.alfresco.transformer.executors.Tika.MSG;
import static org.alfresco.transformer.executors.Tika.OUTLOOK_MSG;
import static org.alfresco.transformer.executors.Tika.PDF;
import static org.alfresco.transformer.executors.Tika.PDF_BOX;
import static org.alfresco.transformer.executors.Tika.POI;
import static org.alfresco.transformer.executors.Tika.POI_OFFICE;
import static org.alfresco.transformer.executors.Tika.POI_OO_XML;
import static org.alfresco.transformer.executors.Tika.PPTX;
import static org.alfresco.transformer.executors.Tika.TEXT_MINING;
import static org.alfresco.transformer.executors.Tika.TIKA_AUTO;
import static org.alfresco.transformer.executors.Tika.TXT;
import static org.alfresco.transformer.executors.Tika.XHTML;
import static org.alfresco.transformer.executors.Tika.XML;
import static org.alfresco.transformer.executors.Tika.XSLX;
import static org.alfresco.transformer.executors.Tika.ZIP;
import static org.alfresco.transform.tika.transformers.Tika.ARCHIVE;
import static org.alfresco.transform.tika.transformers.Tika.CSV;
import static org.alfresco.transform.tika.transformers.Tika.DOC;
import static org.alfresco.transform.tika.transformers.Tika.DOCX;
import static org.alfresco.transform.tika.transformers.Tika.HTML;
import static org.alfresco.transform.tika.transformers.Tika.MSG;
import static org.alfresco.transform.tika.transformers.Tika.OUTLOOK_MSG;
import static org.alfresco.transform.tika.transformers.Tika.PDF;
import static org.alfresco.transform.tika.transformers.Tika.PDF_BOX;
import static org.alfresco.transform.tika.transformers.Tika.POI;
import static org.alfresco.transform.tika.transformers.Tika.OFFICE;
import static org.alfresco.transform.tika.transformers.Tika.OOXML;
import static org.alfresco.transform.tika.transformers.Tika.PPTX;
import static org.alfresco.transform.tika.transformers.Tika.TEXT_MINING;
import static org.alfresco.transform.tika.transformers.Tika.TIKA_AUTO;
import static org.alfresco.transform.tika.transformers.Tika.TXT;
import static org.alfresco.transform.tika.transformers.Tika.XHTML;
import static org.alfresco.transform.tika.transformers.Tika.XML;
import static org.alfresco.transform.tika.transformers.Tika.XSLX;
import static org.alfresco.transform.tika.transformers.Tika.ZIP;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_METADATA_EMBED;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_PRESENTATION;
@@ -61,8 +61,8 @@ import static org.alfresco.transform.common.Mimetype.MIMETYPE_WORD;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_XHTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_XML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_ZIP;
import static org.alfresco.transformer.util.RequestParamMap.INCLUDE_CONTENTS;
import static org.alfresco.transformer.util.RequestParamMap.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transform.base.util.RequestParamMap.INCLUDE_CONTENTS;
import static org.alfresco.transform.base.util.RequestParamMap.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -91,12 +91,14 @@ import java.util.UUID;
import javax.servlet.http.HttpServletRequest;
import org.alfresco.transform.base.AbstractTransformerControllerTest;
import org.alfresco.transform.base.TransformController;
import org.alfresco.transform.client.model.TransformReply;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transformer.executors.RuntimeExec;
import org.alfresco.transformer.model.FileRefEntity;
import org.alfresco.transformer.model.FileRefResponse;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.alfresco.transform.base.executors.RuntimeExec;
import org.alfresco.transform.base.model.FileRefEntity;
import org.alfresco.transform.base.model.FileRefResponse;
import org.alfresco.transform.base.probes.ProbeTestTransform;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.BeforeEach;
@@ -142,9 +144,6 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Mock
private RuntimeExec mockCheckCommand;
@Autowired
protected AbstractTransformerController controller;
private String targetEncoding = "UTF-8";
private String targetMimetype = MIMETYPE_TEXT_PLAIN;
@@ -236,12 +235,6 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
when(mockExecutionResult.getStdOut()).thenReturn("STDOUT");
}
@Override
protected AbstractTransformerController getController()
{
return controller;
}
private void transform(String transform, String sourceExtension, String targetExtension,
String sourceMimetype, String targetMimetype,
Boolean includeContents, String expectedContentContains) throws Exception
@@ -284,9 +277,10 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
public void testImmutableEmptyMap()
{
// See ACS-373
ProbeTestTransform probeTestTransform = getController().getProbeTestTransform();
TransformController controller = getController();
ProbeTestTransform probeTestTransform = getProbeTestTransform();
ReflectionTestUtils.setField(probeTestTransform, "livenessTransformEnabled", true);
probeTestTransform.doTransformOrNothing(httpServletRequest, true);
probeTestTransform.doTransformOrNothing(httpServletRequest, true, controller);
}
@Test
@@ -483,14 +477,14 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Test
public void msgToTxtOfficeTest() throws Exception
{
transform(POI_OFFICE, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
transform(OFFICE, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_MSG_CONTENT_CONTAINS);
}
@Test
public void docToTxtOfficeTest() throws Exception
{
transform(POI_OFFICE, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
transform(OFFICE, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@@ -508,14 +502,14 @@ public class TikaControllerTest extends AbstractTransformerControllerTest
@Test
public void docxToTxtOoXmlTest() throws Exception
{
transform(POI_OO_XML, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
transform(OOXML, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}
@Test
public void pptxToTxtOoXmlTest() throws Exception
{
transform(POI_OO_XML, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
transform(OOXML, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
EXPECTED_TEXT_CONTENT_CONTAINS);
}

View File

@@ -24,15 +24,11 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.springframework.http.MediaType.MULTIPART_FORM_DATA;
package org.alfresco.transform.tika;
import org.alfresco.transform.base.AbstractHttpRequestTest;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.SpringBootTest.WebEnvironment;
import org.springframework.core.io.ClassPathResource;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.util.LinkedMultiValueMap;
/**

View File

@@ -24,11 +24,11 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.tika;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_APP_DWG;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OUTLOOK_MSG;
import static org.alfresco.transformer.TestFileInfo.testFile;
import static org.alfresco.transform.base.TestFileInfo.testFile;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_AUDIO_MP4;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_EXCEL;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_BMP;
@@ -74,6 +74,8 @@ import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_RAW_NEF;
import java.util.stream.Stream;
import org.alfresco.transform.base.AbstractMetadataExtractsIT;
import org.alfresco.transform.base.TestFileInfo;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.tika;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_WORDPROCESSING;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
@@ -32,6 +32,7 @@ import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transform.base.AbstractQueueTransformServiceIT;
import org.springframework.boot.test.context.SpringBootTest;
/**

View File

@@ -24,11 +24,11 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
package org.alfresco.transform.tika;
import static java.text.MessageFormat.format;
import static java.util.function.Function.identity;
import static org.alfresco.transformer.EngineClient.sendTRequest;
import static org.alfresco.transform.base.EngineClient.sendTRequest;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;
import static org.springframework.http.HttpStatus.OK;

View File

@@ -14,7 +14,7 @@
<dependencies>
<dependency>
<groupId>org.alfresco</groupId>
<artifactId>alfresco-transformer-base</artifactId>
<artifactId>alfresco-t-engine-base</artifactId>
<version>${project.version}</version>
</dependency>

View File

@@ -0,0 +1,78 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika;
import org.alfresco.transform.base.TransformEngine;
import org.alfresco.transform.base.probes.ProbeTestTransform;
import org.alfresco.transform.common.TransformConfigResourceReader;
import org.alfresco.transform.config.TransformConfig;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.util.Collections;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_PDF;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
@Component
public class TikaTransformEngine implements TransformEngine
{
private static final String LICENCE =
"This transformer uses Tika from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt\n" +
"This transformer uses ExifTool by Phil Harvey. See license at https://exiftool.org/#license. or in /Perl-Artistic-License.txt";
@Autowired
private TransformConfigResourceReader transformConfigResourceReader;
@Value("${transform.core.config.location:classpath:engine_config.json}")
private String engineConfigLocation;
@Override
public String getTransformEngineName()
{
return "0001-Tika";
}
@Override
public String getStartupMessage() {
return LICENCE;
}
@Override
public TransformConfig getTransformConfig()
{
return transformConfigResourceReader.read(engineConfigLocation);
}
@Override
public ProbeTestTransform getLivenessAndReadinessProbeTestTransform()
{
return new ProbeTestTransform("quick.pdf", "quick.txt",
MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, Collections.emptyMap(),
60, 16, 400, 10240, 60 * 30 + 1, 60 * 15 + 20);
}
}

View File

@@ -24,13 +24,15 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
package org.alfresco.transform.tika.metadataExtractors;
import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.common.TransformException;
import org.alfresco.transform.base.metadataExtractors.AbstractMetadataExtractor;
import org.apache.tika.embedder.Embedder;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
@@ -67,6 +69,8 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* The parent of all Metadata Extractors which use Apache Tika under the hood. This handles all the
* common parts of processing the files, and the common mappings.
@@ -82,7 +86,7 @@ import java.util.stream.Stream;
* @author Nick Burch
* @author adavis
*/
public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtractor
public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtractor implements CustomTransformer
{
protected static final String KEY_AUTHOR = "author";
protected static final String KEY_TITLE = "title";
@@ -97,9 +101,17 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr
private final DateTimeFormatter tikaUTCDateFormater;
private final DateTimeFormatter tikaDateFormater;
public AbstractTikaMetadataExtractor(Logger logger)
public static enum Type
{
EXTRACTOR, EMBEDDER
}
private final Type type;
public AbstractTikaMetadataExtractor(Type type, Logger logger)
{
super(logger);
this.type = type;
// TODO Once TIKA-451 is fixed this list will get nicer
DateTimeParser[] parsersUTC = {
@@ -118,6 +130,26 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr
tikaDateFormater = new DateTimeFormatterBuilder().append(null, parsers).toFormatter();
}
@Override
public String getTransformerName() {
return getClass().getSimpleName();
}
@Override
public void transform(String sourceMimetype, String sourceEncoding, InputStream inputStream,
String targetMimetype, String targetEncoding, OutputStream outputStream,
Map<String, String> transformOptions) throws Exception
{
if (type == EXTRACTOR)
{
extractMetadata(sourceMimetype, transformOptions, sourceEncoding, inputStream, targetEncoding, outputStream);
}
else
{
embedMetadata(sourceMimetype, transformOptions, sourceEncoding, inputStream, targetEncoding, outputStream);
}
}
/**
* Version which also tries the ISO-8601 formats (in order..),
* and similar formats, which Tika makes use of
@@ -308,6 +340,14 @@ public abstract class AbstractTikaMetadataExtractor extends AbstractMetadataExtr
return rawProperties;
}
public void embedMetadata(String sourceMimetype, Map<String, String> transformOptions,
String sourceEncoding, InputStream inputStream,
String targetEncoding, OutputStream outputStream) throws Exception
{
// TODO
throw new TransformException(500, "TODO embedMetadata");
}
/**
* @deprecated The content repository's TikaPoweredMetadataExtracter provides no non test implementations.
* This code exists in case there are custom implementations, that need to be converted to T-Engines.

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -32,10 +32,13 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.parser.dwg.DWGParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* {@code "application/dwg"} and {@code "image/vnd.dwg"} metadata extractor.
*
@@ -53,6 +56,7 @@ import java.util.Map;
* @author Nick Burch
* @author adavis
*/
@Component
public class DWGMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(DWGMetadataExtractor.class);
@@ -62,7 +66,7 @@ public class DWGMetadataExtractor extends AbstractTikaMetadataExtractor
public DWGMetadataExtractor()
{
super(logger);
super(EXTRACTOR, logger);
}
@Override

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
package org.alfresco.transform.tika.metadataExtractors;
import java.io.Serializable;
import java.util.Arrays;
@@ -33,13 +33,17 @@ import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.alfresco.transformer.tika.parsers.ExifToolParser;
import org.alfresco.transform.tika.parsers.ExifToolParser;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
@Component
public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor
{
@@ -53,7 +57,7 @@ public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor
public IPTCMetadataExtractor()
{
super(logger);
super(EXTRACTOR, logger);
}
@Override

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -33,10 +33,13 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.parser.mp3.Mp3Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* MP3 file metadata extractor.
*
@@ -63,6 +66,7 @@ import java.util.Map;
* @author Nick Burch
* @author adavis
*/
@Component
public class MP3MetadataExtractor extends TikaAudioMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(MP3MetadataExtractor.class);

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.tika.metadata.Message;
import org.apache.tika.metadata.Metadata;
@@ -33,10 +33,13 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* Outlook MAPI format email metadata extractor.
*
@@ -59,6 +62,7 @@ import java.util.Map;
* @author Kevin Roast
* @author adavis
*/
@Component
public class MailMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(MailMetadataExtractor.class);
@@ -74,7 +78,7 @@ public class MailMetadataExtractor extends AbstractTikaMetadataExtractor
public MailMetadataExtractor()
{
super(logger);
super(EXTRACTOR, logger);
}
@Override

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
@@ -33,10 +33,13 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* Office file format metadata extractor.
*
@@ -67,6 +70,7 @@ import java.util.Map;
* @author Nick Burch
* @author adavis
*/
@Component
public class OfficeMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(OfficeMetadataExtractor.class);
@@ -84,7 +88,7 @@ public class OfficeMetadataExtractor extends AbstractTikaMetadataExtractor
public OfficeMetadataExtractor()
{
super(logger);
super(EXTRACTOR, logger);
}
@Override

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,8 +24,9 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
package org.alfresco.transform.tika.metadataExtractors;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
import static org.apache.tika.metadata.DublinCore.NAMESPACE_URI_DC;
import org.apache.tika.metadata.Metadata;
@@ -40,6 +41,7 @@ import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import org.xml.sax.ContentHandler;
import java.io.Serializable;
@@ -76,6 +78,7 @@ import java.util.stream.Collectors;
* @author Derek Hulley
* @author adavis
*/
@Component
public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(OpenDocumentMetadataExtractor.class);
@@ -95,7 +98,7 @@ public class OpenDocumentMetadataExtractor extends AbstractTikaMetadataExtractor
public OpenDocumentMetadataExtractor()
{
super(logger);
super(EXTRACTOR, logger);
}
@Override

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,15 +24,18 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
package org.alfresco.transform.tika.metadataExtractors;
import org.alfresco.transformer.executors.Tika;
import org.alfresco.transform.tika.transformers.Tika;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.pdf.PDFParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* Metadata extractor for the PDF documents.
@@ -52,13 +55,14 @@ import org.slf4j.LoggerFactory;
* @author Derek Hulley
* @author adavis
*/
@Component
public class PdfBoxMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(PdfBoxMetadataExtractor.class);
public PdfBoxMetadataExtractor()
{
super(logger);
super(EXTRACTOR, logger);
}
@Override

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
@@ -36,6 +36,7 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.io.InputStream;
@@ -44,6 +45,8 @@ import java.util.Collections;
import java.util.Set;
import java.util.StringJoiner;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* POI-based metadata extractor for Office 07 documents. See http://poi.apache.org/ for information on POI.
*
@@ -92,13 +95,14 @@ import java.util.StringJoiner;
* @author Dmitry Velichkevich
* @author adavis
*/
@Component
public class PoiMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(PoiMetadataExtractor.class);
public PoiMetadataExtractor()
{
super(logger);
super(EXTRACTOR, logger);
}
@Override

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
package org.alfresco.transform.tika.metadataExtractors;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
@@ -37,12 +37,14 @@ import org.gagravarr.tika.FlacParser;
import org.gagravarr.tika.VorbisParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Calendar;
import java.util.Map;
import static org.alfresco.transformer.executors.Tika.readTikaConfig;
import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* A Metadata Extractor which makes use of the Apache Tika Audio Parsers to extract metadata from media files.
@@ -66,6 +68,7 @@ import static org.alfresco.transformer.executors.Tika.readTikaConfig;
* @author Nick Burch
* @author adavis
*/
@Component
public class TikaAudioMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(TikaAudioMetadataExtractor.class);
@@ -86,7 +89,7 @@ public class TikaAudioMetadataExtractor extends AbstractTikaMetadataExtractor
public TikaAudioMetadataExtractor(Logger logger)
{
super(logger);
super(EXTRACTOR, logger);
tikaConfig = readTikaConfig(logger);
}

View File

@@ -24,9 +24,8 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
package org.alfresco.transform.tika.metadataExtractors;
import org.alfresco.transform.common.Mimetype;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TIFF;
@@ -34,12 +33,14 @@ import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.Map;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transformer.executors.Tika.readTikaConfig;
import static org.alfresco.transform.tika.transformers.Tika.readTikaConfig;
import static org.alfresco.transform.tika.metadataExtractors.AbstractTikaMetadataExtractor.Type.EXTRACTOR;
/**
* A Metadata Extractor which makes use of the Apache Tika auto-detection to select the best parser to extract the
@@ -61,6 +62,7 @@ import static org.alfresco.transformer.executors.Tika.readTikaConfig;
* @author Nick Burch
* @author adavis
*/
@Component
public class TikaAutoMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(TikaAutoMetadataExtractor.class);
@@ -75,7 +77,7 @@ public class TikaAutoMetadataExtractor extends AbstractTikaMetadataExtractor
public TikaAutoMetadataExtractor()
{
super(logger);
super(EXTRACTOR, logger);
tikaConfig = readTikaConfig(logger);
}

View File

@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.tika.parsers;
package org.alfresco.transform.tika.parsers;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;

View File

@@ -2,7 +2,7 @@
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2019 Alfresco Software Limited
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
@@ -24,7 +24,7 @@
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
package org.alfresco.transform.tika.parsers;
import java.io.IOException;
import java.io.InputStream;

View File

@@ -0,0 +1,42 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
@Component
public class ArchiveTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.packageParser;
}
}

View File

@@ -0,0 +1,146 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.alfresco.transform.base.CustomTransformer;
import org.alfresco.transform.base.logging.LogEntry;
import org.alfresco.transform.base.util.RequestParamMap;
import org.alfresco.transform.common.TransformException;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Map;
import java.util.StringJoiner;
import static java.lang.Boolean.parseBoolean;
public abstract class GenericTikaTransformer implements CustomTransformer
{
private static final Logger logger = LoggerFactory.getLogger(GenericTikaTransformer.class);
@Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}")
boolean notExtractBookmarksTextDefault;
@Autowired
protected Tika tika;
protected abstract Parser getParser();
protected DocumentSelector getDocumentSelector()
{
return null;
}
@Override
public String getTransformerName()
{
String simpleClassName = getClass().getSimpleName();
return simpleClassName.substring(0, simpleClassName.length()-"Transformer".length());
}
@Override
public void transform(String sourceMimetype, String sourceEncoding, InputStream inputStream,
String targetMimetype, String targetEncoding, OutputStream outputStream,
Map<String, String> transformOptions) throws Exception
{
// TODO
throw new TransformException(500, "TODO GenericTikaTransformer transform with InputStreams");
}
public void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
throws Exception
{
final boolean includeContents = parseBoolean(
transformOptions.getOrDefault(RequestParamMap.INCLUDE_CONTENTS, "false"));
final boolean notExtractBookmarksText = parseBoolean(
transformOptions.getOrDefault(RequestParamMap.NOT_EXTRACT_BOOKMARKS_TEXT, String.valueOf(notExtractBookmarksTextDefault)));
final String targetEncoding = transformOptions.getOrDefault("targetEncoding", "UTF-8");
if (transformOptions.get(RequestParamMap.NOT_EXTRACT_BOOKMARKS_TEXT) == null && notExtractBookmarksTextDefault)
{
logger.trace("notExtractBookmarksText default value has been overridden to {}", notExtractBookmarksTextDefault);
}
call(sourceFile, targetFile, transformName,
includeContents ? Tika.INCLUDE_CONTENTS : null,
notExtractBookmarksText ? Tika.NOT_EXTRACT_BOOKMARKS_TEXT : null,
Tika.TARGET_MIMETYPE + targetMimetype, Tika.TARGET_ENCODING + targetEncoding);
}
void call(File sourceFile, File targetFile, String... args)
{
Parser parser = getParser();
DocumentSelector documentSelector = getDocumentSelector();
args = buildArgs(sourceFile, targetFile, args);
tika.transform(parser, documentSelector, args);
}
private static String[] buildArgs(File sourceFile, File targetFile, String[] args)
{
ArrayList<String> methodArgs = new ArrayList<>(args.length + 2);
StringJoiner sj = new StringJoiner(" ");
for (String arg : args)
{
addArg(methodArgs, sj, arg);
}
addFileArg(methodArgs, sj, sourceFile);
addFileArg(methodArgs, sj, targetFile);
LogEntry.setOptions(sj.toString());
return methodArgs.toArray(new String[0]);
}
private static void addArg(ArrayList<String> methodArgs, StringJoiner sj, String arg)
{
if (arg != null)
{
sj.add(arg);
methodArgs.add(arg);
}
}
private static void addFileArg(ArrayList<String> methodArgs, StringJoiner sj, File arg)
{
if (arg != null)
{
String path = arg.getAbsolutePath();
int i = path.lastIndexOf('.');
String ext = i == -1 ? "???" : path.substring(i + 1);
sj.add(ext);
methodArgs.add(path);
}
}
}

View File

@@ -0,0 +1,40 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
@Component
public class OOXMLTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.ooXmlParser;
}
}

View File

@@ -0,0 +1,40 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
@Component
public class OfficeTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.officeParser;
}
}

View File

@@ -0,0 +1,40 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
@Component
public class OutlookMsgTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.officeParser;
}
}

View File

@@ -0,0 +1,47 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.tika.transformers;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.parser.Parser;
import org.springframework.stereotype.Component;
@Component
public class PdfBoxTransformer extends GenericTikaTransformer
{
@Override
protected Parser getParser()
{
return tika.pdfParser;
}
@Override
protected DocumentSelector getDocumentSelector()
{
return tika.pdfBoxEmbededDocumentSelector;
}
}

Some files were not shown because too many files have changed in this diff Show More