fix mixed-line-ending

This commit is contained in:
Marcello Teodori
2022-02-23 22:40:19 +01:00
parent 157e261dde
commit 4175ac34da
56 changed files with 5160 additions and 5160 deletions

86
.gitignore vendored
View File

@@ -1,43 +1,43 @@
*.class
# Eclipse
.classpath
.settings
.project
# Intellij
.idea/
*.iml
*.iws
# vscode
.vscode
# Mac
.DS_Store
# Maven
target
*.log
*.log.*
# Mobile Tools for Java (J2ME)
.mtj
.tmp/
# Package Files #
*.jar
!quick.jar
*.war
*.ear
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
alf_data
/src/main/resources/alfresco-global.properties
/src/main/resources/alfresco/extension/custom-log4j.properties
libreoffice-dist-*-linux.gz
*.class
# Eclipse
.classpath
.settings
.project
# Intellij
.idea/
*.iml
*.iws
# vscode
.vscode
# Mac
.DS_Store
# Maven
target
*.log
*.log.*
# Mobile Tools for Java (J2ME)
.mtj
.tmp/
# Package Files #
*.jar
!quick.jar
*.war
*.ear
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
alf_data
/src/main/resources/alfresco-global.properties
/src/main/resources/alfresco/extension/custom-log4j.properties
libreoffice-dist-*-linux.gz

View File

@@ -1,126 +1,126 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.alfresco.transform.client.model.config.TransformConfig;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestParam;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transform.client.model.config.CoreVersionDecorator.setOrClearCoreVersion;
import static org.alfresco.transform.client.util.RequestParamMap.CONFIG_VERSION_DEFAULT;
import static org.alfresco.transformer.util.RequestParamMap.CONFIG_VERSION;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
import static org.springframework.http.HttpStatus.OK;
@Controller
public class AIOController extends AbstractTransformerController
{
private static final Logger logger = LoggerFactory.getLogger(AIOController.class);
@Autowired
private AIOTransformRegistry transformRegistry;
@Override
public String getTransformerName()
{
return "All in One Transformer";
}
@Override
public String version()
{
return getTransformerName() + " available";
}
// TODO ATS-713 Currently uses the Misc probeTest. The implementation will need to be changed such that the test can be selected based on the required transform
@Override
public ProbeTestTransform getProbeTestTransform()
{
// HtmlParserContentTransformer html -> text
// See the Javadoc on this method and Probes.md for the choice of these values.
return new ProbeTestTransform(this, "quick.html", "quick.txt",
119, 30, 150, 1024,
60 * 2 + 1, 60 * 2)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformImpl("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
}
};
}
@Override
public ResponseEntity<TransformConfig> info(
@RequestParam(value = CONFIG_VERSION, defaultValue = CONFIG_VERSION_DEFAULT) int configVersion)
{
logger.info("GET Transform Config version: " + configVersion);
TransformConfig transformConfig = transformRegistry.getTransformConfig();
transformConfig = setOrClearCoreVersion(transformConfig, configVersion);
return new ResponseEntity<>(transformConfig, OK);
}
@Override
public void transformImpl(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
logger.debug("Processing transform with: transformName; '{}', sourceFile '{}', targetFile '{}', transformOptions" +
" {}", transformName, sourceFile, targetFile, transformOptions);
Transformer transformer = transformRegistry.getByTransformName(transformName);
if (transformer == null)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(), "No transformer mapping for - transform:"
+ transformName + " sourceMimetype:" + sourceMimetype + " targetMimetype:" + targetMimetype);
}
if (logger.isDebugEnabled())
{
logger.debug("Performing transform with name '{}' using transformer with id '{}'.", transformName, transformer.getTransformerId());
}
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.alfresco.transform.client.model.config.TransformConfig;
import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestParam;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transform.client.model.config.CoreVersionDecorator.setOrClearCoreVersion;
import static org.alfresco.transform.client.util.RequestParamMap.CONFIG_VERSION_DEFAULT;
import static org.alfresco.transformer.util.RequestParamMap.CONFIG_VERSION;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
import static org.springframework.http.HttpStatus.OK;
@Controller
public class AIOController extends AbstractTransformerController
{
private static final Logger logger = LoggerFactory.getLogger(AIOController.class);
@Autowired
private AIOTransformRegistry transformRegistry;
@Override
public String getTransformerName()
{
return "All in One Transformer";
}
@Override
public String version()
{
return getTransformerName() + " available";
}
// TODO ATS-713 Currently uses the Misc probeTest. The implementation will need to be changed such that the test can be selected based on the required transform
@Override
public ProbeTestTransform getProbeTestTransform()
{
// HtmlParserContentTransformer html -> text
// See the Javadoc on this method and Probes.md for the choice of these values.
return new ProbeTestTransform(this, "quick.html", "quick.txt",
119, 30, 150, 1024,
60 * 2 + 1, 60 * 2)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformImpl("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
}
};
}
@Override
public ResponseEntity<TransformConfig> info(
@RequestParam(value = CONFIG_VERSION, defaultValue = CONFIG_VERSION_DEFAULT) int configVersion)
{
logger.info("GET Transform Config version: " + configVersion);
TransformConfig transformConfig = transformRegistry.getTransformConfig();
transformConfig = setOrClearCoreVersion(transformConfig, configVersion);
return new ResponseEntity<>(transformConfig, OK);
}
@Override
public void transformImpl(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
logger.debug("Processing transform with: transformName; '{}', sourceFile '{}', targetFile '{}', transformOptions" +
" {}", transformName, sourceFile, targetFile, transformOptions);
Transformer transformer = transformRegistry.getByTransformName(transformName);
if (transformer == null)
{
throw new TransformException(INTERNAL_SERVER_ERROR.value(), "No transformer mapping for - transform:"
+ transformName + " sourceMimetype:" + sourceMimetype + " targetMimetype:" + targetMimetype);
}
if (logger.isDebugEnabled())
{
logger.debug("Performing transform with name '{}' using transformer with id '{}'.", transformName, transformer.getTransformerId());
}
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -1,123 +1,123 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.alfresco.transform.client.registry.TransformServiceRegistry;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Configuration
public class AIOCustomConfig
{
@Value("${transform.core.libreoffice.path}")
private String libreofficePath;
@Value("${transform.core.libreoffice.maxTasksPerProcess}")
private String libreofficeMaxTasksPerProcess;
@Value("${transform.core.libreoffice.timeout}")
private String libreofficeTimeout;
@Value("${transform.core.libreoffice.portNumbers}")
private String libreofficePortNumbers;
@Value("${transform.core.libreoffice.templateProfileDir}")
private String libreofficeTemplateProfileDir;
@Value("${transform.core.libreoffice.isEnabled}")
private String libreofficeIsEnabled;
@Value("${transform.core.pdfrenderer.exe}")
private String pdfRendererPath;
@Value("${transform.core.imagemagick.exe}")
private String imageMagickExePath;
@Value("${transform.core.imagemagick.dyn}")
private String imageMagickDynPath;
@Value("${transform.core.imagemagick.root}")
private String imageMagickRootPath;
@Value("${transform.core.imagemagick.coders}")
private String imageMagickCodersPath;
@Value("${transform.core.imagemagick.config}")
private String imageMagickConfigPath;
@Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}")
private boolean notExtractBookmarksTextDefault;
@Value("${transform.core.version}")
private String coreVersion;
/**
*
* @return Override the TransformRegistryImpl used in {@link AbstractTransformerController}
*/
@Bean
@Primary
public TransformServiceRegistry aioTransformRegistry() throws Exception
{
AIOTransformRegistry aioTransformRegistry = new AIOTransformRegistry();
aioTransformRegistry.setCoreVersion(coreVersion);
// T-Engines are sorted by name so they are combined in the same order as in the T-Router
// and Content Repository with individual T-Engines. See TransformersConfigRegistry#retrieveRemoteConfig and
// LocalTransformServiceRegistry#getTEngineUrlsSortedByName.
for (Transformer tEngine : getTEnginesSortedByName())
{
aioTransformRegistry.registerTransformer(tEngine); // now a poor name - should be combineTransformers
}
aioTransformRegistry.registerCombinedTransformers();
return aioTransformRegistry;
}
List<Transformer> getTEnginesSortedByName()
{
return Stream.of(new SelectingTransformer(),
new TikaJavaExecutor(notExtractBookmarksTextDefault),
new ImageMagickCommandExecutor(imageMagickExePath, imageMagickDynPath, imageMagickRootPath, imageMagickCodersPath, imageMagickConfigPath),
new LibreOfficeJavaExecutor(libreofficePath, libreofficeMaxTasksPerProcess, libreofficeTimeout, libreofficePortNumbers, libreofficeTemplateProfileDir, libreofficeIsEnabled),
new PdfRendererCommandExecutor(pdfRendererPath))
.sorted(Comparator.comparing(Transformer::getTransformerId))
.collect(Collectors.toList());
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.alfresco.transform.client.registry.TransformServiceRegistry;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Configuration
public class AIOCustomConfig
{
@Value("${transform.core.libreoffice.path}")
private String libreofficePath;
@Value("${transform.core.libreoffice.maxTasksPerProcess}")
private String libreofficeMaxTasksPerProcess;
@Value("${transform.core.libreoffice.timeout}")
private String libreofficeTimeout;
@Value("${transform.core.libreoffice.portNumbers}")
private String libreofficePortNumbers;
@Value("${transform.core.libreoffice.templateProfileDir}")
private String libreofficeTemplateProfileDir;
@Value("${transform.core.libreoffice.isEnabled}")
private String libreofficeIsEnabled;
@Value("${transform.core.pdfrenderer.exe}")
private String pdfRendererPath;
@Value("${transform.core.imagemagick.exe}")
private String imageMagickExePath;
@Value("${transform.core.imagemagick.dyn}")
private String imageMagickDynPath;
@Value("${transform.core.imagemagick.root}")
private String imageMagickRootPath;
@Value("${transform.core.imagemagick.coders}")
private String imageMagickCodersPath;
@Value("${transform.core.imagemagick.config}")
private String imageMagickConfigPath;
@Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}")
private boolean notExtractBookmarksTextDefault;
@Value("${transform.core.version}")
private String coreVersion;
/**
*
* @return Override the TransformRegistryImpl used in {@link AbstractTransformerController}
*/
@Bean
@Primary
public TransformServiceRegistry aioTransformRegistry() throws Exception
{
AIOTransformRegistry aioTransformRegistry = new AIOTransformRegistry();
aioTransformRegistry.setCoreVersion(coreVersion);
// T-Engines are sorted by name so they are combined in the same order as in the T-Router
// and Content Repository with individual T-Engines. See TransformersConfigRegistry#retrieveRemoteConfig and
// LocalTransformServiceRegistry#getTEngineUrlsSortedByName.
for (Transformer tEngine : getTEnginesSortedByName())
{
aioTransformRegistry.registerTransformer(tEngine); // now a poor name - should be combineTransformers
}
aioTransformRegistry.registerCombinedTransformers();
return aioTransformRegistry;
}
List<Transformer> getTEnginesSortedByName()
{
return Stream.of(new SelectingTransformer(),
new TikaJavaExecutor(notExtractBookmarksTextDefault),
new ImageMagickCommandExecutor(imageMagickExePath, imageMagickDynPath, imageMagickRootPath, imageMagickCodersPath, imageMagickConfigPath),
new LibreOfficeJavaExecutor(libreofficePath, libreofficeMaxTasksPerProcess, libreofficeTimeout, libreofficePortNumbers, libreofficeTemplateProfileDir, libreofficeIsEnabled),
new PdfRendererCommandExecutor(pdfRendererPath))
.sorted(Comparator.comparing(Transformer::getTransformerId))
.collect(Collectors.toList());
}
}

View File

@@ -1,85 +1,85 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
public class Application
{
private static final Logger logger = LoggerFactory.getLogger(Application.class);
@Value("${container.name}")
private String containerName;
@Bean
MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
{
return registry -> registry.config().commonTags("containerName", containerName);
}
public static void main(String[] args)
{
SpringApplication.run(Application.class, args);
}
@EventListener(ApplicationReadyEvent.class)
public void startup()
{
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
logger.info(ImageMagickCommandExecutor.LICENCE);
logger.info(LibreOfficeJavaExecutor.LICENCE);
Arrays.stream(TikaJavaExecutor.LICENCE.split("\\n")).forEach(logger::info);
logger.info(PdfRendererCommandExecutor.LICENCE);
Arrays.stream(SelectingTransformer.LICENCE.split("\\n")).forEach(logger::info);
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
logger.info("Starting application components... Done");
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.executors.ImageMagickCommandExecutor;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.executors.PdfRendererCommandExecutor;
import org.alfresco.transformer.executors.TikaJavaExecutor;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
public class Application
{
private static final Logger logger = LoggerFactory.getLogger(Application.class);
@Value("${container.name}")
private String containerName;
@Bean
MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
{
return registry -> registry.config().commonTags("containerName", containerName);
}
public static void main(String[] args)
{
SpringApplication.run(Application.class, args);
}
@EventListener(ApplicationReadyEvent.class)
public void startup()
{
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
logger.info(ImageMagickCommandExecutor.LICENCE);
logger.info(LibreOfficeJavaExecutor.LICENCE);
Arrays.stream(TikaJavaExecutor.LICENCE.split("\\n")).forEach(logger::info);
logger.info(PdfRendererCommandExecutor.LICENCE);
Arrays.stream(SelectingTransformer.LICENCE.split("\\n")).forEach(logger::info);
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
logger.info("Starting application components... Done");
}
}

View File

@@ -1,127 +1,127 @@
The "Artistic License"
Preamble
The intent of this document is to state the conditions under which a
Package may be copied, such that the Copyright Holder maintains some
semblance of artistic control over the development of the package,
while giving the users of the package the right to use and distribute
the Package in a more-or-less customary fashion, plus the right to make
reasonable modifications.
Definitions:
"Package" refers to the collection of files distributed by the
Copyright Holder, and derivatives of that collection of files
created through textual modification.
"Standard Version" refers to such a Package if it has not been
modified, or has been modified in accordance with the wishes
of the Copyright Holder as specified below.
"Copyright Holder" is whoever is named in the copyright or
copyrights for the package.
"You" is you, if you're thinking about copying or distributing
this Package.
"Reasonable copying fee" is whatever you can justify on the
basis of media cost, duplication charges, time of people involved,
and so on. (You will not be required to justify it to the
Copyright Holder, but only to the computing community at large
as a market that must bear the fee.)
"Freely Available" means that no fee is charged for the item
itself, though there may be fees involved in handling the item.
It also means that recipients of the item may redistribute it
under the same conditions they received it.
1. You may make and give away verbatim copies of the source form of the
Standard Version of this Package without restriction, provided that you
duplicate all of the original copyright notices and associated disclaimers.
2. You may apply bug fixes, portability fixes and other modifications
derived from the Public Domain or from the Copyright Holder. A Package
modified in such a way shall still be considered the Standard Version.
3. You may otherwise modify your copy of this Package in any way, provided
that you insert a prominent notice in each changed file stating how and
when you changed that file, and provided that you do at least ONE of the
following:
a) place your modifications in the Public Domain or otherwise make them
Freely Available, such as by posting said modifications to Usenet or
an equivalent medium, or placing the modifications on a major archive
site such as uunet.uu.net, or by allowing the Copyright Holder to include
your modifications in the Standard Version of the Package.
b) use the modified Package only within your corporation or organization.
c) rename any non-standard executables so the names do not conflict
with standard executables, which must also be provided, and provide
a separate manual page for each non-standard executable that clearly
documents how it differs from the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
4. You may distribute the programs of this Package in object code or
executable form, provided that you do at least ONE of the following:
a) distribute a Standard Version of the executables and library files,
together with instructions (in the manual page or equivalent) on where
to get the Standard Version.
b) accompany the distribution with the machine-readable source of
the Package with your modifications.
c) give non-standard executables non-standard names, and clearly
document the differences in manual pages (or equivalent), together
with instructions on where to get the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
5. You may charge a reasonable copying fee for any distribution of this
Package. You may charge any fee you choose for support of this
Package. You may not charge a fee for this Package itself. However,
you may distribute this Package in aggregate with other (possibly
commercial) programs as part of a larger (possibly commercial) software
distribution provided that you do not advertise this Package as a
product of your own. You may embed this Package's interpreter within
an executable of yours (by linking); this shall be construed as a mere
form of aggregation, provided that the complete Standard Version of the
interpreter is so embedded.
6. The scripts and library files supplied as input to or produced as
output from the programs of this Package do not automatically fall
under the copyright of this Package, but belong to whoever generated
them, and may be sold commercially, and may be aggregated with this
Package. If such scripts or library files are aggregated with this
Package via the so-called "undump" or "unexec" methods of producing a
binary executable image, then distribution of such an image shall
neither be construed as a distribution of this Package nor shall it
fall under the restrictions of Paragraphs 3 and 4, provided that you do
not represent such an executable image as a Standard Version of this
Package.
7. C subroutines (or comparably compiled subroutines in other
languages) supplied by you and linked into this Package in order to
emulate subroutines and variables of the language defined by this
Package shall not be considered part of this Package, but are the
equivalent of input as in Paragraph 6, provided these subroutines do
not change the language in any way that would cause it to fail the
regression tests for the language.
8. Aggregation of this Package with a commercial distribution is always
permitted provided that the use of this Package is embedded; that is,
when no overt attempt is made to make this Package's interfaces visible
to the end user of the commercial distribution. Such use shall not be
construed as a distribution of this Package.
9. The name of the Copyright Holder may not be used to endorse or promote
products derived from this software without specific prior written permission.
10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
The End
The "Artistic License"
Preamble
The intent of this document is to state the conditions under which a
Package may be copied, such that the Copyright Holder maintains some
semblance of artistic control over the development of the package,
while giving the users of the package the right to use and distribute
the Package in a more-or-less customary fashion, plus the right to make
reasonable modifications.
Definitions:
"Package" refers to the collection of files distributed by the
Copyright Holder, and derivatives of that collection of files
created through textual modification.
"Standard Version" refers to such a Package if it has not been
modified, or has been modified in accordance with the wishes
of the Copyright Holder as specified below.
"Copyright Holder" is whoever is named in the copyright or
copyrights for the package.
"You" is you, if you're thinking about copying or distributing
this Package.
"Reasonable copying fee" is whatever you can justify on the
basis of media cost, duplication charges, time of people involved,
and so on. (You will not be required to justify it to the
Copyright Holder, but only to the computing community at large
as a market that must bear the fee.)
"Freely Available" means that no fee is charged for the item
itself, though there may be fees involved in handling the item.
It also means that recipients of the item may redistribute it
under the same conditions they received it.
1. You may make and give away verbatim copies of the source form of the
Standard Version of this Package without restriction, provided that you
duplicate all of the original copyright notices and associated disclaimers.
2. You may apply bug fixes, portability fixes and other modifications
derived from the Public Domain or from the Copyright Holder. A Package
modified in such a way shall still be considered the Standard Version.
3. You may otherwise modify your copy of this Package in any way, provided
that you insert a prominent notice in each changed file stating how and
when you changed that file, and provided that you do at least ONE of the
following:
a) place your modifications in the Public Domain or otherwise make them
Freely Available, such as by posting said modifications to Usenet or
an equivalent medium, or placing the modifications on a major archive
site such as uunet.uu.net, or by allowing the Copyright Holder to include
your modifications in the Standard Version of the Package.
b) use the modified Package only within your corporation or organization.
c) rename any non-standard executables so the names do not conflict
with standard executables, which must also be provided, and provide
a separate manual page for each non-standard executable that clearly
documents how it differs from the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
4. You may distribute the programs of this Package in object code or
executable form, provided that you do at least ONE of the following:
a) distribute a Standard Version of the executables and library files,
together with instructions (in the manual page or equivalent) on where
to get the Standard Version.
b) accompany the distribution with the machine-readable source of
the Package with your modifications.
c) give non-standard executables non-standard names, and clearly
document the differences in manual pages (or equivalent), together
with instructions on where to get the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
5. You may charge a reasonable copying fee for any distribution of this
Package. You may charge any fee you choose for support of this
Package. You may not charge a fee for this Package itself. However,
you may distribute this Package in aggregate with other (possibly
commercial) programs as part of a larger (possibly commercial) software
distribution provided that you do not advertise this Package as a
product of your own. You may embed this Package's interpreter within
an executable of yours (by linking); this shall be construed as a mere
form of aggregation, provided that the complete Standard Version of the
interpreter is so embedded.
6. The scripts and library files supplied as input to or produced as
output from the programs of this Package do not automatically fall
under the copyright of this Package, but belong to whoever generated
them, and may be sold commercially, and may be aggregated with this
Package. If such scripts or library files are aggregated with this
Package via the so-called "undump" or "unexec" methods of producing a
binary executable image, then distribution of such an image shall
neither be construed as a distribution of this Package nor shall it
fall under the restrictions of Paragraphs 3 and 4, provided that you do
not represent such an executable image as a Standard Version of this
Package.
7. C subroutines (or comparably compiled subroutines in other
languages) supplied by you and linked into this Package in order to
emulate subroutines and variables of the language defined by this
Package shall not be considered part of this Package, but are the
equivalent of input as in Paragraph 6, provided these subroutines do
not change the language in any way that would cause it to fail the
regression tests for the language.
8. Aggregation of this Package with a commercial distribution is always
permitted provided that the use of this Package is embedded; that is,
when no overt attempt is made to make this Package's interfaces visible
to the end user of the commercial distribution. Such use shall not be
construed as a distribution of this Package.
9. The name of the Copyright Holder may not be used to endorse or promote
products derived from this software without specific prior written permission.
10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
The End

View File

@@ -1,49 +1,49 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.SpringBootTest.WebEnvironment;
@SpringBootTest(webEnvironment = WebEnvironment.RANDOM_PORT)
public class AIOControllerHttpRequestTest extends AbstractHttpRequestTest
{
@Override
protected String getTransformerName()
{
return "All in One Transformer";
}
@Override
protected String getSourceExtension()
{
// Currently using same extension as ImageMagick tests
return "jpg";
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.SpringBootTest.WebEnvironment;
@SpringBootTest(webEnvironment = WebEnvironment.RANDOM_PORT)
public class AIOControllerHttpRequestTest extends AbstractHttpRequestTest
{
@Override
protected String getTransformerName()
{
return "All in One Transformer";
}
@Override
protected String getSourceExtension()
{
// Currently using same extension as ImageMagick tests
return "jpg";
}
}

View File

@@ -1,141 +1,141 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
import java.util.Map;
import org.alfresco.transformer.executors.Transformer;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.util.ReflectionTestUtils;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
/**
* Test the AIOController ImageMagick transforms without a server.
* Super class includes tests for the AbstractTransformerController.
*/
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
public class AIOControllerImageMagickTest extends ImageMagickControllerTest
{
// All tests contained in ImageMagickControllerTest
@Autowired
AIOTransformRegistry transformRegistry;
@BeforeEach @Override
public void before() throws IOException
{
ReflectionTestUtils.setField(commandExecutor, "transformCommand", mockTransformCommand);
ReflectionTestUtils.setField(commandExecutor, "checkCommand", mockCheckCommand);
//Need to wire in the mocked commandExecutor into the controller...
Map<String,Transformer> transformers = transformRegistry.getTransformerEngineMapping();
transformers.replace("imagemagick", commandExecutor);
mockTransformCommand("jpg", "png", "image/jpeg", true);
}
@Override
protected AbstractTransformerController getController()
{
return controller;
}
@Override
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile,
String... params)
{
final MockHttpServletRequestBuilder builder = super.mockMvcRequest(url, sourceFile, params)
.param("targetMimetype", targetMimetype)
.param("sourceMimetype", sourceMimetype);
return builder;
}
@Test
public void testTestValidity()
{
// just test that we are actually testing against the AIOController (instead of MiscController)
assertTrue(controller instanceof AIOController, "Wrong controller wired for test");
}
@Test
@Override
public void noTargetFileTest()
{
// Ignore the test in super class as the AIO transforms will not be selected .
// It is the mock that returns a zero length file for other transformers, when we supply an invalid targetExtension.
}
@Test
@Override
public void testGetTransformConfigInfo()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetTransformConfigInfoExcludingCoreVersion()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithDuplicates()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithEmptyTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithNoTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
import java.util.Map;
import org.alfresco.transformer.executors.Transformer;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.util.ReflectionTestUtils;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
/**
* Test the AIOController ImageMagick transforms without a server.
* Super class includes tests for the AbstractTransformerController.
*/
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
public class AIOControllerImageMagickTest extends ImageMagickControllerTest
{
// All tests contained in ImageMagickControllerTest
@Autowired
AIOTransformRegistry transformRegistry;
@BeforeEach @Override
public void before() throws IOException
{
ReflectionTestUtils.setField(commandExecutor, "transformCommand", mockTransformCommand);
ReflectionTestUtils.setField(commandExecutor, "checkCommand", mockCheckCommand);
//Need to wire in the mocked commandExecutor into the controller...
Map<String,Transformer> transformers = transformRegistry.getTransformerEngineMapping();
transformers.replace("imagemagick", commandExecutor);
mockTransformCommand("jpg", "png", "image/jpeg", true);
}
@Override
protected AbstractTransformerController getController()
{
return controller;
}
@Override
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile,
String... params)
{
final MockHttpServletRequestBuilder builder = super.mockMvcRequest(url, sourceFile, params)
.param("targetMimetype", targetMimetype)
.param("sourceMimetype", sourceMimetype);
return builder;
}
@Test
public void testTestValidity()
{
// just test that we are actually testing against the AIOController (instead of MiscController)
assertTrue(controller instanceof AIOController, "Wrong controller wired for test");
}
@Test
@Override
public void noTargetFileTest()
{
// Ignore the test in super class as the AIO transforms will not be selected .
// It is the mock that returns a zero length file for other transformers, when we supply an invalid targetExtension.
}
@Test
@Override
public void testGetTransformConfigInfo()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetTransformConfigInfoExcludingCoreVersion()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithDuplicates()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithEmptyTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithNoTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
}

View File

@@ -1,119 +1,119 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Map;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.executors.Transformer;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
/**
* Test the AIOController without a server.
* Super class includes tests for the LibreOfficeController and AbstractTransformerController.
*/
public class AIOControllerLibreOfficeTest extends LibreOfficeControllerTest
{
//Tests contained in LibreOfficeControllerTest
@Test
public void testTestValidity()
{
// just test that we are actually testing against the AIOController (instead of MiscController)
assertTrue(controller instanceof AIOController,"Wrong controller wired for test");
}
@Autowired
AIOTransformRegistry transformRegistry;
@Override
// Used by the super class to mock the javaExecutor, a different implementation is required here
protected void setJavaExecutor(AbstractTransformerController controller, LibreOfficeJavaExecutor javaExecutor)
{
//Need to wire in the mocked javaExecutor into the controller...
Map<String,Transformer> transformers = transformRegistry.getTransformerEngineMapping();
transformers.replace("libreoffice", javaExecutor);
// No need to set the transform registry to the controller as it is @Autowired in
}
@Override
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile,
String... params)
{
final MockHttpServletRequestBuilder builder = super.mockMvcRequest(url, sourceFile, params)
.param("targetMimetype", targetMimetype)
.param("sourceMimetype", sourceMimetype);
return builder;
}
@Test
@Override
public void testGetTransformConfigInfo()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetTransformConfigInfoExcludingCoreVersion()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithDuplicates()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithEmptyTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithNoTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Map;
import org.alfresco.transformer.executors.LibreOfficeJavaExecutor;
import org.alfresco.transformer.executors.Transformer;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
/**
* Test the AIOController without a server.
* Super class includes tests for the LibreOfficeController and AbstractTransformerController.
*/
public class AIOControllerLibreOfficeTest extends LibreOfficeControllerTest
{
//Tests contained in LibreOfficeControllerTest
@Test
public void testTestValidity()
{
// just test that we are actually testing against the AIOController (instead of MiscController)
assertTrue(controller instanceof AIOController,"Wrong controller wired for test");
}
@Autowired
AIOTransformRegistry transformRegistry;
@Override
// Used by the super class to mock the javaExecutor, a different implementation is required here
protected void setJavaExecutor(AbstractTransformerController controller, LibreOfficeJavaExecutor javaExecutor)
{
//Need to wire in the mocked javaExecutor into the controller...
Map<String,Transformer> transformers = transformRegistry.getTransformerEngineMapping();
transformers.replace("libreoffice", javaExecutor);
// No need to set the transform registry to the controller as it is @Autowired in
}
@Override
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile,
String... params)
{
final MockHttpServletRequestBuilder builder = super.mockMvcRequest(url, sourceFile, params)
.param("targetMimetype", targetMimetype)
.param("sourceMimetype", sourceMimetype);
return builder;
}
@Test
@Override
public void testGetTransformConfigInfo()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetTransformConfigInfoExcludingCoreVersion()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithDuplicates()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithEmptyTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithNoTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
}

View File

@@ -1,85 +1,85 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
public class AIOControllerMiscTest extends MiscControllerTest
{
//Tests contained in MiscControllerTest
@Test
public void testTestValidity()
{
// just test that we are actually testing against the AIOController (instead of MiscController)
assertTrue(controller instanceof AIOController, "Wrong controller wired for test");
}
@Test
@Override
public void testGetTransformConfigInfo()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetTransformConfigInfoExcludingCoreVersion()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithDuplicates()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithEmptyTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithNoTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
public class AIOControllerMiscTest extends MiscControllerTest
{
//Tests contained in MiscControllerTest
@Test
public void testTestValidity()
{
// just test that we are actually testing against the AIOController (instead of MiscController)
assertTrue(controller instanceof AIOController, "Wrong controller wired for test");
}
@Test
@Override
public void testGetTransformConfigInfo()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetTransformConfigInfoExcludingCoreVersion()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithDuplicates()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithEmptyTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithNoTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
}

View File

@@ -1,119 +1,119 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Map;
import org.alfresco.transformer.executors.Transformer;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.util.ReflectionTestUtils;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
/**
* Test the AIOController PDF Renderer transforms without a server.
* Super class includes tests for the AbstractTransformerController.
*/
public class AIOControllerPdfRendererTest extends AlfrescoPdfRendererControllerTest
{
@Autowired
AIOTransformRegistry transformRegistry;
@Override
protected void setFields()
{
ReflectionTestUtils.setField(commandExecutor, "transformCommand", mockTransformCommand);
ReflectionTestUtils.setField(commandExecutor, "checkCommand", mockCheckCommand);
//Need to wire in the mocked commandExecutor into the controller...
Map<String,Transformer> transformers = transformRegistry.getTransformerEngineMapping();
transformers.replace("pdfrenderer", commandExecutor);
}
@Override
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile,
String... params)
{
final MockHttpServletRequestBuilder builder = super.mockMvcRequest(url, sourceFile, params)
.param("targetMimetype", targetMimetype)
.param("sourceMimetype", sourceMimetype);
return builder;
}
@Test
public void testTestValidity()
{
// just test that we are actually testing against the AIOController (instead of MiscController)
assertTrue(controller instanceof AIOController, "Wrong controller wired for test");
}
@Test
@Override
public void testGetTransformConfigInfo()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetTransformConfigInfoExcludingCoreVersion()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithDuplicates()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithEmptyTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithNoTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Map;
import org.alfresco.transformer.executors.Transformer;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.util.ReflectionTestUtils;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
/**
* Test the AIOController PDF Renderer transforms without a server.
* Super class includes tests for the AbstractTransformerController.
*/
public class AIOControllerPdfRendererTest extends AlfrescoPdfRendererControllerTest
{
@Autowired
AIOTransformRegistry transformRegistry;
@Override
protected void setFields()
{
ReflectionTestUtils.setField(commandExecutor, "transformCommand", mockTransformCommand);
ReflectionTestUtils.setField(commandExecutor, "checkCommand", mockCheckCommand);
//Need to wire in the mocked commandExecutor into the controller...
Map<String,Transformer> transformers = transformRegistry.getTransformerEngineMapping();
transformers.replace("pdfrenderer", commandExecutor);
}
@Override
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile,
String... params)
{
final MockHttpServletRequestBuilder builder = super.mockMvcRequest(url, sourceFile, params)
.param("targetMimetype", targetMimetype)
.param("sourceMimetype", sourceMimetype);
return builder;
}
@Test
public void testTestValidity()
{
// just test that we are actually testing against the AIOController (instead of MiscController)
assertTrue(controller instanceof AIOController, "Wrong controller wired for test");
}
@Test
@Override
public void testGetTransformConfigInfo()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetTransformConfigInfoExcludingCoreVersion()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithDuplicates()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithEmptyTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithNoTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
}

View File

@@ -1,93 +1,93 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import java.io.IOException;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transform.client.model.config.TransformConfig;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
import org.springframework.http.ResponseEntity;
import static org.alfresco.transform.client.util.RequestParamMap.CONFIG_VERSION_DEFAULT;
import static org.alfresco.transform.client.util.RequestParamMap.CONFIG_VERSION_LATEST;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
public class AIOControllerTest //extends AbstractTransformerControllerTest
{
@Value("${transform.core.version}")
private String coreVersion;
@Autowired
AIOController aioController;
//@Override
protected void mockTransformCommand(String sourceExtension, String targetExtension, String sourceMimetype,
boolean readTargetFileBytes) throws IOException {
// TODO Auto-generated method stub
}
//@Override
protected AbstractTransformerController getController() {
// TODO Auto-generated method stub
return null;
}
//@Override
protected void updateTransformRequestWithSpecificOptions(TransformRequest transformRequest) {
// TODO Auto-generated method stub
}
@Test
public void emptyTest()
{
ResponseEntity<TransformConfig> responseEntity = aioController.info(Integer.valueOf(CONFIG_VERSION_DEFAULT));
responseEntity.getBody().getTransformers().forEach(transformer -> {
assertNull(transformer.getCoreVersion(), transformer.getTransformerName() +
" should have had a null coreValue but was " + transformer.getCoreVersion());
});
}
@Test
public void emptyTestWithLatestVersion()
{
ResponseEntity<TransformConfig> responseEntity = aioController.info(CONFIG_VERSION_LATEST);
responseEntity.getBody().getTransformers().forEach(transformer -> {
assertNotNull(transformer.getCoreVersion(), transformer.getTransformerName() +
" should have had a coreValue but was null. Should have been " + coreVersion);
});
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import java.io.IOException;
import org.alfresco.transform.client.model.TransformRequest;
import org.alfresco.transform.client.model.config.TransformConfig;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
import org.springframework.http.ResponseEntity;
import static org.alfresco.transform.client.util.RequestParamMap.CONFIG_VERSION_DEFAULT;
import static org.alfresco.transform.client.util.RequestParamMap.CONFIG_VERSION_LATEST;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
public class AIOControllerTest //extends AbstractTransformerControllerTest
{
@Value("${transform.core.version}")
private String coreVersion;
@Autowired
AIOController aioController;
//@Override
protected void mockTransformCommand(String sourceExtension, String targetExtension, String sourceMimetype,
boolean readTargetFileBytes) throws IOException {
// TODO Auto-generated method stub
}
//@Override
protected AbstractTransformerController getController() {
// TODO Auto-generated method stub
return null;
}
//@Override
protected void updateTransformRequestWithSpecificOptions(TransformRequest transformRequest) {
// TODO Auto-generated method stub
}
@Test
public void emptyTest()
{
ResponseEntity<TransformConfig> responseEntity = aioController.info(Integer.valueOf(CONFIG_VERSION_DEFAULT));
responseEntity.getBody().getTransformers().forEach(transformer -> {
assertNull(transformer.getCoreVersion(), transformer.getTransformerName() +
" should have had a null coreValue but was " + transformer.getCoreVersion());
});
}
@Test
public void emptyTestWithLatestVersion()
{
ResponseEntity<TransformConfig> responseEntity = aioController.info(CONFIG_VERSION_LATEST);
responseEntity.getBody().getTransformers().forEach(transformer -> {
assertNotNull(transformer.getCoreVersion(), transformer.getTransformerName() +
" should have had a coreValue but was null. Should have been " + coreVersion);
});
}
}

View File

@@ -1,99 +1,99 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
/**
* Test the AIOController Tika transforms without a server.
* Super class includes tests for the AbstractTransformerController.
*/
public class AIOControllerTikaTest extends TikaControllerTest
{
// All tests contained in TikaControllerTest
@Test
public void testTestValidity()
{
// just test that we are actually testing against the AIOController (instead of MiscController)
assertTrue(controller instanceof AIOController,"Wrong controller wired for test");
}
@Test
@Override
public void testGetTransformConfigInfo()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetTransformConfigInfoExcludingCoreVersion()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithDuplicates()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithEmptyTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithNoTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void xlsxEmbedTest()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
// It uses the real class path rather than the test one.
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.context.annotation.Import;
@WebMvcTest(AIOController.class)
@Import(AIOCustomConfig.class)
/**
* Test the AIOController Tika transforms without a server.
* Super class includes tests for the AbstractTransformerController.
*/
public class AIOControllerTikaTest extends TikaControllerTest
{
// All tests contained in TikaControllerTest
@Test
public void testTestValidity()
{
// just test that we are actually testing against the AIOController (instead of MiscController)
assertTrue(controller instanceof AIOController,"Wrong controller wired for test");
}
@Test
@Override
public void testGetTransformConfigInfo()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetTransformConfigInfoExcludingCoreVersion()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithDuplicates()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithEmptyTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void testGetInfoFromConfigWithNoTransformOptions()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
}
@Test
@Override
public void xlsxEmbedTest()
{
// Ignore the test in super class as the way the AIO transformer provides config is fundamentally different.
// It uses the real class path rather than the test one.
}
}

View File

@@ -1,32 +1,32 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
public class AIOImageMagickIT extends ImageMagickTransformationIT
{
// Tests are in ImageMagickTransformationIT
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
public class AIOImageMagickIT extends ImageMagickTransformationIT
{
// Tests are in ImageMagickTransformationIT
}

View File

@@ -1,32 +1,32 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
public class AIOLibreOfficeTransformationIT extends LibreOfficeTransformationIT
{
// Tests are in LibreOfficeTransformationIT
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
public class AIOLibreOfficeTransformationIT extends LibreOfficeTransformationIT
{
// Tests are in LibreOfficeTransformationIT
}

View File

@@ -1,32 +1,32 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
public class AIOMiscTransformsIT extends MiscTransformsIT
{
// Tests are in ImageMagickTransformationIT
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
public class AIOMiscTransformsIT extends MiscTransformsIT
{
// Tests are in ImageMagickTransformationIT
}

View File

@@ -1,31 +1,31 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
public class AIOPdfRendererIT extends AlfrescoPdfRendererTransformationIT {
// Tests are in AlfrescoPdfRendererTransformationIT
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
public class AIOPdfRendererIT extends AlfrescoPdfRendererTransformationIT {
// Tests are in AlfrescoPdfRendererTransformationIT
}

View File

@@ -1,60 +1,60 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.springframework.boot.test.context.SpringBootTest;
/**
* @author David Edwards
* created on 21/04/2020
*/
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
properties = {"activemq.url=nio://localhost:61616"})
public class AIOQueueTransformServiceIT extends AbstractQueueTransformServiceIT
{
@Override
protected TransformRequest buildRequest()
{
return TransformRequest
.builder()
.withRequestId(UUID.randomUUID().toString())
.withSourceMediaType(MIMETYPE_HTML)
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
.withTargetExtension("txt")
.withSchema(1)
.withClientData("ACS")
.withSourceReference(UUID.randomUUID().toString())
.withSourceSize(32L)
.build();
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.springframework.boot.test.context.SpringBootTest;
/**
* @author David Edwards
* created on 21/04/2020
*/
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
properties = {"activemq.url=nio://localhost:61616"})
public class AIOQueueTransformServiceIT extends AbstractQueueTransformServiceIT
{
@Override
protected TransformRequest buildRequest()
{
return TransformRequest
.builder()
.withRequestId(UUID.randomUUID().toString())
.withSourceMediaType(MIMETYPE_HTML)
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
.withTargetExtension("txt")
.withSchema(1)
.withClientData("ACS")
.withSourceReference(UUID.randomUUID().toString())
.withSourceSize(32L)
.build();
}
}

View File

@@ -1,32 +1,32 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
public class AIOTikaTransformationIT extends TikaTransformationIT
{
// Tests are in ImageMagickTransformationIT
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
public class AIOTikaTransformationIT extends TikaTransformationIT
{
// Tests are in ImageMagickTransformationIT
}

View File

@@ -1,77 +1,77 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
public class Application
{
private static final Logger logger = LoggerFactory.getLogger(Application.class);
@Value("${container.name}")
private String containerName;
@Bean
MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
{
return registry -> registry.config().commonTags("containerName", containerName);
}
public static void main(String[] args)
{
SpringApplication.run(Application.class, args);
}
@EventListener(ApplicationReadyEvent.class)
public void startup()
{
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
Arrays.stream(SelectingTransformer.LICENCE.split("\\n")).forEach(logger::info);
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
logger.info("Starting application components... Done");
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
public class Application
{
private static final Logger logger = LoggerFactory.getLogger(Application.class);
@Value("${container.name}")
private String containerName;
@Bean
MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
{
return registry -> registry.config().commonTags("containerName", containerName);
}
public static void main(String[] args)
{
SpringApplication.run(Application.class, args);
}
@EventListener(ApplicationReadyEvent.class)
public void startup()
{
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
Arrays.stream(SelectingTransformer.LICENCE.split("\\n")).forEach(logger::info);
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
logger.info("Starting application components... Done");
}
}

View File

@@ -1,89 +1,89 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
@Controller
public class MiscController extends AbstractTransformerController
{
private static final Logger logger = LoggerFactory.getLogger(MiscController.class);
private SelectingTransformer transformer = new SelectingTransformer();
@Override
public String getTransformerName()
{
return "Miscellaneous Transformers";
}
@Override
public String version()
{
return getTransformerName() + " available";
}
@Override
public ProbeTestTransform getProbeTestTransform()
{
// HtmlParserContentTransformer html -> text
// See the Javadoc on this method and Probes.md for the choice of these values.
return new ProbeTestTransform(this, "quick.html", "quick.txt",
119, 30, 150, 1024,
60 * 2 + 1, 60 * 2)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformImpl("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
}
};
}
@Override
public void transformImpl(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
@Controller
public class MiscController extends AbstractTransformerController
{
private static final Logger logger = LoggerFactory.getLogger(MiscController.class);
private SelectingTransformer transformer = new SelectingTransformer();
@Override
public String getTransformerName()
{
return "Miscellaneous Transformers";
}
@Override
public String version()
{
return getTransformerName() + " available";
}
@Override
public ProbeTestTransform getProbeTestTransform()
{
// HtmlParserContentTransformer html -> text
// See the Javadoc on this method and Probes.md for the choice of these values.
return new ProbeTestTransform(this, "quick.html", "quick.txt",
119, 30, 150, 1024,
60 * 2 + 1, 60 * 2)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformImpl("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
}
};
}
@Override
public void transformImpl(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -1,55 +1,55 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.springframework.boot.test.context.SpringBootTest;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
properties = {"activemq.url=nio://localhost:61616"})
public class MiscQueueTransformServiceIT extends AbstractQueueTransformServiceIT
{
@Override
protected TransformRequest buildRequest()
{
return TransformRequest
.builder()
.withRequestId(UUID.randomUUID().toString())
.withSourceMediaType(MIMETYPE_HTML)
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
.withTargetExtension("txt")
.withSchema(1)
.withClientData("ACS")
.withSourceReference(UUID.randomUUID().toString())
.withSourceSize(32L).build();
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.springframework.boot.test.context.SpringBootTest;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
properties = {"activemq.url=nio://localhost:61616"})
public class MiscQueueTransformServiceIT extends AbstractQueueTransformServiceIT
{
@Override
protected TransformRequest buildRequest()
{
return TransformRequest
.builder()
.withRequestId(UUID.randomUUID().toString())
.withSourceMediaType(MIMETYPE_HTML)
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
.withTargetExtension("txt")
.withSchema(1)
.withClientData("ACS")
.withSourceReference(UUID.randomUUID().toString())
.withSourceSize(32L).build();
}
}

View File

@@ -1,48 +1,48 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.springframework.boot.test.context.SpringBootTest;
/**
* Tests MiscController with a server test harness.
*/
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
public class MiscTransformerHttpRequestTest extends AbstractHttpRequestTest
{
@Override
protected String getTransformerName()
{
return "Miscellaneous Transformers";
}
@Override
protected String getSourceExtension()
{
return "html";
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.springframework.boot.test.context.SpringBootTest;
/**
* Tests MiscController with a server test harness.
*/
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
public class MiscTransformerHttpRequestTest extends AbstractHttpRequestTest
{
@Override
protected String getTransformerName()
{
return "Miscellaneous Transformers";
}
@Override
protected String getSourceExtension()
{
return "html";
}
}

View File

@@ -1,30 +1,30 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
This is a multipart message in MIME format.
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
alternative plain text
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">alternative html text</div>
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
Parts form an multipart/alternative should represent the same content in different formats
In this eml example the content differs with the purpose of determining if right part was used in transformation
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
This is a multipart message in MIME format.
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
alternative plain text
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">alternative html text</div>
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
Parts form an multipart/alternative should represent the same content in different formats
In this eml example the content differs with the purpose of determining if right part was used in transformation

View File

@@ -1,44 +1,44 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/mixed;
boundary="----=_NextPart_000_0000_01D06C6A.D04F3750"
This is a multipart message in MIME format.
------=_NextPart_000_0000_01D06C6A.D04F3750
Content-Type: multipart/alternative;
boundary="----=_NextPart_001_0001_01D06C6A.D04F3750"
------=_NextPart_001_0001_01D06C6A.D04F3750
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
Mail with attachment content
------=_NextPart_001_0001_01D06C6A.D04F3750
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">Mail with attachment content</div>
------=_NextPart_001_0001_01D06C6A.D04F3750--
------=_NextPart_000_0000_01D06C6A.D04F3750
Content-Type: text/plain;
name="alt.txt"
Content-Transfer-Encoding: quoted-printable
Content-ID: <796B1E07B04ACC41A78199F35721150F@eurprd04.prod.outlook.com>
Content-Disposition: attachment;
filename="alt.txt"
File attachment content
------=_NextPart_000_0000_01D06C6A.D04F3750--
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/mixed;
boundary="----=_NextPart_000_0000_01D06C6A.D04F3750"
This is a multipart message in MIME format.
------=_NextPart_000_0000_01D06C6A.D04F3750
Content-Type: multipart/alternative;
boundary="----=_NextPart_001_0001_01D06C6A.D04F3750"
------=_NextPart_001_0001_01D06C6A.D04F3750
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
Mail with attachment content
------=_NextPart_001_0001_01D06C6A.D04F3750
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">Mail with attachment content</div>
------=_NextPart_001_0001_01D06C6A.D04F3750--
------=_NextPart_000_0000_01D06C6A.D04F3750
Content-Type: text/plain;
name="alt.txt"
Content-Transfer-Encoding: quoted-printable
Content-ID: <796B1E07B04ACC41A78199F35721150F@eurprd04.prod.outlook.com>
Content-Disposition: attachment;
filename="alt.txt"
File attachment content
------=_NextPart_000_0000_01D06C6A.D04F3750--

View File

@@ -1,28 +1,28 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
This is a multipart message in MIME format.
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
html special characters
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">html&nbsp;special&nbsp;characters</div>
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
This is a multipart message in MIME format.
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
html special characters
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">html&nbsp;special&nbsp;characters</div>
------=_NextPart_000_0005_01D06C6A.DBA98EC0--

View File

@@ -1,41 +1,41 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/related;
boundary="--_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423";
type="multipart/alternative"
This is a multi-part message in MIME format.
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
Content-Type: multipart/alternative; boundary="--_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362"
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
nested alternative plain text
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
Content-Type: text/html; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">nested alternative html text</div>
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362--
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
Content-Type: image/jpeg; name="image001.jpg"
Content-Transfer-Encoding: base64
Content-ID: <image001.jpg@01D146F0.63006280>
image
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423--
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/related;
boundary="--_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423";
type="multipart/alternative"
This is a multi-part message in MIME format.
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
Content-Type: multipart/alternative; boundary="--_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362"
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
nested alternative plain text
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
Content-Type: text/html; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">nested alternative html text</div>
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362--
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
Content-Type: image/jpeg; name="image001.jpg"
Content-Transfer-Encoding: base64
Content-ID: <image001.jpg@01D146F0.63006280>
image
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423--

View File

@@ -1,31 +1,31 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: The quick brown fox jumps over the lazy dog
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0009_01D06BC5.14D754D0"
This is a multipart message in MIME format.
------=_NextPart_000_0009_01D06BC5.14D754D0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 8bit
El rápido zorro marrón salta sobre el perro perezoso
------=_NextPart_000_0009_01D06BC5.14D754D0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">El r=C3=A1pido zorro marr=C3=B3n salta sobre el perro =
perezoso&nbsp;<br></div>
------=_NextPart_000_0009_01D06BC5.14D754D0--
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: The quick brown fox jumps over the lazy dog
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0009_01D06BC5.14D754D0"
This is a multipart message in MIME format.
------=_NextPart_000_0009_01D06BC5.14D754D0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 8bit
El rápido zorro marrón salta sobre el perro perezoso
------=_NextPart_000_0009_01D06BC5.14D754D0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">El r=C3=A1pido zorro marr=C3=B3n salta sobre el perro =
perezoso&nbsp;<br></div>
------=_NextPart_000_0009_01D06BC5.14D754D0--

View File

@@ -1,115 +1,115 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import com.google.common.collect.ImmutableList;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.List;
import java.util.Map;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
/**
* Converts Apple iWorks files to JPEGs for thumbnailing and previewing.
* The transformer will only work for iWorks 2013/14 files. Support for iWorks 2008/9 has been dropped as we cannot
* support both, because the newer format does not contain a PDF. If we say this transformer supports PDF, Share will
* assume incorrectly that we can convert to PDF and we would only get a preview for the older format and never the
* newer one. Both formats have the same mimetype.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* @author Neil Mc Erlean
* @author eknizat
* @since 4.0
*/
public class AppleIWorksContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(
AppleIWorksContentTransformer.class);
// Apple's zip entry names for previews in iWorks have changed over time.
private static final List<String> PDF_PATHS = ImmutableList.of(
"QuickLook/Preview.pdf"); // iWorks 2008/9
private static final List<String> JPG_PATHS = ImmutableList.of(
"QuickLook/Thumbnail.jpg", // iWorks 2008/9
"preview.jpg"); // iWorks 2013/14 (720 x 552) We use the best quality image. Others are:
// (225 x 173) preview-web.jpg
// (53 x 41) preview-micro.jpg
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile)
{
logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
sourceMimetype, targetMimetype);
// iWorks files are zip (or package) files.
// If it's not a zip file, the resultant ZipException will be caught as an IOException below.
try (ZipArchiveInputStream iWorksZip = new ZipArchiveInputStream(
new BufferedInputStream(new FileInputStream(sourceFile))))
{
// Look through the zip file entries for the preview/thumbnail.
List<String> paths = MIMETYPE_IMAGE_JPEG.equals(targetMimetype) ? JPG_PATHS : PDF_PATHS;
ZipArchiveEntry entry;
boolean found = false;
while ((entry = iWorksZip.getNextZipEntry()) != null)
{
String name = entry.getName();
if (paths.contains(name))
{
Files.copy(iWorksZip, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
found = true;
break;
}
}
if (!found)
{
throw new RuntimeException(
"The source " + sourceMimetype + " file did not contain a " + targetMimetype + " preview");
}
}
catch (IOException e)
{
throw new RuntimeException(
"Unable to transform " + sourceMimetype + " file. It should have been a zip format file.",
e);
}
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import com.google.common.collect.ImmutableList;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.List;
import java.util.Map;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
/**
* Converts Apple iWorks files to JPEGs for thumbnailing and previewing.
* The transformer will only work for iWorks 2013/14 files. Support for iWorks 2008/9 has been dropped as we cannot
* support both, because the newer format does not contain a PDF. If we say this transformer supports PDF, Share will
* assume incorrectly that we can convert to PDF and we would only get a preview for the older format and never the
* newer one. Both formats have the same mimetype.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* @author Neil Mc Erlean
* @author eknizat
* @since 4.0
*/
public class AppleIWorksContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(
AppleIWorksContentTransformer.class);
// Apple's zip entry names for previews in iWorks have changed over time.
private static final List<String> PDF_PATHS = ImmutableList.of(
"QuickLook/Preview.pdf"); // iWorks 2008/9
private static final List<String> JPG_PATHS = ImmutableList.of(
"QuickLook/Thumbnail.jpg", // iWorks 2008/9
"preview.jpg"); // iWorks 2013/14 (720 x 552) We use the best quality image. Others are:
// (225 x 173) preview-web.jpg
// (53 x 41) preview-micro.jpg
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile)
{
logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
sourceMimetype, targetMimetype);
// iWorks files are zip (or package) files.
// If it's not a zip file, the resultant ZipException will be caught as an IOException below.
try (ZipArchiveInputStream iWorksZip = new ZipArchiveInputStream(
new BufferedInputStream(new FileInputStream(sourceFile))))
{
// Look through the zip file entries for the preview/thumbnail.
List<String> paths = MIMETYPE_IMAGE_JPEG.equals(targetMimetype) ? JPG_PATHS : PDF_PATHS;
ZipArchiveEntry entry;
boolean found = false;
while ((entry = iWorksZip.getNextZipEntry()) != null)
{
String name = entry.getName();
if (paths.contains(name))
{
Files.copy(iWorksZip, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
found = true;
break;
}
}
if (!found)
{
throw new RuntimeException(
"The source " + sourceMimetype + " file did not contain a " + targetMimetype + " preview");
}
}
catch (IOException e)
{
throw new RuntimeException(
"Unable to transform " + sourceMimetype + " file. It should have been a zip format file.",
e);
}
}
}

View File

@@ -1,232 +1,232 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.alfresco.transformer.fs.FileManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.internet.MimeMessage;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Map;
import java.util.Properties;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
/**
* Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
* messages. Searches for all text content parts, and returns them. Any
* attachments are ignored. TIKA Note - could be replaced with the Tika email
* parser. Would require a recursing parser to be specified, but not the full
* Auto one (we don't want attachments), just one containing text and html
* related parsers.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*/
public class EMLTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(EMLTransformer.class);
private static final String CHARSET = "charset";
private static final String DEFAULT_ENCODING = "UTF-8";
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
logger.debug("Performing RFC822 to text transform.");
// Use try with resource
try (InputStream contentInputStream = new BufferedInputStream(
new FileInputStream(sourceFile));
Writer bufferedFileWriter = new BufferedWriter(new FileWriter(targetFile)))
{
MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()),
contentInputStream);
final StringBuilder sb = new StringBuilder();
Object content = mimeMessage.getContent();
if (content instanceof Multipart)
{
processMultiPart((Multipart) content, sb);
}
else
{
sb.append(content.toString());
}
bufferedFileWriter.write(sb.toString());
}
}
/**
* Find "text" parts of message recursively and appends it to sb StringBuilder
*
* @param multipart Multipart to process
* @param sb StringBuilder
* @throws MessagingException
* @throws IOException
*/
private void processMultiPart(Multipart multipart, StringBuilder sb) throws MessagingException,
IOException
{
boolean isAlternativeMultipart = multipart.getContentType().contains(
MIMETYPE_MULTIPART_ALTERNATIVE);
if (isAlternativeMultipart)
{
processAlternativeMultipart(multipart, sb);
}
else
{
for (int i = 0, n = multipart.getCount(); i < n; i++)
{
Part part = multipart.getBodyPart(i);
if (part.getContent() instanceof Multipart)
{
processMultiPart((Multipart) part.getContent(), sb);
}
else
{
processPart(part, sb);
}
}
}
}
/**
* Finds the suitable part from an multipart/alternative and appends it's text content to StringBuilder sb
*
* @param multipart
* @param sb
* @throws IOException
* @throws MessagingException
*/
private void processAlternativeMultipart(Multipart multipart, StringBuilder sb) throws
IOException, MessagingException
{
Part partToUse = null;
for (int i = 0, n = multipart.getCount(); i < n; i++)
{
Part part = multipart.getBodyPart(i);
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
{
partToUse = part;
break;
}
else if (part.getContentType().contains(MIMETYPE_HTML))
{
partToUse = part;
}
else if (part.getContentType().contains(MIMETYPE_MULTIPART_ALTERNATIVE))
{
if (part.getContent() instanceof Multipart)
{
processAlternativeMultipart((Multipart) part.getContent(), sb);
}
}
}
if (partToUse != null)
{
processPart(partToUse, sb);
}
}
/**
* Finds text on a given mail part. Accepted parts types are text/html and text/plain.
* Attachments are ignored
*
* @param part
* @param sb
* @throws IOException
* @throws MessagingException
*/
private void processPart(Part part, StringBuilder sb) throws IOException, MessagingException
{
boolean isAttachment = Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition());
if (isAttachment)
{
return;
}
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
{
sb.append(part.getContent().toString());
}
else if (part.getContentType().contains(MIMETYPE_HTML))
{
String mailPartContent = part.getContent().toString();
//create a temporary html file with same mail part content and encoding
File tempHtmlFile = FileManager.TempFileProvider.createTempFile("EMLTransformer_",
".html");
String encoding = getMailPartContentEncoding(part);
try (OutputStreamWriter osWriter = new OutputStreamWriter(
new FileOutputStream(tempHtmlFile), encoding))
{
osWriter.write(mailPartContent);
}
//transform html file's content to plain text
HtmlParserContentTransformer.EncodingAwareStringBean extractor = new HtmlParserContentTransformer.EncodingAwareStringBean();
extractor.setCollapse(false);
extractor.setLinks(false);
extractor.setReplaceNonBreakingSpaces(false);
extractor.setURL(tempHtmlFile, encoding);
sb.append(extractor.getStrings());
tempHtmlFile.delete();
}
}
private String getMailPartContentEncoding(Part part) throws MessagingException
{
String encoding = DEFAULT_ENCODING;
String contentType = part.getContentType();
int startIndex = contentType.indexOf(CHARSET);
if (startIndex > 0)
{
encoding = contentType.substring(startIndex + CHARSET.length() + 1)
.replaceAll("\"", "");
}
return encoding;
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.alfresco.transformer.fs.FileManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.internet.MimeMessage;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Map;
import java.util.Properties;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
/**
* Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
* messages. Searches for all text content parts, and returns them. Any
* attachments are ignored. TIKA Note - could be replaced with the Tika email
* parser. Would require a recursing parser to be specified, but not the full
* Auto one (we don't want attachments), just one containing text and html
* related parsers.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*/
public class EMLTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(EMLTransformer.class);
private static final String CHARSET = "charset";
private static final String DEFAULT_ENCODING = "UTF-8";
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
logger.debug("Performing RFC822 to text transform.");
// Use try with resource
try (InputStream contentInputStream = new BufferedInputStream(
new FileInputStream(sourceFile));
Writer bufferedFileWriter = new BufferedWriter(new FileWriter(targetFile)))
{
MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()),
contentInputStream);
final StringBuilder sb = new StringBuilder();
Object content = mimeMessage.getContent();
if (content instanceof Multipart)
{
processMultiPart((Multipart) content, sb);
}
else
{
sb.append(content.toString());
}
bufferedFileWriter.write(sb.toString());
}
}
/**
* Find "text" parts of message recursively and appends it to sb StringBuilder
*
* @param multipart Multipart to process
* @param sb StringBuilder
* @throws MessagingException
* @throws IOException
*/
private void processMultiPart(Multipart multipart, StringBuilder sb) throws MessagingException,
IOException
{
boolean isAlternativeMultipart = multipart.getContentType().contains(
MIMETYPE_MULTIPART_ALTERNATIVE);
if (isAlternativeMultipart)
{
processAlternativeMultipart(multipart, sb);
}
else
{
for (int i = 0, n = multipart.getCount(); i < n; i++)
{
Part part = multipart.getBodyPart(i);
if (part.getContent() instanceof Multipart)
{
processMultiPart((Multipart) part.getContent(), sb);
}
else
{
processPart(part, sb);
}
}
}
}
/**
* Finds the suitable part from an multipart/alternative and appends it's text content to StringBuilder sb
*
* @param multipart
* @param sb
* @throws IOException
* @throws MessagingException
*/
private void processAlternativeMultipart(Multipart multipart, StringBuilder sb) throws
IOException, MessagingException
{
Part partToUse = null;
for (int i = 0, n = multipart.getCount(); i < n; i++)
{
Part part = multipart.getBodyPart(i);
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
{
partToUse = part;
break;
}
else if (part.getContentType().contains(MIMETYPE_HTML))
{
partToUse = part;
}
else if (part.getContentType().contains(MIMETYPE_MULTIPART_ALTERNATIVE))
{
if (part.getContent() instanceof Multipart)
{
processAlternativeMultipart((Multipart) part.getContent(), sb);
}
}
}
if (partToUse != null)
{
processPart(partToUse, sb);
}
}
/**
* Finds text on a given mail part. Accepted parts types are text/html and text/plain.
* Attachments are ignored
*
* @param part
* @param sb
* @throws IOException
* @throws MessagingException
*/
private void processPart(Part part, StringBuilder sb) throws IOException, MessagingException
{
boolean isAttachment = Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition());
if (isAttachment)
{
return;
}
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
{
sb.append(part.getContent().toString());
}
else if (part.getContentType().contains(MIMETYPE_HTML))
{
String mailPartContent = part.getContent().toString();
//create a temporary html file with same mail part content and encoding
File tempHtmlFile = FileManager.TempFileProvider.createTempFile("EMLTransformer_",
".html");
String encoding = getMailPartContentEncoding(part);
try (OutputStreamWriter osWriter = new OutputStreamWriter(
new FileOutputStream(tempHtmlFile), encoding))
{
osWriter.write(mailPartContent);
}
//transform html file's content to plain text
HtmlParserContentTransformer.EncodingAwareStringBean extractor = new HtmlParserContentTransformer.EncodingAwareStringBean();
extractor.setCollapse(false);
extractor.setLinks(false);
extractor.setReplaceNonBreakingSpaces(false);
extractor.setURL(tempHtmlFile, encoding);
sb.append(extractor.getStrings());
tempHtmlFile.delete();
}
}
private String getMailPartContentEncoding(Part part) throws MessagingException
{
String encoding = DEFAULT_ENCODING;
String contentType = part.getContentType();
int startIndex = contentType.indexOf(CHARSET);
if (startIndex > 0)
{
encoding = contentType.substring(startIndex + CHARSET.length() + 1)
.replaceAll("\"", "");
}
return encoding;
}
}

View File

@@ -1,192 +1,192 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
/**
* Content transformer which wraps the HTML Parser library for
* parsing HTML content.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* <p>
* Since HTML Parser was updated from v1.6 to v2.1, META tags
* defining an encoding for the content via http-equiv=Content-Type
* will ONLY be respected if the encoding of the content item
* itself is set to ISO-8859-1.
* </p>
*
* <p>
* Tika Note - could be converted to use the Tika HTML parser,
* but we'd potentially need a custom text handler to replicate
* the current settings around links and non-breaking spaces.
* </p>
*
* @author Derek Hulley
* @author eknizat
* @see <a href="http://htmlparser.sourceforge.net/">http://htmlparser.sourceforge.net</a>
* @see org.htmlparser.beans.StringBean
* @see <a href="http://sourceforge.net/tracker/?func=detail&aid=1644504&group_id=24399&atid=381401">HTML Parser</a>
*/
public class HtmlParserContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(
HtmlParserContentTransformer.class);
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
if (logger.isDebugEnabled())
{
logger.debug("Performing HTML to text transform with sourceEncoding=" + sourceEncoding);
}
// Create the extractor
EncodingAwareStringBean extractor = new EncodingAwareStringBean();
extractor.setCollapse(false);
extractor.setLinks(false);
extractor.setReplaceNonBreakingSpaces(false);
extractor.setURL(sourceFile, sourceEncoding);
// get the text
String text = extractor.getStrings();
// write it to the writer
try (Writer writer = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(targetFile))))
{
writer.write(text);
}
}
private void checkEncodingParameter(String encoding, String parameterName)
{
try
{
if (encoding != null && !Charset.isSupported(encoding))
{
throw new IllegalArgumentException(
parameterName + "=" + encoding + " is not supported by the JVM.");
}
}
catch (IllegalCharsetNameException e)
{
throw new IllegalArgumentException(
parameterName + "=" + encoding + " is not a valid encoding.");
}
}
/**
* <p>
* This code is based on a class of the same name, originally implemented in alfresco-repository.
* </p>
*
* A version of {@link StringBean} which allows control of the
* encoding in the underlying HTML Parser.
* Unfortunately, StringBean doesn't allow easy over-riding of
* this, so we have to duplicate some code to control this.
* This allows us to correctly handle HTML files where the encoding
* is specified against the content property (rather than in the
* HTML Head Meta), see ALF-10466 for details.
*/
public static class EncodingAwareStringBean extends StringBean
{
private static final long serialVersionUID = -9033414360428669553L;
/**
* Sets the File to extract strings from, and the encoding
* it's in (if known to Alfresco)
*
* @param file The File that text should be fetched from.
* @param encoding The encoding of the input
*/
public void setURL(File file, String encoding)
{
String previousURL = getURL();
String newURL = file.getAbsolutePath();
if (previousURL == null || !newURL.equals(previousURL))
{
try
{
URLConnection conn = getConnection();
if (null == mParser)
{
mParser = new Parser(newURL);
}
else
{
mParser.setURL(newURL);
}
if (encoding != null)
{
mParser.setEncoding(encoding);
}
mPropertySupport.firePropertyChange(StringBean.PROP_URL_PROPERTY, previousURL,
getURL());
mPropertySupport.firePropertyChange(StringBean.PROP_CONNECTION_PROPERTY, conn,
mParser.getConnection());
setStrings();
}
catch (ParserException pe)
{
updateStrings(pe.toString());
}
}
}
public String getEncoding()
{
return mParser.getEncoding();
}
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
/**
* Content transformer which wraps the HTML Parser library for
* parsing HTML content.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* <p>
* Since HTML Parser was updated from v1.6 to v2.1, META tags
* defining an encoding for the content via http-equiv=Content-Type
* will ONLY be respected if the encoding of the content item
* itself is set to ISO-8859-1.
* </p>
*
* <p>
* Tika Note - could be converted to use the Tika HTML parser,
* but we'd potentially need a custom text handler to replicate
* the current settings around links and non-breaking spaces.
* </p>
*
* @author Derek Hulley
* @author eknizat
* @see <a href="http://htmlparser.sourceforge.net/">http://htmlparser.sourceforge.net</a>
* @see org.htmlparser.beans.StringBean
* @see <a href="http://sourceforge.net/tracker/?func=detail&aid=1644504&group_id=24399&atid=381401">HTML Parser</a>
*/
public class HtmlParserContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(
HtmlParserContentTransformer.class);
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
if (logger.isDebugEnabled())
{
logger.debug("Performing HTML to text transform with sourceEncoding=" + sourceEncoding);
}
// Create the extractor
EncodingAwareStringBean extractor = new EncodingAwareStringBean();
extractor.setCollapse(false);
extractor.setLinks(false);
extractor.setReplaceNonBreakingSpaces(false);
extractor.setURL(sourceFile, sourceEncoding);
// get the text
String text = extractor.getStrings();
// write it to the writer
try (Writer writer = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(targetFile))))
{
writer.write(text);
}
}
private void checkEncodingParameter(String encoding, String parameterName)
{
try
{
if (encoding != null && !Charset.isSupported(encoding))
{
throw new IllegalArgumentException(
parameterName + "=" + encoding + " is not supported by the JVM.");
}
}
catch (IllegalCharsetNameException e)
{
throw new IllegalArgumentException(
parameterName + "=" + encoding + " is not a valid encoding.");
}
}
/**
* <p>
* This code is based on a class of the same name, originally implemented in alfresco-repository.
* </p>
*
* A version of {@link StringBean} which allows control of the
* encoding in the underlying HTML Parser.
* Unfortunately, StringBean doesn't allow easy over-riding of
* this, so we have to duplicate some code to control this.
* This allows us to correctly handle HTML files where the encoding
* is specified against the content property (rather than in the
* HTML Head Meta), see ALF-10466 for details.
*/
public static class EncodingAwareStringBean extends StringBean
{
private static final long serialVersionUID = -9033414360428669553L;
/**
* Sets the File to extract strings from, and the encoding
* it's in (if known to Alfresco)
*
* @param file The File that text should be fetched from.
* @param encoding The encoding of the input
*/
public void setURL(File file, String encoding)
{
String previousURL = getURL();
String newURL = file.getAbsolutePath();
if (previousURL == null || !newURL.equals(previousURL))
{
try
{
URLConnection conn = getConnection();
if (null == mParser)
{
mParser = new Parser(newURL);
}
else
{
mParser.setURL(newURL);
}
if (encoding != null)
{
mParser.setEncoding(encoding);
}
mPropertySupport.firePropertyChange(StringBean.PROP_URL_PROPERTY, previousURL,
getURL());
mPropertySupport.firePropertyChange(StringBean.PROP_CONNECTION_PROPERTY, conn,
mParser.getConnection());
setStrings();
}
catch (ParserException pe)
{
updateStrings(pe.toString());
}
}
}
public String getEncoding()
{
return mParser.getEncoding();
}
}
}

View File

@@ -1,130 +1,130 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.Map;
/**
* Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing.
* This transformer will only work for OOXML files where thumbnailing was enabled,
* which isn't on by default on Windows, but is more common on Mac.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* @author Nick Burch
* @author eknizat
*/
public class OOXMLThumbnailContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(
OOXMLThumbnailContentTransformer.class);
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
if (logger.isDebugEnabled())
{
logger.debug("Performing OOXML to jpeg transform with sourceMimetype=" + sourceMimetype
+ " targetMimetype=" + targetMimetype);
}
try (OPCPackage pkg = OPCPackage.open(sourceFile.getPath()))
{
// Does it have a thumbnail?
PackageRelationshipCollection rels = pkg.getRelationshipsByType(
PackageRelationshipTypes.THUMBNAIL);
if (rels.size() > 0)
{
// Get the thumbnail part
PackageRelationship tRel = rels.getRelationship(0);
PackagePart tPart = pkg.getPart(tRel);
// Write it to the target
InputStream tStream = tPart.getInputStream();
Files.copy(tStream, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
tStream.close();
}
else
{
logger.debug("No thumbnail present in file.");
throw new Exception(
"No thumbnail present in file, unable to generate " + targetMimetype);
}
}
catch (IOException e)
{
throw new RuntimeException("Unable to transform file.", e);
}
}
/*
// TODO Add this back to engine_config.json when the transformer is fixed for java 11
{
"transformerName": "ooxmlThumbnail",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "image/jpeg"}
],
"transformOptions": [
]
}
*/
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.Map;
/**
* Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing.
* This transformer will only work for OOXML files where thumbnailing was enabled,
* which isn't on by default on Windows, but is more common on Mac.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* @author Nick Burch
* @author eknizat
*/
public class OOXMLThumbnailContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(
OOXMLThumbnailContentTransformer.class);
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
if (logger.isDebugEnabled())
{
logger.debug("Performing OOXML to jpeg transform with sourceMimetype=" + sourceMimetype
+ " targetMimetype=" + targetMimetype);
}
try (OPCPackage pkg = OPCPackage.open(sourceFile.getPath()))
{
// Does it have a thumbnail?
PackageRelationshipCollection rels = pkg.getRelationshipsByType(
PackageRelationshipTypes.THUMBNAIL);
if (rels.size() > 0)
{
// Get the thumbnail part
PackageRelationship tRel = rels.getRelationship(0);
PackagePart tPart = pkg.getPart(tRel);
// Write it to the target
InputStream tStream = tPart.getInputStream();
Files.copy(tStream, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
tStream.close();
}
else
{
logger.debug("No thumbnail present in file.");
throw new Exception(
"No thumbnail present in file, unable to generate " + targetMimetype);
}
}
catch (IOException e)
{
throw new RuntimeException("Unable to transform file.", e);
}
}
/*
// TODO Add this back to engine_config.json when the transformer is fixed for java 11
{
"transformerName": "ooxmlThumbnail",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "image/jpeg"}
],
"transformOptions": [
]
}
*/
}

View File

@@ -1,53 +1,53 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import java.io.File;
import java.util.Map;
/**
* Implemented by transformers used by {@link SelectingTransformer}.
*
* @author eknizat
*/
public interface SelectableTransformer
{
default void transform(String sourceMimetype, String targetMimetype, Map<String, String> parameters,
File sourceFile, File targetFile) throws Exception
{
}
default void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
}
default void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import java.io.File;
import java.util.Map;
/**
* Implemented by transformers used by {@link SelectingTransformer}.
*
* @author eknizat
*/
public interface SelectableTransformer
{
default void transform(String sourceMimetype, String targetMimetype, Map<String, String> parameters,
File sourceFile, File targetFile) throws Exception
{
}
default void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
}
default void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
}
}

View File

@@ -1,114 +1,114 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import com.google.common.collect.ImmutableMap;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.metadataExtractors.HtmlMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.RFC822MetadataExtractor;
import java.io.File;
import java.util.Map;
import java.util.StringJoiner;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
/**
* The SelectingTransformer selects a registered {@link SelectableTransformer}
* and delegates the transformation to its implementation.
*
* @author eknizat
*/
public class SelectingTransformer implements Transformer
{
private static final String ID = "misc";
public static final String LICENCE =
"This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\\n" +
"Additional libraries used:\n" +
"* htmlparser http://htmlparser.sourceforge.net/license.html";
private final Map<String, SelectableTransformer> transformers = ImmutableMap
.<String, SelectableTransformer>builder()
.put("appleIWorks", new AppleIWorksContentTransformer())
.put("html", new HtmlParserContentTransformer())
.put("string", new StringExtractingContentTransformer())
.put("textToPdf", new TextToPdfContentTransformer())
.put("rfc822", new EMLTransformer())
.put("ooXmlThumbnail", new OOXMLThumbnailContentTransformer())
.put("HtmlMetadataExtractor", new HtmlMetadataExtractor())
.put("RFC822MetadataExtractor", new RFC822MetadataExtractor())
.build();
@Override
public String getTransformerId()
{
return ID;
}
@Override
public void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
final SelectableTransformer transformer = transformers.get(transformName);
logOptions(sourceFile, targetFile, transformOptions);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
final SelectableTransformer transformer = transformers.get(transformName);
logOptions(sourceFile, targetFile, transformOptions);
transformer.extractMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
private static void logOptions(File sourceFile, File targetFile, Map<String, String> parameters)
{
StringJoiner sj = new StringJoiner(" ");
parameters.forEach((k, v) ->
{
if (!TRANSFORM_NAME_PARAMETER.equals(k))
{
sj.add("--" + k + "=" + v);
}
}); // keeping the existing style used in other T-Engines
sj.add(getExtension(sourceFile));
sj.add(getExtension(targetFile));
LogEntry.setOptions(sj.toString());
}
private static String getExtension(File file)
{
final String name = file.getName();
int i = name.lastIndexOf('.');
return i == -1 ? "???" : name.substring(i + 1);
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import com.google.common.collect.ImmutableMap;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.metadataExtractors.HtmlMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.RFC822MetadataExtractor;
import java.io.File;
import java.util.Map;
import java.util.StringJoiner;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
/**
* The SelectingTransformer selects a registered {@link SelectableTransformer}
* and delegates the transformation to its implementation.
*
* @author eknizat
*/
public class SelectingTransformer implements Transformer
{
private static final String ID = "misc";
public static final String LICENCE =
"This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\\n" +
"Additional libraries used:\n" +
"* htmlparser http://htmlparser.sourceforge.net/license.html";
private final Map<String, SelectableTransformer> transformers = ImmutableMap
.<String, SelectableTransformer>builder()
.put("appleIWorks", new AppleIWorksContentTransformer())
.put("html", new HtmlParserContentTransformer())
.put("string", new StringExtractingContentTransformer())
.put("textToPdf", new TextToPdfContentTransformer())
.put("rfc822", new EMLTransformer())
.put("ooXmlThumbnail", new OOXMLThumbnailContentTransformer())
.put("HtmlMetadataExtractor", new HtmlMetadataExtractor())
.put("RFC822MetadataExtractor", new RFC822MetadataExtractor())
.build();
@Override
public String getTransformerId()
{
return ID;
}
@Override
public void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
final SelectableTransformer transformer = transformers.get(transformName);
logOptions(sourceFile, targetFile, transformOptions);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
final SelectableTransformer transformer = transformers.get(transformName);
logOptions(sourceFile, targetFile, transformOptions);
transformer.extractMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
private static void logOptions(File sourceFile, File targetFile, Map<String, String> parameters)
{
StringJoiner sj = new StringJoiner(" ");
parameters.forEach((k, v) ->
{
if (!TRANSFORM_NAME_PARAMETER.equals(k))
{
sj.add("--" + k + "=" + v);
}
}); // keeping the existing style used in other T-Engines
sj.add(getExtension(sourceFile));
sj.add(getExtension(targetFile));
LogEntry.setOptions(sj.toString());
}
private static String getExtension(File file)
{
final String name = file.getName();
int i = name.lastIndexOf('.');
return i == -1 ? "???" : name.substring(i + 1);
}
}

View File

@@ -1,158 +1,158 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transform.client.util.RequestParamMap.TARGET_ENCODING;
/**
* Converts any textual format to plain text.
* <p>
* The transformation is sensitive to the source and target string encodings.
*
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* @author Derek Hulley
* @author eknizat
*/
public class StringExtractingContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
/**
* Text to text conversions are done directly using the content reader and writer string
* manipulation methods.
* <p>
* Extraction of text from binary content attempts to take the possible character
* encoding into account. The text produced from this will, if the encoding was correct,
* be unformatted but valid.
*/
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
String targetEncoding = parameters.get(TARGET_ENCODING);
if (logger.isDebugEnabled())
{
logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
+ " targetEncoding=" + targetEncoding);
}
Reader charReader = null;
Writer charWriter = null;
try
{
// Build reader
if (sourceEncoding == null)
{
charReader = new BufferedReader(
new InputStreamReader(new FileInputStream(sourceFile)));
}
else
{
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
charReader = new BufferedReader(
new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
}
// Build writer
if (targetEncoding == null)
{
charWriter = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(targetFile)));
}
else
{
checkEncodingParameter(targetEncoding, TARGET_ENCODING);
charWriter = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
}
// copy from the one to the other
char[] buffer = new char[8192];
int readCount = 0;
while (readCount > -1)
{
// write the last read count number of bytes
charWriter.write(buffer, 0, readCount);
// fill the buffer again
readCount = charReader.read(buffer);
}
}
finally
{
if (charReader != null)
{
try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
}
if (charWriter != null)
{
try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
}
}
// done
}
private void checkEncodingParameter(String encoding, String paramterName)
{
try
{
if (!Charset.isSupported(encoding))
{
throw new IllegalArgumentException(
paramterName + "=" + encoding + " is not supported by the JVM.");
}
}
catch (IllegalCharsetNameException e)
{
throw new IllegalArgumentException(
paramterName + "=" + encoding + " is not a valid encoding.");
}
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transform.client.util.RequestParamMap.TARGET_ENCODING;
/**
* Converts any textual format to plain text.
* <p>
* The transformation is sensitive to the source and target string encodings.
*
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* @author Derek Hulley
* @author eknizat
*/
public class StringExtractingContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
/**
* Text to text conversions are done directly using the content reader and writer string
* manipulation methods.
* <p>
* Extraction of text from binary content attempts to take the possible character
* encoding into account. The text produced from this will, if the encoding was correct,
* be unformatted but valid.
*/
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
String targetEncoding = parameters.get(TARGET_ENCODING);
if (logger.isDebugEnabled())
{
logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
+ " targetEncoding=" + targetEncoding);
}
Reader charReader = null;
Writer charWriter = null;
try
{
// Build reader
if (sourceEncoding == null)
{
charReader = new BufferedReader(
new InputStreamReader(new FileInputStream(sourceFile)));
}
else
{
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
charReader = new BufferedReader(
new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
}
// Build writer
if (targetEncoding == null)
{
charWriter = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(targetFile)));
}
else
{
checkEncodingParameter(targetEncoding, TARGET_ENCODING);
charWriter = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
}
// copy from the one to the other
char[] buffer = new char[8192];
int readCount = 0;
while (readCount > -1)
{
// write the last read count number of bytes
charWriter.write(buffer, 0, readCount);
// fill the buffer again
readCount = charReader.read(buffer);
}
}
finally
{
if (charReader != null)
{
try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
}
if (charWriter != null)
{
try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
}
}
// done
}
private void checkEncodingParameter(String encoding, String paramterName)
{
try
{
if (!Charset.isSupported(encoding))
{
throw new IllegalArgumentException(
paramterName + "=" + encoding + " is not supported by the JVM.");
}
}
catch (IllegalCharsetNameException e)
{
throw new IllegalArgumentException(
paramterName + "=" + encoding + " is not a valid encoding.");
}
}
}

View File

@@ -1,448 +1,448 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.alfresco.transformer.util.RequestParamMap;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.tools.TextToPDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
/**
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* Makes use of the <a href="http://www.pdfbox.org/">PDFBox</a> library's <code>TextToPDF</code> utility.
*
* @author Derek Hulley
* @author eknizat
*/
public class TextToPdfContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
private static final int UTF16_READ_AHEAD_BYTES = 16; // 8 characters including BOM if it exists
private static final byte FE = (byte) 0xFE;
private static final byte FF = (byte) 0xFF;
public static final String PAGE_LIMIT = RequestParamMap.PAGE_LIMIT;
private final PagedTextToPDF transformer;
public TextToPdfContentTransformer()
{
transformer = new PagedTextToPDF();
}
public void setStandardFont(String fontName)
{
try
{
transformer.setFont(PagedTextToPDF.getStandardFont(fontName));
}
catch (Throwable e)
{
throw new RuntimeException(
"Unable to set Standard Font for PDF generation: " + fontName, e);
}
}
public void setFontSize(int fontSize)
{
try
{
transformer.setFontSize(fontSize);
}
catch (Throwable e)
{
throw new RuntimeException(
"Unable to set Font Size for PDF generation: " + fontSize);
}
}
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
String stringPageLimit = parameters.get(PAGE_LIMIT);
int pageLimit = -1;
if (stringPageLimit != null)
{
pageLimit = parseInt(stringPageLimit, PAGE_LIMIT);
}
PDDocument pdf = null;
try (InputStream is = new FileInputStream(sourceFile);
Reader ir = new BufferedReader(buildReader(is, sourceEncoding));
OutputStream os = new BufferedOutputStream(new FileOutputStream(targetFile)))
{
//TransformationOptionLimits limits = getLimits(reader, writer, options);
//TransformationOptionPair pageLimits = limits.getPagesPair();
pdf = transformer.createPDFFromText(ir, pageLimit);
pdf.save(os);
}
finally
{
if (pdf != null)
{
try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); }
}
}
}
protected InputStreamReader buildReader(InputStream is, String encoding)
{
// If they gave an encoding, try to use it
if (encoding != null)
{
Charset charset = null;
try
{
charset = Charset.forName(encoding);
}
catch (Exception e)
{
logger.warn("JVM doesn't understand encoding '" + encoding +
"' when transforming text to pdf");
}
if (charset != null)
{
// Handles the situation where there is a BOM even though the encoding indicates that normally
// there should not be one for UTF-16BE and UTF-16LE. For extra flexibility includes UTF-16 too
// which optionally has the BOM. Rather than look at the BOM we look at the number of zero bytes
// in the first few character. XML files even when not in European languages tend to have more
// even zero bytes when big-endian encoded and more odd zero bytes when little-endian.
// Think of: <?xml version="1.0"?> The normal Java decoder does not have this flexibility but
// other transformers do.
String name = charset.displayName();
if ("UTF-16".equals(name) || "UTF-16BE".equals(name) || "UTF-16LE".equals(name))
{
logger.debug("Handle big and little endian UTF-16 text. Using UTF-16 rather than encoding " + name);
charset = Charset.forName("UTF-16");
is = new PushbackInputStream(is, UTF16_READ_AHEAD_BYTES)
{
boolean bomRead;
boolean switchByteOrder;
boolean evenByte = true;
@Override
public int read(byte[] bytes, int off, int len) throws IOException
{
int i = 0;
int b = 0;
for (; i<len; i++)
{
b = read();
if (b == -1)
{
break;
}
bytes[off+i] = (byte)b;
}
return i == 0 && b == -1 ? -1 : i;
}
@Override
public int read() throws IOException
{
if (!bomRead)
{
bomRead = true;
boolean switchBom = false;
byte[] bytes = new byte[UTF16_READ_AHEAD_BYTES];
int end = in.read(bytes, 0, UTF16_READ_AHEAD_BYTES);
int evenZeros = countZeros(bytes, 0);
int oddZeros = countZeros(bytes, 1);
if (evenZeros > oddZeros)
{
if (bytes[0] == FF && bytes[1] == FE)
{
switchByteOrder = true;
switchBom = true;
logger.warn("Little-endian BOM FFFE read, but characters are big-endian");
}
else
{
logger.debug("More even zero bytes, so normal read for big-endian");
}
}
else
{
if (bytes[0] == FE && bytes[1] == FF)
{
switchBom = true;
logger.debug("Big-endian BOM FEFF read, but characters are little-endian");
}
else
{
switchByteOrder = true;
logger.debug("More odd zero bytes, so switch bytes from little-endian");
}
}
if (switchBom)
{
byte b = bytes[0];
bytes[0] = bytes[1];
bytes[1] = b;
}
for (int i = end-1; i>=0; i--)
{
unread(bytes[i]);
}
}
if (switchByteOrder)
{
if (evenByte)
{
int b1 = super.read();
int b2 = super.read();
if (b1 != -1)
{
unread(b1);
}
if (b2 != -1)
{
unread(b2);
}
}
evenByte = !evenByte;
}
return super.read();
}
// Counts the number of even or odd 00 bytes
private int countZeros(byte[] b, int offset)
{
int count = 0;
for (int i=offset; i<UTF16_READ_AHEAD_BYTES; i+=2)
{
if (b[i] == 0)
{
count++;
}
}
return count;
}
};
}
logger.debug("Processing plain text in encoding " + name);
return new InputStreamReader(is, charset);
}
}
// Fall back on the system default
logger.debug("Processing plain text using system default encoding");
return new InputStreamReader(is);
}
private static class PagedTextToPDF extends TextToPDF
{
// REPO-1066: duplicating the following lines from org.apache.pdfbox.tools.TextToPDF because they made them private
// before the upgrade to pdfbox 2.0.8, in pdfbox 1.8, this piece of code was public in org.apache.pdfbox.pdmodel.font.PDType1Font
static PDType1Font getStandardFont(String name)
{
return STANDARD_14.get(name);
}
private static final Map<String, PDType1Font> STANDARD_14 = new HashMap<>();
static
{
STANDARD_14.put(PDType1Font.TIMES_ROMAN.getBaseFont(), PDType1Font.TIMES_ROMAN);
STANDARD_14.put(PDType1Font.TIMES_BOLD.getBaseFont(), PDType1Font.TIMES_BOLD);
STANDARD_14.put(PDType1Font.TIMES_ITALIC.getBaseFont(), PDType1Font.TIMES_ITALIC);
STANDARD_14.put(PDType1Font.TIMES_BOLD_ITALIC.getBaseFont(),
PDType1Font.TIMES_BOLD_ITALIC);
STANDARD_14.put(PDType1Font.HELVETICA.getBaseFont(), PDType1Font.HELVETICA);
STANDARD_14.put(PDType1Font.HELVETICA_BOLD.getBaseFont(), PDType1Font.HELVETICA_BOLD);
STANDARD_14.put(PDType1Font.HELVETICA_OBLIQUE.getBaseFont(),
PDType1Font.HELVETICA_OBLIQUE);
STANDARD_14.put(PDType1Font.HELVETICA_BOLD_OBLIQUE.getBaseFont(),
PDType1Font.HELVETICA_BOLD_OBLIQUE);
STANDARD_14.put(PDType1Font.COURIER.getBaseFont(), PDType1Font.COURIER);
STANDARD_14.put(PDType1Font.COURIER_BOLD.getBaseFont(), PDType1Font.COURIER_BOLD);
STANDARD_14.put(PDType1Font.COURIER_OBLIQUE.getBaseFont(), PDType1Font.COURIER_OBLIQUE);
STANDARD_14.put(PDType1Font.COURIER_BOLD_OBLIQUE.getBaseFont(),
PDType1Font.COURIER_BOLD_OBLIQUE);
STANDARD_14.put(PDType1Font.SYMBOL.getBaseFont(), PDType1Font.SYMBOL);
STANDARD_14.put(PDType1Font.ZAPF_DINGBATS.getBaseFont(), PDType1Font.ZAPF_DINGBATS);
}
//duplicating until here
// The following code is based on the code in TextToPDF with the addition of
// checks for page limits.
// The calling code must close the PDDocument once finished with it.
public PDDocument createPDFFromText(Reader text, int pageLimit)
throws IOException
{
PDDocument doc = null;
int pageCount = 0;
try
{
final int margin = 40;
float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000;
//calculate font height and increase by 5 percent.
height = height * getFontSize() * 1.05f;
doc = new PDDocument();
BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text);
String nextLine;
PDPage page = new PDPage();
PDPageContentStream contentStream = null;
float y = -1;
float maxStringLength = page.getMediaBox().getWidth() - 2 * margin;
// There is a special case of creating a PDF document from an empty string.
boolean textIsEmpty = true;
outer:
while ((nextLine = data.readLine()) != null)
{
// The input text is nonEmpty. New pages will be created and added
// to the PDF document as they are needed, depending on the length of
// the text.
textIsEmpty = false;
String[] lineWords = nextLine.trim().split(" ");
int lineIndex = 0;
while (lineIndex < lineWords.length)
{
final StringBuilder nextLineToDraw = new StringBuilder();
float lengthIfUsingNextWord = 0;
do
{
nextLineToDraw.append(lineWords[lineIndex]);
nextLineToDraw.append(" ");
lineIndex++;
if (lineIndex < lineWords.length)
{
String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
lengthIfUsingNextWord =
(getFont().getStringWidth(
lineWithNextWord) / 1000) * getFontSize();
}
}
while (lineIndex < lineWords.length &&
lengthIfUsingNextWord < maxStringLength);
if (y < margin)
{
int test = pageCount + 1;
if (pageLimit > 0 && (pageCount++ >= pageLimit))
{
break outer;
}
// We have crossed the end-of-page boundary and need to extend the
// document by another page.
page = new PDPage();
doc.addPage(page);
if (contentStream != null)
{
contentStream.endText();
contentStream.close();
}
contentStream = new PDPageContentStream(doc, page);
contentStream.setFont(getFont(), getFontSize());
contentStream.beginText();
y = page.getMediaBox().getHeight() - margin + height;
contentStream.moveTextPositionByAmount(margin, y);
}
if (contentStream == null)
{
throw new IOException("Error:Expected non-null content stream.");
}
contentStream.moveTextPositionByAmount(0, -height);
y -= height;
contentStream.drawString(nextLineToDraw.toString());
}
}
// If the input text was the empty string, then the above while loop will have short-circuited
// and we will not have added any PDPages to the document.
// So in order to make the resultant PDF document readable by Adobe Reader etc, we'll add an empty page.
if (textIsEmpty)
{
doc.addPage(page);
}
if (contentStream != null)
{
contentStream.endText();
contentStream.close();
}
}
catch (IOException io)
{
if (doc != null)
{
doc.close();
}
throw io;
}
return doc;
}
}
private int parseInt(String s, String paramName)
{
try
{
return Integer.valueOf(s);
}
catch (NumberFormatException e)
{
throw new IllegalArgumentException(paramName + " parameter must be an integer.");
}
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.alfresco.transformer.util.RequestParamMap;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.tools.TextToPDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
/**
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* Makes use of the <a href="http://www.pdfbox.org/">PDFBox</a> library's <code>TextToPDF</code> utility.
*
* @author Derek Hulley
* @author eknizat
*/
public class TextToPdfContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
private static final int UTF16_READ_AHEAD_BYTES = 16; // 8 characters including BOM if it exists
private static final byte FE = (byte) 0xFE;
private static final byte FF = (byte) 0xFF;
public static final String PAGE_LIMIT = RequestParamMap.PAGE_LIMIT;
private final PagedTextToPDF transformer;
public TextToPdfContentTransformer()
{
transformer = new PagedTextToPDF();
}
public void setStandardFont(String fontName)
{
try
{
transformer.setFont(PagedTextToPDF.getStandardFont(fontName));
}
catch (Throwable e)
{
throw new RuntimeException(
"Unable to set Standard Font for PDF generation: " + fontName, e);
}
}
public void setFontSize(int fontSize)
{
try
{
transformer.setFontSize(fontSize);
}
catch (Throwable e)
{
throw new RuntimeException(
"Unable to set Font Size for PDF generation: " + fontSize);
}
}
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
String stringPageLimit = parameters.get(PAGE_LIMIT);
int pageLimit = -1;
if (stringPageLimit != null)
{
pageLimit = parseInt(stringPageLimit, PAGE_LIMIT);
}
PDDocument pdf = null;
try (InputStream is = new FileInputStream(sourceFile);
Reader ir = new BufferedReader(buildReader(is, sourceEncoding));
OutputStream os = new BufferedOutputStream(new FileOutputStream(targetFile)))
{
//TransformationOptionLimits limits = getLimits(reader, writer, options);
//TransformationOptionPair pageLimits = limits.getPagesPair();
pdf = transformer.createPDFFromText(ir, pageLimit);
pdf.save(os);
}
finally
{
if (pdf != null)
{
try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); }
}
}
}
protected InputStreamReader buildReader(InputStream is, String encoding)
{
// If they gave an encoding, try to use it
if (encoding != null)
{
Charset charset = null;
try
{
charset = Charset.forName(encoding);
}
catch (Exception e)
{
logger.warn("JVM doesn't understand encoding '" + encoding +
"' when transforming text to pdf");
}
if (charset != null)
{
// Handles the situation where there is a BOM even though the encoding indicates that normally
// there should not be one for UTF-16BE and UTF-16LE. For extra flexibility includes UTF-16 too
// which optionally has the BOM. Rather than look at the BOM we look at the number of zero bytes
// in the first few character. XML files even when not in European languages tend to have more
// even zero bytes when big-endian encoded and more odd zero bytes when little-endian.
// Think of: <?xml version="1.0"?> The normal Java decoder does not have this flexibility but
// other transformers do.
String name = charset.displayName();
if ("UTF-16".equals(name) || "UTF-16BE".equals(name) || "UTF-16LE".equals(name))
{
logger.debug("Handle big and little endian UTF-16 text. Using UTF-16 rather than encoding " + name);
charset = Charset.forName("UTF-16");
is = new PushbackInputStream(is, UTF16_READ_AHEAD_BYTES)
{
boolean bomRead;
boolean switchByteOrder;
boolean evenByte = true;
@Override
public int read(byte[] bytes, int off, int len) throws IOException
{
int i = 0;
int b = 0;
for (; i<len; i++)
{
b = read();
if (b == -1)
{
break;
}
bytes[off+i] = (byte)b;
}
return i == 0 && b == -1 ? -1 : i;
}
@Override
public int read() throws IOException
{
if (!bomRead)
{
bomRead = true;
boolean switchBom = false;
byte[] bytes = new byte[UTF16_READ_AHEAD_BYTES];
int end = in.read(bytes, 0, UTF16_READ_AHEAD_BYTES);
int evenZeros = countZeros(bytes, 0);
int oddZeros = countZeros(bytes, 1);
if (evenZeros > oddZeros)
{
if (bytes[0] == FF && bytes[1] == FE)
{
switchByteOrder = true;
switchBom = true;
logger.warn("Little-endian BOM FFFE read, but characters are big-endian");
}
else
{
logger.debug("More even zero bytes, so normal read for big-endian");
}
}
else
{
if (bytes[0] == FE && bytes[1] == FF)
{
switchBom = true;
logger.debug("Big-endian BOM FEFF read, but characters are little-endian");
}
else
{
switchByteOrder = true;
logger.debug("More odd zero bytes, so switch bytes from little-endian");
}
}
if (switchBom)
{
byte b = bytes[0];
bytes[0] = bytes[1];
bytes[1] = b;
}
for (int i = end-1; i>=0; i--)
{
unread(bytes[i]);
}
}
if (switchByteOrder)
{
if (evenByte)
{
int b1 = super.read();
int b2 = super.read();
if (b1 != -1)
{
unread(b1);
}
if (b2 != -1)
{
unread(b2);
}
}
evenByte = !evenByte;
}
return super.read();
}
// Counts the number of even or odd 00 bytes
private int countZeros(byte[] b, int offset)
{
int count = 0;
for (int i=offset; i<UTF16_READ_AHEAD_BYTES; i+=2)
{
if (b[i] == 0)
{
count++;
}
}
return count;
}
};
}
logger.debug("Processing plain text in encoding " + name);
return new InputStreamReader(is, charset);
}
}
// Fall back on the system default
logger.debug("Processing plain text using system default encoding");
return new InputStreamReader(is);
}
private static class PagedTextToPDF extends TextToPDF
{
// REPO-1066: duplicating the following lines from org.apache.pdfbox.tools.TextToPDF because they made them private
// before the upgrade to pdfbox 2.0.8, in pdfbox 1.8, this piece of code was public in org.apache.pdfbox.pdmodel.font.PDType1Font
static PDType1Font getStandardFont(String name)
{
return STANDARD_14.get(name);
}
private static final Map<String, PDType1Font> STANDARD_14 = new HashMap<>();
static
{
STANDARD_14.put(PDType1Font.TIMES_ROMAN.getBaseFont(), PDType1Font.TIMES_ROMAN);
STANDARD_14.put(PDType1Font.TIMES_BOLD.getBaseFont(), PDType1Font.TIMES_BOLD);
STANDARD_14.put(PDType1Font.TIMES_ITALIC.getBaseFont(), PDType1Font.TIMES_ITALIC);
STANDARD_14.put(PDType1Font.TIMES_BOLD_ITALIC.getBaseFont(),
PDType1Font.TIMES_BOLD_ITALIC);
STANDARD_14.put(PDType1Font.HELVETICA.getBaseFont(), PDType1Font.HELVETICA);
STANDARD_14.put(PDType1Font.HELVETICA_BOLD.getBaseFont(), PDType1Font.HELVETICA_BOLD);
STANDARD_14.put(PDType1Font.HELVETICA_OBLIQUE.getBaseFont(),
PDType1Font.HELVETICA_OBLIQUE);
STANDARD_14.put(PDType1Font.HELVETICA_BOLD_OBLIQUE.getBaseFont(),
PDType1Font.HELVETICA_BOLD_OBLIQUE);
STANDARD_14.put(PDType1Font.COURIER.getBaseFont(), PDType1Font.COURIER);
STANDARD_14.put(PDType1Font.COURIER_BOLD.getBaseFont(), PDType1Font.COURIER_BOLD);
STANDARD_14.put(PDType1Font.COURIER_OBLIQUE.getBaseFont(), PDType1Font.COURIER_OBLIQUE);
STANDARD_14.put(PDType1Font.COURIER_BOLD_OBLIQUE.getBaseFont(),
PDType1Font.COURIER_BOLD_OBLIQUE);
STANDARD_14.put(PDType1Font.SYMBOL.getBaseFont(), PDType1Font.SYMBOL);
STANDARD_14.put(PDType1Font.ZAPF_DINGBATS.getBaseFont(), PDType1Font.ZAPF_DINGBATS);
}
//duplicating until here
// The following code is based on the code in TextToPDF with the addition of
// checks for page limits.
// The calling code must close the PDDocument once finished with it.
public PDDocument createPDFFromText(Reader text, int pageLimit)
throws IOException
{
PDDocument doc = null;
int pageCount = 0;
try
{
final int margin = 40;
float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000;
//calculate font height and increase by 5 percent.
height = height * getFontSize() * 1.05f;
doc = new PDDocument();
BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text);
String nextLine;
PDPage page = new PDPage();
PDPageContentStream contentStream = null;
float y = -1;
float maxStringLength = page.getMediaBox().getWidth() - 2 * margin;
// There is a special case of creating a PDF document from an empty string.
boolean textIsEmpty = true;
outer:
while ((nextLine = data.readLine()) != null)
{
// The input text is nonEmpty. New pages will be created and added
// to the PDF document as they are needed, depending on the length of
// the text.
textIsEmpty = false;
String[] lineWords = nextLine.trim().split(" ");
int lineIndex = 0;
while (lineIndex < lineWords.length)
{
final StringBuilder nextLineToDraw = new StringBuilder();
float lengthIfUsingNextWord = 0;
do
{
nextLineToDraw.append(lineWords[lineIndex]);
nextLineToDraw.append(" ");
lineIndex++;
if (lineIndex < lineWords.length)
{
String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
lengthIfUsingNextWord =
(getFont().getStringWidth(
lineWithNextWord) / 1000) * getFontSize();
}
}
while (lineIndex < lineWords.length &&
lengthIfUsingNextWord < maxStringLength);
if (y < margin)
{
int test = pageCount + 1;
if (pageLimit > 0 && (pageCount++ >= pageLimit))
{
break outer;
}
// We have crossed the end-of-page boundary and need to extend the
// document by another page.
page = new PDPage();
doc.addPage(page);
if (contentStream != null)
{
contentStream.endText();
contentStream.close();
}
contentStream = new PDPageContentStream(doc, page);
contentStream.setFont(getFont(), getFontSize());
contentStream.beginText();
y = page.getMediaBox().getHeight() - margin + height;
contentStream.moveTextPositionByAmount(margin, y);
}
if (contentStream == null)
{
throw new IOException("Error:Expected non-null content stream.");
}
contentStream.moveTextPositionByAmount(0, -height);
y -= height;
contentStream.drawString(nextLineToDraw.toString());
}
}
// If the input text was the empty string, then the above while loop will have short-circuited
// and we will not have added any PDPages to the document.
// So in order to make the resultant PDF document readable by Adobe Reader etc, we'll add an empty page.
if (textIsEmpty)
{
doc.addPage(page);
}
if (contentStream != null)
{
contentStream.endText();
contentStream.close();
}
}
catch (IOException io)
{
if (doc != null)
{
doc.close();
}
throw io;
}
return doc;
}
}
private int parseInt(String s, String paramName)
{
try
{
return Integer.valueOf(s);
}
catch (NumberFormatException e)
{
throw new IllegalArgumentException(paramName + " parameter must be an integer.");
}
}
}

View File

@@ -1,12 +1,12 @@
#
# HtmlMetadataExtractor - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
#
# HtmlMetadataExtractor - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description

View File

@@ -1,162 +1,162 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.nio.file.Files;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class HtmlParserContentTransformerTest
{
private static final String SOURCE_MIMETYPE = "text/html";
private static final String TARGET_MIMETYPE = "text/plain";
HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
/**
* Checks that we correctly handle text in different encodings,
* no matter if the encoding is specified on the Content Property
* or in a meta tag within the HTML itself. (ALF-10466)
*
* On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
* so we must be careful when checking the returned text
*/
@Test
public void testEncodingHandling() throws Exception
{
final String NEWLINE = System.getProperty("line.separator");
final String TITLE = "Testing!";
final String TEXT_P1 = "This is some text in English";
final String TEXT_P2 = "This is more text in English";
final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
"<p>" + TEXT_P2 + "</p>" + NEWLINE +
"<p>" + TEXT_P3 + "</p>" + NEWLINE;
String partC = "</body></html>";
final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
File tmpS = null;
File tmpD = null;
try
{
// Content set to ISO 8859-1
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
writeToFile(tmpS, partA + partB + partC, "ISO-8859-1");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Content set to UTF-8
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
writeToFile(tmpS, partA + partB + partC, "UTF-8");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Content set to UTF-16
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
writeToFile(tmpS, partA + partB + partC, "UTF-16");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-16");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Note - since HTML Parser 2.0 META tags specifying the
// document encoding will ONLY be respected if the original
// content type was set to ISO-8859-1.
//
// This means there is now only one test which we can perform
// to ensure that this now-limited overriding of the encoding
// takes effect.
// Content set to ISO 8859-1, meta set to UTF-8
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
String str = partA +
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" +
partB + partC;
writeToFile(tmpS, str, "UTF-8");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Note - we can't test UTF-16 with only a meta encoding,
// because without that the parser won't know about the
// 2 byte format so won't be able to identify the meta tag
}
finally
{
if (tmpS != null && tmpS.exists()) tmpS.delete();
if (tmpD != null && tmpD.exists()) tmpD.delete();
}
}
private void writeToFile(File file, String content, String encoding) throws Exception
{
try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
{
ow.append(content);
}
}
private String readFromFile(File file, final String encoding) throws Exception
{
return new String(Files.readAllBytes(file.toPath()), encoding);
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.nio.file.Files;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class HtmlParserContentTransformerTest
{
private static final String SOURCE_MIMETYPE = "text/html";
private static final String TARGET_MIMETYPE = "text/plain";
HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
/**
* Checks that we correctly handle text in different encodings,
* no matter if the encoding is specified on the Content Property
* or in a meta tag within the HTML itself. (ALF-10466)
*
* On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
* so we must be careful when checking the returned text
*/
@Test
public void testEncodingHandling() throws Exception
{
final String NEWLINE = System.getProperty("line.separator");
final String TITLE = "Testing!";
final String TEXT_P1 = "This is some text in English";
final String TEXT_P2 = "This is more text in English";
final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
"<p>" + TEXT_P2 + "</p>" + NEWLINE +
"<p>" + TEXT_P3 + "</p>" + NEWLINE;
String partC = "</body></html>";
final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
File tmpS = null;
File tmpD = null;
try
{
// Content set to ISO 8859-1
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
writeToFile(tmpS, partA + partB + partC, "ISO-8859-1");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Content set to UTF-8
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
writeToFile(tmpS, partA + partB + partC, "UTF-8");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Content set to UTF-16
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
writeToFile(tmpS, partA + partB + partC, "UTF-16");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-16");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Note - since HTML Parser 2.0 META tags specifying the
// document encoding will ONLY be respected if the original
// content type was set to ISO-8859-1.
//
// This means there is now only one test which we can perform
// to ensure that this now-limited overriding of the encoding
// takes effect.
// Content set to ISO 8859-1, meta set to UTF-8
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
String str = partA +
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" +
partB + partC;
writeToFile(tmpS, str, "UTF-8");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Note - we can't test UTF-16 with only a meta encoding,
// because without that the parser won't know about the
// 2 byte format so won't be able to identify the meta tag
}
finally
{
if (tmpS != null && tmpS.exists()) tmpS.delete();
if (tmpD != null && tmpD.exists()) tmpD.delete();
}
}
private void writeToFile(File file, String content, String encoding) throws Exception
{
try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
{
ow.append(content);
}
}
private String readFromFile(File file, final String encoding) throws Exception
{
return new String(Files.readAllBytes(file.toPath()), encoding);
}
}

View File

@@ -1,127 +1,127 @@
The "Artistic License"
Preamble
The intent of this document is to state the conditions under which a
Package may be copied, such that the Copyright Holder maintains some
semblance of artistic control over the development of the package,
while giving the users of the package the right to use and distribute
the Package in a more-or-less customary fashion, plus the right to make
reasonable modifications.
Definitions:
"Package" refers to the collection of files distributed by the
Copyright Holder, and derivatives of that collection of files
created through textual modification.
"Standard Version" refers to such a Package if it has not been
modified, or has been modified in accordance with the wishes
of the Copyright Holder as specified below.
"Copyright Holder" is whoever is named in the copyright or
copyrights for the package.
"You" is you, if you're thinking about copying or distributing
this Package.
"Reasonable copying fee" is whatever you can justify on the
basis of media cost, duplication charges, time of people involved,
and so on. (You will not be required to justify it to the
Copyright Holder, but only to the computing community at large
as a market that must bear the fee.)
"Freely Available" means that no fee is charged for the item
itself, though there may be fees involved in handling the item.
It also means that recipients of the item may redistribute it
under the same conditions they received it.
1. You may make and give away verbatim copies of the source form of the
Standard Version of this Package without restriction, provided that you
duplicate all of the original copyright notices and associated disclaimers.
2. You may apply bug fixes, portability fixes and other modifications
derived from the Public Domain or from the Copyright Holder. A Package
modified in such a way shall still be considered the Standard Version.
3. You may otherwise modify your copy of this Package in any way, provided
that you insert a prominent notice in each changed file stating how and
when you changed that file, and provided that you do at least ONE of the
following:
a) place your modifications in the Public Domain or otherwise make them
Freely Available, such as by posting said modifications to Usenet or
an equivalent medium, or placing the modifications on a major archive
site such as uunet.uu.net, or by allowing the Copyright Holder to include
your modifications in the Standard Version of the Package.
b) use the modified Package only within your corporation or organization.
c) rename any non-standard executables so the names do not conflict
with standard executables, which must also be provided, and provide
a separate manual page for each non-standard executable that clearly
documents how it differs from the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
4. You may distribute the programs of this Package in object code or
executable form, provided that you do at least ONE of the following:
a) distribute a Standard Version of the executables and library files,
together with instructions (in the manual page or equivalent) on where
to get the Standard Version.
b) accompany the distribution with the machine-readable source of
the Package with your modifications.
c) give non-standard executables non-standard names, and clearly
document the differences in manual pages (or equivalent), together
with instructions on where to get the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
5. You may charge a reasonable copying fee for any distribution of this
Package. You may charge any fee you choose for support of this
Package. You may not charge a fee for this Package itself. However,
you may distribute this Package in aggregate with other (possibly
commercial) programs as part of a larger (possibly commercial) software
distribution provided that you do not advertise this Package as a
product of your own. You may embed this Package's interpreter within
an executable of yours (by linking); this shall be construed as a mere
form of aggregation, provided that the complete Standard Version of the
interpreter is so embedded.
6. The scripts and library files supplied as input to or produced as
output from the programs of this Package do not automatically fall
under the copyright of this Package, but belong to whoever generated
them, and may be sold commercially, and may be aggregated with this
Package. If such scripts or library files are aggregated with this
Package via the so-called "undump" or "unexec" methods of producing a
binary executable image, then distribution of such an image shall
neither be construed as a distribution of this Package nor shall it
fall under the restrictions of Paragraphs 3 and 4, provided that you do
not represent such an executable image as a Standard Version of this
Package.
7. C subroutines (or comparably compiled subroutines in other
languages) supplied by you and linked into this Package in order to
emulate subroutines and variables of the language defined by this
Package shall not be considered part of this Package, but are the
equivalent of input as in Paragraph 6, provided these subroutines do
not change the language in any way that would cause it to fail the
regression tests for the language.
8. Aggregation of this Package with a commercial distribution is always
permitted provided that the use of this Package is embedded; that is,
when no overt attempt is made to make this Package's interfaces visible
to the end user of the commercial distribution. Such use shall not be
construed as a distribution of this Package.
9. The name of the Copyright Holder may not be used to endorse or promote
products derived from this software without specific prior written permission.
10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
The End
The "Artistic License"
Preamble
The intent of this document is to state the conditions under which a
Package may be copied, such that the Copyright Holder maintains some
semblance of artistic control over the development of the package,
while giving the users of the package the right to use and distribute
the Package in a more-or-less customary fashion, plus the right to make
reasonable modifications.
Definitions:
"Package" refers to the collection of files distributed by the
Copyright Holder, and derivatives of that collection of files
created through textual modification.
"Standard Version" refers to such a Package if it has not been
modified, or has been modified in accordance with the wishes
of the Copyright Holder as specified below.
"Copyright Holder" is whoever is named in the copyright or
copyrights for the package.
"You" is you, if you're thinking about copying or distributing
this Package.
"Reasonable copying fee" is whatever you can justify on the
basis of media cost, duplication charges, time of people involved,
and so on. (You will not be required to justify it to the
Copyright Holder, but only to the computing community at large
as a market that must bear the fee.)
"Freely Available" means that no fee is charged for the item
itself, though there may be fees involved in handling the item.
It also means that recipients of the item may redistribute it
under the same conditions they received it.
1. You may make and give away verbatim copies of the source form of the
Standard Version of this Package without restriction, provided that you
duplicate all of the original copyright notices and associated disclaimers.
2. You may apply bug fixes, portability fixes and other modifications
derived from the Public Domain or from the Copyright Holder. A Package
modified in such a way shall still be considered the Standard Version.
3. You may otherwise modify your copy of this Package in any way, provided
that you insert a prominent notice in each changed file stating how and
when you changed that file, and provided that you do at least ONE of the
following:
a) place your modifications in the Public Domain or otherwise make them
Freely Available, such as by posting said modifications to Usenet or
an equivalent medium, or placing the modifications on a major archive
site such as uunet.uu.net, or by allowing the Copyright Holder to include
your modifications in the Standard Version of the Package.
b) use the modified Package only within your corporation or organization.
c) rename any non-standard executables so the names do not conflict
with standard executables, which must also be provided, and provide
a separate manual page for each non-standard executable that clearly
documents how it differs from the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
4. You may distribute the programs of this Package in object code or
executable form, provided that you do at least ONE of the following:
a) distribute a Standard Version of the executables and library files,
together with instructions (in the manual page or equivalent) on where
to get the Standard Version.
b) accompany the distribution with the machine-readable source of
the Package with your modifications.
c) give non-standard executables non-standard names, and clearly
document the differences in manual pages (or equivalent), together
with instructions on where to get the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
5. You may charge a reasonable copying fee for any distribution of this
Package. You may charge any fee you choose for support of this
Package. You may not charge a fee for this Package itself. However,
you may distribute this Package in aggregate with other (possibly
commercial) programs as part of a larger (possibly commercial) software
distribution provided that you do not advertise this Package as a
product of your own. You may embed this Package's interpreter within
an executable of yours (by linking); this shall be construed as a mere
form of aggregation, provided that the complete Standard Version of the
interpreter is so embedded.
6. The scripts and library files supplied as input to or produced as
output from the programs of this Package do not automatically fall
under the copyright of this Package, but belong to whoever generated
them, and may be sold commercially, and may be aggregated with this
Package. If such scripts or library files are aggregated with this
Package via the so-called "undump" or "unexec" methods of producing a
binary executable image, then distribution of such an image shall
neither be construed as a distribution of this Package nor shall it
fall under the restrictions of Paragraphs 3 and 4, provided that you do
not represent such an executable image as a Standard Version of this
Package.
7. C subroutines (or comparably compiled subroutines in other
languages) supplied by you and linked into this Package in order to
emulate subroutines and variables of the language defined by this
Package shall not be considered part of this Package, but are the
equivalent of input as in Paragraph 6, provided these subroutines do
not change the language in any way that would cause it to fail the
regression tests for the language.
8. Aggregation of this Package with a commercial distribution is always
permitted provided that the use of this Package is embedded; that is,
when no overt attempt is made to make this Package's interfaces visible
to the end user of the commercial distribution. Such use shall not be
construed as a distribution of this Package.
9. The name of the Copyright Holder may not be used to endorse or promote
products derived from this software without specific prior written permission.
10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
The End

View File

@@ -1,158 +1,158 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.alfresco.transformer.tika.parsers.ExifToolParser;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(IPTCMetadataExtractor.class);
private static Set<String> IPTC_DATE_KEYS = Set.of("XMP-photoshop:DateCreated", "XMP-iptcExt:ArtworkDateCreated");
private static final Pattern YEAR_IPTC = Pattern.compile("(\\d{4}[:|-]\\d{2}[:|-]\\d{2})");
private ExifToolParser parser;
public IPTCMetadataExtractor()
{
super(logger);
}
@Override
protected Parser getParser()
{
if (this.parser == null) {
this.parser = new ExifToolParser();
}
return this.parser;
}
/**
* Because some of the mimetypes that IPTCMetadataExtractor now parse, were previously handled
* by TikaAutoMetadataExtractor we call the TikaAutoMetadataExtractor.extractSpecific method to
* ensure that the returned properties contains the expected entries.
*/
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata, Map<String, Serializable> properties,
Map<String, String> headers)
{
properties = new TikaAutoMetadataExtractor().extractSpecific(metadata, properties, headers);
ExifToolParser etParser = (ExifToolParser)this.getParser();
if (etParser.getSeparator()!=null)
{
for (String key : properties.keySet())
{
if (properties.get(key) instanceof String)
{
String value = (String) properties.get(key);
String separator = etParser.getSeparator();
if (value.contains(separator))
{
if (value.contains(String.format("\"%s\"",separator)))
{
separator = String.format("\"%s\"",separator);
}
String [] values = StringUtils.splitByWholeSeparator(value, separator);
// Change dateTime format. MM converted ':' to '-'
if (IPTC_DATE_KEYS.contains(key)){
values = iptcToIso8601DateStrings(values);
}
putRawValue(key, (Serializable) Arrays.asList(values), properties);
}
else if (IPTC_DATE_KEYS.contains(key)) {
// Handle property with a single date string
putRawValue(key, (Serializable) iptcToIso8601DateString(value), properties);
}
}
}
}
return properties;
}
/**
* Converts a date or date time strings into Iso8601 format <p>
*
* @param dateStrings
* @return dateStrings in Iso8601 format
* @see #iptcToIso8601DateString
*/
protected String[] iptcToIso8601DateStrings(String[] dateStrings)
{
for (int i = 0; i < dateStrings.length; i++)
{
dateStrings[i] = iptcToIso8601DateString(dateStrings[i]);
}
return dateStrings;
}
/**
* Converts a date or date time string into Iso8601 format <p>
* Converts any ':' in the year portion of a date string characters to '-'. <p>
* Expects the year in the format YYYY:MM:DD or YYYY-MM-DD <p>
* Will add the correct delimiter, 'T', to any dateTime strings, where | can be any char other than ,'T':
* YYYY:MM:DD|HH:mm:ss.... or YYYY-MM-DD|HH:mm:ss....
* <p>
* Examples: <p><ul>
* <li>"1919:10:16" will convert to "1919-10-16"</li>
* <li>"1901:02:01 00:00:00.000Z" will convert to "1901-02-01T00:00:00.000Z"</li>
* <li>"2001:02:01 16:15+00:00" will convert to "2001-02-01T16:15+00:00"</li>
* <li>"2021-06-11 05:36-01:00" will convert to "2021-06-11T05:36-01:00"</li>
* </ul>
* @param dateStr
* @return dateStr in Iso8601 format
*/
protected String iptcToIso8601DateString(String dateStr)
{
char timeSeparator = 'T';
Matcher yearMatcher = YEAR_IPTC.matcher(dateStr);
if (yearMatcher.find())
{
String year = yearMatcher.group(1);
dateStr = yearMatcher.replaceFirst(year.replaceAll(":", "-"));
if (dateStr.length()>year.length() && dateStr.charAt(year.length())!=timeSeparator)
{
dateStr = dateStr.replace(dateStr.charAt(year.length()), timeSeparator);
}
}
return dateStr;
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.metadataExtractors;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.alfresco.transformer.tika.parsers.ExifToolParser;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class IPTCMetadataExtractor extends AbstractTikaMetadataExtractor
{
private static final Logger logger = LoggerFactory.getLogger(IPTCMetadataExtractor.class);
private static Set<String> IPTC_DATE_KEYS = Set.of("XMP-photoshop:DateCreated", "XMP-iptcExt:ArtworkDateCreated");
private static final Pattern YEAR_IPTC = Pattern.compile("(\\d{4}[:|-]\\d{2}[:|-]\\d{2})");
private ExifToolParser parser;
public IPTCMetadataExtractor()
{
super(logger);
}
@Override
protected Parser getParser()
{
if (this.parser == null) {
this.parser = new ExifToolParser();
}
return this.parser;
}
/**
* Because some of the mimetypes that IPTCMetadataExtractor now parse, were previously handled
* by TikaAutoMetadataExtractor we call the TikaAutoMetadataExtractor.extractSpecific method to
* ensure that the returned properties contains the expected entries.
*/
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata, Map<String, Serializable> properties,
Map<String, String> headers)
{
properties = new TikaAutoMetadataExtractor().extractSpecific(metadata, properties, headers);
ExifToolParser etParser = (ExifToolParser)this.getParser();
if (etParser.getSeparator()!=null)
{
for (String key : properties.keySet())
{
if (properties.get(key) instanceof String)
{
String value = (String) properties.get(key);
String separator = etParser.getSeparator();
if (value.contains(separator))
{
if (value.contains(String.format("\"%s\"",separator)))
{
separator = String.format("\"%s\"",separator);
}
String [] values = StringUtils.splitByWholeSeparator(value, separator);
// Change dateTime format. MM converted ':' to '-'
if (IPTC_DATE_KEYS.contains(key)){
values = iptcToIso8601DateStrings(values);
}
putRawValue(key, (Serializable) Arrays.asList(values), properties);
}
else if (IPTC_DATE_KEYS.contains(key)) {
// Handle property with a single date string
putRawValue(key, (Serializable) iptcToIso8601DateString(value), properties);
}
}
}
}
return properties;
}
/**
* Converts a date or date time strings into Iso8601 format <p>
*
* @param dateStrings
* @return dateStrings in Iso8601 format
* @see #iptcToIso8601DateString
*/
protected String[] iptcToIso8601DateStrings(String[] dateStrings)
{
for (int i = 0; i < dateStrings.length; i++)
{
dateStrings[i] = iptcToIso8601DateString(dateStrings[i]);
}
return dateStrings;
}
/**
* Converts a date or date time string into Iso8601 format <p>
* Converts any ':' in the year portion of a date string characters to '-'. <p>
* Expects the year in the format YYYY:MM:DD or YYYY-MM-DD <p>
* Will add the correct delimiter, 'T', to any dateTime strings, where | can be any char other than ,'T':
* YYYY:MM:DD|HH:mm:ss.... or YYYY-MM-DD|HH:mm:ss....
* <p>
* Examples: <p><ul>
* <li>"1919:10:16" will convert to "1919-10-16"</li>
* <li>"1901:02:01 00:00:00.000Z" will convert to "1901-02-01T00:00:00.000Z"</li>
* <li>"2001:02:01 16:15+00:00" will convert to "2001-02-01T16:15+00:00"</li>
* <li>"2021-06-11 05:36-01:00" will convert to "2021-06-11T05:36-01:00"</li>
* </ul>
* @param dateStr
* @return dateStr in Iso8601 format
*/
protected String iptcToIso8601DateString(String dateStr)
{
char timeSeparator = 'T';
Matcher yearMatcher = YEAR_IPTC.matcher(dateStr);
if (yearMatcher.find())
{
String year = yearMatcher.group(1);
dateStr = yearMatcher.replaceFirst(year.replaceAll(":", "-"));
if (dateStr.length()>year.length() && dateStr.charAt(year.length())!=timeSeparator)
{
dateStr = dateStr.replace(dateStr.charAt(year.length()), timeSeparator);
}
}
return dateStr;
}
}

View File

@@ -1,372 +1,372 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.tika.parsers;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_TIFF;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.net.URL;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.parser.external.ExternalParsersFactory;
import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.image.TiffParser;
import org.apache.tika.parser.image.JpegParser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class ExifToolParser extends ExternalParser {
private static final Logger logger = LoggerFactory.getLogger(ExifToolParser.class);
private static final String EXIFTOOL_PARSER_CONFIG = "parsers/external/config/exiftool-parser.xml";
protected static final String DEFAULT_SEPARATOR = ", ";
protected static final String SEPARATOR_SETTING = "-sep";
private String separator;
public ExifToolParser() {
super();
try {
List<ExternalParser> eParsers = ExternalParsersFactory.create(getExternalParserConfigURL());
// if ExifTool is not installed then no parsers are returned
if (eParsers.size() > 0) {
ExternalParser eParser = eParsers.get(0);
this.setCommand(eParser.getCommand());
this.setIgnoredLineConsumer(eParser.getIgnoredLineConsumer());
this.setMetadataExtractionPatterns(eParser.getMetadataExtractionPatterns());
this.setSupportedTypes(eParser.getSupportedTypes());
} else {
logger.error(
"Error creating ExifToolParser from config, ExifToolExtractions not enabled. Please check ExifTool is installed correctly.");
}
} catch (IOException | TikaException e) {
logger.error("Error creating ExifToolParser from config, ExifToolExtractions not enabled: ", e);
}
}
private URL getExternalParserConfigURL(){
ClassLoader classLoader = ExifToolParser.class.getClassLoader();
return classLoader.getResource(EXIFTOOL_PARSER_CONFIG);
}
public void setSeparator(String sep) {
this.separator = sep;
}
public String getSeparator() {
return this.separator;
}
@Override
public void setCommand(String... command){
super.setCommand(command);
if (command.length==1) {
setSeparator(findSeparator(command[0]));
}
else {
setSeparator(DEFAULT_SEPARATOR);
}
}
protected String findSeparator(String command) {
if (command.contains(SEPARATOR_SETTING)) {
int start = command.indexOf(SEPARATOR_SETTING)+SEPARATOR_SETTING.length()+1;
String separator = DEFAULT_SEPARATOR;
if (command.charAt(start)=='\"') {
//get all chars up to the next \"
int end = command.indexOf("\"", start+1);
separator = command.substring(start+1, end);
}
else {
int end = command.indexOf(" ", start);
separator = command.substring(start, end);
}
return separator;
}
return DEFAULT_SEPARATOR;
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}
* due to errors attempting to {@link #extractMetadata} from the errorStream in original implementation. <p>
* Executes the configured external command and passes the given document
* stream as a simple XHTML document to the given SAX content handler.
* Metadata is only extracted if {@link #setMetadataExtractionPatterns(Map)}
* has been called to set patterns.
*/
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
MediaType mediaType = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE));
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
if (this.getSupportedTypes().contains(mediaType)) {
parse(tis, xhtml, metadata, tmp);
}
switch (mediaType.getType()+"/"+mediaType.getSubtype()) {
case MIMETYPE_IMAGE_JPEG:
parseAdditional(new JpegParser(), tis, handler, metadata, context, mediaType);
break;
case MIMETYPE_IMAGE_TIFF:
parseAdditional(new TiffParser(), tis, handler, metadata, context, mediaType);
break;
default:
parseAdditional(new ImageParser(), tis, handler, metadata, context, mediaType);
}
} finally {
tmp.dispose();
}
}
private void parseAdditional(Parser parser, TikaInputStream tis, ContentHandler handler, Metadata metadata, ParseContext context,
MediaType mediaType) throws IOException, SAXException, TikaException {
if (parser.getSupportedTypes(context).contains(mediaType)) {
parser.parse(tis, handler, metadata, context);
}
}
private void parse(TikaInputStream stream, XHTMLContentHandler xhtml, Metadata metadata, TemporaryResources tmp)
throws IOException, SAXException, TikaException {
boolean inputToStdIn = true;
boolean outputFromStdOut = true;
boolean hasPatterns = (getMetadataExtractionPatterns() != null && !getMetadataExtractionPatterns().isEmpty());
File output = null;
// Build our getCommand()
String[] cmd;
if (getCommand().length == 1) {
cmd = getCommand()[0].split(" ");
} else {
cmd = new String[getCommand().length];
System.arraycopy(getCommand(), 0, cmd, 0, getCommand().length);
}
for (int i = 0; i < cmd.length; i++) {
if (cmd[i].indexOf(INPUT_FILE_TOKEN) != -1) {
cmd[i] = cmd[i].replace(INPUT_FILE_TOKEN, stream.getFile().getPath());
inputToStdIn = false;
}
if (cmd[i].indexOf(OUTPUT_FILE_TOKEN) != -1) {
output = tmp.createTemporaryFile();
outputFromStdOut = false;
cmd[i] = cmd[i].replace(OUTPUT_FILE_TOKEN, output.getPath());
}
}
// Execute
Process process = null;
try {
if (cmd.length == 1) {
process = Runtime.getRuntime().exec(cmd[0]);
} else {
process = Runtime.getRuntime().exec(cmd);
}
} catch (Exception e) {
e.printStackTrace();
}
try {
if (inputToStdIn) {
sendInput(process, stream);
} else {
process.getOutputStream().close();
}
InputStream out = process.getInputStream();
InputStream err = process.getErrorStream();
if (hasPatterns) {
if (outputFromStdOut) {
extractOutput(out, xhtml);
} else {
extractMetadata(out, metadata);
}
} else {
ignoreStream(err);
if (outputFromStdOut) {
extractOutput(out, xhtml);
} else {
ignoreStream(out);
}
}
} finally {
try {
process.waitFor();
} catch (InterruptedException ignore) {
}
}
// Grab the output if we haven't already
if (!outputFromStdOut) {
extractOutput(new FileInputStream(output), xhtml);
}
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}<p>
* Starts a thread that extracts the contents of the standard output
* stream of the given process to the given XHTML content handler.
* The standard output stream is closed once fully processed.
*
* @param stream stream
* @param xhtml XHTML content handler
* @throws SAXException if the XHTML SAX events could not be handled
* @throws IOException if an input error occurred
*/
private void extractOutput(InputStream stream, XHTMLContentHandler xhtml) throws SAXException, IOException {
try (Reader reader = new InputStreamReader(stream, UTF_8)) {
xhtml.startDocument();
xhtml.startElement("p");
char[] buffer = new char[1024];
for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
xhtml.characters(buffer, 0, n);
}
xhtml.endElement("p");
xhtml.endDocument();
}
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}<p>
* Starts a thread that sends the contents of the given input stream
* to the standard input stream of the given process. Potential
* exceptions are ignored, and the standard input stream is closed
* once fully processed. Note that the given input stream is <em>not</em>
* closed by this method.
*
* @param process process
* @param stream input stream
*/
private void sendInput(final Process process, final InputStream stream) {
Thread t = new Thread() {
public void run() {
OutputStream stdin = process.getOutputStream();
try {
IOUtils.copy(stream, stdin);
} catch (IOException e) {
}
}
};
t.start();
try {
t.join();
} catch (InterruptedException ignore) {
}
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}<p>
* Starts a thread that reads and discards the contents of the
* standard stream of the given process. Potential exceptions
* are ignored, and the stream is closed once fully processed.
*
* @param stream stream
*/
private void ignoreStream(final InputStream stream) {
Thread t = new Thread() {
public void run() {
try {
IOUtils.copy(stream, NullOutputStream.NULL_OUTPUT_STREAM);
} catch (IOException e) {
} finally {
IOUtils.closeQuietly(stream);
}
}
};
t.start();
try {
t.join();
} catch (InterruptedException ignore) {
}
}
private void extractMetadata(final InputStream stream, final Metadata metadata) {
Thread t = new Thread() {
public void run() {
BufferedReader reader;
reader = new BufferedReader(new InputStreamReader(stream, UTF_8));
try {
String line;
while ((line = reader.readLine()) != null) {
for (Pattern p : getMetadataExtractionPatterns().keySet()) {
Matcher m = p.matcher(line);
if (m.find()) {
if (getMetadataExtractionPatterns().get(p) != null
&& !getMetadataExtractionPatterns().get(p).equals("")) {
metadata.add(getMetadataExtractionPatterns().get(p), m.group(1));
} else {
metadata.add(m.group(1), m.group(2));
}
}
}
}
} catch (IOException e) {
// Ignore
} finally {
IOUtils.closeQuietly(reader);
IOUtils.closeQuietly(stream);
}
}
};
t.start();
try {
t.join();
} catch (InterruptedException ignore) {
}
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.tika.parsers;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_TIFF;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.net.URL;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.parser.external.ExternalParsersFactory;
import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.image.TiffParser;
import org.apache.tika.parser.image.JpegParser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class ExifToolParser extends ExternalParser {
private static final Logger logger = LoggerFactory.getLogger(ExifToolParser.class);
private static final String EXIFTOOL_PARSER_CONFIG = "parsers/external/config/exiftool-parser.xml";
protected static final String DEFAULT_SEPARATOR = ", ";
protected static final String SEPARATOR_SETTING = "-sep";
private String separator;
public ExifToolParser() {
super();
try {
List<ExternalParser> eParsers = ExternalParsersFactory.create(getExternalParserConfigURL());
// if ExifTool is not installed then no parsers are returned
if (eParsers.size() > 0) {
ExternalParser eParser = eParsers.get(0);
this.setCommand(eParser.getCommand());
this.setIgnoredLineConsumer(eParser.getIgnoredLineConsumer());
this.setMetadataExtractionPatterns(eParser.getMetadataExtractionPatterns());
this.setSupportedTypes(eParser.getSupportedTypes());
} else {
logger.error(
"Error creating ExifToolParser from config, ExifToolExtractions not enabled. Please check ExifTool is installed correctly.");
}
} catch (IOException | TikaException e) {
logger.error("Error creating ExifToolParser from config, ExifToolExtractions not enabled: ", e);
}
}
private URL getExternalParserConfigURL(){
ClassLoader classLoader = ExifToolParser.class.getClassLoader();
return classLoader.getResource(EXIFTOOL_PARSER_CONFIG);
}
public void setSeparator(String sep) {
this.separator = sep;
}
public String getSeparator() {
return this.separator;
}
@Override
public void setCommand(String... command){
super.setCommand(command);
if (command.length==1) {
setSeparator(findSeparator(command[0]));
}
else {
setSeparator(DEFAULT_SEPARATOR);
}
}
protected String findSeparator(String command) {
if (command.contains(SEPARATOR_SETTING)) {
int start = command.indexOf(SEPARATOR_SETTING)+SEPARATOR_SETTING.length()+1;
String separator = DEFAULT_SEPARATOR;
if (command.charAt(start)=='\"') {
//get all chars up to the next \"
int end = command.indexOf("\"", start+1);
separator = command.substring(start+1, end);
}
else {
int end = command.indexOf(" ", start);
separator = command.substring(start, end);
}
return separator;
}
return DEFAULT_SEPARATOR;
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}
* due to errors attempting to {@link #extractMetadata} from the errorStream in original implementation. <p>
* Executes the configured external command and passes the given document
* stream as a simple XHTML document to the given SAX content handler.
* Metadata is only extracted if {@link #setMetadataExtractionPatterns(Map)}
* has been called to set patterns.
*/
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
MediaType mediaType = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE));
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
if (this.getSupportedTypes().contains(mediaType)) {
parse(tis, xhtml, metadata, tmp);
}
switch (mediaType.getType()+"/"+mediaType.getSubtype()) {
case MIMETYPE_IMAGE_JPEG:
parseAdditional(new JpegParser(), tis, handler, metadata, context, mediaType);
break;
case MIMETYPE_IMAGE_TIFF:
parseAdditional(new TiffParser(), tis, handler, metadata, context, mediaType);
break;
default:
parseAdditional(new ImageParser(), tis, handler, metadata, context, mediaType);
}
} finally {
tmp.dispose();
}
}
private void parseAdditional(Parser parser, TikaInputStream tis, ContentHandler handler, Metadata metadata, ParseContext context,
MediaType mediaType) throws IOException, SAXException, TikaException {
if (parser.getSupportedTypes(context).contains(mediaType)) {
parser.parse(tis, handler, metadata, context);
}
}
private void parse(TikaInputStream stream, XHTMLContentHandler xhtml, Metadata metadata, TemporaryResources tmp)
throws IOException, SAXException, TikaException {
boolean inputToStdIn = true;
boolean outputFromStdOut = true;
boolean hasPatterns = (getMetadataExtractionPatterns() != null && !getMetadataExtractionPatterns().isEmpty());
File output = null;
// Build our getCommand()
String[] cmd;
if (getCommand().length == 1) {
cmd = getCommand()[0].split(" ");
} else {
cmd = new String[getCommand().length];
System.arraycopy(getCommand(), 0, cmd, 0, getCommand().length);
}
for (int i = 0; i < cmd.length; i++) {
if (cmd[i].indexOf(INPUT_FILE_TOKEN) != -1) {
cmd[i] = cmd[i].replace(INPUT_FILE_TOKEN, stream.getFile().getPath());
inputToStdIn = false;
}
if (cmd[i].indexOf(OUTPUT_FILE_TOKEN) != -1) {
output = tmp.createTemporaryFile();
outputFromStdOut = false;
cmd[i] = cmd[i].replace(OUTPUT_FILE_TOKEN, output.getPath());
}
}
// Execute
Process process = null;
try {
if (cmd.length == 1) {
process = Runtime.getRuntime().exec(cmd[0]);
} else {
process = Runtime.getRuntime().exec(cmd);
}
} catch (Exception e) {
e.printStackTrace();
}
try {
if (inputToStdIn) {
sendInput(process, stream);
} else {
process.getOutputStream().close();
}
InputStream out = process.getInputStream();
InputStream err = process.getErrorStream();
if (hasPatterns) {
if (outputFromStdOut) {
extractOutput(out, xhtml);
} else {
extractMetadata(out, metadata);
}
} else {
ignoreStream(err);
if (outputFromStdOut) {
extractOutput(out, xhtml);
} else {
ignoreStream(out);
}
}
} finally {
try {
process.waitFor();
} catch (InterruptedException ignore) {
}
}
// Grab the output if we haven't already
if (!outputFromStdOut) {
extractOutput(new FileInputStream(output), xhtml);
}
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}<p>
* Starts a thread that extracts the contents of the standard output
* stream of the given process to the given XHTML content handler.
* The standard output stream is closed once fully processed.
*
* @param stream stream
* @param xhtml XHTML content handler
* @throws SAXException if the XHTML SAX events could not be handled
* @throws IOException if an input error occurred
*/
private void extractOutput(InputStream stream, XHTMLContentHandler xhtml) throws SAXException, IOException {
try (Reader reader = new InputStreamReader(stream, UTF_8)) {
xhtml.startDocument();
xhtml.startElement("p");
char[] buffer = new char[1024];
for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
xhtml.characters(buffer, 0, n);
}
xhtml.endElement("p");
xhtml.endDocument();
}
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}<p>
* Starts a thread that sends the contents of the given input stream
* to the standard input stream of the given process. Potential
* exceptions are ignored, and the standard input stream is closed
* once fully processed. Note that the given input stream is <em>not</em>
* closed by this method.
*
* @param process process
* @param stream input stream
*/
private void sendInput(final Process process, final InputStream stream) {
Thread t = new Thread() {
public void run() {
OutputStream stdin = process.getOutputStream();
try {
IOUtils.copy(stream, stdin);
} catch (IOException e) {
}
}
};
t.start();
try {
t.join();
} catch (InterruptedException ignore) {
}
}
/**
* Adapted from {@link org.apache.tika.parser.external.ExternalParser}<p>
* Starts a thread that reads and discards the contents of the
* standard stream of the given process. Potential exceptions
* are ignored, and the stream is closed once fully processed.
*
* @param stream stream
*/
private void ignoreStream(final InputStream stream) {
Thread t = new Thread() {
public void run() {
try {
IOUtils.copy(stream, NullOutputStream.NULL_OUTPUT_STREAM);
} catch (IOException e) {
} finally {
IOUtils.closeQuietly(stream);
}
}
};
t.start();
try {
t.join();
} catch (InterruptedException ignore) {
}
}
private void extractMetadata(final InputStream stream, final Metadata metadata) {
Thread t = new Thread() {
public void run() {
BufferedReader reader;
reader = new BufferedReader(new InputStreamReader(stream, UTF_8));
try {
String line;
while ((line = reader.readLine()) != null) {
for (Pattern p : getMetadataExtractionPatterns().keySet()) {
Matcher m = p.matcher(line);
if (m.find()) {
if (getMetadataExtractionPatterns().get(p) != null
&& !getMetadataExtractionPatterns().get(p).equals("")) {
metadata.add(getMetadataExtractionPatterns().get(p), m.group(1));
} else {
metadata.add(m.group(1), m.group(2));
}
}
}
}
} catch (IOException e) {
// Ignore
} finally {
IOUtils.closeQuietly(reader);
IOUtils.closeQuietly(stream);
}
}
};
t.start();
try {
t.join();
} catch (InterruptedException ignore) {
}
}
}

View File

@@ -1,12 +1,12 @@
#
# DWGMetadataExtracter - default mapping
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
#
# DWGMetadataExtracter - default mapping
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description

View File

@@ -1,30 +1,30 @@
#
# MP3MetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Core mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
# Audio descriptive mappings
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
# Audio specific mappings
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor
#
# MP3MetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Core mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
# Audio descriptive mappings
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
# Audio specific mappings
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor

View File

@@ -1,14 +1,14 @@
#
# MailMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
sentDate=cm:sentdate
originator=cm:originator, cm:author
addressee=cm:addressee
addressees=cm:addressees
subjectLine=cm:subjectline, cm:description
#
# MailMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
sentDate=cm:sentdate
originator=cm:originator, cm:author
addressee=cm:addressee
addressees=cm:addressees
subjectLine=cm:subjectline, cm:description

View File

@@ -1,14 +1,14 @@
#
# OfficeMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
subject=cm:description
createDateTime=cm:created
lastSaveDateTime=cm:modified
#
# OfficeMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
subject=cm:description
createDateTime=cm:created
lastSaveDateTime=cm:modified

View File

@@ -1,21 +1,21 @@
#
# OpenDocumentMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
creationDate=cm:created
creator=cm:author
date=
description=
generator=
initialCreator=
keyword=
language=
printDate=
printedBy=
subject=cm:description
title=cm:title
#
# OpenDocumentMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
creationDate=cm:created
creator=cm:author
date=
description=
generator=
initialCreator=
keyword=
language=
printDate=
printedBy=
subject=cm:description
title=cm:title

View File

@@ -1,13 +1,13 @@
#
# PdfBoxMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
subject=cm:description
created=cm:created
#
# PdfBoxMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
subject=cm:description
created=cm:created

View File

@@ -1,13 +1,13 @@
#
# PoiMetadataExtracter - default mapping
#
# author: Neil McErlean
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
#
# PoiMetadataExtracter - default mapping
#
# author: Neil McErlean
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created

View File

@@ -1,34 +1,34 @@
#
# TikaAudioMetadataExtracter - audio mapping
#
# This is used to map from the Tika audio metadata onto your
# content model. This will be used for any Audio content
# for which an explicit extractor isn't defined
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Core mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
# Audio descriptive mappings
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
# Audio specific mappings
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor
#
# TikaAudioMetadataExtracter - audio mapping
#
# This is used to map from the Tika audio metadata onto your
# content model. This will be used for any Audio content
# for which an explicit extractor isn't defined
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Core mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
# Audio descriptive mappings
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
# Audio specific mappings
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor

View File

@@ -1,52 +1,52 @@
#
# TikaAutoMetadataExtracter - default mapping
#
# This is used to map from the Tika and standard namespaces
# onto your content model. This will be used for any
# content for which an explicit extractor isn't defined,
# by using Tika's auto-selection facilities.
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
geo\:lat=cm:latitude
geo\:long=cm:longitude
tiff\:ImageWidth=exif:pixelXDimension
tiff\:ImageLength=exif:pixelYDimension
tiff\:Make=exif:manufacturer
tiff\:Model=exif:model
tiff\:Software=exif:software
tiff\:Orientation=exif:orientation
tiff\:XResolution=exif:xResolution
tiff\:YResolution=exif:yResolution
tiff\:ResolutionUnit=exif:resolutionUnit
exif\:Flash=exif:flash
exif\:ExposureTime=exif:exposureTime
exif\:FNumber=exif:fNumber
exif\:FocalLength=exif:focalLength
exif\:IsoSpeedRatings=exif:isoSpeedRatings
exif\:DateTimeOriginal=exif:dateTimeOriginal
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor
#
# TikaAutoMetadataExtracter - default mapping
#
# This is used to map from the Tika and standard namespaces
# onto your content model. This will be used for any
# content for which an explicit extractor isn't defined,
# by using Tika's auto-selection facilities.
#
# author: Nick Burch
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.exif=http://www.alfresco.org/model/exif/1.0
namespace.prefix.audio=http://www.alfresco.org/model/audio/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
created=cm:created
geo\:lat=cm:latitude
geo\:long=cm:longitude
tiff\:ImageWidth=exif:pixelXDimension
tiff\:ImageLength=exif:pixelYDimension
tiff\:Make=exif:manufacturer
tiff\:Model=exif:model
tiff\:Software=exif:software
tiff\:Orientation=exif:orientation
tiff\:XResolution=exif:xResolution
tiff\:YResolution=exif:yResolution
tiff\:ResolutionUnit=exif:resolutionUnit
exif\:Flash=exif:flash
exif\:ExposureTime=exif:exposureTime
exif\:FNumber=exif:fNumber
exif\:FocalLength=exif:focalLength
exif\:IsoSpeedRatings=exif:isoSpeedRatings
exif\:DateTimeOriginal=exif:dateTimeOriginal
xmpDM\:album=audio:album
xmpDM\:artist=audio:artist
xmpDM\:composer=audio:composer
xmpDM\:engineer=audio:engineer
xmpDM\:genre=audio:genre
xmpDM\:trackNumber=audio:trackNumber
xmpDM\:releaseDate=audio:releaseDate
#xmpDM:logComment
xmpDM\:audioSampleRate=audio:sampleRate
xmpDM\:audioSampleType=audio:sampleType
xmpDM\:audioChannelType=audio:channelType
xmpDM\:audioCompressor=audio:compressor

View File

@@ -1,35 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<external-parsers>
<parser>
<check>
<command>exiftool -ver</command>
<error-codes>126,127</error-codes>
</check>
<command>env FOO=${OUTPUT} exiftool -args -G1 -sep "|||" ${INPUT}</command>
<mime-types>
<mime-type>image/x-raw-hasselblad</mime-type>
<mime-type>image/x-raw-sony</mime-type>
<mime-type>image/x-raw-canon</mime-type>
<mime-type>image/x-raw-adobe</mime-type>
<mime-type>image/gif</mime-type>
<mime-type>image/jp2</mime-type>
<mime-type>image/jpeg</mime-type>
<mime-type>image/x-raw-kodak</mime-type>
<mime-type>image/x-raw-minolta</mime-type>
<mime-type>image/x-raw-nikon</mime-type>
<mime-type>image/x-raw-olympus</mime-type>
<mime-type>image/x-raw-pentax</mime-type>
<mime-type>image/png</mime-type>
<mime-type>image/x-raw-fuji</mime-type>
<mime-type>image/x-raw-panasonic</mime-type>
<mime-type>image/tiff</mime-type>
<mime-type>image/webp</mime-type>
</mime-types>
<metadata>
<!-- Default output-->
<match>\s*([A-Za-z0-9/ \(\)]+\S{1})\s+:\s+([A-Za-z0-9\(\)\[\] \:\-\.]+)\s*</match>
<!-- args format-->
<match>^-([\S]+)\=(.*)</match>
</metadata>
</parser>
</external-parsers>
<?xml version="1.0" encoding="UTF-8"?>
<external-parsers>
<parser>
<check>
<command>exiftool -ver</command>
<error-codes>126,127</error-codes>
</check>
<command>env FOO=${OUTPUT} exiftool -args -G1 -sep "|||" ${INPUT}</command>
<mime-types>
<mime-type>image/x-raw-hasselblad</mime-type>
<mime-type>image/x-raw-sony</mime-type>
<mime-type>image/x-raw-canon</mime-type>
<mime-type>image/x-raw-adobe</mime-type>
<mime-type>image/gif</mime-type>
<mime-type>image/jp2</mime-type>
<mime-type>image/jpeg</mime-type>
<mime-type>image/x-raw-kodak</mime-type>
<mime-type>image/x-raw-minolta</mime-type>
<mime-type>image/x-raw-nikon</mime-type>
<mime-type>image/x-raw-olympus</mime-type>
<mime-type>image/x-raw-pentax</mime-type>
<mime-type>image/png</mime-type>
<mime-type>image/x-raw-fuji</mime-type>
<mime-type>image/x-raw-panasonic</mime-type>
<mime-type>image/tiff</mime-type>
<mime-type>image/webp</mime-type>
</mime-types>
<metadata>
<!-- Default output-->
<match>\s*([A-Za-z0-9/ \(\)]+\S{1})\s+:\s+([A-Za-z0-9\(\)\[\] \:\-\.]+)\s*</match>
<!-- args format-->
<match>^-([\S]+)\=(.*)</match>
</metadata>
</parser>
</external-parsers>

View File

@@ -1,126 +1,126 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transformer.executors.Tika.TARGET_ENCODING;
import static org.alfresco.transformer.executors.Tika.TARGET_MIMETYPE;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.clearInvocations;
import static org.mockito.Mockito.lenient;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension;
@ExtendWith(MockitoExtension.class)
public class TikaJavaExecutorTest {
@Test
public void testNotExtractBookmarkTextDefault() throws Exception
{
TikaJavaExecutor executorSpyDefaultTrue = spy(new TikaJavaExecutor(true));
TikaJavaExecutor executorSpyDefaultFalse = spy(new TikaJavaExecutor(false));
File mockSourceFile = mock(File.class);
File mockTargetFile = mock(File.class);
String transformName = "transformName";
String sourceMimetype = "sourceMimetype";
String targetMimetype = "targetMimetype";
String defaultEncoding = "UTF-8";
// no need to continue execution passed here or check values as we're checking the correct params passed to this method later.
lenient().doNothing().when(executorSpyDefaultTrue).call(any(), any(), any(), any(), any(), any(), any());
lenient().doNothing().when(executorSpyDefaultFalse).call(any(), any(), any(), any(), any(), any(), any());
Map<String, String> transformOptions = new HashMap<String,String>();
// use empty transformOptions to test defaults
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
// when default set to true, with no options passed we should get a call method with NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null,
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// when default set to false, with no options passed we should get a call method without NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// use transforms with notExtractBookmarksText set to true
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
transformOptions.put("notExtractBookmarksText", "true");
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null,
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null,
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// use transforms with notExtractBookmarksText set to false
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
transformOptions.replace("notExtractBookmarksText", "true", "false");
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// use full set of pdfbox transformOptions just to be safe
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
transformOptions.put("targetEncoding", "anyEncoding");
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT but the encoding will change
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding");
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding");
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.executors;
import static org.alfresco.transformer.executors.Tika.NOT_EXTRACT_BOOKMARKS_TEXT;
import static org.alfresco.transformer.executors.Tika.TARGET_ENCODING;
import static org.alfresco.transformer.executors.Tika.TARGET_MIMETYPE;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.clearInvocations;
import static org.mockito.Mockito.lenient;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension;
@ExtendWith(MockitoExtension.class)
public class TikaJavaExecutorTest {
@Test
public void testNotExtractBookmarkTextDefault() throws Exception
{
TikaJavaExecutor executorSpyDefaultTrue = spy(new TikaJavaExecutor(true));
TikaJavaExecutor executorSpyDefaultFalse = spy(new TikaJavaExecutor(false));
File mockSourceFile = mock(File.class);
File mockTargetFile = mock(File.class);
String transformName = "transformName";
String sourceMimetype = "sourceMimetype";
String targetMimetype = "targetMimetype";
String defaultEncoding = "UTF-8";
// no need to continue execution passed here or check values as we're checking the correct params passed to this method later.
lenient().doNothing().when(executorSpyDefaultTrue).call(any(), any(), any(), any(), any(), any(), any());
lenient().doNothing().when(executorSpyDefaultFalse).call(any(), any(), any(), any(), any(), any(), any());
Map<String, String> transformOptions = new HashMap<String,String>();
// use empty transformOptions to test defaults
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
// when default set to true, with no options passed we should get a call method with NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null,
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// when default set to false, with no options passed we should get a call method without NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// use transforms with notExtractBookmarksText set to true
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
transformOptions.put("notExtractBookmarksText", "true");
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions,
mockSourceFile, mockTargetFile);
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null,
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null,
NOT_EXTRACT_BOOKMARKS_TEXT, TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// use transforms with notExtractBookmarksText set to false
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
transformOptions.replace("notExtractBookmarksText", "true", "false");
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + defaultEncoding);
// use full set of pdfbox transformOptions just to be safe
clearInvocations(executorSpyDefaultTrue, executorSpyDefaultFalse);
transformOptions.put("targetEncoding", "anyEncoding");
executorSpyDefaultTrue.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
executorSpyDefaultFalse.transform(transformName, sourceMimetype, targetMimetype, transformOptions, mockSourceFile, mockTargetFile);
// both call methods should have NOT_EXTRACT_BOOKMARKS_TEXT but the encoding will change
verify(executorSpyDefaultTrue, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding");
verify(executorSpyDefaultFalse, times(1)).call(mockSourceFile, mockTargetFile, transformName, null, null,
TARGET_MIMETYPE + targetMimetype, TARGET_ENCODING + "anyEncoding");
}
}

View File

@@ -1,59 +1,59 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.tika.parsers;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
public class ExifToolParserTest {
ExifToolParser exifToolParser = new ExifToolParser();
@Test
public void testFindSeparator() {
String testCommand = "env FOO=${OUTPUT} exiftool -args -G1 " + ExifToolParser.SEPARATOR_SETTING
+ " \"|||\" ${INPUT}";
String expected = "|||";
String actual = exifToolParser.findSeparator(testCommand);
assertEquals(expected, actual);
expected = "TESTWITHOUTQUOTES";
testCommand = "nothing matters until the " + ExifToolParser.SEPARATOR_SETTING + " " + expected
+ " now all this extra should be ignored";
actual = exifToolParser.findSeparator(testCommand);
assertEquals(expected, actual);
expected = "Test something bonkers 112!£$%^£$^";
testCommand = ExifToolParser.SEPARATOR_SETTING + " \""+expected+"\"";
actual = exifToolParser.findSeparator(testCommand);
assertEquals(expected, actual);
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.tika.parsers;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
public class ExifToolParserTest {
ExifToolParser exifToolParser = new ExifToolParser();
@Test
public void testFindSeparator() {
String testCommand = "env FOO=${OUTPUT} exiftool -args -G1 " + ExifToolParser.SEPARATOR_SETTING
+ " \"|||\" ${INPUT}";
String expected = "|||";
String actual = exifToolParser.findSeparator(testCommand);
assertEquals(expected, actual);
expected = "TESTWITHOUTQUOTES";
testCommand = "nothing matters until the " + ExifToolParser.SEPARATOR_SETTING + " " + expected
+ " now all this extra should be ignored";
actual = exifToolParser.findSeparator(testCommand);
assertEquals(expected, actual);
expected = "Test something bonkers 112!£$%^£$^";
testCommand = ExifToolParser.SEPARATOR_SETTING + " \""+expected+"\"";
actual = exifToolParser.findSeparator(testCommand);
assertEquals(expected, actual);
}
}

View File

@@ -1,50 +1,50 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.util;
/**
* Extends the list of transform options with historic request parameters or 'extra' parameters used in testing
* or communication in the all-in-one transformer.
*/
public interface RequestParamMap extends org.alfresco.transform.client.util.RequestParamMap
{
// This property can be sent by acs repository's legacy transformers to force a transform,
// instead of letting this T-Engine determine it based on the request parameters.
// This allows clients to specify transform names as they appear in the engine config files, for example:
// imagemagick, libreoffice, PdfBox, TikaAuto, ....
// See ATS-731.
@Deprecated
String TRANSFORM_NAME_PROPERTY = "transformName";
String TRANSFORM_NAME_PARAMETER = "alfresco.transform-name-parameter";
String FILE = "file";
String SOURCE_EXTENSION = "sourceExtension";
String SOURCE_MIMETYPE = "sourceMimetype";
String TARGET_EXTENSION = "targetExtension";
String TARGET_MIMETYPE = "targetMimetype";
String TEST_DELAY = "testDelay";
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.util;
/**
* Extends the list of transform options with historic request parameters or 'extra' parameters used in testing
* or communication in the all-in-one transformer.
*/
public interface RequestParamMap extends org.alfresco.transform.client.util.RequestParamMap
{
// This property can be sent by acs repository's legacy transformers to force a transform,
// instead of letting this T-Engine determine it based on the request parameters.
// This allows clients to specify transform names as they appear in the engine config files, for example:
// imagemagick, libreoffice, PdfBox, TikaAuto, ....
// See ATS-731.
@Deprecated
String TRANSFORM_NAME_PROPERTY = "transformName";
String TRANSFORM_NAME_PARAMETER = "alfresco.transform-name-parameter";
String FILE = "file";
String SOURCE_EXTENSION = "sourceExtension";
String SOURCE_MIMETYPE = "sourceMimetype";
String TARGET_EXTENSION = "targetExtension";
String TARGET_MIMETYPE = "targetMimetype";
String TEST_DELAY = "testDelay";
}