mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-07-31 17:38:33 +00:00
Fix/mnt 25089 html transformations with ootb aio create extra whitespace (#1079)
This commit is contained in:
@@ -40,6 +40,7 @@ The following externalized T-engines properties are available:
|
|||||||
| FILE_STORE_URL | T-Engine Port. | http://localhost:8099/alfresco/api/-default-/private/sfs/versions/1/file |
|
| FILE_STORE_URL | T-Engine Port. | http://localhost:8099/alfresco/api/-default-/private/sfs/versions/1/file |
|
||||||
| TRANSFORM_ENGINE_REQUEST_QUEUE | T-Engine queue used for async requests. | org.alfresco.transform.engine.misc.acs |
|
| TRANSFORM_ENGINE_REQUEST_QUEUE | T-Engine queue used for async requests. | org.alfresco.transform.engine.misc.acs |
|
||||||
| MISC_PDFBOX_DEFAULT_FONT | Default font used by PdfBox | NotoSans-Regular |
|
| MISC_PDFBOX_DEFAULT_FONT | Default font used by PdfBox | NotoSans-Regular |
|
||||||
|
| MISC_HTML_COLLAPSE | Html Collasping Option for HTML to TXT transformation | true |
|
||||||
|
|
||||||
## Libreoffice
|
## Libreoffice
|
||||||
| Property | Description | Default value |
|
| Property | Description | Default value |
|
||||||
@@ -99,3 +100,4 @@ The following externalized T-engines properties are available:
|
|||||||
| IMAGEMAGICK_CODERS | Path to Imagemagick custom coders. | |
|
| IMAGEMAGICK_CODERS | Path to Imagemagick custom coders. | |
|
||||||
| IMAGEMAGICK_CONFIG | Path to Imagemagick custom config. | |
|
| IMAGEMAGICK_CONFIG | Path to Imagemagick custom config. | |
|
||||||
| MISC_PDFBOX_DEFAULT_FONT | Default font used by PdfBox | NotoSans-Regular |
|
| MISC_PDFBOX_DEFAULT_FONT | Default font used by PdfBox | NotoSans-Regular |
|
||||||
|
| MISC_HTML_COLLAPSE | Html Collasping Option for HTML to TXT transformation explicitly for Misc Engine | true |
|
@@ -27,3 +27,5 @@ transform:
|
|||||||
misc:
|
misc:
|
||||||
pdfBox:
|
pdfBox:
|
||||||
defaultFont: ${MISC_PDFBOX_DEFAULT_FONT:NotoSans-Regular}
|
defaultFont: ${MISC_PDFBOX_DEFAULT_FONT:NotoSans-Regular}
|
||||||
|
htmlOptions:
|
||||||
|
collapseHtml: ${MISC_HTML_COLLAPSE:true}
|
@@ -2,7 +2,7 @@
|
|||||||
* #%L
|
* #%L
|
||||||
* Alfresco Transform Core
|
* Alfresco Transform Core
|
||||||
* %%
|
* %%
|
||||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
* Copyright (C) 2005 - 2025 Alfresco Software Limited
|
||||||
* %%
|
* %%
|
||||||
* This file is part of the Alfresco software.
|
* This file is part of the Alfresco software.
|
||||||
* -
|
* -
|
||||||
@@ -26,9 +26,18 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.transform.aio;
|
package org.alfresco.transform.aio;
|
||||||
|
|
||||||
import org.alfresco.transform.base.AbstractBaseTest;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import org.alfresco.transform.base.TransformController;
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
import org.alfresco.transform.config.TransformConfig;
|
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||||
|
|
||||||
|
import static org.alfresco.transform.base.TransformControllerTest.getLogMessagesFor;
|
||||||
|
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
||||||
|
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||||
|
import static org.alfresco.transform.common.RequestParamMap.*;
|
||||||
|
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.util.StringJoiner;
|
||||||
|
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
@@ -36,20 +45,12 @@ import org.springframework.http.ResponseEntity;
|
|||||||
import org.springframework.mock.web.MockMultipartFile;
|
import org.springframework.mock.web.MockMultipartFile;
|
||||||
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
|
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
|
||||||
|
|
||||||
import java.nio.file.Files;
|
import org.alfresco.transform.base.AbstractBaseTest;
|
||||||
import java.util.StringJoiner;
|
import org.alfresco.transform.base.TransformController;
|
||||||
|
import org.alfresco.transform.config.TransformConfig;
|
||||||
import static org.alfresco.transform.base.TransformControllerTest.getLogMessagesFor;
|
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
|
|
||||||
import static org.alfresco.transform.common.RequestParamMap.CONFIG_VERSION_DEFAULT;
|
|
||||||
import static org.alfresco.transform.common.RequestParamMap.CONFIG_VERSION_LATEST;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test All-In-One.
|
* Test All-In-One
|
||||||
*/
|
*/
|
||||||
public class AIOTest extends AbstractBaseTest
|
public class AIOTest extends AbstractBaseTest
|
||||||
{
|
{
|
||||||
@@ -66,7 +67,7 @@ public class AIOTest extends AbstractBaseTest
|
|||||||
expectedOptions = null;
|
expectedOptions = null;
|
||||||
expectedSourceSuffix = null;
|
expectedSourceSuffix = null;
|
||||||
sourceFileBytes = readTestFile(sourceExtension);
|
sourceFileBytes = readTestFile(sourceExtension);
|
||||||
expectedTargetFileBytes = Files.readAllBytes(getTestFile("quick2." + targetExtension, true).toPath());
|
expectedTargetFileBytes = Files.readAllBytes(getTestFile("quick3." + targetExtension, true).toPath());
|
||||||
sourceFile = new MockMultipartFile("file", "quick." + sourceExtension, sourceMimetype, sourceFileBytes);
|
sourceFile = new MockMultipartFile("file", "quick." + sourceExtension, sourceMimetype, sourceFileBytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -75,8 +76,9 @@ public class AIOTest extends AbstractBaseTest
|
|||||||
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
|
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
|
||||||
{
|
{
|
||||||
return super.mockMvcRequest(url, sourceFile, params)
|
return super.mockMvcRequest(url, sourceFile, params)
|
||||||
.param("targetMimetype", targetMimetype)
|
.param("targetMimetype", targetMimetype)
|
||||||
.param("sourceMimetype", sourceMimetype);
|
.param("sourceMimetype", sourceMimetype)
|
||||||
|
.param(HTML_COLLAPSE, "true");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -107,20 +109,20 @@ public class AIOTest extends AbstractBaseTest
|
|||||||
controller.startup();
|
controller.startup();
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"--------------------------------------------------------------------------------------------------------------------------------------------------------------\n"
|
"--------------------------------------------------------------------------------------------------------------------------------------------------------------\n"
|
||||||
+ "If the Alfresco software was purchased under a paid Alfresco license, the terms of the paid license agreement \n"
|
+ "If the Alfresco software was purchased under a paid Alfresco license, the terms of the paid license agreement \n"
|
||||||
+ "will prevail. Otherwise, the software is provided under terms of the GNU LGPL v3 license. \n"
|
+ "will prevail. Otherwise, the software is provided under terms of the GNU LGPL v3 license. \n"
|
||||||
+ "See the license at http://www.gnu.org/licenses/lgpl-3.0.txt. or in /LICENSE.txt \n"
|
+ "See the license at http://www.gnu.org/licenses/lgpl-3.0.txt. or in /LICENSE.txt \n"
|
||||||
+ "\n"
|
+ "\n"
|
||||||
+ "This transformer uses ImageMagick from ImageMagick Studio LLC. See the license at http://www.imagemagick.org/script/license.php or in /ImageMagick-license.txt\n"
|
+ "This transformer uses ImageMagick from ImageMagick Studio LLC. See the license at http://www.imagemagick.org/script/license.php or in /ImageMagick-license.txt\n"
|
||||||
+ "This transformer uses LibreOffice from The Document Foundation. See the license at https://www.libreoffice.org/download/license/ or in /libreoffice.txt\n"
|
+ "This transformer uses LibreOffice from The Document Foundation. See the license at https://www.libreoffice.org/download/license/ or in /libreoffice.txt\n"
|
||||||
+ "This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\n"
|
+ "This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\n"
|
||||||
+ "This transformer uses htmlparser. See the license at http://htmlparser.sourceforge.net/license.html\n"
|
+ "This transformer uses htmlparser. See the license at http://htmlparser.sourceforge.net/license.html\n"
|
||||||
+ "This transformer uses alfresco-pdf-renderer which uses the PDFium library from Google Inc. See the license at https://pdfium.googlesource.com/pdfium/+/master/LICENSE or in /pdfium.txt\n"
|
+ "This transformer uses alfresco-pdf-renderer which uses the PDFium library from Google Inc. See the license at https://pdfium.googlesource.com/pdfium/+/master/LICENSE or in /pdfium.txt\n"
|
||||||
+ "This transformer uses Tika from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt\n"
|
+ "This transformer uses Tika from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt\n"
|
||||||
+ "This transformer uses ExifTool by Phil Harvey. See license at https://exiftool.org/#license. or in /Perl-Artistic-License.txt\n"
|
+ "This transformer uses ExifTool by Phil Harvey. See license at https://exiftool.org/#license. or in /Perl-Artistic-License.txt\n"
|
||||||
+ "--------------------------------------------------------------------------------------------------------------------------------------------------------------\n"
|
+ "--------------------------------------------------------------------------------------------------------------------------------------------------------------\n"
|
||||||
+ "Starting application components... Done",
|
+ "Starting application components... Done",
|
||||||
controllerLogMessages.toString());
|
controllerLogMessages.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -2,7 +2,7 @@
|
|||||||
* #%L
|
* #%L
|
||||||
* Alfresco Transform Core
|
* Alfresco Transform Core
|
||||||
* %%
|
* %%
|
||||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
* Copyright (C) 2005 - 2025 Alfresco Software Limited
|
||||||
* %%
|
* %%
|
||||||
* This file is part of the Alfresco software.
|
* This file is part of the Alfresco software.
|
||||||
* -
|
* -
|
||||||
@@ -26,12 +26,14 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.transform.aio;
|
package org.alfresco.transform.aio;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableSet;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import org.alfresco.transform.tika.TikaTest;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import static org.alfresco.transform.base.html.OptionsHelper.getOptionNames;
|
import static org.alfresco.transform.base.html.OptionsHelper.getOptionNames;
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import org.alfresco.transform.tika.TikaTest;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Tika functionality in All-In-One.
|
* Test Tika functionality in All-In-One.
|
||||||
@@ -73,8 +75,8 @@ public class AIOTikaTest extends TikaTest
|
|||||||
"thumbnail",
|
"thumbnail",
|
||||||
"width",
|
"width",
|
||||||
"pdfFont",
|
"pdfFont",
|
||||||
"pdfFontSize"
|
"pdfFontSize",
|
||||||
),
|
"collapseHtml"),
|
||||||
getOptionNames(controller.transformConfig(0).getBody().getTransformOptions()));
|
getOptionNames(controller.transformConfig(0).getBody().getTransformOptions()));
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -2,7 +2,7 @@
|
|||||||
* #%L
|
* #%L
|
||||||
* Alfresco Transform Core
|
* Alfresco Transform Core
|
||||||
* %%
|
* %%
|
||||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
* Copyright (C) 2005 - 2025 Alfresco Software Limited
|
||||||
* %%
|
* %%
|
||||||
* This file is part of the Alfresco software.
|
* This file is part of the Alfresco software.
|
||||||
* -
|
* -
|
||||||
@@ -26,21 +26,22 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.transform.misc;
|
package org.alfresco.transform.misc;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableMap;
|
|
||||||
import org.alfresco.transform.base.TransformEngine;
|
|
||||||
import org.alfresco.transform.base.probes.ProbeTransform;
|
|
||||||
import org.alfresco.transform.config.reader.TransformConfigResourceReader;
|
|
||||||
import org.alfresco.transform.config.TransformConfig;
|
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
|
||||||
import org.springframework.stereotype.Component;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import static org.alfresco.transform.base.logging.StandardMessages.COMMUNITY_LICENCE;
|
import static org.alfresco.transform.base.logging.StandardMessages.COMMUNITY_LICENCE;
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
|
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||||
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
|
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import org.alfresco.transform.base.TransformEngine;
|
||||||
|
import org.alfresco.transform.base.probes.ProbeTransform;
|
||||||
|
import org.alfresco.transform.config.TransformConfig;
|
||||||
|
import org.alfresco.transform.config.reader.TransformConfigResourceReader;
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
public class MiscTransformEngine implements TransformEngine
|
public class MiscTransformEngine implements TransformEngine
|
||||||
{
|
{
|
||||||
@@ -74,6 +75,6 @@ public class MiscTransformEngine implements TransformEngine
|
|||||||
public ProbeTransform getProbeTransform()
|
public ProbeTransform getProbeTransform()
|
||||||
{
|
{
|
||||||
return new ProbeTransform("probe.html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, transformOptions,
|
return new ProbeTransform("probe.html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, transformOptions,
|
||||||
119, 30, 150, 1024, 60 * 2 + 1, 60 * 2);
|
107, 30, 150, 1024, 60 * 2 + 1, 60 * 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
* #%L
|
* #%L
|
||||||
* Alfresco Transform Core
|
* Alfresco Transform Core
|
||||||
* %%
|
* %%
|
||||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
* Copyright (C) 2005 - 2025 Alfresco Software Limited
|
||||||
* %%
|
* %%
|
||||||
* This file is part of the Alfresco software.
|
* This file is part of the Alfresco software.
|
||||||
* -
|
* -
|
||||||
@@ -26,14 +26,8 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.transform.misc.transformers;
|
package org.alfresco.transform.misc.transformers;
|
||||||
|
|
||||||
import org.alfresco.transform.base.TransformManager;
|
import static org.alfresco.transform.common.RequestParamMap.HTML_COLLAPSE;
|
||||||
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
|
||||||
import org.htmlparser.Parser;
|
|
||||||
import org.htmlparser.beans.StringBean;
|
|
||||||
import org.htmlparser.util.ParserException;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
import org.springframework.stereotype.Component;
|
|
||||||
|
|
||||||
import java.io.BufferedWriter;
|
import java.io.BufferedWriter;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
@@ -45,27 +39,30 @@ import java.nio.charset.Charset;
|
|||||||
import java.nio.charset.IllegalCharsetNameException;
|
import java.nio.charset.IllegalCharsetNameException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
|
import org.htmlparser.Parser;
|
||||||
|
import org.htmlparser.beans.StringBean;
|
||||||
|
import org.htmlparser.util.ParserException;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import org.alfresco.transform.base.TransformManager;
|
||||||
|
import org.alfresco.transform.base.util.CustomTransformerFileAdaptor;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Content transformer which wraps the HTML Parser library for
|
* Content transformer which wraps the HTML Parser library for parsing HTML content.
|
||||||
* parsing HTML content.
|
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* Since HTML Parser was updated from v1.6 to v2.1, META tags
|
* Since HTML Parser was updated from v1.6 to v2.1, META tags defining an encoding for the content via http-equiv=Content-Type will ONLY be respected if the encoding of the content item itself is set to ISO-8859-1.
|
||||||
* defining an encoding for the content via http-equiv=Content-Type
|
|
||||||
* will ONLY be respected if the encoding of the content item
|
|
||||||
* itself is set to ISO-8859-1.
|
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* Tika Note - could be converted to use the Tika HTML parser,
|
* Tika Note - could be converted to use the Tika HTML parser, but we'd potentially need a custom text handler to replicate the current settings around links and non-breaking spaces.
|
||||||
* but we'd potentially need a custom text handler to replicate
|
|
||||||
* the current settings around links and non-breaking spaces.
|
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @author Derek Hulley
|
* @author Derek Hulley
|
||||||
@@ -78,7 +75,10 @@ import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
|
|||||||
public class HtmlParserContentTransformer implements CustomTransformerFileAdaptor
|
public class HtmlParserContentTransformer implements CustomTransformerFileAdaptor
|
||||||
{
|
{
|
||||||
private static final Logger logger = LoggerFactory.getLogger(
|
private static final Logger logger = LoggerFactory.getLogger(
|
||||||
HtmlParserContentTransformer.class);
|
HtmlParserContentTransformer.class);
|
||||||
|
|
||||||
|
@Value("${transform.core.misc.htmlOptions.collapseHtml:true}")
|
||||||
|
private String collapseOptionDefault;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getTransformerName()
|
public String getTransformerName()
|
||||||
@@ -88,11 +88,28 @@ public class HtmlParserContentTransformer implements CustomTransformerFileAdapto
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void transform(final String sourceMimetype, final String targetMimetype,
|
public void transform(final String sourceMimetype, final String targetMimetype,
|
||||||
final Map<String, String> transformOptions,
|
final Map<String, String> transformOptions,
|
||||||
final File sourceFile, final File targetFile, TransformManager transformManager) throws Exception
|
final File sourceFile, final File targetFile, TransformManager transformManager) throws Exception
|
||||||
{
|
{
|
||||||
String sourceEncoding = transformOptions.get(SOURCE_ENCODING);
|
String sourceEncoding = transformOptions.get(SOURCE_ENCODING);
|
||||||
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
|
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
|
||||||
|
boolean collapse;
|
||||||
|
|
||||||
|
var collapseOption = transformOptions.get(HTML_COLLAPSE);
|
||||||
|
// If the collapse option is set, use it, otherwise use the default value
|
||||||
|
if (collapseOption != null && (collapseOption.trim().equalsIgnoreCase("true") || collapseOption.trim().equalsIgnoreCase("false")))
|
||||||
|
{
|
||||||
|
collapse = Boolean.parseBoolean(collapseOption);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Use the default value from the configuration
|
||||||
|
collapse = collapseOptionDefault == null || Boolean.parseBoolean(collapseOptionDefault);
|
||||||
|
if (logger.isDebugEnabled())
|
||||||
|
{
|
||||||
|
logger.debug("Using default html collapse option: " + collapseOptionDefault);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (logger.isDebugEnabled())
|
if (logger.isDebugEnabled())
|
||||||
{
|
{
|
||||||
@@ -101,7 +118,7 @@ public class HtmlParserContentTransformer implements CustomTransformerFileAdapto
|
|||||||
|
|
||||||
// Create the extractor
|
// Create the extractor
|
||||||
EncodingAwareStringBean extractor = new EncodingAwareStringBean();
|
EncodingAwareStringBean extractor = new EncodingAwareStringBean();
|
||||||
extractor.setCollapse(false);
|
extractor.setCollapse(collapse);
|
||||||
extractor.setLinks(false);
|
extractor.setLinks(false);
|
||||||
extractor.setReplaceNonBreakingSpaces(false);
|
extractor.setReplaceNonBreakingSpaces(false);
|
||||||
extractor.setURL(sourceFile, sourceEncoding);
|
extractor.setURL(sourceFile, sourceEncoding);
|
||||||
@@ -110,7 +127,7 @@ public class HtmlParserContentTransformer implements CustomTransformerFileAdapto
|
|||||||
|
|
||||||
// write it to the writer
|
// write it to the writer
|
||||||
try (Writer writer = new BufferedWriter(
|
try (Writer writer = new BufferedWriter(
|
||||||
new OutputStreamWriter(new FileOutputStream(targetFile))))
|
new OutputStreamWriter(new FileOutputStream(targetFile))))
|
||||||
{
|
{
|
||||||
writer.write(text);
|
writer.write(text);
|
||||||
}
|
}
|
||||||
@@ -123,13 +140,13 @@ public class HtmlParserContentTransformer implements CustomTransformerFileAdapto
|
|||||||
if (encoding != null && !Charset.isSupported(encoding))
|
if (encoding != null && !Charset.isSupported(encoding))
|
||||||
{
|
{
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
parameterName + "=" + encoding + " is not supported by the JVM.");
|
parameterName + "=" + encoding + " is not supported by the JVM.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (IllegalCharsetNameException e)
|
catch (IllegalCharsetNameException e)
|
||||||
{
|
{
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
parameterName + "=" + encoding + " is not a valid encoding.");
|
parameterName + "=" + encoding + " is not a valid encoding.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,24 +155,19 @@ public class HtmlParserContentTransformer implements CustomTransformerFileAdapto
|
|||||||
* This code is based on a class of the same name, originally implemented in alfresco-repository.
|
* This code is based on a class of the same name, originally implemented in alfresco-repository.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* A version of {@link StringBean} which allows control of the
|
* A version of {@link StringBean} which allows control of the encoding in the underlying HTML Parser. Unfortunately, StringBean doesn't allow easy over-riding of this, so we have to duplicate some code to control this. This allows us to correctly handle HTML files where the encoding is specified against the content property (rather than in the HTML Head Meta), see ALF-10466 for details.
|
||||||
* encoding in the underlying HTML Parser.
|
|
||||||
* Unfortunately, StringBean doesn't allow easy over-riding of
|
|
||||||
* this, so we have to duplicate some code to control this.
|
|
||||||
* This allows us to correctly handle HTML files where the encoding
|
|
||||||
* is specified against the content property (rather than in the
|
|
||||||
* HTML Head Meta), see ALF-10466 for details.
|
|
||||||
*/
|
*/
|
||||||
public static class EncodingAwareStringBean extends StringBean
|
public static class EncodingAwareStringBean extends StringBean
|
||||||
{
|
{
|
||||||
private static final long serialVersionUID = -9033414360428669553L;
|
private static final long serialVersionUID = -9033414360428669553L;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the File to extract strings from, and the encoding
|
* Sets the File to extract strings from, and the encoding it's in (if known to Alfresco)
|
||||||
* it's in (if known to Alfresco)
|
|
||||||
*
|
*
|
||||||
* @param file The File that text should be fetched from.
|
* @param file
|
||||||
* @param encoding The encoding of the input
|
* The File that text should be fetched from.
|
||||||
|
* @param encoding
|
||||||
|
* The encoding of the input
|
||||||
*/
|
*/
|
||||||
public void setURL(File file, String encoding)
|
public void setURL(File file, String encoding)
|
||||||
{
|
{
|
||||||
@@ -183,9 +195,9 @@ public class HtmlParserContentTransformer implements CustomTransformerFileAdapto
|
|||||||
}
|
}
|
||||||
|
|
||||||
mPropertySupport.firePropertyChange(StringBean.PROP_URL_PROPERTY, previousURL,
|
mPropertySupport.firePropertyChange(StringBean.PROP_URL_PROPERTY, previousURL,
|
||||||
getURL());
|
getURL());
|
||||||
mPropertySupport.firePropertyChange(StringBean.PROP_CONNECTION_PROPERTY, conn,
|
mPropertySupport.firePropertyChange(StringBean.PROP_CONNECTION_PROPERTY, conn,
|
||||||
mParser.getConnection());
|
mParser.getConnection());
|
||||||
setStrings();
|
setStrings();
|
||||||
}
|
}
|
||||||
catch (ParserException pe)
|
catch (ParserException pe)
|
||||||
|
@@ -5,3 +5,5 @@ transform:
|
|||||||
misc:
|
misc:
|
||||||
pdfBox:
|
pdfBox:
|
||||||
defaultFont: ${MISC_PDFBOX_DEFAULT_FONT:NotoSans-Regular}
|
defaultFont: ${MISC_PDFBOX_DEFAULT_FONT:NotoSans-Regular}
|
||||||
|
htmlOptions:
|
||||||
|
collapseHtml: ${MISC_HTML_COLLAPSE:true}
|
||||||
|
@@ -1,5 +1,8 @@
|
|||||||
{
|
{
|
||||||
"transformOptions": {
|
"transformOptions": {
|
||||||
|
"htmlOptions": [
|
||||||
|
{"value": {"name": "collapseHtml"}}
|
||||||
|
],
|
||||||
"textToPdfOptions": [
|
"textToPdfOptions": [
|
||||||
{"value": {"name": "pageLimit"}},
|
{"value": {"name": "pageLimit"}},
|
||||||
{"value": {"name": "pdfFont"}},
|
{"value": {"name": "pdfFont"}},
|
||||||
@@ -24,8 +27,7 @@
|
|||||||
"supportedSourceAndTargetList": [
|
"supportedSourceAndTargetList": [
|
||||||
{"sourceMediaType": "text/html", "targetMediaType": "text/plain"}
|
{"sourceMediaType": "text/html", "targetMediaType": "text/plain"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": ["htmlOptions"]
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"transformerName": "string",
|
"transformerName": "string",
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
* #%L
|
* #%L
|
||||||
* Alfresco Transform Core
|
* Alfresco Transform Core
|
||||||
* %%
|
* %%
|
||||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
* Copyright (C) 2005 - 2025 Alfresco Software Limited
|
||||||
* %%
|
* %%
|
||||||
* This file is part of the Alfresco software.
|
* This file is part of the Alfresco software.
|
||||||
* -
|
* -
|
||||||
@@ -26,7 +26,30 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.transform.misc;
|
package org.alfresco.transform.misc;
|
||||||
|
|
||||||
import org.alfresco.transform.base.AbstractBaseTest;
|
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.header;
|
||||||
|
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
|
||||||
|
|
||||||
|
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
||||||
|
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
|
||||||
|
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IWORK_KEYNOTE;
|
||||||
|
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IWORK_NUMBERS;
|
||||||
|
import static org.alfresco.transform.common.Mimetype.MIMETYPE_PDF;
|
||||||
|
import static org.alfresco.transform.common.Mimetype.MIMETYPE_RFC822;
|
||||||
|
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||||
|
import static org.alfresco.transform.common.RequestParamMap.ENDPOINT_TRANSFORM;
|
||||||
|
import static org.alfresco.transform.common.RequestParamMap.HTML_COLLAPSE;
|
||||||
|
import static org.alfresco.transform.common.RequestParamMap.SOURCE_MIMETYPE;
|
||||||
|
import static org.alfresco.transform.common.RequestParamMap.TARGET_MIMETYPE;
|
||||||
|
|
||||||
|
import java.io.StringWriter;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.apache.pdfbox.text.PDFTextStripper;
|
import org.apache.pdfbox.text.PDFTextStripper;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
@@ -35,35 +58,15 @@ import org.springframework.mock.web.MockMultipartFile;
|
|||||||
import org.springframework.test.web.servlet.MvcResult;
|
import org.springframework.test.web.servlet.MvcResult;
|
||||||
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
|
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
|
||||||
|
|
||||||
import java.io.StringWriter;
|
import org.alfresco.transform.base.AbstractBaseTest;
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
|
|
||||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IMAGE_JPEG;
|
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IWORK_KEYNOTE;
|
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_IWORK_NUMBERS;
|
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_OPENXML_WORDPROCESSING;
|
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_PDF;
|
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_RFC822;
|
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
|
|
||||||
import static org.alfresco.transform.common.RequestParamMap.ENDPOINT_TRANSFORM;
|
|
||||||
import static org.alfresco.transform.common.RequestParamMap.SOURCE_MIMETYPE;
|
|
||||||
import static org.alfresco.transform.common.RequestParamMap.TARGET_MIMETYPE;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
|
||||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.header;
|
|
||||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Misc. Includes calling the 3rd party libraries.
|
* Test Misc. Includes calling the 3rd party libraries.
|
||||||
*/
|
*/
|
||||||
public class MiscTest extends AbstractBaseTest
|
public class MiscTest extends AbstractBaseTest
|
||||||
{
|
{
|
||||||
protected final String sourceEncoding = "UTF-8";
|
protected static final String sourceEncoding = "UTF-8";
|
||||||
protected final String targetEncoding = "UTF-8";
|
protected static final String targetEncoding = "UTF-8";
|
||||||
protected final String targetMimetype = MIMETYPE_TEXT_PLAIN;
|
protected final String targetMimetype = MIMETYPE_TEXT_PLAIN;
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
@@ -75,7 +78,7 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
expectedOptions = null;
|
expectedOptions = null;
|
||||||
expectedSourceSuffix = null;
|
expectedSourceSuffix = null;
|
||||||
sourceFileBytes = readTestFile(sourceExtension);
|
sourceFileBytes = readTestFile(sourceExtension);
|
||||||
expectedTargetFileBytes = Files.readAllBytes(getTestFile("quick2." + targetExtension, true).toPath());
|
expectedTargetFileBytes = Files.readAllBytes(getTestFile("quick3." + targetExtension, true).toPath());
|
||||||
sourceFile = new MockMultipartFile("file", "quick." + sourceExtension, sourceMimetype, sourceFileBytes);
|
sourceFile = new MockMultipartFile("file", "quick." + sourceExtension, sourceMimetype, sourceFileBytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -83,9 +86,10 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
|
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
|
||||||
{
|
{
|
||||||
final MockHttpServletRequestBuilder builder = super.mockMvcRequest(url, sourceFile, params)
|
final MockHttpServletRequestBuilder builder = super.mockMvcRequest(url, sourceFile, params)
|
||||||
.param("sourceEncoding", sourceEncoding)
|
.param("sourceEncoding", sourceEncoding)
|
||||||
.param("targetMimetype", targetMimetype)
|
.param("targetMimetype", targetMimetype)
|
||||||
.param("sourceMimetype", sourceMimetype);
|
.param("sourceMimetype", sourceMimetype)
|
||||||
|
.param(HTML_COLLAPSE, "true");
|
||||||
|
|
||||||
// Only the 'string' transformer should have the targetEncoding.
|
// Only the 'string' transformer should have the targetEncoding.
|
||||||
if (!"message/rfc822".equals(sourceMimetype) && !"text/html".equals(sourceMimetype))
|
if (!"message/rfc822".equals(sourceMimetype) && !"text/html".equals(sourceMimetype))
|
||||||
@@ -103,16 +107,16 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
{
|
{
|
||||||
String expected = "Gym class featuring a brown fox and lazy dog";
|
String expected = "Gym class featuring a brown fox and lazy dog";
|
||||||
MvcResult result = sendRequest("eml",
|
MvcResult result = sendRequest("eml",
|
||||||
null,
|
null,
|
||||||
MIMETYPE_RFC822,
|
MIMETYPE_RFC822,
|
||||||
"txt",
|
"txt",
|
||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
readTestFile("eml"));
|
readTestFile("eml"));
|
||||||
assertTrue(result.getResponse().getContentAsString().contains(expected),
|
assertTrue(result.getResponse().getContentAsString().contains(expected),
|
||||||
"Content from eml transform didn't contain expected value. ");
|
"Content from eml transform didn't contain expected value. ");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -123,17 +127,17 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
{
|
{
|
||||||
String expected = "El r\u00E1pido zorro marr\u00F3n salta sobre el perro perezoso";
|
String expected = "El r\u00E1pido zorro marr\u00F3n salta sobre el perro perezoso";
|
||||||
MvcResult result = sendRequest("eml",
|
MvcResult result = sendRequest("eml",
|
||||||
null,
|
null,
|
||||||
MIMETYPE_RFC822,
|
MIMETYPE_RFC822,
|
||||||
"txt",
|
"txt",
|
||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
null, readTestFile("spanish.eml"));
|
null, readTestFile("spanish.eml"));
|
||||||
|
|
||||||
String contentResult = new String(result.getResponse().getContentAsByteArray(), UTF_8);
|
String contentResult = new String(result.getResponse().getContentAsByteArray(), UTF_8);
|
||||||
assertTrue(contentResult.contains(expected),
|
assertTrue(contentResult.contains(expected),
|
||||||
"Content from eml transform didn't contain expected value. ");
|
"Content from eml transform didn't contain expected value. ");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -145,16 +149,16 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
String expected = "Mail with attachment content";
|
String expected = "Mail with attachment content";
|
||||||
String notExpected = "File attachment content";
|
String notExpected = "File attachment content";
|
||||||
MvcResult result = sendRequest("eml",
|
MvcResult result = sendRequest("eml",
|
||||||
null,
|
null,
|
||||||
MIMETYPE_RFC822,
|
MIMETYPE_RFC822,
|
||||||
"txt",
|
"txt",
|
||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
readTestFile("attachment.eml"));
|
readTestFile("attachment.eml"));
|
||||||
assertTrue(result.getResponse().getContentAsString().contains(expected),
|
assertTrue(result.getResponse().getContentAsString().contains(expected),
|
||||||
"Content from eml transform didn't contain expected value. ");
|
"Content from eml transform didn't contain expected value. ");
|
||||||
assertFalse(result.getResponse().getContentAsString().contains(notExpected));
|
assertFalse(result.getResponse().getContentAsString().contains(notExpected));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -166,16 +170,16 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
{
|
{
|
||||||
String expected = "alternative plain text";
|
String expected = "alternative plain text";
|
||||||
MvcResult result = sendRequest("eml",
|
MvcResult result = sendRequest("eml",
|
||||||
null,
|
null,
|
||||||
MIMETYPE_RFC822,
|
MIMETYPE_RFC822,
|
||||||
"txt",
|
"txt",
|
||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
readTestFile("alternative.eml"));
|
readTestFile("alternative.eml"));
|
||||||
assertTrue(result.getResponse().getContentAsString().contains(expected),
|
assertTrue(result.getResponse().getContentAsString().contains(expected),
|
||||||
"Content from eml transform didn't contain expected value. ");
|
"Content from eml transform didn't contain expected value. ");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -186,16 +190,16 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
{
|
{
|
||||||
String expected = "nested alternative plain text";
|
String expected = "nested alternative plain text";
|
||||||
MvcResult result = sendRequest("eml",
|
MvcResult result = sendRequest("eml",
|
||||||
null,
|
null,
|
||||||
MIMETYPE_RFC822,
|
MIMETYPE_RFC822,
|
||||||
"txt",
|
"txt",
|
||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
readTestFile("nested.alternative.eml"));
|
readTestFile("nested.alternative.eml"));
|
||||||
assertTrue(result.getResponse().getContentAsString().contains(expected),
|
assertTrue(result.getResponse().getContentAsString().contains(expected),
|
||||||
"Content from eml transform didn't contain expected value. ");
|
"Content from eml transform didn't contain expected value. ");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -204,21 +208,20 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
@Test
|
@Test
|
||||||
public void testExtractMetadataRFC822() throws Exception
|
public void testExtractMetadataRFC822() throws Exception
|
||||||
{
|
{
|
||||||
String expected =
|
String expected = "{" +
|
||||||
"{"+
|
"\"{http://www.alfresco.org/model/content/1.0}addressee\":\"Nevin Nollop <nevin.nollop@gmail.com>\"," +
|
||||||
"\"{http://www.alfresco.org/model/content/1.0}addressee\":\"Nevin Nollop <nevin.nollop@gmail.com>\","+
|
"\"{http://www.alfresco.org/model/content/1.0}addressees\":\"Nevin Nollop <nevinn@alfresco.com>\"," +
|
||||||
"\"{http://www.alfresco.org/model/content/1.0}addressees\":\"Nevin Nollop <nevinn@alfresco.com>\","+
|
"\"{http://www.alfresco.org/model/content/1.0}description\":\"The quick brown fox jumps over the lazy dog\"," +
|
||||||
"\"{http://www.alfresco.org/model/content/1.0}description\":\"The quick brown fox jumps over the lazy dog\","+
|
"\"{http://www.alfresco.org/model/content/1.0}originator\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"," +
|
||||||
"\"{http://www.alfresco.org/model/content/1.0}originator\":\"Nevin Nollop <nevin.nollop@alfresco.com>\","+
|
"\"{http://www.alfresco.org/model/content/1.0}sentdate\":1086351802000," +
|
||||||
"\"{http://www.alfresco.org/model/content/1.0}sentdate\":1086351802000,"+
|
"\"{http://www.alfresco.org/model/content/1.0}subjectline\":\"The quick brown fox jumps over the lazy dog\"," +
|
||||||
"\"{http://www.alfresco.org/model/content/1.0}subjectline\":\"The quick brown fox jumps over the lazy dog\","+
|
"\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\"," +
|
||||||
"\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\","+
|
"\"{http://www.alfresco.org/model/imap/1.0}dateSent\":1086351802000," +
|
||||||
"\"{http://www.alfresco.org/model/imap/1.0}dateSent\":1086351802000,"+
|
"\"{http://www.alfresco.org/model/imap/1.0}messageCc\":\"Nevin Nollop <nevinn@alfresco.com>\"," +
|
||||||
"\"{http://www.alfresco.org/model/imap/1.0}messageCc\":\"Nevin Nollop <nevinn@alfresco.com>\","+
|
"\"{http://www.alfresco.org/model/imap/1.0}messageFrom\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"," +
|
||||||
"\"{http://www.alfresco.org/model/imap/1.0}messageFrom\":\"Nevin Nollop <nevin.nollop@alfresco.com>\","+
|
"\"{http://www.alfresco.org/model/imap/1.0}messageId\":\"<20040604122322.GV1905@phoenix.home>\"," +
|
||||||
"\"{http://www.alfresco.org/model/imap/1.0}messageId\":\"<20040604122322.GV1905@phoenix.home>\","+
|
"\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"," +
|
||||||
"\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\","+
|
"\"{http://www.alfresco.org/model/imap/1.0}messageTo\":\"Nevin Nollop <nevin.nollop@gmail.com>\"" +
|
||||||
"\"{http://www.alfresco.org/model/imap/1.0}messageTo\":\"Nevin Nollop <nevin.nollop@gmail.com>\""+
|
|
||||||
"}";
|
"}";
|
||||||
MvcResult result = sendRequest("eml",
|
MvcResult result = sendRequest("eml",
|
||||||
null,
|
null,
|
||||||
@@ -240,18 +243,16 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
public void testExtractMetadataOptionRFC822() throws Exception
|
public void testExtractMetadataOptionRFC822() throws Exception
|
||||||
{
|
{
|
||||||
// {"messageSubject":["{http://www.alfresco.org/model/imap/1.0}messageSubject","{http://www.alfresco.org/model/content/1.0}subjectline","{http://www.alfresco.org/model/content/1.0}description","{http://www.alfresco.org/model/content/1.0}title"],"Thread-Index":["{http://www.alfresco.org/model/imap/1.0}threadIndex"],"messageTo":["{http://www.alfresco.org/model/imap/1.0}messageTo","{http://www.alfresco.org/model/content/1.0}addressee"],"messageSent":["{http://www.alfresco.org/model/content/1.0}sentdate","{http://www.alfresco.org/model/imap/1.0}dateSent"],"Message-ID":["{http://www.alfresco.org/model/imap/1.0}messageId"],"messageCc":["{http://www.alfresco.org/model/imap/1.0}messageCc","{http://www.alfresco.org/model/content/1.0}addressees"],"messageReceived":["{http://www.alfresco.org/model/imap/1.0}dateReceived"],"messageFrom":["{http://www.alfresco.org/model/imap/1.0}messageFrom","{http://www.alfresco.org/model/content/1.0}originator"]}
|
// {"messageSubject":["{http://www.alfresco.org/model/imap/1.0}messageSubject","{http://www.alfresco.org/model/content/1.0}subjectline","{http://www.alfresco.org/model/content/1.0}description","{http://www.alfresco.org/model/content/1.0}title"],"Thread-Index":["{http://www.alfresco.org/model/imap/1.0}threadIndex"],"messageTo":["{http://www.alfresco.org/model/imap/1.0}messageTo","{http://www.alfresco.org/model/content/1.0}addressee"],"messageSent":["{http://www.alfresco.org/model/content/1.0}sentdate","{http://www.alfresco.org/model/imap/1.0}dateSent"],"Message-ID":["{http://www.alfresco.org/model/imap/1.0}messageId"],"messageCc":["{http://www.alfresco.org/model/imap/1.0}messageCc","{http://www.alfresco.org/model/content/1.0}addressees"],"messageReceived":["{http://www.alfresco.org/model/imap/1.0}dateReceived"],"messageFrom":["{http://www.alfresco.org/model/imap/1.0}messageFrom","{http://www.alfresco.org/model/content/1.0}originator"]}
|
||||||
String extractMapping =
|
String extractMapping = "{\"messageSubject\":[" +
|
||||||
"{\"messageSubject\":[" +
|
"\"{http://www.alfresco.org/model/imap/1.0}messageSubject\"," +
|
||||||
"\"{http://www.alfresco.org/model/imap/1.0}messageSubject\"," +
|
"\"{http://www.alfresco.org/model/content/1.0}title\"]," +
|
||||||
"\"{http://www.alfresco.org/model/content/1.0}title\"]," +
|
|
||||||
"\"Thread-Index\":[" +
|
"\"Thread-Index\":[" +
|
||||||
"\"{http://www.alfresco.org/model/imap/1.0}threadIndex\"]," +
|
"\"{http://www.alfresco.org/model/imap/1.0}threadIndex\"]," +
|
||||||
"\"messageFrom\":[" +
|
"\"messageFrom\":[" +
|
||||||
"\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\"]}\n";
|
"\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\"]}\n";
|
||||||
String expected =
|
String expected = "{\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\"," +
|
||||||
"{\"{http://www.alfresco.org/model/content/1.0}title\":\"The quick brown fox jumps over the lazy dog\","+
|
"\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"," +
|
||||||
"\"{http://www.alfresco.org/model/dod5015/1.0}dodProp1\":\"Nevin Nollop <nevin.nollop@alfresco.com>\"," +
|
"\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"}";
|
||||||
"\"{http://www.alfresco.org/model/imap/1.0}messageSubject\":\"The quick brown fox jumps over the lazy dog\"}";
|
|
||||||
MvcResult result = sendRequest("eml",
|
MvcResult result = sendRequest("eml",
|
||||||
null,
|
null,
|
||||||
MIMETYPE_RFC822,
|
MIMETYPE_RFC822,
|
||||||
@@ -273,14 +274,14 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
{
|
{
|
||||||
String expected = " ";
|
String expected = " ";
|
||||||
MvcResult result = sendRequest("eml",
|
MvcResult result = sendRequest("eml",
|
||||||
null,
|
null,
|
||||||
MIMETYPE_RFC822,
|
MIMETYPE_RFC822,
|
||||||
"txt",
|
"txt",
|
||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
readTestFile("htmlChars.eml"));
|
readTestFile("htmlChars.eml"));
|
||||||
assertFalse(result.getResponse().getContentAsString().contains(expected));
|
assertFalse(result.getResponse().getContentAsString().contains(expected));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -294,23 +295,23 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
|
final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
|
||||||
String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
|
String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
|
||||||
String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
|
String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
|
||||||
"<p>" + TEXT_P2 + "</p>" + NEWLINE +
|
"<p>" + TEXT_P2 + "</p>" + NEWLINE +
|
||||||
"<p>" + TEXT_P3 + "</p>" + NEWLINE;
|
"<p>" + TEXT_P3 + "</p>" + NEWLINE;
|
||||||
String partC = "</body></html>";
|
String partC = "</body></html>";
|
||||||
final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
|
final String expected = TITLE + " " + TEXT_P1 + " " + TEXT_P2 + " " + TEXT_P3;
|
||||||
|
|
||||||
MvcResult result = sendRequest("html",
|
MvcResult result = sendRequest("html",
|
||||||
"UTF-8",
|
"UTF-8",
|
||||||
MIMETYPE_HTML,
|
MIMETYPE_HTML,
|
||||||
"txt",
|
"txt",
|
||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
expected.getBytes());
|
expected.getBytes());
|
||||||
|
|
||||||
String contentResult = new String(result.getResponse().getContentAsByteArray(),
|
String contentResult = new String(result.getResponse().getContentAsByteArray(),
|
||||||
targetEncoding);
|
targetEncoding);
|
||||||
assertTrue(contentResult.contains(expected), "The content did not include \"" + expected);
|
assertTrue(contentResult.contains(expected), "The content did not include \"" + expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -330,17 +331,17 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
MvcResult result = sendRequest("txt",
|
MvcResult result = sendRequest("txt",
|
||||||
"MacDingbat",
|
"MacDingbat",
|
||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
"txt",
|
"txt",
|
||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
"UTF-8",
|
"UTF-8",
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
content);
|
content);
|
||||||
|
|
||||||
String contentResult = new String(result.getResponse().getContentAsByteArray(),
|
String contentResult = new String(result.getResponse().getContentAsByteArray(),
|
||||||
targetEncoding);
|
targetEncoding);
|
||||||
assertTrue(contentResult.contains(expected), "The content did not include \"" + expected);
|
assertTrue(contentResult.contains(expected), "The content did not include \"" + expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -351,17 +352,17 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
byte[] content = new byte[0];
|
byte[] content = new byte[0];
|
||||||
|
|
||||||
MvcResult result = sendRequest("txt",
|
MvcResult result = sendRequest("txt",
|
||||||
"UTF-8",
|
"UTF-8",
|
||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
"txt",
|
"txt",
|
||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
"UTF-8",
|
"UTF-8",
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
content);
|
content);
|
||||||
|
|
||||||
assertEquals(0, result.getResponse().getContentLength(),
|
assertEquals(0, result.getResponse().getContentLength(),
|
||||||
"Returned content should be empty for an empty source file");
|
"Returned content should be empty for an empty source file");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -377,14 +378,14 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
String expected = sb.toString();
|
String expected = sb.toString();
|
||||||
|
|
||||||
MvcResult result = sendRequest("txt",
|
MvcResult result = sendRequest("txt",
|
||||||
"UTF-8",
|
"UTF-8",
|
||||||
MIMETYPE_TEXT_PLAIN,
|
MIMETYPE_TEXT_PLAIN,
|
||||||
"pdf",
|
"pdf",
|
||||||
MIMETYPE_PDF,
|
MIMETYPE_PDF,
|
||||||
null,
|
null,
|
||||||
"1",
|
"1",
|
||||||
null,
|
null,
|
||||||
expected.getBytes());
|
expected.getBytes());
|
||||||
|
|
||||||
// Read back in the PDF and check it
|
// Read back in the PDF and check it
|
||||||
PDDocument doc = PDDocument.load(result.getResponse().getContentAsByteArray());
|
PDDocument doc = PDDocument.load(result.getResponse().getContentAsByteArray());
|
||||||
@@ -403,56 +404,55 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
public void testAppleIWorksPages() throws Exception
|
public void testAppleIWorksPages() throws Exception
|
||||||
{
|
{
|
||||||
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
|
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
|
||||||
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("pages"));
|
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("pages"));
|
||||||
assertTrue(result.getResponse().getContentAsByteArray().length > 0L,
|
assertTrue(result.getResponse().getContentAsByteArray().length > 0L,
|
||||||
"Expected image content but content is empty.");
|
"Expected image content but content is empty.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAppleIWorksNumbers() throws Exception
|
public void testAppleIWorksNumbers() throws Exception
|
||||||
{
|
{
|
||||||
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
|
MvcResult result = sendRequest("numbers", null, MIMETYPE_IWORK_NUMBERS,
|
||||||
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("numbers"));
|
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("numbers"));
|
||||||
assertTrue(result.getResponse().getContentAsByteArray().length > 0L,
|
assertTrue(result.getResponse().getContentAsByteArray().length > 0L,
|
||||||
"Expected image content but content is empty.");
|
"Expected image content but content is empty.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAppleIWorksKey() throws Exception
|
public void testAppleIWorksKey() throws Exception
|
||||||
{
|
{
|
||||||
MvcResult result = sendRequest("key", null, MIMETYPE_IWORK_KEYNOTE,
|
MvcResult result = sendRequest("key", null, MIMETYPE_IWORK_KEYNOTE,
|
||||||
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("key"));
|
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("key"));
|
||||||
assertTrue(result.getResponse().getContentAsByteArray().length > 0L,
|
assertTrue(result.getResponse().getContentAsByteArray().length > 0L,
|
||||||
"Expected image content but content is empty.");
|
"Expected image content but content is empty.");
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Test
|
// @Test
|
||||||
// TODO Doesn't work with java 11, enable when fixed
|
// TODO Doesn't work with java 11, enable when fixed
|
||||||
public void testOOXML() throws Exception
|
// public void testOOXML() throws Exception
|
||||||
{
|
// {
|
||||||
MvcResult result = sendRequest("docx", null, MIMETYPE_OPENXML_WORDPROCESSING,
|
// MvcResult result = sendRequest("docx", null, MIMETYPE_OPENXML_WORDPROCESSING,
|
||||||
"jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("docx"));
|
// "jpeg", MIMETYPE_IMAGE_JPEG, null, null, null, readTestFile("docx"));
|
||||||
assertTrue(result.getResponse().getContentAsByteArray().length > 0L,
|
// assertTrue(result.getResponse().getContentAsByteArray().length > 0L,
|
||||||
"Expected image content but content is empty.");
|
// "Expected image content but content is empty.");
|
||||||
}
|
// }
|
||||||
|
|
||||||
private MvcResult sendRequest(String sourceExtension,
|
private MvcResult sendRequest(String sourceExtension,
|
||||||
String sourceEncoding,
|
String sourceEncoding,
|
||||||
String sourceMimetype,
|
String sourceMimetype,
|
||||||
String targetExtension,
|
String targetExtension,
|
||||||
String targetMimetype,
|
String targetMimetype,
|
||||||
String targetEncoding,
|
String targetEncoding,
|
||||||
String pageLimit,
|
String pageLimit,
|
||||||
String extractMapping,
|
String extractMapping,
|
||||||
byte[] content) throws Exception
|
byte[] content) throws Exception
|
||||||
{
|
{
|
||||||
final MockMultipartFile sourceFile = new MockMultipartFile("file",
|
final MockMultipartFile sourceFile = new MockMultipartFile("file",
|
||||||
"test_file." + sourceExtension, sourceMimetype, content);
|
"test_file." + sourceExtension, sourceMimetype, content);
|
||||||
|
|
||||||
final MockHttpServletRequestBuilder requestBuilder = super
|
final MockHttpServletRequestBuilder requestBuilder = super.mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile)
|
||||||
.mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile)
|
.param(TARGET_MIMETYPE, targetMimetype)
|
||||||
.param(TARGET_MIMETYPE, targetMimetype)
|
.param(SOURCE_MIMETYPE, sourceMimetype);
|
||||||
.param(SOURCE_MIMETYPE, sourceMimetype);
|
|
||||||
|
|
||||||
// SourceEncoding is available in the options but is not used to select the transformer as it is a known
|
// SourceEncoding is available in the options but is not used to select the transformer as it is a known
|
||||||
// like the source mimetype.
|
// like the source mimetype.
|
||||||
@@ -474,12 +474,12 @@ public class MiscTest extends AbstractBaseTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
return mockMvc.perform(requestBuilder)
|
return mockMvc.perform(requestBuilder)
|
||||||
.andExpect(status().isOk())
|
.andExpect(status().isOk())
|
||||||
.andExpect(header().string("Content-Disposition",
|
.andExpect(header().string("Content-Disposition",
|
||||||
"attachment; filename*=" +
|
"attachment; filename*=" +
|
||||||
(targetEncoding == null ? "UTF-8" : targetEncoding) +
|
(targetEncoding == null ? "UTF-8" : targetEncoding) +
|
||||||
"''transform." + targetExtension))
|
"''transform." + targetExtension))
|
||||||
.andReturn();
|
.andReturn();
|
||||||
}
|
}
|
||||||
|
|
||||||
private String clean(String text)
|
private String clean(String text)
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
* #%L
|
* #%L
|
||||||
* Alfresco Transform Core
|
* Alfresco Transform Core
|
||||||
* %%
|
* %%
|
||||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
* Copyright (C) 2005 - 2025 Alfresco Software Limited
|
||||||
* %%
|
* %%
|
||||||
* This file is part of the Alfresco software.
|
* This file is part of the Alfresco software.
|
||||||
* -
|
* -
|
||||||
@@ -26,7 +26,11 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.transform.misc.transformers;
|
package org.alfresco.transform.misc.transformers;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.fail;
|
||||||
|
|
||||||
|
import static org.alfresco.transform.common.RequestParamMap.HTML_COLLAPSE;
|
||||||
|
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
@@ -35,38 +39,35 @@ import java.nio.file.Files;
|
|||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import static org.alfresco.transform.common.RequestParamMap.SOURCE_ENCODING;
|
import org.junit.jupiter.api.Test;
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import org.junit.jupiter.params.ParameterizedTest;
|
||||||
|
import org.junit.jupiter.params.provider.ValueSource;
|
||||||
|
|
||||||
public class HtmlParserContentTransformerTest
|
public class HtmlParserContentTransformerTest
|
||||||
{
|
{
|
||||||
private static final String SOURCE_MIMETYPE = "text/html";
|
private static final String SOURCE_MIMETYPE = "text/html";
|
||||||
private static final String TARGET_MIMETYPE = "text/plain";
|
private static final String TARGET_MIMETYPE = "text/plain";
|
||||||
|
|
||||||
HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks that we correctly handle text in different encodings,
|
* Checks that we correctly handle text in different encodings, no matter if the encoding is specified on the Content Property or in a meta tag within the HTML itself. (ALF-10466)
|
||||||
* no matter if the encoding is specified on the Content Property
|
|
||||||
* or in a meta tag within the HTML itself. (ALF-10466)
|
|
||||||
*
|
*
|
||||||
* On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
|
* On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line so we must be careful when checking the returned text
|
||||||
* so we must be careful when checking the returned text
|
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testEncodingHandling() throws Exception
|
public void testEncodingHandling() throws Exception
|
||||||
{
|
{
|
||||||
final String NEWLINE = System.getProperty("line.separator");
|
final HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
|
||||||
final String TITLE = "Testing!";
|
final String newline = System.getProperty("line.separator");
|
||||||
final String TEXT_P1 = "This is some text in English";
|
final String title = "Testing!";
|
||||||
final String TEXT_P2 = "This is more text in English";
|
final String textp1 = "This is some text in English";
|
||||||
final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
|
final String textp2 = "This is more text in English";
|
||||||
String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
|
final String textp3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
|
||||||
String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
|
String partA = "<html><head><title>" + title + "</title></head>" + newline;
|
||||||
"<p>" + TEXT_P2 + "</p>" + NEWLINE +
|
String partB = "<body><p>" + textp1 + "</p>" + newline +
|
||||||
"<p>" + TEXT_P3 + "</p>" + NEWLINE;
|
"<p>" + textp2 + "</p>" + newline +
|
||||||
|
"<p>" + textp3 + "</p>" + newline;
|
||||||
String partC = "</body></html>";
|
String partC = "</body></html>";
|
||||||
final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
|
final String expected = title + newline + textp1 + newline + textp2 + newline + textp3;
|
||||||
|
|
||||||
File tmpS = null;
|
File tmpS = null;
|
||||||
File tmpD = null;
|
File tmpD = null;
|
||||||
@@ -81,6 +82,7 @@ public class HtmlParserContentTransformerTest
|
|||||||
|
|
||||||
Map<String, String> parameters = new HashMap<>();
|
Map<String, String> parameters = new HashMap<>();
|
||||||
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
|
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
|
||||||
|
parameters.put(HTML_COLLAPSE, String.valueOf(true));
|
||||||
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD, null);
|
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD, null);
|
||||||
|
|
||||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||||
@@ -94,6 +96,7 @@ public class HtmlParserContentTransformerTest
|
|||||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||||
parameters = new HashMap<>();
|
parameters = new HashMap<>();
|
||||||
parameters.put(SOURCE_ENCODING, "UTF-8");
|
parameters.put(SOURCE_ENCODING, "UTF-8");
|
||||||
|
parameters.put(HTML_COLLAPSE, String.valueOf(true));
|
||||||
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD, null);
|
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD, null);
|
||||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||||
tmpS.delete();
|
tmpS.delete();
|
||||||
@@ -105,6 +108,7 @@ public class HtmlParserContentTransformerTest
|
|||||||
|
|
||||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||||
parameters = new HashMap<>();
|
parameters = new HashMap<>();
|
||||||
|
parameters.put(HTML_COLLAPSE, String.valueOf(true));
|
||||||
parameters.put(SOURCE_ENCODING, "UTF-16");
|
parameters.put(SOURCE_ENCODING, "UTF-16");
|
||||||
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD, null);
|
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD, null);
|
||||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||||
@@ -122,8 +126,8 @@ public class HtmlParserContentTransformerTest
|
|||||||
// Content set to ISO 8859-1, meta set to UTF-8
|
// Content set to ISO 8859-1, meta set to UTF-8
|
||||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||||
String str = partA +
|
String str = partA +
|
||||||
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" +
|
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" +
|
||||||
partB + partC;
|
partB + partC;
|
||||||
|
|
||||||
writeToFile(tmpS, str, "UTF-8");
|
writeToFile(tmpS, str, "UTF-8");
|
||||||
|
|
||||||
@@ -131,32 +135,166 @@ public class HtmlParserContentTransformerTest
|
|||||||
|
|
||||||
parameters = new HashMap<>();
|
parameters = new HashMap<>();
|
||||||
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
|
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
|
||||||
|
parameters.put(HTML_COLLAPSE, String.valueOf(true));
|
||||||
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD, null);
|
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD, null);
|
||||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||||
tmpS.delete();
|
tmpS.delete();
|
||||||
tmpD.delete();
|
tmpD.delete();
|
||||||
|
|
||||||
// Note - we can't test UTF-16 with only a meta encoding,
|
// Note - we can't test UTF-16 with only a meta encoding,
|
||||||
// because without that the parser won't know about the
|
// because without that the parser won't know about the
|
||||||
// 2 byte format so won't be able to identify the meta tag
|
// 2 byte format so won't be able to identify the meta tag
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
fail("Test Failed: " + e.getMessage()); // fail the test if any exception occurs
|
||||||
}
|
}
|
||||||
finally
|
finally
|
||||||
{
|
{
|
||||||
if (tmpS != null && tmpS.exists()) tmpS.delete();
|
if (tmpS != null && tmpS.exists())
|
||||||
if (tmpD != null && tmpD.exists()) tmpD.delete();
|
{
|
||||||
|
tmpS.delete();
|
||||||
|
}
|
||||||
|
if (tmpD != null && tmpD.exists())
|
||||||
|
{
|
||||||
|
tmpD.delete();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writeToFile(File file, String content, String encoding) throws Exception
|
/**
|
||||||
|
* Tests the transformer with different collapsing methods. If the collapsing is set to false, it should not collapse the new lines between paragraphs. If the collapsing is set to true, it should collapse the new lines.
|
||||||
|
*/
|
||||||
|
@ParameterizedTest
|
||||||
|
@ValueSource(booleans = {true, false})
|
||||||
|
public void testTransformerWithDifferentCollapsingMethods(boolean shouldCollapse)
|
||||||
|
{
|
||||||
|
final HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
|
||||||
|
|
||||||
|
final String newline = System.getProperty("line.separator");
|
||||||
|
final String title = "Testing!";
|
||||||
|
final String textp1 = "This is some text in English";
|
||||||
|
final String textp2 = "This is more text in English";
|
||||||
|
final String textp3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
|
||||||
|
String partA = "<html><head><title>" + title + "</title></head>" + newline;
|
||||||
|
String partB = "<body><p>" + textp1 + "</p>" + newline +
|
||||||
|
"<p>" + textp2 + "</p>" + newline +
|
||||||
|
"<p>" + textp3 + "</p>" + newline;
|
||||||
|
String partC = "</body></html>";
|
||||||
|
final String expected = title + newline + textp1 + newline + textp2 + newline + textp3 + (shouldCollapse ? "" : newline); // Just a added newline if collapsing is not collapsing
|
||||||
|
|
||||||
|
File tmpS = null;
|
||||||
|
File tmpD = null;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||||
|
writeToFile(tmpS, partA + partB + partC, "UTF-8");
|
||||||
|
|
||||||
|
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||||
|
Map<String, String> parameters = new HashMap<>();
|
||||||
|
parameters.put(SOURCE_ENCODING, "UTF-8");
|
||||||
|
parameters.put(HTML_COLLAPSE, String.valueOf(shouldCollapse));
|
||||||
|
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD, null);
|
||||||
|
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||||
|
tmpS.delete();
|
||||||
|
tmpD.delete();
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
fail("Test Failed: " + e.getMessage()); // fail the test if any exception occurs
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
if (tmpS != null && tmpS.exists())
|
||||||
|
{
|
||||||
|
tmpS.delete();
|
||||||
|
}
|
||||||
|
if (tmpD != null && tmpD.exists())
|
||||||
|
{
|
||||||
|
tmpD.delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the transformer with wrong boolean values for the collapse option. It should not throw an exception and should use the default value for collapsing.
|
||||||
|
*/
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@ValueSource(strings = {"cat", "dog", "", "1234abcd", "@#$%"})
|
||||||
|
public void testTransformerWithWrongBooleanValues(String booleanValues)
|
||||||
|
{
|
||||||
|
final HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
|
||||||
|
|
||||||
|
final String newline = System.getProperty("line.separator");
|
||||||
|
final String title = "Testing!";
|
||||||
|
final String textp1 = "This is some text in English";
|
||||||
|
final String textp2 = "This is more text in English";
|
||||||
|
final String textp3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
|
||||||
|
String partA = "<html><head><title>" + title + "</title></head>" + newline;
|
||||||
|
String partB = "<body><p>" + textp1 + "</p>" + newline +
|
||||||
|
"<p>" + textp2 + "</p>" + newline +
|
||||||
|
"<p>" + textp3 + "</p>" + newline;
|
||||||
|
String partC = "</body></html>";
|
||||||
|
final String expected = title + newline + textp1 + newline + textp2 + newline + textp3;
|
||||||
|
|
||||||
|
File tmpS = null;
|
||||||
|
File tmpD = null;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||||
|
writeToFile(tmpS, partA + partB + partC, "UTF-8");
|
||||||
|
|
||||||
|
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||||
|
Map<String, String> parameters = new HashMap<>();
|
||||||
|
parameters.put(SOURCE_ENCODING, "UTF-8");
|
||||||
|
parameters.put(HTML_COLLAPSE, booleanValues);
|
||||||
|
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD, null);
|
||||||
|
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||||
|
tmpS.delete();
|
||||||
|
tmpD.delete();
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
fail("Test Failed: " + e.getMessage()); // fail the test if any exception occurs
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
if (tmpS != null && tmpS.exists())
|
||||||
|
{
|
||||||
|
tmpS.delete();
|
||||||
|
}
|
||||||
|
if (tmpD != null && tmpD.exists())
|
||||||
|
{
|
||||||
|
tmpD.delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeToFile(File file, String content, String encoding)
|
||||||
{
|
{
|
||||||
try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
|
try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
|
||||||
{
|
{
|
||||||
ow.append(content);
|
ow.append(content);
|
||||||
}
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
fail("Failed to write to file: " + e.getMessage()); // fail the test if any exception occurs
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String readFromFile(File file, final String encoding) throws Exception
|
private String readFromFile(File file, final String encoding)
|
||||||
{
|
{
|
||||||
return new String(Files.readAllBytes(file.toPath()), encoding);
|
try
|
||||||
|
{
|
||||||
|
return new String(Files.readAllBytes(file.toPath()), encoding);
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
fail("Failed to read from file: " + e.getMessage());
|
||||||
|
return null; // Return null if there is an error reading the file
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -1,5 +1,8 @@
|
|||||||
{
|
{
|
||||||
"transformOptions": {
|
"transformOptions": {
|
||||||
|
"htmlOptions": [
|
||||||
|
{"value": {"name": "collapseHtml"}}
|
||||||
|
],
|
||||||
"textToPdfOptions": [
|
"textToPdfOptions": [
|
||||||
{"value": {"name": "pageLimit"}}
|
{"value": {"name": "pageLimit"}}
|
||||||
],
|
],
|
||||||
@@ -17,6 +20,7 @@
|
|||||||
{"sourceMediaType": "text/html", "targetMediaType": "text/plain"}
|
{"sourceMediaType": "text/html", "targetMediaType": "text/plain"}
|
||||||
],
|
],
|
||||||
"transformOptions": [
|
"transformOptions": [
|
||||||
|
"htmlOptions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
2
engines/misc/src/test/resources/quick3.txt
Normal file
2
engines/misc/src/test/resources/quick3.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
The quick brown fox jumps over the lazy dog
|
||||||
|
The quick brown fox jumps over the lazy dog
|
@@ -2,7 +2,7 @@
|
|||||||
* #%L
|
* #%L
|
||||||
* Alfresco Transform Core
|
* Alfresco Transform Core
|
||||||
* %%
|
* %%
|
||||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
* Copyright (C) 2005 - 2025 Alfresco Software Limited
|
||||||
* %%
|
* %%
|
||||||
* This file is part of the Alfresco software.
|
* This file is part of the Alfresco software.
|
||||||
* -
|
* -
|
||||||
@@ -26,31 +26,21 @@
|
|||||||
*/
|
*/
|
||||||
package org.alfresco.transform.tika;
|
package org.alfresco.transform.tika;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableSet;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import org.alfresco.transform.base.AbstractBaseTest;
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
import org.alfresco.transform.base.executors.RuntimeExec;
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
import org.alfresco.transform.base.model.FileRefEntity;
|
import static org.mockito.Mockito.when;
|
||||||
import org.alfresco.transform.base.model.FileRefResponse;
|
import static org.springframework.http.HttpHeaders.ACCEPT;
|
||||||
import org.alfresco.transform.client.model.TransformReply;
|
import static org.springframework.http.HttpHeaders.CONTENT_DISPOSITION;
|
||||||
import org.alfresco.transform.client.model.TransformRequest;
|
import static org.springframework.http.HttpHeaders.CONTENT_TYPE;
|
||||||
import org.apache.poi.ooxml.POIXMLProperties;
|
import static org.springframework.http.HttpStatus.CREATED;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import static org.springframework.http.HttpStatus.OK;
|
||||||
import org.junit.jupiter.api.Test;
|
import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE;
|
||||||
import org.mockito.Mock;
|
import static org.springframework.http.MediaType.APPLICATION_PDF_VALUE;
|
||||||
import org.springframework.core.io.FileSystemResource;
|
import static org.springframework.http.MediaType.TEXT_PLAIN_VALUE;
|
||||||
import org.springframework.core.io.Resource;
|
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.header;
|
||||||
import org.springframework.http.HttpHeaders;
|
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
|
||||||
import org.springframework.http.ResponseEntity;
|
|
||||||
import org.springframework.mock.web.MockMultipartFile;
|
|
||||||
import org.springframework.test.web.servlet.MvcResult;
|
|
||||||
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
|
|
||||||
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
|
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.UUID;
|
|
||||||
|
|
||||||
import static org.alfresco.transform.base.html.OptionsHelper.getOptionNames;
|
import static org.alfresco.transform.base.html.OptionsHelper.getOptionNames;
|
||||||
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
|
||||||
@@ -89,21 +79,33 @@ import static org.alfresco.transform.tika.transformers.Tika.XHTML;
|
|||||||
import static org.alfresco.transform.tika.transformers.Tika.XLSX;
|
import static org.alfresco.transform.tika.transformers.Tika.XLSX;
|
||||||
import static org.alfresco.transform.tika.transformers.Tika.XML;
|
import static org.alfresco.transform.tika.transformers.Tika.XML;
|
||||||
import static org.alfresco.transform.tika.transformers.Tika.ZIP;
|
import static org.alfresco.transform.tika.transformers.Tika.ZIP;
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
import java.io.ByteArrayInputStream;
|
||||||
import static org.mockito.ArgumentMatchers.any;
|
import java.io.File;
|
||||||
import static org.mockito.Mockito.when;
|
import java.io.IOException;
|
||||||
import static org.springframework.http.HttpHeaders.ACCEPT;
|
import java.util.UUID;
|
||||||
import static org.springframework.http.HttpHeaders.CONTENT_DISPOSITION;
|
|
||||||
import static org.springframework.http.HttpHeaders.CONTENT_TYPE;
|
import com.google.common.collect.ImmutableSet;
|
||||||
import static org.springframework.http.HttpStatus.CREATED;
|
import org.apache.poi.ooxml.POIXMLProperties;
|
||||||
import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
import static org.springframework.http.HttpStatus.OK;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE;
|
import org.junit.jupiter.api.Test;
|
||||||
import static org.springframework.http.MediaType.APPLICATION_PDF_VALUE;
|
import org.mockito.Mock;
|
||||||
import static org.springframework.http.MediaType.TEXT_PLAIN_VALUE;
|
import org.springframework.core.io.FileSystemResource;
|
||||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.header;
|
import org.springframework.core.io.Resource;
|
||||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
|
import org.springframework.http.HttpHeaders;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.mock.web.MockMultipartFile;
|
||||||
|
import org.springframework.test.web.servlet.MvcResult;
|
||||||
|
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
|
||||||
|
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
|
||||||
|
|
||||||
|
import org.alfresco.transform.base.AbstractBaseTest;
|
||||||
|
import org.alfresco.transform.base.executors.RuntimeExec;
|
||||||
|
import org.alfresco.transform.base.model.FileRefEntity;
|
||||||
|
import org.alfresco.transform.base.model.FileRefResponse;
|
||||||
|
import org.alfresco.transform.client.model.TransformReply;
|
||||||
|
import org.alfresco.transform.client.model.TransformRequest;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Tika.
|
* Test Tika.
|
||||||
@@ -113,9 +115,9 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
private static final String EXPECTED_XHTML_CONTENT_CONTAINS = "<p>The quick brown fox jumps over the lazy dog</p>";
|
private static final String EXPECTED_XHTML_CONTENT_CONTAINS = "<p>The quick brown fox jumps over the lazy dog</p>";
|
||||||
private static final String EXPECTED_TEXT_CONTENT_CONTAINS = "The quick brown fox jumps over the lazy dog";
|
private static final String EXPECTED_TEXT_CONTENT_CONTAINS = "The quick brown fox jumps over the lazy dog";
|
||||||
private static final String EXPECTED_MSG_CONTENT_CONTAINS = "Recipients\n" +
|
private static final String EXPECTED_MSG_CONTENT_CONTAINS = "Recipients\n" +
|
||||||
"\tmark.rogers@alfresco.com; speedy@quick.com; mrquick@nowhere.com\n" +
|
"\tmark.rogers@alfresco.com; speedy@quick.com; mrquick@nowhere.com\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"The quick brown fox jumps over the lazy dogs";
|
"The quick brown fox jumps over the lazy dogs";
|
||||||
private static final String EXPECTED_CSV_CONTENT_CONTAINS = "\"The\",\"quick\",\"brown\",\"fox\"";
|
private static final String EXPECTED_CSV_CONTENT_CONTAINS = "\"The\",\"quick\",\"brown\",\"fox\"";
|
||||||
|
|
||||||
@Mock
|
@Mock
|
||||||
@@ -139,8 +141,8 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void mockTransformCommand(String sourceExtension,
|
protected void mockTransformCommand(String sourceExtension,
|
||||||
String targetExtension, String sourceMimetype,
|
String targetExtension, String sourceMimetype,
|
||||||
boolean readTargetFileBytes) throws IOException
|
boolean readTargetFileBytes) throws IOException
|
||||||
{
|
{
|
||||||
// Tika transform is not mocked. It is run for real.
|
// Tika transform is not mocked. It is run for real.
|
||||||
|
|
||||||
@@ -160,8 +162,8 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void transform(String transform, String sourceExtension, String targetExtension,
|
private void transform(String transform, String sourceExtension, String targetExtension,
|
||||||
String sourceMimetype, String targetMimetype,
|
String sourceMimetype, String targetMimetype,
|
||||||
Boolean includeContents, String expectedContentContains) throws Exception
|
Boolean includeContents, String expectedContentContains) throws Exception
|
||||||
{
|
{
|
||||||
// We don't use targetFileBytes as some of the transforms contain different date text based on the os being used.
|
// We don't use targetFileBytes as some of the transforms contain different date text based on the os being used.
|
||||||
mockTransformCommand(sourceExtension, targetExtension, sourceMimetype, false);
|
mockTransformCommand(sourceExtension, targetExtension, sourceMimetype, false);
|
||||||
@@ -169,18 +171,18 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
|
|
||||||
System.out.println("Test " + transform + " " + sourceExtension + " to " + targetExtension);
|
System.out.println("Test " + transform + " " + sourceExtension + " to " + targetExtension);
|
||||||
MockHttpServletRequestBuilder requestBuilder = includeContents == null
|
MockHttpServletRequestBuilder requestBuilder = includeContents == null
|
||||||
? mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile,
|
? mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile,
|
||||||
"targetExtension", this.targetExtension)
|
"targetExtension", this.targetExtension)
|
||||||
: mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile,
|
: mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile,
|
||||||
"targetExtension", this.targetExtension, INCLUDE_CONTENTS, includeContents.toString());
|
"targetExtension", this.targetExtension, INCLUDE_CONTENTS, includeContents.toString());
|
||||||
MvcResult result = mockMvc.perform(requestBuilder)
|
MvcResult result = mockMvc.perform(requestBuilder)
|
||||||
.andExpect(status().is(OK.value()))
|
.andExpect(status().is(OK.value()))
|
||||||
.andExpect(header().string("Content-Disposition",
|
.andExpect(header().string("Content-Disposition",
|
||||||
"attachment; filename*=UTF-8''transform." + this.targetExtension))
|
"attachment; filename*=UTF-8''transform." + this.targetExtension))
|
||||||
.andReturn();
|
.andReturn();
|
||||||
String content = result.getResponse().getContentAsString();
|
String content = result.getResponse().getContentAsString();
|
||||||
assertTrue(content.contains(expectedContentContains),
|
assertTrue(content.contains(expectedContentContains),
|
||||||
"The content did not include \"" + expectedContentContains);
|
"The content did not include \"" + expectedContentContains);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -188,9 +190,9 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
|
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
|
||||||
{
|
{
|
||||||
return super.mockMvcRequest(url, sourceFile, params)
|
return super.mockMvcRequest(url, sourceFile, params)
|
||||||
.param("targetEncoding", targetEncoding)
|
.param("targetEncoding", targetEncoding)
|
||||||
.param("targetMimetype", targetMimetype)
|
.param("targetMimetype", targetMimetype)
|
||||||
.param("sourceMimetype", sourceMimetype);
|
.param("sourceMimetype", sourceMimetype);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -199,8 +201,8 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||||
targetEncoding = "rubbish";
|
targetEncoding = "rubbish";
|
||||||
mockMvc.perform(
|
mockMvc.perform(
|
||||||
mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile, "targetExtension", targetExtension))
|
mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile, "targetExtension", targetExtension))
|
||||||
.andExpect(status().is(INTERNAL_SERVER_ERROR.value()));
|
.andExpect(status().is(INTERNAL_SERVER_ERROR.value()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Archive ---
|
// --- Archive ---
|
||||||
@@ -209,55 +211,55 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
public void zipToTextArchiveTest() throws Exception
|
public void zipToTextArchiveTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, false,
|
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, false,
|
||||||
"quick.html\n" +
|
"quick.html\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"quick.pdf\n" +
|
"quick.pdf\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\n");
|
"\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void zipToTextIncludeArchiveTest() throws Exception
|
public void zipToTextIncludeArchiveTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, true,
|
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN, true,
|
||||||
"quick.html\n" +
|
"quick.html\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"The quick brown fox jumps over the lazy dog\n" +
|
"The quick brown fox jumps over the lazy dog\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"quick.pdf\n" +
|
"quick.pdf\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"The quick brown fox jumps over the lazy dog" +
|
"The quick brown fox jumps over the lazy dog" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\n");
|
"\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void zipToTextExcludeArchiveTest() throws Exception
|
public void zipToTextExcludeArchiveTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,
|
transform(ARCHIVE, ZIP, TXT, MIMETYPE_ZIP, MIMETYPE_TEXT_PLAIN,
|
||||||
false, "\n" +
|
false, "\n" +
|
||||||
"folder/subfolder/quick.jpg\n" +
|
"folder/subfolder/quick.jpg\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"quick.doc\n" +
|
"quick.doc\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"quick.html\n" +
|
"quick.html\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"quick.pdf\n" +
|
"quick.pdf\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"quick.txt\n" +
|
"quick.txt\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"quick.xml\n" +
|
"quick.xml\n" +
|
||||||
"\n");
|
"\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- OutlookMsg ---
|
// --- OutlookMsg ---
|
||||||
@@ -266,7 +268,7 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
public void msgToTxtOutlookMsgTest() throws Exception
|
public void msgToTxtOutlookMsgTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(OUTLOOK_MSG, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
|
transform(OUTLOOK_MSG, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
|
||||||
EXPECTED_MSG_CONTENT_CONTAINS);
|
EXPECTED_MSG_CONTENT_CONTAINS);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- PdfBox ---
|
// --- PdfBox ---
|
||||||
@@ -275,35 +277,35 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
public void pdfToTxtPdfBoxTest() throws Exception
|
public void pdfToTxtPdfBoxTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(PDF_BOX, PDF, TXT, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, null,
|
transform(PDF_BOX, PDF, TXT, MIMETYPE_PDF, MIMETYPE_TEXT_PLAIN, null,
|
||||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pdfToCsvPdfBoxTest() throws Exception
|
public void pdfToCsvPdfBoxTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(PDF_BOX, PDF, CSV, MIMETYPE_PDF, MIMETYPE_TEXT_CSV, null,
|
transform(PDF_BOX, PDF, CSV, MIMETYPE_PDF, MIMETYPE_TEXT_CSV, null,
|
||||||
EXPECTED_TEXT_CONTENT_CONTAINS); // Yes it is just text
|
EXPECTED_TEXT_CONTENT_CONTAINS); // Yes it is just text
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pdfToXmlPdfBoxTest() throws Exception
|
public void pdfToXmlPdfBoxTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(PDF_BOX, PDF, XML, MIMETYPE_PDF, MIMETYPE_XML, null,
|
transform(PDF_BOX, PDF, XML, MIMETYPE_PDF, MIMETYPE_XML, null,
|
||||||
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
|
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pdfToXhtmlPdfBoxTest() throws Exception
|
public void pdfToXhtmlPdfBoxTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(PDF_BOX, PDF, XHTML, MIMETYPE_PDF, MIMETYPE_XHTML, null,
|
transform(PDF_BOX, PDF, XHTML, MIMETYPE_PDF, MIMETYPE_XHTML, null,
|
||||||
EXPECTED_XHTML_CONTENT_CONTAINS);
|
EXPECTED_XHTML_CONTENT_CONTAINS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pdfToHtmlPdfBoxTest() throws Exception
|
public void pdfToHtmlPdfBoxTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(PDF_BOX, PDF, HTML, MIMETYPE_PDF, MIMETYPE_HTML, null,
|
transform(PDF_BOX, PDF, HTML, MIMETYPE_PDF, MIMETYPE_HTML, null,
|
||||||
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
|
EXPECTED_XHTML_CONTENT_CONTAINS); // Yes it is just XHTML
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Office ---
|
// --- Office ---
|
||||||
@@ -312,14 +314,14 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
public void msgToTxtOfficeTest() throws Exception
|
public void msgToTxtOfficeTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(OFFICE, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
|
transform(OFFICE, MSG, TXT, MIMETYPE_OUTLOOK_MSG, MIMETYPE_TEXT_PLAIN, null,
|
||||||
EXPECTED_MSG_CONTENT_CONTAINS);
|
EXPECTED_MSG_CONTENT_CONTAINS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void docToTxtOfficeTest() throws Exception
|
public void docToTxtOfficeTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(OFFICE, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
|
transform(OFFICE, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
|
||||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Poi ---
|
// --- Poi ---
|
||||||
@@ -328,7 +330,7 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
public void xslxToCsvPoiTest() throws Exception
|
public void xslxToCsvPoiTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(POI, XLSX, CSV, MIMETYPE_OPENXML_SPREADSHEET, MIMETYPE_TEXT_CSV, null,
|
transform(POI, XLSX, CSV, MIMETYPE_OPENXML_SPREADSHEET, MIMETYPE_TEXT_CSV, null,
|
||||||
EXPECTED_CSV_CONTENT_CONTAINS);
|
EXPECTED_CSV_CONTENT_CONTAINS);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- OOXML ---
|
// --- OOXML ---
|
||||||
@@ -337,14 +339,14 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
public void docxToTxtOoXmlTest() throws Exception
|
public void docxToTxtOoXmlTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(OOXML, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
|
transform(OOXML, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
|
||||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pptxToTxtOoXmlTest() throws Exception
|
public void pptxToTxtOoXmlTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(OOXML, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
|
transform(OOXML, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
|
||||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- TikaAuto ---
|
// --- TikaAuto ---
|
||||||
@@ -353,14 +355,14 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
public void ppxtToTxtTikaAutoTest() throws Exception
|
public void ppxtToTxtTikaAutoTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(TIKA_AUTO, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
|
transform(TIKA_AUTO, PPTX, TXT, MIMETYPE_OPENXML_PRESENTATION, MIMETYPE_TEXT_PLAIN, null,
|
||||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void doctToTxtTikaAutoTest() throws Exception
|
public void doctToTxtTikaAutoTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(TIKA_AUTO, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
|
transform(TIKA_AUTO, DOCX, TXT, MIMETYPE_OPENXML_WORDPROCESSING, MIMETYPE_TEXT_PLAIN, null,
|
||||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- TextMining ---
|
// --- TextMining ---
|
||||||
@@ -369,7 +371,7 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
public void docToTxtTextMiningTest() throws Exception
|
public void docToTxtTextMiningTest() throws Exception
|
||||||
{
|
{
|
||||||
transform(TEXT_MINING, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
|
transform(TEXT_MINING, DOC, TXT, MIMETYPE_WORD, MIMETYPE_TEXT_PLAIN, null,
|
||||||
EXPECTED_TEXT_CONTENT_CONTAINS);
|
EXPECTED_TEXT_CONTENT_CONTAINS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -377,24 +379,22 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
{
|
{
|
||||||
mockTransformCommand(XLSX, XLSX, MIMETYPE_OPENXML_SPREADSHEET, false);
|
mockTransformCommand(XLSX, XLSX, MIMETYPE_OPENXML_SPREADSHEET, false);
|
||||||
|
|
||||||
String metadata =
|
String metadata = "{\"{http://www.alfresco.org/model/content/1.0}author\":\"author1\"," +
|
||||||
"{\"{http://www.alfresco.org/model/content/1.0}author\":\"author1\"," +
|
"\"{http://www.alfresco.org/model/content/1.0}title\":\"title1\"," +
|
||||||
"\"{http://www.alfresco.org/model/content/1.0}title\":\"title1\"," +
|
"\"{http://www.alfresco.org/model/content/1.0}description\":[\"desc1\",\"desc2\"]," +
|
||||||
"\"{http://www.alfresco.org/model/content/1.0}description\":[\"desc1\",\"desc2\"]," +
|
"\"{http://www.alfresco.org/model/content/1.0}created\":\"created1\"}";
|
||||||
"\"{http://www.alfresco.org/model/content/1.0}created\":\"created1\"}";
|
|
||||||
|
|
||||||
MockHttpServletRequestBuilder requestBuilder =
|
MockHttpServletRequestBuilder requestBuilder = super.mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile,
|
||||||
super.mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile,
|
"targetExtension", XLSX,
|
||||||
"targetExtension", XLSX,
|
"metadata", metadata,
|
||||||
"metadata", metadata,
|
"targetMimetype", MIMETYPE_METADATA_EMBED,
|
||||||
"targetMimetype", MIMETYPE_METADATA_EMBED,
|
"sourceMimetype", MIMETYPE_OPENXML_SPREADSHEET);
|
||||||
"sourceMimetype", MIMETYPE_OPENXML_SPREADSHEET);
|
|
||||||
|
|
||||||
MvcResult result = mockMvc.perform(requestBuilder)
|
MvcResult result = mockMvc.perform(requestBuilder)
|
||||||
.andExpect(status().is(OK.value()))
|
.andExpect(status().is(OK.value()))
|
||||||
.andExpect(header().string("Content-Disposition",
|
.andExpect(header().string("Content-Disposition",
|
||||||
"attachment; filename*=UTF-8''transform." + targetExtension)).
|
"attachment; filename*=UTF-8''transform." + targetExtension))
|
||||||
andReturn();
|
.andReturn();
|
||||||
|
|
||||||
byte[] bytes = result.getResponse().getContentAsByteArray();
|
byte[] bytes = result.getResponse().getContentAsByteArray();
|
||||||
ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes);
|
ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes);
|
||||||
@@ -414,11 +414,11 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
{
|
{
|
||||||
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
mockTransformCommand(PDF, TXT, MIMETYPE_PDF, true);
|
||||||
mockMvc.perform(
|
mockMvc.perform(
|
||||||
mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile, "targetExtension", targetExtension).param(
|
mockMvcRequest(ENDPOINT_TRANSFORM, sourceFile, "targetExtension", targetExtension).param(
|
||||||
NOT_EXTRACT_BOOKMARKS_TEXT, "true"))
|
NOT_EXTRACT_BOOKMARKS_TEXT, "true"))
|
||||||
.andExpect(status().is(OK.value()))
|
.andExpect(status().is(OK.value()))
|
||||||
.andExpect(header().string("Content-Disposition",
|
.andExpect(header().string("Content-Disposition",
|
||||||
"attachment; filename*=UTF-8''transform." + targetExtension));
|
"attachment; filename*=UTF-8''transform." + targetExtension));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -445,11 +445,11 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
HttpHeaders headers = new HttpHeaders();
|
HttpHeaders headers = new HttpHeaders();
|
||||||
headers.set(CONTENT_DISPOSITION, "attachment; filename=quick." + sourceExtension);
|
headers.set(CONTENT_DISPOSITION, "attachment; filename=quick." + sourceExtension);
|
||||||
ResponseEntity<Resource> response = new ResponseEntity<>(new FileSystemResource(
|
ResponseEntity<Resource> response = new ResponseEntity<>(new FileSystemResource(
|
||||||
sourceFile), headers, OK);
|
sourceFile), headers, OK);
|
||||||
|
|
||||||
when(sharedFileStoreClient.retrieveFile(sourceFileRef)).thenReturn(response);
|
when(sharedFileStoreClient.retrieveFile(sourceFileRef)).thenReturn(response);
|
||||||
when(sharedFileStoreClient.saveFile(any()))
|
when(sharedFileStoreClient.saveFile(any()))
|
||||||
.thenReturn(new FileRefResponse(new FileRefEntity(targetFileRef)));
|
.thenReturn(new FileRefResponse(new FileRefEntity(targetFileRef)));
|
||||||
when(mockExecutionResult.getExitValue()).thenReturn(0);
|
when(mockExecutionResult.getExitValue()).thenReturn(0);
|
||||||
|
|
||||||
// Update the Transformation Request with any specific params before sending it
|
// Update the Transformation Request with any specific params before sending it
|
||||||
@@ -458,16 +458,16 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
// Serialize and call the transformer
|
// Serialize and call the transformer
|
||||||
String tr = objectMapper.writeValueAsString(transformRequest);
|
String tr = objectMapper.writeValueAsString(transformRequest);
|
||||||
String transformationReplyAsString = mockMvc
|
String transformationReplyAsString = mockMvc
|
||||||
.perform(MockMvcRequestBuilders
|
.perform(MockMvcRequestBuilders
|
||||||
.post(ENDPOINT_TRANSFORM)
|
.post(ENDPOINT_TRANSFORM)
|
||||||
.header(ACCEPT, APPLICATION_JSON_VALUE)
|
.header(ACCEPT, APPLICATION_JSON_VALUE)
|
||||||
.header(CONTENT_TYPE, APPLICATION_JSON_VALUE)
|
.header(CONTENT_TYPE, APPLICATION_JSON_VALUE)
|
||||||
.content(tr))
|
.content(tr))
|
||||||
.andExpect(status().is(CREATED.value()))
|
.andExpect(status().is(CREATED.value()))
|
||||||
.andReturn().getResponse().getContentAsString();
|
.andReturn().getResponse().getContentAsString();
|
||||||
|
|
||||||
TransformReply transformReply = objectMapper.readValue(transformationReplyAsString,
|
TransformReply transformReply = objectMapper.readValue(transformationReplyAsString,
|
||||||
TransformReply.class);
|
TransformReply.class);
|
||||||
|
|
||||||
// Assert the reply
|
// Assert the reply
|
||||||
assertEquals(transformRequest.getRequestId(), transformReply.getRequestId());
|
assertEquals(transformRequest.getRequestId(), transformReply.getRequestId());
|
||||||
@@ -492,6 +492,6 @@ public class TikaTest extends AbstractBaseTest
|
|||||||
"extractMapping",
|
"extractMapping",
|
||||||
"notExtractBookmarksText",
|
"notExtractBookmarksText",
|
||||||
"metadata"),
|
"metadata"),
|
||||||
getOptionNames(controller.transformConfig(0).getBody().getTransformOptions()));
|
getOptionNames(controller.transformConfig(0).getBody().getTransformOptions()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
* #%L
|
* #%L
|
||||||
* Alfresco Transform Model
|
* Alfresco Transform Model
|
||||||
* %%
|
* %%
|
||||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
* Copyright (C) 2005 - 2025 Alfresco Software Limited
|
||||||
* %%
|
* %%
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU Lesser General Public License as
|
* it under the terms of the GNU Lesser General Public License as
|
||||||
@@ -30,54 +30,57 @@ public interface RequestParamMap
|
|||||||
{
|
{
|
||||||
// html parameter names
|
// html parameter names
|
||||||
String FILE = "file";
|
String FILE = "file";
|
||||||
String SOURCE_EXTENSION = "sourceExtension";
|
String SOURCE_EXTENSION = "sourceExtension";
|
||||||
String TARGET_EXTENSION = "targetExtension";
|
String TARGET_EXTENSION = "targetExtension";
|
||||||
String SOURCE_MIMETYPE = "sourceMimetype";
|
String SOURCE_MIMETYPE = "sourceMimetype";
|
||||||
String TARGET_MIMETYPE = "targetMimetype";
|
String TARGET_MIMETYPE = "targetMimetype";
|
||||||
|
|
||||||
// Transform options used in the core transformers.
|
// Transform options used in the core transformers.
|
||||||
String SOURCE_ENCODING = "sourceEncoding";
|
String SOURCE_ENCODING = "sourceEncoding";
|
||||||
String TARGET_ENCODING = "targetEncoding";
|
String TARGET_ENCODING = "targetEncoding";
|
||||||
String PAGE_REQUEST_PARAM = "page";
|
String PAGE_REQUEST_PARAM = "page";
|
||||||
String WIDTH_REQUEST_PARAM = "width";
|
String WIDTH_REQUEST_PARAM = "width";
|
||||||
String HEIGHT_REQUEST_PARAM = "height";
|
String HEIGHT_REQUEST_PARAM = "height";
|
||||||
String ALLOW_PDF_ENLARGEMENT = "allowPdfEnlargement";
|
String ALLOW_PDF_ENLARGEMENT = "allowPdfEnlargement";
|
||||||
String MAINTAIN_PDF_ASPECT_RATIO = "maintainPdfAspectRatio";
|
String MAINTAIN_PDF_ASPECT_RATIO = "maintainPdfAspectRatio";
|
||||||
String START_PAGE = "startPage";
|
String START_PAGE = "startPage";
|
||||||
String END_PAGE = "endPage";
|
String END_PAGE = "endPage";
|
||||||
String ALPHA_REMOVE = "alphaRemove";
|
String ALPHA_REMOVE = "alphaRemove";
|
||||||
String AUTO_ORIENT = "autoOrient";
|
String AUTO_ORIENT = "autoOrient";
|
||||||
String CROP_GRAVITY = "cropGravity";
|
String CROP_GRAVITY = "cropGravity";
|
||||||
String CROP_WIDTH = "cropWidth";
|
String CROP_WIDTH = "cropWidth";
|
||||||
String CROP_HEIGHT = "cropHeight";
|
String CROP_HEIGHT = "cropHeight";
|
||||||
String CROP_PERCENTAGE = "cropPercentage";
|
String CROP_PERCENTAGE = "cropPercentage";
|
||||||
String CROP_X_OFFSET = "cropXOffset";
|
String CROP_X_OFFSET = "cropXOffset";
|
||||||
String CROP_Y_OFFSET = "cropYOffset";
|
String CROP_Y_OFFSET = "cropYOffset";
|
||||||
String THUMBNAIL = "thumbnail";
|
String THUMBNAIL = "thumbnail";
|
||||||
String RESIZE_WIDTH = "resizeWidth";
|
String RESIZE_WIDTH = "resizeWidth";
|
||||||
String RESIZE_HEIGHT = "resizeHeight";
|
String RESIZE_HEIGHT = "resizeHeight";
|
||||||
String RESIZE_PERCENTAGE = "resizePercentage";
|
String RESIZE_PERCENTAGE = "resizePercentage";
|
||||||
String ALLOW_ENLARGEMENT = "allowEnlargement";
|
String ALLOW_ENLARGEMENT = "allowEnlargement";
|
||||||
String MAINTAIN_ASPECT_RATIO = "maintainAspectRatio";
|
String MAINTAIN_ASPECT_RATIO = "maintainAspectRatio";
|
||||||
String COMMAND_OPTIONS = "commandOptions";
|
String COMMAND_OPTIONS = "commandOptions";
|
||||||
String TIMEOUT = "timeout";
|
String TIMEOUT = "timeout";
|
||||||
String INCLUDE_CONTENTS = "includeContents";
|
String INCLUDE_CONTENTS = "includeContents";
|
||||||
String NOT_EXTRACT_BOOKMARKS_TEXT = "notExtractBookmarksText";
|
String NOT_EXTRACT_BOOKMARKS_TEXT = "notExtractBookmarksText";
|
||||||
String PAGE_LIMIT = "pageLimit";
|
String PAGE_LIMIT = "pageLimit";
|
||||||
String PDF_FORMAT = "pdfFormat";
|
String PDF_FORMAT = "pdfFormat";
|
||||||
String PDF_ORIENTATION = "pdfOrientation";
|
String PDF_ORIENTATION = "pdfOrientation";
|
||||||
String PDF_FONT = "pdfFont";
|
String PDF_FONT = "pdfFont";
|
||||||
String PDF_FONT_SIZE = "pdfFontSize";
|
String PDF_FONT_SIZE = "pdfFontSize";
|
||||||
|
|
||||||
|
// Html parameter names for the transform config
|
||||||
|
String HTML_COLLAPSE = "collapseHtml";
|
||||||
|
|
||||||
// Parameters interpreted by the TransformController
|
// Parameters interpreted by the TransformController
|
||||||
String DIRECT_ACCESS_URL = "directAccessUrl";
|
String DIRECT_ACCESS_URL = "directAccessUrl";
|
||||||
|
|
||||||
// An optional parameter (defaults to 1) to be included in the request to the t-engine {@code /transform/config}
|
// An optional parameter (defaults to 1) to be included in the request to the t-engine {@code /transform/config}
|
||||||
// endpoint to specify what version (of the schema) to return. Provides the flexibility to introduce changes
|
// endpoint to specify what version (of the schema) to return. Provides the flexibility to introduce changes
|
||||||
// without getting deserialization issues when we have components at different versions.
|
// without getting deserialization issues when we have components at different versions.
|
||||||
String CONFIG_VERSION = "configVersion";
|
String CONFIG_VERSION = "configVersion";
|
||||||
String CONFIG_VERSION_DEFAULT = "1";
|
String CONFIG_VERSION_DEFAULT = "1";
|
||||||
int CONFIG_VERSION_LATEST = CoreVersionDecorator.CONFIG_VERSION_INCLUDES_CORE_VERSION;
|
int CONFIG_VERSION_LATEST = CoreVersionDecorator.CONFIG_VERSION_INCLUDES_CORE_VERSION;
|
||||||
|
|
||||||
// Endpoints
|
// Endpoints
|
||||||
String ENDPOINT_TRANSFORM = "/transform";
|
String ENDPOINT_TRANSFORM = "/transform";
|
||||||
|
Reference in New Issue
Block a user