Fix/mnt 25089 html transformations with ootb aio create extra whitespace (#1079)

This commit is contained in:
KushalBanik
2025-06-03 13:23:33 +05:30
committed by GitHub
parent 0c534f1081
commit cb9d070c9c
14 changed files with 1496 additions and 1324 deletions

View File

@@ -26,4 +26,6 @@ transform:
unixOS: 'env FOO=#{"$"}{OUTPUT} exiftool -args -G1 -sep "|||" #{"$"}{INPUT}'
misc:
pdfBox:
defaultFont: ${MISC_PDFBOX_DEFAULT_FONT:NotoSans-Regular}
defaultFont: ${MISC_PDFBOX_DEFAULT_FONT:NotoSans-Regular}
htmlOptions:
collapseHtml: ${MISC_HTML_COLLAPSE:true}

View File

@@ -1,126 +1,128 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.aio;
import org.alfresco.transform.base.AbstractBaseTest;
import org.alfresco.transform.base.TransformController;
import org.alfresco.transform.config.TransformConfig;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import java.nio.file.Files;
import java.util.StringJoiner;
import static org.alfresco.transform.base.TransformControllerTest.getLogMessagesFor;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transform.common.RequestParamMap.CONFIG_VERSION_DEFAULT;
import static org.alfresco.transform.common.RequestParamMap.CONFIG_VERSION_LATEST;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
/**
* Test All-In-One.
*/
public class AIOTest extends AbstractBaseTest
{
@Autowired
private String coreVersion;
@BeforeEach
public void before() throws Exception
{
sourceMimetype = MIMETYPE_HTML;
targetMimetype = MIMETYPE_TEXT_PLAIN;
sourceExtension = "html";
targetExtension = "txt";
expectedOptions = null;
expectedSourceSuffix = null;
sourceFileBytes = readTestFile(sourceExtension);
expectedTargetFileBytes = Files.readAllBytes(getTestFile("quick2." + targetExtension, true).toPath());
sourceFile = new MockMultipartFile("file", "quick." + sourceExtension, sourceMimetype, sourceFileBytes);
}
@Override
// Add extra required parameters to the request.
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
{
return super.mockMvcRequest(url, sourceFile, params)
.param("targetMimetype", targetMimetype)
.param("sourceMimetype", sourceMimetype);
}
@Test
public void coreVersionNotSetInOriginalConfigTest()
{
ResponseEntity<TransformConfig> responseEntity = controller.transformConfig(Integer.valueOf(CONFIG_VERSION_DEFAULT));
responseEntity.getBody().getTransformers().forEach(transformer -> {
assertNull(transformer.getCoreVersion(), transformer.getTransformerName() +
" should have had a null coreValue but was " + transformer.getCoreVersion());
});
}
@Test
public void coreVersionSetInLatestConfigTest()
{
ResponseEntity<TransformConfig> responseEntity = controller.transformConfig(CONFIG_VERSION_LATEST);
responseEntity.getBody().getTransformers().forEach(transformer -> {
assertNotNull(transformer.getCoreVersion(), transformer.getTransformerName() +
" should have had a coreValue but was null. Should have been " + coreVersion);
});
}
@Test
public void testStartupLogsIncludeEngineMessages()
{
StringJoiner controllerLogMessages = getLogMessagesFor(TransformController.class);
controller.startup();
assertEquals(
"--------------------------------------------------------------------------------------------------------------------------------------------------------------\n"
+ "If the Alfresco software was purchased under a paid Alfresco license, the terms of the paid license agreement \n"
+ "will prevail. Otherwise, the software is provided under terms of the GNU LGPL v3 license. \n"
+ "See the license at http://www.gnu.org/licenses/lgpl-3.0.txt. or in /LICENSE.txt \n"
+ "\n"
+ "This transformer uses ImageMagick from ImageMagick Studio LLC. See the license at http://www.imagemagick.org/script/license.php or in /ImageMagick-license.txt\n"
+ "This transformer uses LibreOffice from The Document Foundation. See the license at https://www.libreoffice.org/download/license/ or in /libreoffice.txt\n"
+ "This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\n"
+ "This transformer uses htmlparser. See the license at http://htmlparser.sourceforge.net/license.html\n"
+ "This transformer uses alfresco-pdf-renderer which uses the PDFium library from Google Inc. See the license at https://pdfium.googlesource.com/pdfium/+/master/LICENSE or in /pdfium.txt\n"
+ "This transformer uses Tika from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt\n"
+ "This transformer uses ExifTool by Phil Harvey. See license at https://exiftool.org/#license. or in /Perl-Artistic-License.txt\n"
+ "--------------------------------------------------------------------------------------------------------------------------------------------------------------\n"
+ "Starting application components... Done",
controllerLogMessages.toString());
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2025 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.aio;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.alfresco.transform.base.TransformControllerTest.getLogMessagesFor;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.common.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transform.common.RequestParamMap.*;
import java.nio.file.Files;
import java.util.StringJoiner;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.test.web.servlet.request.MockHttpServletRequestBuilder;
import org.alfresco.transform.base.AbstractBaseTest;
import org.alfresco.transform.base.TransformController;
import org.alfresco.transform.config.TransformConfig;
/**
* Test All-In-One
*/
public class AIOTest extends AbstractBaseTest
{
@Autowired
private String coreVersion;
@BeforeEach
public void before() throws Exception
{
sourceMimetype = MIMETYPE_HTML;
targetMimetype = MIMETYPE_TEXT_PLAIN;
sourceExtension = "html";
targetExtension = "txt";
expectedOptions = null;
expectedSourceSuffix = null;
sourceFileBytes = readTestFile(sourceExtension);
expectedTargetFileBytes = Files.readAllBytes(getTestFile("quick3." + targetExtension, true).toPath());
sourceFile = new MockMultipartFile("file", "quick." + sourceExtension, sourceMimetype, sourceFileBytes);
}
@Override
// Add extra required parameters to the request.
protected MockHttpServletRequestBuilder mockMvcRequest(String url, MockMultipartFile sourceFile, String... params)
{
return super.mockMvcRequest(url, sourceFile, params)
.param("targetMimetype", targetMimetype)
.param("sourceMimetype", sourceMimetype)
.param(HTML_COLLAPSE, "true");
}
@Test
public void coreVersionNotSetInOriginalConfigTest()
{
ResponseEntity<TransformConfig> responseEntity = controller.transformConfig(Integer.valueOf(CONFIG_VERSION_DEFAULT));
responseEntity.getBody().getTransformers().forEach(transformer -> {
assertNull(transformer.getCoreVersion(), transformer.getTransformerName() +
" should have had a null coreValue but was " + transformer.getCoreVersion());
});
}
@Test
public void coreVersionSetInLatestConfigTest()
{
ResponseEntity<TransformConfig> responseEntity = controller.transformConfig(CONFIG_VERSION_LATEST);
responseEntity.getBody().getTransformers().forEach(transformer -> {
assertNotNull(transformer.getCoreVersion(), transformer.getTransformerName() +
" should have had a coreValue but was null. Should have been " + coreVersion);
});
}
@Test
public void testStartupLogsIncludeEngineMessages()
{
StringJoiner controllerLogMessages = getLogMessagesFor(TransformController.class);
controller.startup();
assertEquals(
"--------------------------------------------------------------------------------------------------------------------------------------------------------------\n"
+ "If the Alfresco software was purchased under a paid Alfresco license, the terms of the paid license agreement \n"
+ "will prevail. Otherwise, the software is provided under terms of the GNU LGPL v3 license. \n"
+ "See the license at http://www.gnu.org/licenses/lgpl-3.0.txt. or in /LICENSE.txt \n"
+ "\n"
+ "This transformer uses ImageMagick from ImageMagick Studio LLC. See the license at http://www.imagemagick.org/script/license.php or in /ImageMagick-license.txt\n"
+ "This transformer uses LibreOffice from The Document Foundation. See the license at https://www.libreoffice.org/download/license/ or in /libreoffice.txt\n"
+ "This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\n"
+ "This transformer uses htmlparser. See the license at http://htmlparser.sourceforge.net/license.html\n"
+ "This transformer uses alfresco-pdf-renderer which uses the PDFium library from Google Inc. See the license at https://pdfium.googlesource.com/pdfium/+/master/LICENSE or in /pdfium.txt\n"
+ "This transformer uses Tika from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\ 2.0.txt\n"
+ "This transformer uses ExifTool by Phil Harvey. See license at https://exiftool.org/#license. or in /Perl-Artistic-License.txt\n"
+ "--------------------------------------------------------------------------------------------------------------------------------------------------------------\n"
+ "Starting application components... Done",
controllerLogMessages.toString());
}
}

View File

@@ -1,80 +1,82 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license", "the terms of
* the paid license agreement will prevail. Otherwise", "the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation", "either version 3 of the License", "or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not", "see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.aio;
import com.google.common.collect.ImmutableSet;
import org.alfresco.transform.tika.TikaTest;
import org.junit.jupiter.api.Test;
import static org.alfresco.transform.base.html.OptionsHelper.getOptionNames;
import static org.junit.jupiter.api.Assertions.assertEquals;
/**
* Test Tika functionality in All-In-One.
*/
public class AIOTikaTest extends TikaTest
{
@Test
public void optionListTest()
{
assertEquals(ImmutableSet.of(
"allowEnlargement",
"allowPdfEnlargement",
"alphaRemove",
"autoOrient",
"commandOptions",
"cropGravity",
"cropHeight",
"cropPercentage",
"cropWidth",
"cropXOffset",
"cropYOffset",
"endPage",
"extractMapping",
"height",
"includeContents",
"maintainAspectRatio",
"maintainPdfAspectRatio",
"metadata",
"notExtractBookmarksText",
"page",
"pageLimit",
"pdfFormat",
"pdfOrientation",
"resizeHeight",
"resizePercentage",
"resizeWidth",
"startPage",
"targetEncoding",
"thumbnail",
"width",
"pdfFont",
"pdfFontSize"
),
getOptionNames(controller.transformConfig(0).getBody().getTransformOptions()));
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2025 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license", "the terms of
* the paid license agreement will prevail. Otherwise", "the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation", "either version 3 of the License", "or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not", "see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transform.aio;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.alfresco.transform.base.html.OptionsHelper.getOptionNames;
import com.google.common.collect.ImmutableSet;
import org.junit.jupiter.api.Test;
import org.alfresco.transform.tika.TikaTest;
/**
* Test Tika functionality in All-In-One.
*/
public class AIOTikaTest extends TikaTest
{
@Test
public void optionListTest()
{
assertEquals(ImmutableSet.of(
"allowEnlargement",
"allowPdfEnlargement",
"alphaRemove",
"autoOrient",
"commandOptions",
"cropGravity",
"cropHeight",
"cropPercentage",
"cropWidth",
"cropXOffset",
"cropYOffset",
"endPage",
"extractMapping",
"height",
"includeContents",
"maintainAspectRatio",
"maintainPdfAspectRatio",
"metadata",
"notExtractBookmarksText",
"page",
"pageLimit",
"pdfFormat",
"pdfOrientation",
"resizeHeight",
"resizePercentage",
"resizeWidth",
"startPage",
"targetEncoding",
"thumbnail",
"width",
"pdfFont",
"pdfFontSize",
"collapseHtml"),
getOptionNames(controller.transformConfig(0).getBody().getTransformOptions()));
}
}