mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-08-14 17:58:27 +00:00
fix mixed-line-ending
This commit is contained in:
@@ -1,77 +1,77 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import org.alfresco.transformer.transformers.SelectingTransformer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
||||
import org.springframework.boot.context.event.ApplicationReadyEvent;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.event.EventListener;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
|
||||
|
||||
@SpringBootApplication
|
||||
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
|
||||
public class Application
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(Application.class);
|
||||
|
||||
@Value("${container.name}")
|
||||
private String containerName;
|
||||
|
||||
@Bean
|
||||
MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
|
||||
{
|
||||
return registry -> registry.config().commonTags("containerName", containerName);
|
||||
}
|
||||
|
||||
public static void main(String[] args)
|
||||
{
|
||||
SpringApplication.run(Application.class, args);
|
||||
}
|
||||
|
||||
@EventListener(ApplicationReadyEvent.class)
|
||||
public void startup()
|
||||
{
|
||||
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
|
||||
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
|
||||
Arrays.stream(SelectingTransformer.LICENCE.split("\\n")).forEach(logger::info);
|
||||
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
|
||||
|
||||
logger.info("Starting application components... Done");
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import org.alfresco.transformer.transformers.SelectingTransformer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
||||
import org.springframework.boot.context.event.ApplicationReadyEvent;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.event.EventListener;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
|
||||
|
||||
@SpringBootApplication
|
||||
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
|
||||
public class Application
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(Application.class);
|
||||
|
||||
@Value("${container.name}")
|
||||
private String containerName;
|
||||
|
||||
@Bean
|
||||
MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
|
||||
{
|
||||
return registry -> registry.config().commonTags("containerName", containerName);
|
||||
}
|
||||
|
||||
public static void main(String[] args)
|
||||
{
|
||||
SpringApplication.run(Application.class, args);
|
||||
}
|
||||
|
||||
@EventListener(ApplicationReadyEvent.class)
|
||||
public void startup()
|
||||
{
|
||||
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
|
||||
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
|
||||
Arrays.stream(SelectingTransformer.LICENCE.split("\\n")).forEach(logger::info);
|
||||
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
|
||||
|
||||
logger.info("Starting application components... Done");
|
||||
}
|
||||
}
|
||||
|
@@ -1,89 +1,89 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import org.alfresco.transformer.probes.ProbeTestTransform;
|
||||
import org.alfresco.transformer.transformers.SelectingTransformer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Controller;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
|
||||
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
|
||||
|
||||
@Controller
|
||||
public class MiscController extends AbstractTransformerController
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(MiscController.class);
|
||||
|
||||
private SelectingTransformer transformer = new SelectingTransformer();
|
||||
|
||||
@Override
|
||||
public String getTransformerName()
|
||||
{
|
||||
return "Miscellaneous Transformers";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String version()
|
||||
{
|
||||
return getTransformerName() + " available";
|
||||
}
|
||||
|
||||
@Override
|
||||
public ProbeTestTransform getProbeTestTransform()
|
||||
{
|
||||
// HtmlParserContentTransformer html -> text
|
||||
// See the Javadoc on this method and Probes.md for the choice of these values.
|
||||
return new ProbeTestTransform(this, "quick.html", "quick.txt",
|
||||
119, 30, 150, 1024,
|
||||
60 * 2 + 1, 60 * 2)
|
||||
{
|
||||
@Override
|
||||
protected void executeTransformCommand(File sourceFile, File targetFile)
|
||||
{
|
||||
Map<String, String> parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "UTF-8");
|
||||
transformImpl("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transformImpl(String transformName, String sourceMimetype, String targetMimetype,
|
||||
Map<String, String> transformOptions, File sourceFile, File targetFile)
|
||||
{
|
||||
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
|
||||
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import org.alfresco.transformer.probes.ProbeTestTransform;
|
||||
import org.alfresco.transformer.transformers.SelectingTransformer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Controller;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
|
||||
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
|
||||
|
||||
@Controller
|
||||
public class MiscController extends AbstractTransformerController
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(MiscController.class);
|
||||
|
||||
private SelectingTransformer transformer = new SelectingTransformer();
|
||||
|
||||
@Override
|
||||
public String getTransformerName()
|
||||
{
|
||||
return "Miscellaneous Transformers";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String version()
|
||||
{
|
||||
return getTransformerName() + " available";
|
||||
}
|
||||
|
||||
@Override
|
||||
public ProbeTestTransform getProbeTestTransform()
|
||||
{
|
||||
// HtmlParserContentTransformer html -> text
|
||||
// See the Javadoc on this method and Probes.md for the choice of these values.
|
||||
return new ProbeTestTransform(this, "quick.html", "quick.txt",
|
||||
119, 30, 150, 1024,
|
||||
60 * 2 + 1, 60 * 2)
|
||||
{
|
||||
@Override
|
||||
protected void executeTransformCommand(File sourceFile, File targetFile)
|
||||
{
|
||||
Map<String, String> parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "UTF-8");
|
||||
transformImpl("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transformImpl(String transformName, String sourceMimetype, String targetMimetype,
|
||||
Map<String, String> transformOptions, File sourceFile, File targetFile)
|
||||
{
|
||||
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
|
||||
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,55 +1,55 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2021 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
import org.alfresco.transform.client.model.TransformRequest;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
|
||||
properties = {"activemq.url=nio://localhost:61616"})
|
||||
public class MiscQueueTransformServiceIT extends AbstractQueueTransformServiceIT
|
||||
{
|
||||
@Override
|
||||
protected TransformRequest buildRequest()
|
||||
{
|
||||
return TransformRequest
|
||||
.builder()
|
||||
.withRequestId(UUID.randomUUID().toString())
|
||||
.withSourceMediaType(MIMETYPE_HTML)
|
||||
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
|
||||
.withTargetExtension("txt")
|
||||
.withSchema(1)
|
||||
.withClientData("ACS")
|
||||
.withSourceReference(UUID.randomUUID().toString())
|
||||
.withSourceSize(32L).build();
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2021 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
import org.alfresco.transform.client.model.TransformRequest;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
|
||||
properties = {"activemq.url=nio://localhost:61616"})
|
||||
public class MiscQueueTransformServiceIT extends AbstractQueueTransformServiceIT
|
||||
{
|
||||
@Override
|
||||
protected TransformRequest buildRequest()
|
||||
{
|
||||
return TransformRequest
|
||||
.builder()
|
||||
.withRequestId(UUID.randomUUID().toString())
|
||||
.withSourceMediaType(MIMETYPE_HTML)
|
||||
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
|
||||
.withTargetExtension("txt")
|
||||
.withSchema(1)
|
||||
.withClientData("ACS")
|
||||
.withSourceReference(UUID.randomUUID().toString())
|
||||
.withSourceSize(32L).build();
|
||||
}
|
||||
}
|
||||
|
@@ -1,48 +1,48 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2021 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
|
||||
/**
|
||||
* Tests MiscController with a server test harness.
|
||||
*/
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
public class MiscTransformerHttpRequestTest extends AbstractHttpRequestTest
|
||||
{
|
||||
@Override
|
||||
protected String getTransformerName()
|
||||
{
|
||||
return "Miscellaneous Transformers";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getSourceExtension()
|
||||
{
|
||||
return "html";
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2021 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer;
|
||||
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
|
||||
/**
|
||||
* Tests MiscController with a server test harness.
|
||||
*/
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
public class MiscTransformerHttpRequestTest extends AbstractHttpRequestTest
|
||||
{
|
||||
@Override
|
||||
protected String getTransformerName()
|
||||
{
|
||||
return "Miscellaneous Transformers";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getSourceExtension()
|
||||
{
|
||||
return "html";
|
||||
}
|
||||
}
|
||||
|
@@ -1,30 +1,30 @@
|
||||
MIME-Version: 1.0
|
||||
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
|
||||
Date: Thu, 16 Aug 2012 16:13:29 +0100
|
||||
Delivered-To: jane.doe@alfresco.com
|
||||
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
|
||||
Subject: Attachment test
|
||||
From: <john.doe@alfresco.com>
|
||||
To: <jane.doe@alfresco.com>
|
||||
Content-Type: multipart/alternative;
|
||||
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
|
||||
|
||||
This is a multipart message in MIME format.
|
||||
|
||||
------=_NextPart_000_0005_01D06C6A.DBA98EC0
|
||||
Content-Type: text/plain;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
alternative plain text
|
||||
|
||||
------=_NextPart_000_0005_01D06C6A.DBA98EC0
|
||||
Content-Type: text/html;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div dir=3D"ltr">alternative html text</div>
|
||||
|
||||
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
|
||||
Parts form an multipart/alternative should represent the same content in different formats
|
||||
In this eml example the content differs with the purpose of determining if right part was used in transformation
|
||||
MIME-Version: 1.0
|
||||
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
|
||||
Date: Thu, 16 Aug 2012 16:13:29 +0100
|
||||
Delivered-To: jane.doe@alfresco.com
|
||||
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
|
||||
Subject: Attachment test
|
||||
From: <john.doe@alfresco.com>
|
||||
To: <jane.doe@alfresco.com>
|
||||
Content-Type: multipart/alternative;
|
||||
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
|
||||
|
||||
This is a multipart message in MIME format.
|
||||
|
||||
------=_NextPart_000_0005_01D06C6A.DBA98EC0
|
||||
Content-Type: text/plain;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
alternative plain text
|
||||
|
||||
------=_NextPart_000_0005_01D06C6A.DBA98EC0
|
||||
Content-Type: text/html;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div dir=3D"ltr">alternative html text</div>
|
||||
|
||||
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
|
||||
Parts form an multipart/alternative should represent the same content in different formats
|
||||
In this eml example the content differs with the purpose of determining if right part was used in transformation
|
||||
|
@@ -1,44 +1,44 @@
|
||||
MIME-Version: 1.0
|
||||
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
|
||||
Date: Thu, 16 Aug 2012 16:13:29 +0100
|
||||
Delivered-To: jane.doe@alfresco.com
|
||||
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
|
||||
Subject: Attachment test
|
||||
From: <john.doe@alfresco.com>
|
||||
To: <jane.doe@alfresco.com>
|
||||
Content-Type: multipart/mixed;
|
||||
boundary="----=_NextPart_000_0000_01D06C6A.D04F3750"
|
||||
|
||||
This is a multipart message in MIME format.
|
||||
|
||||
------=_NextPart_000_0000_01D06C6A.D04F3750
|
||||
Content-Type: multipart/alternative;
|
||||
boundary="----=_NextPart_001_0001_01D06C6A.D04F3750"
|
||||
|
||||
|
||||
------=_NextPart_001_0001_01D06C6A.D04F3750
|
||||
Content-Type: text/plain;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
Mail with attachment content
|
||||
|
||||
------=_NextPart_001_0001_01D06C6A.D04F3750
|
||||
Content-Type: text/html;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div dir=3D"ltr">Mail with attachment content</div>
|
||||
|
||||
------=_NextPart_001_0001_01D06C6A.D04F3750--
|
||||
|
||||
------=_NextPart_000_0000_01D06C6A.D04F3750
|
||||
Content-Type: text/plain;
|
||||
name="alt.txt"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
Content-ID: <796B1E07B04ACC41A78199F35721150F@eurprd04.prod.outlook.com>
|
||||
Content-Disposition: attachment;
|
||||
filename="alt.txt"
|
||||
|
||||
File attachment content
|
||||
------=_NextPart_000_0000_01D06C6A.D04F3750--
|
||||
MIME-Version: 1.0
|
||||
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
|
||||
Date: Thu, 16 Aug 2012 16:13:29 +0100
|
||||
Delivered-To: jane.doe@alfresco.com
|
||||
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
|
||||
Subject: Attachment test
|
||||
From: <john.doe@alfresco.com>
|
||||
To: <jane.doe@alfresco.com>
|
||||
Content-Type: multipart/mixed;
|
||||
boundary="----=_NextPart_000_0000_01D06C6A.D04F3750"
|
||||
|
||||
This is a multipart message in MIME format.
|
||||
|
||||
------=_NextPart_000_0000_01D06C6A.D04F3750
|
||||
Content-Type: multipart/alternative;
|
||||
boundary="----=_NextPart_001_0001_01D06C6A.D04F3750"
|
||||
|
||||
|
||||
------=_NextPart_001_0001_01D06C6A.D04F3750
|
||||
Content-Type: text/plain;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
Mail with attachment content
|
||||
|
||||
------=_NextPart_001_0001_01D06C6A.D04F3750
|
||||
Content-Type: text/html;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div dir=3D"ltr">Mail with attachment content</div>
|
||||
|
||||
------=_NextPart_001_0001_01D06C6A.D04F3750--
|
||||
|
||||
------=_NextPart_000_0000_01D06C6A.D04F3750
|
||||
Content-Type: text/plain;
|
||||
name="alt.txt"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
Content-ID: <796B1E07B04ACC41A78199F35721150F@eurprd04.prod.outlook.com>
|
||||
Content-Disposition: attachment;
|
||||
filename="alt.txt"
|
||||
|
||||
File attachment content
|
||||
------=_NextPart_000_0000_01D06C6A.D04F3750--
|
||||
|
@@ -1,28 +1,28 @@
|
||||
MIME-Version: 1.0
|
||||
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
|
||||
Date: Thu, 16 Aug 2012 16:13:29 +0100
|
||||
Delivered-To: jane.doe@alfresco.com
|
||||
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
|
||||
Subject: Attachment test
|
||||
From: <john.doe@alfresco.com>
|
||||
To: <jane.doe@alfresco.com>
|
||||
Content-Type: multipart/alternative;
|
||||
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
|
||||
|
||||
This is a multipart message in MIME format.
|
||||
|
||||
------=_NextPart_000_0005_01D06C6A.DBA98EC0
|
||||
Content-Type: text/plain;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
html special characters
|
||||
|
||||
------=_NextPart_000_0005_01D06C6A.DBA98EC0
|
||||
Content-Type: text/html;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div dir=3D"ltr">html special characters</div>
|
||||
|
||||
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
|
||||
MIME-Version: 1.0
|
||||
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
|
||||
Date: Thu, 16 Aug 2012 16:13:29 +0100
|
||||
Delivered-To: jane.doe@alfresco.com
|
||||
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
|
||||
Subject: Attachment test
|
||||
From: <john.doe@alfresco.com>
|
||||
To: <jane.doe@alfresco.com>
|
||||
Content-Type: multipart/alternative;
|
||||
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
|
||||
|
||||
This is a multipart message in MIME format.
|
||||
|
||||
------=_NextPart_000_0005_01D06C6A.DBA98EC0
|
||||
Content-Type: text/plain;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
html special characters
|
||||
|
||||
------=_NextPart_000_0005_01D06C6A.DBA98EC0
|
||||
Content-Type: text/html;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div dir=3D"ltr">html special characters</div>
|
||||
|
||||
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
|
||||
|
Binary file not shown.
@@ -1,41 +1,41 @@
|
||||
MIME-Version: 1.0
|
||||
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
|
||||
Date: Thu, 16 Aug 2012 16:13:29 +0100
|
||||
Delivered-To: jane.doe@alfresco.com
|
||||
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
|
||||
Subject: Attachment test
|
||||
From: <john.doe@alfresco.com>
|
||||
To: <jane.doe@alfresco.com>
|
||||
Content-Type: multipart/related;
|
||||
boundary="--_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423";
|
||||
type="multipart/alternative"
|
||||
|
||||
This is a multi-part message in MIME format.
|
||||
|
||||
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
|
||||
Content-Type: multipart/alternative; boundary="--_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362"
|
||||
|
||||
|
||||
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
|
||||
Content-Type: text/plain; charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
nested alternative plain text
|
||||
|
||||
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
|
||||
Content-Type: text/html; charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div dir=3D"ltr">nested alternative html text</div>
|
||||
|
||||
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362--
|
||||
|
||||
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
|
||||
Content-Type: image/jpeg; name="image001.jpg"
|
||||
Content-Transfer-Encoding: base64
|
||||
Content-ID: <image001.jpg@01D146F0.63006280>
|
||||
|
||||
image
|
||||
|
||||
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423--
|
||||
|
||||
MIME-Version: 1.0
|
||||
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
|
||||
Date: Thu, 16 Aug 2012 16:13:29 +0100
|
||||
Delivered-To: jane.doe@alfresco.com
|
||||
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
|
||||
Subject: Attachment test
|
||||
From: <john.doe@alfresco.com>
|
||||
To: <jane.doe@alfresco.com>
|
||||
Content-Type: multipart/related;
|
||||
boundary="--_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423";
|
||||
type="multipart/alternative"
|
||||
|
||||
This is a multi-part message in MIME format.
|
||||
|
||||
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
|
||||
Content-Type: multipart/alternative; boundary="--_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362"
|
||||
|
||||
|
||||
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
|
||||
Content-Type: text/plain; charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
nested alternative plain text
|
||||
|
||||
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
|
||||
Content-Type: text/html; charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div dir=3D"ltr">nested alternative html text</div>
|
||||
|
||||
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362--
|
||||
|
||||
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
|
||||
Content-Type: image/jpeg; name="image001.jpg"
|
||||
Content-Transfer-Encoding: base64
|
||||
Content-ID: <image001.jpg@01D146F0.63006280>
|
||||
|
||||
image
|
||||
|
||||
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423--
|
||||
|
||||
|
@@ -1,31 +1,31 @@
|
||||
MIME-Version: 1.0
|
||||
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
|
||||
Date: Thu, 16 Aug 2012 16:13:29 +0100
|
||||
Delivered-To: jane.doe@alfresco.com
|
||||
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
|
||||
Subject: The quick brown fox jumps over the lazy dog
|
||||
From: <john.doe@alfresco.com>
|
||||
To: <jane.doe@alfresco.com>
|
||||
Content-Type: multipart/alternative;
|
||||
boundary="----=_NextPart_000_0009_01D06BC5.14D754D0"
|
||||
|
||||
This is a multipart message in MIME format.
|
||||
|
||||
------=_NextPart_000_0009_01D06BC5.14D754D0
|
||||
Content-Type: text/plain;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
El rápido zorro marrón salta sobre el perro perezoso
|
||||
|
||||
|
||||
------=_NextPart_000_0009_01D06BC5.14D754D0
|
||||
Content-Type: text/html;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div dir=3D"ltr">El r=C3=A1pido zorro marr=C3=B3n salta sobre el perro =
|
||||
perezoso <br></div>
|
||||
|
||||
------=_NextPart_000_0009_01D06BC5.14D754D0--
|
||||
|
||||
MIME-Version: 1.0
|
||||
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
|
||||
Date: Thu, 16 Aug 2012 16:13:29 +0100
|
||||
Delivered-To: jane.doe@alfresco.com
|
||||
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
|
||||
Subject: The quick brown fox jumps over the lazy dog
|
||||
From: <john.doe@alfresco.com>
|
||||
To: <jane.doe@alfresco.com>
|
||||
Content-Type: multipart/alternative;
|
||||
boundary="----=_NextPart_000_0009_01D06BC5.14D754D0"
|
||||
|
||||
This is a multipart message in MIME format.
|
||||
|
||||
------=_NextPart_000_0009_01D06BC5.14D754D0
|
||||
Content-Type: text/plain;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
El rápido zorro marrón salta sobre el perro perezoso
|
||||
|
||||
|
||||
------=_NextPart_000_0009_01D06BC5.14D754D0
|
||||
Content-Type: text/html;
|
||||
charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div dir=3D"ltr">El r=C3=A1pido zorro marr=C3=B3n salta sobre el perro =
|
||||
perezoso <br></div>
|
||||
|
||||
------=_NextPart_000_0009_01D06BC5.14D754D0--
|
||||
|
||||
|
@@ -1,115 +1,115 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
|
||||
|
||||
/**
|
||||
* Converts Apple iWorks files to JPEGs for thumbnailing and previewing.
|
||||
* The transformer will only work for iWorks 2013/14 files. Support for iWorks 2008/9 has been dropped as we cannot
|
||||
* support both, because the newer format does not contain a PDF. If we say this transformer supports PDF, Share will
|
||||
* assume incorrectly that we can convert to PDF and we would only get a preview for the older format and never the
|
||||
* newer one. Both formats have the same mimetype.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* @author Neil Mc Erlean
|
||||
* @author eknizat
|
||||
* @since 4.0
|
||||
*/
|
||||
public class AppleIWorksContentTransformer implements SelectableTransformer
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(
|
||||
AppleIWorksContentTransformer.class);
|
||||
|
||||
// Apple's zip entry names for previews in iWorks have changed over time.
|
||||
private static final List<String> PDF_PATHS = ImmutableList.of(
|
||||
"QuickLook/Preview.pdf"); // iWorks 2008/9
|
||||
private static final List<String> JPG_PATHS = ImmutableList.of(
|
||||
"QuickLook/Thumbnail.jpg", // iWorks 2008/9
|
||||
"preview.jpg"); // iWorks 2013/14 (720 x 552) We use the best quality image. Others are:
|
||||
// (225 x 173) preview-web.jpg
|
||||
// (53 x 41) preview-micro.jpg
|
||||
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile)
|
||||
{
|
||||
logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
|
||||
sourceMimetype, targetMimetype);
|
||||
|
||||
// iWorks files are zip (or package) files.
|
||||
// If it's not a zip file, the resultant ZipException will be caught as an IOException below.
|
||||
try (ZipArchiveInputStream iWorksZip = new ZipArchiveInputStream(
|
||||
new BufferedInputStream(new FileInputStream(sourceFile))))
|
||||
{
|
||||
// Look through the zip file entries for the preview/thumbnail.
|
||||
List<String> paths = MIMETYPE_IMAGE_JPEG.equals(targetMimetype) ? JPG_PATHS : PDF_PATHS;
|
||||
ZipArchiveEntry entry;
|
||||
boolean found = false;
|
||||
while ((entry = iWorksZip.getNextZipEntry()) != null)
|
||||
{
|
||||
String name = entry.getName();
|
||||
if (paths.contains(name))
|
||||
{
|
||||
Files.copy(iWorksZip, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
{
|
||||
throw new RuntimeException(
|
||||
"The source " + sourceMimetype + " file did not contain a " + targetMimetype + " preview");
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new RuntimeException(
|
||||
"Unable to transform " + sourceMimetype + " file. It should have been a zip format file.",
|
||||
e);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
|
||||
|
||||
/**
|
||||
* Converts Apple iWorks files to JPEGs for thumbnailing and previewing.
|
||||
* The transformer will only work for iWorks 2013/14 files. Support for iWorks 2008/9 has been dropped as we cannot
|
||||
* support both, because the newer format does not contain a PDF. If we say this transformer supports PDF, Share will
|
||||
* assume incorrectly that we can convert to PDF and we would only get a preview for the older format and never the
|
||||
* newer one. Both formats have the same mimetype.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* @author Neil Mc Erlean
|
||||
* @author eknizat
|
||||
* @since 4.0
|
||||
*/
|
||||
public class AppleIWorksContentTransformer implements SelectableTransformer
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(
|
||||
AppleIWorksContentTransformer.class);
|
||||
|
||||
// Apple's zip entry names for previews in iWorks have changed over time.
|
||||
private static final List<String> PDF_PATHS = ImmutableList.of(
|
||||
"QuickLook/Preview.pdf"); // iWorks 2008/9
|
||||
private static final List<String> JPG_PATHS = ImmutableList.of(
|
||||
"QuickLook/Thumbnail.jpg", // iWorks 2008/9
|
||||
"preview.jpg"); // iWorks 2013/14 (720 x 552) We use the best quality image. Others are:
|
||||
// (225 x 173) preview-web.jpg
|
||||
// (53 x 41) preview-micro.jpg
|
||||
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile)
|
||||
{
|
||||
logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
|
||||
sourceMimetype, targetMimetype);
|
||||
|
||||
// iWorks files are zip (or package) files.
|
||||
// If it's not a zip file, the resultant ZipException will be caught as an IOException below.
|
||||
try (ZipArchiveInputStream iWorksZip = new ZipArchiveInputStream(
|
||||
new BufferedInputStream(new FileInputStream(sourceFile))))
|
||||
{
|
||||
// Look through the zip file entries for the preview/thumbnail.
|
||||
List<String> paths = MIMETYPE_IMAGE_JPEG.equals(targetMimetype) ? JPG_PATHS : PDF_PATHS;
|
||||
ZipArchiveEntry entry;
|
||||
boolean found = false;
|
||||
while ((entry = iWorksZip.getNextZipEntry()) != null)
|
||||
{
|
||||
String name = entry.getName();
|
||||
if (paths.contains(name))
|
||||
{
|
||||
Files.copy(iWorksZip, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
{
|
||||
throw new RuntimeException(
|
||||
"The source " + sourceMimetype + " file did not contain a " + targetMimetype + " preview");
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new RuntimeException(
|
||||
"Unable to transform " + sourceMimetype + " file. It should have been a zip format file.",
|
||||
e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,232 +1,232 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.alfresco.transformer.fs.FileManager;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.mail.MessagingException;
|
||||
import javax.mail.Multipart;
|
||||
import javax.mail.Part;
|
||||
import javax.mail.Session;
|
||||
import javax.mail.internet.MimeMessage;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
/**
|
||||
* Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
|
||||
* messages. Searches for all text content parts, and returns them. Any
|
||||
* attachments are ignored. TIKA Note - could be replaced with the Tika email
|
||||
* parser. Would require a recursing parser to be specified, but not the full
|
||||
* Auto one (we don't want attachments), just one containing text and html
|
||||
* related parsers.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*/
|
||||
public class EMLTransformer implements SelectableTransformer
|
||||
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(EMLTransformer.class);
|
||||
|
||||
private static final String CHARSET = "charset";
|
||||
private static final String DEFAULT_ENCODING = "UTF-8";
|
||||
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile) throws Exception
|
||||
{
|
||||
logger.debug("Performing RFC822 to text transform.");
|
||||
// Use try with resource
|
||||
try (InputStream contentInputStream = new BufferedInputStream(
|
||||
new FileInputStream(sourceFile));
|
||||
Writer bufferedFileWriter = new BufferedWriter(new FileWriter(targetFile)))
|
||||
{
|
||||
MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()),
|
||||
contentInputStream);
|
||||
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
Object content = mimeMessage.getContent();
|
||||
if (content instanceof Multipart)
|
||||
{
|
||||
processMultiPart((Multipart) content, sb);
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.append(content.toString());
|
||||
}
|
||||
bufferedFileWriter.write(sb.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find "text" parts of message recursively and appends it to sb StringBuilder
|
||||
*
|
||||
* @param multipart Multipart to process
|
||||
* @param sb StringBuilder
|
||||
* @throws MessagingException
|
||||
* @throws IOException
|
||||
*/
|
||||
private void processMultiPart(Multipart multipart, StringBuilder sb) throws MessagingException,
|
||||
IOException
|
||||
{
|
||||
boolean isAlternativeMultipart = multipart.getContentType().contains(
|
||||
MIMETYPE_MULTIPART_ALTERNATIVE);
|
||||
if (isAlternativeMultipart)
|
||||
{
|
||||
processAlternativeMultipart(multipart, sb);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0, n = multipart.getCount(); i < n; i++)
|
||||
{
|
||||
Part part = multipart.getBodyPart(i);
|
||||
if (part.getContent() instanceof Multipart)
|
||||
{
|
||||
processMultiPart((Multipart) part.getContent(), sb);
|
||||
}
|
||||
else
|
||||
{
|
||||
processPart(part, sb);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the suitable part from an multipart/alternative and appends it's text content to StringBuilder sb
|
||||
*
|
||||
* @param multipart
|
||||
* @param sb
|
||||
* @throws IOException
|
||||
* @throws MessagingException
|
||||
*/
|
||||
private void processAlternativeMultipart(Multipart multipart, StringBuilder sb) throws
|
||||
IOException, MessagingException
|
||||
{
|
||||
Part partToUse = null;
|
||||
for (int i = 0, n = multipart.getCount(); i < n; i++)
|
||||
{
|
||||
Part part = multipart.getBodyPart(i);
|
||||
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
|
||||
{
|
||||
partToUse = part;
|
||||
break;
|
||||
}
|
||||
else if (part.getContentType().contains(MIMETYPE_HTML))
|
||||
{
|
||||
partToUse = part;
|
||||
}
|
||||
else if (part.getContentType().contains(MIMETYPE_MULTIPART_ALTERNATIVE))
|
||||
{
|
||||
if (part.getContent() instanceof Multipart)
|
||||
{
|
||||
processAlternativeMultipart((Multipart) part.getContent(), sb);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (partToUse != null)
|
||||
{
|
||||
processPart(partToUse, sb);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds text on a given mail part. Accepted parts types are text/html and text/plain.
|
||||
* Attachments are ignored
|
||||
*
|
||||
* @param part
|
||||
* @param sb
|
||||
* @throws IOException
|
||||
* @throws MessagingException
|
||||
*/
|
||||
private void processPart(Part part, StringBuilder sb) throws IOException, MessagingException
|
||||
{
|
||||
boolean isAttachment = Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition());
|
||||
if (isAttachment)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
|
||||
{
|
||||
sb.append(part.getContent().toString());
|
||||
}
|
||||
else if (part.getContentType().contains(MIMETYPE_HTML))
|
||||
{
|
||||
String mailPartContent = part.getContent().toString();
|
||||
|
||||
//create a temporary html file with same mail part content and encoding
|
||||
File tempHtmlFile = FileManager.TempFileProvider.createTempFile("EMLTransformer_",
|
||||
".html");
|
||||
String encoding = getMailPartContentEncoding(part);
|
||||
try (OutputStreamWriter osWriter = new OutputStreamWriter(
|
||||
new FileOutputStream(tempHtmlFile), encoding))
|
||||
{
|
||||
osWriter.write(mailPartContent);
|
||||
}
|
||||
|
||||
//transform html file's content to plain text
|
||||
HtmlParserContentTransformer.EncodingAwareStringBean extractor = new HtmlParserContentTransformer.EncodingAwareStringBean();
|
||||
extractor.setCollapse(false);
|
||||
extractor.setLinks(false);
|
||||
extractor.setReplaceNonBreakingSpaces(false);
|
||||
extractor.setURL(tempHtmlFile, encoding);
|
||||
sb.append(extractor.getStrings());
|
||||
|
||||
tempHtmlFile.delete();
|
||||
}
|
||||
}
|
||||
|
||||
private String getMailPartContentEncoding(Part part) throws MessagingException
|
||||
{
|
||||
String encoding = DEFAULT_ENCODING;
|
||||
String contentType = part.getContentType();
|
||||
int startIndex = contentType.indexOf(CHARSET);
|
||||
if (startIndex > 0)
|
||||
{
|
||||
encoding = contentType.substring(startIndex + CHARSET.length() + 1)
|
||||
.replaceAll("\"", "");
|
||||
}
|
||||
return encoding;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.alfresco.transformer.fs.FileManager;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.mail.MessagingException;
|
||||
import javax.mail.Multipart;
|
||||
import javax.mail.Part;
|
||||
import javax.mail.Session;
|
||||
import javax.mail.internet.MimeMessage;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
|
||||
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
|
||||
|
||||
/**
|
||||
* Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
|
||||
* messages. Searches for all text content parts, and returns them. Any
|
||||
* attachments are ignored. TIKA Note - could be replaced with the Tika email
|
||||
* parser. Would require a recursing parser to be specified, but not the full
|
||||
* Auto one (we don't want attachments), just one containing text and html
|
||||
* related parsers.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*/
|
||||
public class EMLTransformer implements SelectableTransformer
|
||||
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(EMLTransformer.class);
|
||||
|
||||
private static final String CHARSET = "charset";
|
||||
private static final String DEFAULT_ENCODING = "UTF-8";
|
||||
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile) throws Exception
|
||||
{
|
||||
logger.debug("Performing RFC822 to text transform.");
|
||||
// Use try with resource
|
||||
try (InputStream contentInputStream = new BufferedInputStream(
|
||||
new FileInputStream(sourceFile));
|
||||
Writer bufferedFileWriter = new BufferedWriter(new FileWriter(targetFile)))
|
||||
{
|
||||
MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()),
|
||||
contentInputStream);
|
||||
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
Object content = mimeMessage.getContent();
|
||||
if (content instanceof Multipart)
|
||||
{
|
||||
processMultiPart((Multipart) content, sb);
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.append(content.toString());
|
||||
}
|
||||
bufferedFileWriter.write(sb.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find "text" parts of message recursively and appends it to sb StringBuilder
|
||||
*
|
||||
* @param multipart Multipart to process
|
||||
* @param sb StringBuilder
|
||||
* @throws MessagingException
|
||||
* @throws IOException
|
||||
*/
|
||||
private void processMultiPart(Multipart multipart, StringBuilder sb) throws MessagingException,
|
||||
IOException
|
||||
{
|
||||
boolean isAlternativeMultipart = multipart.getContentType().contains(
|
||||
MIMETYPE_MULTIPART_ALTERNATIVE);
|
||||
if (isAlternativeMultipart)
|
||||
{
|
||||
processAlternativeMultipart(multipart, sb);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0, n = multipart.getCount(); i < n; i++)
|
||||
{
|
||||
Part part = multipart.getBodyPart(i);
|
||||
if (part.getContent() instanceof Multipart)
|
||||
{
|
||||
processMultiPart((Multipart) part.getContent(), sb);
|
||||
}
|
||||
else
|
||||
{
|
||||
processPart(part, sb);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the suitable part from an multipart/alternative and appends it's text content to StringBuilder sb
|
||||
*
|
||||
* @param multipart
|
||||
* @param sb
|
||||
* @throws IOException
|
||||
* @throws MessagingException
|
||||
*/
|
||||
private void processAlternativeMultipart(Multipart multipart, StringBuilder sb) throws
|
||||
IOException, MessagingException
|
||||
{
|
||||
Part partToUse = null;
|
||||
for (int i = 0, n = multipart.getCount(); i < n; i++)
|
||||
{
|
||||
Part part = multipart.getBodyPart(i);
|
||||
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
|
||||
{
|
||||
partToUse = part;
|
||||
break;
|
||||
}
|
||||
else if (part.getContentType().contains(MIMETYPE_HTML))
|
||||
{
|
||||
partToUse = part;
|
||||
}
|
||||
else if (part.getContentType().contains(MIMETYPE_MULTIPART_ALTERNATIVE))
|
||||
{
|
||||
if (part.getContent() instanceof Multipart)
|
||||
{
|
||||
processAlternativeMultipart((Multipart) part.getContent(), sb);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (partToUse != null)
|
||||
{
|
||||
processPart(partToUse, sb);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds text on a given mail part. Accepted parts types are text/html and text/plain.
|
||||
* Attachments are ignored
|
||||
*
|
||||
* @param part
|
||||
* @param sb
|
||||
* @throws IOException
|
||||
* @throws MessagingException
|
||||
*/
|
||||
private void processPart(Part part, StringBuilder sb) throws IOException, MessagingException
|
||||
{
|
||||
boolean isAttachment = Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition());
|
||||
if (isAttachment)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
|
||||
{
|
||||
sb.append(part.getContent().toString());
|
||||
}
|
||||
else if (part.getContentType().contains(MIMETYPE_HTML))
|
||||
{
|
||||
String mailPartContent = part.getContent().toString();
|
||||
|
||||
//create a temporary html file with same mail part content and encoding
|
||||
File tempHtmlFile = FileManager.TempFileProvider.createTempFile("EMLTransformer_",
|
||||
".html");
|
||||
String encoding = getMailPartContentEncoding(part);
|
||||
try (OutputStreamWriter osWriter = new OutputStreamWriter(
|
||||
new FileOutputStream(tempHtmlFile), encoding))
|
||||
{
|
||||
osWriter.write(mailPartContent);
|
||||
}
|
||||
|
||||
//transform html file's content to plain text
|
||||
HtmlParserContentTransformer.EncodingAwareStringBean extractor = new HtmlParserContentTransformer.EncodingAwareStringBean();
|
||||
extractor.setCollapse(false);
|
||||
extractor.setLinks(false);
|
||||
extractor.setReplaceNonBreakingSpaces(false);
|
||||
extractor.setURL(tempHtmlFile, encoding);
|
||||
sb.append(extractor.getStrings());
|
||||
|
||||
tempHtmlFile.delete();
|
||||
}
|
||||
}
|
||||
|
||||
private String getMailPartContentEncoding(Part part) throws MessagingException
|
||||
{
|
||||
String encoding = DEFAULT_ENCODING;
|
||||
String contentType = part.getContentType();
|
||||
int startIndex = contentType.indexOf(CHARSET);
|
||||
if (startIndex > 0)
|
||||
{
|
||||
encoding = contentType.substring(startIndex + CHARSET.length() + 1)
|
||||
.replaceAll("\"", "");
|
||||
}
|
||||
return encoding;
|
||||
}
|
||||
}
|
||||
|
@@ -1,192 +1,192 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.htmlparser.Parser;
|
||||
import org.htmlparser.beans.StringBean;
|
||||
import org.htmlparser.util.ParserException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.net.URLConnection;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
|
||||
|
||||
/**
|
||||
* Content transformer which wraps the HTML Parser library for
|
||||
* parsing HTML content.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Since HTML Parser was updated from v1.6 to v2.1, META tags
|
||||
* defining an encoding for the content via http-equiv=Content-Type
|
||||
* will ONLY be respected if the encoding of the content item
|
||||
* itself is set to ISO-8859-1.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Tika Note - could be converted to use the Tika HTML parser,
|
||||
* but we'd potentially need a custom text handler to replicate
|
||||
* the current settings around links and non-breaking spaces.
|
||||
* </p>
|
||||
*
|
||||
* @author Derek Hulley
|
||||
* @author eknizat
|
||||
* @see <a href="http://htmlparser.sourceforge.net/">http://htmlparser.sourceforge.net</a>
|
||||
* @see org.htmlparser.beans.StringBean
|
||||
* @see <a href="http://sourceforge.net/tracker/?func=detail&aid=1644504&group_id=24399&atid=381401">HTML Parser</a>
|
||||
*/
|
||||
public class HtmlParserContentTransformer implements SelectableTransformer
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(
|
||||
HtmlParserContentTransformer.class);
|
||||
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile) throws Exception
|
||||
{
|
||||
String sourceEncoding = parameters.get(SOURCE_ENCODING);
|
||||
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
|
||||
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Performing HTML to text transform with sourceEncoding=" + sourceEncoding);
|
||||
}
|
||||
|
||||
// Create the extractor
|
||||
EncodingAwareStringBean extractor = new EncodingAwareStringBean();
|
||||
extractor.setCollapse(false);
|
||||
extractor.setLinks(false);
|
||||
extractor.setReplaceNonBreakingSpaces(false);
|
||||
extractor.setURL(sourceFile, sourceEncoding);
|
||||
// get the text
|
||||
String text = extractor.getStrings();
|
||||
|
||||
// write it to the writer
|
||||
try (Writer writer = new BufferedWriter(
|
||||
new OutputStreamWriter(new FileOutputStream(targetFile))))
|
||||
{
|
||||
writer.write(text);
|
||||
}
|
||||
}
|
||||
|
||||
private void checkEncodingParameter(String encoding, String parameterName)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (encoding != null && !Charset.isSupported(encoding))
|
||||
{
|
||||
throw new IllegalArgumentException(
|
||||
parameterName + "=" + encoding + " is not supported by the JVM.");
|
||||
}
|
||||
}
|
||||
catch (IllegalCharsetNameException e)
|
||||
{
|
||||
throw new IllegalArgumentException(
|
||||
parameterName + "=" + encoding + " is not a valid encoding.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This code is based on a class of the same name, originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* A version of {@link StringBean} which allows control of the
|
||||
* encoding in the underlying HTML Parser.
|
||||
* Unfortunately, StringBean doesn't allow easy over-riding of
|
||||
* this, so we have to duplicate some code to control this.
|
||||
* This allows us to correctly handle HTML files where the encoding
|
||||
* is specified against the content property (rather than in the
|
||||
* HTML Head Meta), see ALF-10466 for details.
|
||||
*/
|
||||
public static class EncodingAwareStringBean extends StringBean
|
||||
{
|
||||
private static final long serialVersionUID = -9033414360428669553L;
|
||||
|
||||
/**
|
||||
* Sets the File to extract strings from, and the encoding
|
||||
* it's in (if known to Alfresco)
|
||||
*
|
||||
* @param file The File that text should be fetched from.
|
||||
* @param encoding The encoding of the input
|
||||
*/
|
||||
public void setURL(File file, String encoding)
|
||||
{
|
||||
String previousURL = getURL();
|
||||
String newURL = file.getAbsolutePath();
|
||||
|
||||
if (previousURL == null || !newURL.equals(previousURL))
|
||||
{
|
||||
try
|
||||
{
|
||||
URLConnection conn = getConnection();
|
||||
|
||||
if (null == mParser)
|
||||
{
|
||||
mParser = new Parser(newURL);
|
||||
}
|
||||
else
|
||||
{
|
||||
mParser.setURL(newURL);
|
||||
}
|
||||
|
||||
if (encoding != null)
|
||||
{
|
||||
mParser.setEncoding(encoding);
|
||||
}
|
||||
|
||||
mPropertySupport.firePropertyChange(StringBean.PROP_URL_PROPERTY, previousURL,
|
||||
getURL());
|
||||
mPropertySupport.firePropertyChange(StringBean.PROP_CONNECTION_PROPERTY, conn,
|
||||
mParser.getConnection());
|
||||
setStrings();
|
||||
}
|
||||
catch (ParserException pe)
|
||||
{
|
||||
updateStrings(pe.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getEncoding()
|
||||
{
|
||||
return mParser.getEncoding();
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.htmlparser.Parser;
|
||||
import org.htmlparser.beans.StringBean;
|
||||
import org.htmlparser.util.ParserException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.net.URLConnection;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
|
||||
|
||||
/**
|
||||
* Content transformer which wraps the HTML Parser library for
|
||||
* parsing HTML content.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Since HTML Parser was updated from v1.6 to v2.1, META tags
|
||||
* defining an encoding for the content via http-equiv=Content-Type
|
||||
* will ONLY be respected if the encoding of the content item
|
||||
* itself is set to ISO-8859-1.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Tika Note - could be converted to use the Tika HTML parser,
|
||||
* but we'd potentially need a custom text handler to replicate
|
||||
* the current settings around links and non-breaking spaces.
|
||||
* </p>
|
||||
*
|
||||
* @author Derek Hulley
|
||||
* @author eknizat
|
||||
* @see <a href="http://htmlparser.sourceforge.net/">http://htmlparser.sourceforge.net</a>
|
||||
* @see org.htmlparser.beans.StringBean
|
||||
* @see <a href="http://sourceforge.net/tracker/?func=detail&aid=1644504&group_id=24399&atid=381401">HTML Parser</a>
|
||||
*/
|
||||
public class HtmlParserContentTransformer implements SelectableTransformer
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(
|
||||
HtmlParserContentTransformer.class);
|
||||
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile) throws Exception
|
||||
{
|
||||
String sourceEncoding = parameters.get(SOURCE_ENCODING);
|
||||
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
|
||||
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Performing HTML to text transform with sourceEncoding=" + sourceEncoding);
|
||||
}
|
||||
|
||||
// Create the extractor
|
||||
EncodingAwareStringBean extractor = new EncodingAwareStringBean();
|
||||
extractor.setCollapse(false);
|
||||
extractor.setLinks(false);
|
||||
extractor.setReplaceNonBreakingSpaces(false);
|
||||
extractor.setURL(sourceFile, sourceEncoding);
|
||||
// get the text
|
||||
String text = extractor.getStrings();
|
||||
|
||||
// write it to the writer
|
||||
try (Writer writer = new BufferedWriter(
|
||||
new OutputStreamWriter(new FileOutputStream(targetFile))))
|
||||
{
|
||||
writer.write(text);
|
||||
}
|
||||
}
|
||||
|
||||
private void checkEncodingParameter(String encoding, String parameterName)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (encoding != null && !Charset.isSupported(encoding))
|
||||
{
|
||||
throw new IllegalArgumentException(
|
||||
parameterName + "=" + encoding + " is not supported by the JVM.");
|
||||
}
|
||||
}
|
||||
catch (IllegalCharsetNameException e)
|
||||
{
|
||||
throw new IllegalArgumentException(
|
||||
parameterName + "=" + encoding + " is not a valid encoding.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This code is based on a class of the same name, originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* A version of {@link StringBean} which allows control of the
|
||||
* encoding in the underlying HTML Parser.
|
||||
* Unfortunately, StringBean doesn't allow easy over-riding of
|
||||
* this, so we have to duplicate some code to control this.
|
||||
* This allows us to correctly handle HTML files where the encoding
|
||||
* is specified against the content property (rather than in the
|
||||
* HTML Head Meta), see ALF-10466 for details.
|
||||
*/
|
||||
public static class EncodingAwareStringBean extends StringBean
|
||||
{
|
||||
private static final long serialVersionUID = -9033414360428669553L;
|
||||
|
||||
/**
|
||||
* Sets the File to extract strings from, and the encoding
|
||||
* it's in (if known to Alfresco)
|
||||
*
|
||||
* @param file The File that text should be fetched from.
|
||||
* @param encoding The encoding of the input
|
||||
*/
|
||||
public void setURL(File file, String encoding)
|
||||
{
|
||||
String previousURL = getURL();
|
||||
String newURL = file.getAbsolutePath();
|
||||
|
||||
if (previousURL == null || !newURL.equals(previousURL))
|
||||
{
|
||||
try
|
||||
{
|
||||
URLConnection conn = getConnection();
|
||||
|
||||
if (null == mParser)
|
||||
{
|
||||
mParser = new Parser(newURL);
|
||||
}
|
||||
else
|
||||
{
|
||||
mParser.setURL(newURL);
|
||||
}
|
||||
|
||||
if (encoding != null)
|
||||
{
|
||||
mParser.setEncoding(encoding);
|
||||
}
|
||||
|
||||
mPropertySupport.firePropertyChange(StringBean.PROP_URL_PROPERTY, previousURL,
|
||||
getURL());
|
||||
mPropertySupport.firePropertyChange(StringBean.PROP_CONNECTION_PROPERTY, conn,
|
||||
mParser.getConnection());
|
||||
setStrings();
|
||||
}
|
||||
catch (ParserException pe)
|
||||
{
|
||||
updateStrings(pe.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getEncoding()
|
||||
{
|
||||
return mParser.getEncoding();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,130 +1,130 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing.
|
||||
* This transformer will only work for OOXML files where thumbnailing was enabled,
|
||||
* which isn't on by default on Windows, but is more common on Mac.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* @author Nick Burch
|
||||
* @author eknizat
|
||||
*/
|
||||
public class OOXMLThumbnailContentTransformer implements SelectableTransformer
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(
|
||||
OOXMLThumbnailContentTransformer.class);
|
||||
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile) throws Exception
|
||||
{
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Performing OOXML to jpeg transform with sourceMimetype=" + sourceMimetype
|
||||
+ " targetMimetype=" + targetMimetype);
|
||||
}
|
||||
|
||||
try (OPCPackage pkg = OPCPackage.open(sourceFile.getPath()))
|
||||
{
|
||||
|
||||
// Does it have a thumbnail?
|
||||
PackageRelationshipCollection rels = pkg.getRelationshipsByType(
|
||||
PackageRelationshipTypes.THUMBNAIL);
|
||||
if (rels.size() > 0)
|
||||
{
|
||||
// Get the thumbnail part
|
||||
PackageRelationship tRel = rels.getRelationship(0);
|
||||
PackagePart tPart = pkg.getPart(tRel);
|
||||
|
||||
// Write it to the target
|
||||
InputStream tStream = tPart.getInputStream();
|
||||
Files.copy(tStream, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
|
||||
tStream.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.debug("No thumbnail present in file.");
|
||||
throw new Exception(
|
||||
"No thumbnail present in file, unable to generate " + targetMimetype);
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new RuntimeException("Unable to transform file.", e);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
// TODO Add this back to engine_config.json when the transformer is fixed for java 11
|
||||
{
|
||||
"transformerName": "ooxmlThumbnail",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "image/jpeg"}
|
||||
],
|
||||
"transformOptions": [
|
||||
]
|
||||
}
|
||||
*/
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing.
|
||||
* This transformer will only work for OOXML files where thumbnailing was enabled,
|
||||
* which isn't on by default on Windows, but is more common on Mac.
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* @author Nick Burch
|
||||
* @author eknizat
|
||||
*/
|
||||
public class OOXMLThumbnailContentTransformer implements SelectableTransformer
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(
|
||||
OOXMLThumbnailContentTransformer.class);
|
||||
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile) throws Exception
|
||||
{
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Performing OOXML to jpeg transform with sourceMimetype=" + sourceMimetype
|
||||
+ " targetMimetype=" + targetMimetype);
|
||||
}
|
||||
|
||||
try (OPCPackage pkg = OPCPackage.open(sourceFile.getPath()))
|
||||
{
|
||||
|
||||
// Does it have a thumbnail?
|
||||
PackageRelationshipCollection rels = pkg.getRelationshipsByType(
|
||||
PackageRelationshipTypes.THUMBNAIL);
|
||||
if (rels.size() > 0)
|
||||
{
|
||||
// Get the thumbnail part
|
||||
PackageRelationship tRel = rels.getRelationship(0);
|
||||
PackagePart tPart = pkg.getPart(tRel);
|
||||
|
||||
// Write it to the target
|
||||
InputStream tStream = tPart.getInputStream();
|
||||
Files.copy(tStream, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
|
||||
tStream.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.debug("No thumbnail present in file.");
|
||||
throw new Exception(
|
||||
"No thumbnail present in file, unable to generate " + targetMimetype);
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new RuntimeException("Unable to transform file.", e);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
// TODO Add this back to engine_config.json when the transformer is fixed for java 11
|
||||
{
|
||||
"transformerName": "ooxmlThumbnail",
|
||||
"supportedSourceAndTargetList": [
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
|
||||
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "image/jpeg"}
|
||||
],
|
||||
"transformOptions": [
|
||||
]
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
@@ -1,53 +1,53 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Implemented by transformers used by {@link SelectingTransformer}.
|
||||
*
|
||||
* @author eknizat
|
||||
*/
|
||||
public interface SelectableTransformer
|
||||
{
|
||||
default void transform(String sourceMimetype, String targetMimetype, Map<String, String> parameters,
|
||||
File sourceFile, File targetFile) throws Exception
|
||||
{
|
||||
}
|
||||
|
||||
default void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile) throws Exception
|
||||
{
|
||||
}
|
||||
|
||||
default void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile) throws Exception
|
||||
{
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Implemented by transformers used by {@link SelectingTransformer}.
|
||||
*
|
||||
* @author eknizat
|
||||
*/
|
||||
public interface SelectableTransformer
|
||||
{
|
||||
default void transform(String sourceMimetype, String targetMimetype, Map<String, String> parameters,
|
||||
File sourceFile, File targetFile) throws Exception
|
||||
{
|
||||
}
|
||||
|
||||
default void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile) throws Exception
|
||||
{
|
||||
}
|
||||
|
||||
default void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile) throws Exception
|
||||
{
|
||||
}
|
||||
}
|
||||
|
@@ -1,114 +1,114 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.alfresco.transformer.executors.Transformer;
|
||||
import org.alfresco.transformer.logging.LogEntry;
|
||||
import org.alfresco.transformer.metadataExtractors.HtmlMetadataExtractor;
|
||||
import org.alfresco.transformer.metadataExtractors.RFC822MetadataExtractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
|
||||
|
||||
/**
|
||||
* The SelectingTransformer selects a registered {@link SelectableTransformer}
|
||||
* and delegates the transformation to its implementation.
|
||||
*
|
||||
* @author eknizat
|
||||
*/
|
||||
public class SelectingTransformer implements Transformer
|
||||
{
|
||||
private static final String ID = "misc";
|
||||
|
||||
public static final String LICENCE =
|
||||
"This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\\n" +
|
||||
"Additional libraries used:\n" +
|
||||
"* htmlparser http://htmlparser.sourceforge.net/license.html";
|
||||
|
||||
private final Map<String, SelectableTransformer> transformers = ImmutableMap
|
||||
.<String, SelectableTransformer>builder()
|
||||
.put("appleIWorks", new AppleIWorksContentTransformer())
|
||||
.put("html", new HtmlParserContentTransformer())
|
||||
.put("string", new StringExtractingContentTransformer())
|
||||
.put("textToPdf", new TextToPdfContentTransformer())
|
||||
.put("rfc822", new EMLTransformer())
|
||||
.put("ooXmlThumbnail", new OOXMLThumbnailContentTransformer())
|
||||
.put("HtmlMetadataExtractor", new HtmlMetadataExtractor())
|
||||
.put("RFC822MetadataExtractor", new RFC822MetadataExtractor())
|
||||
.build();
|
||||
|
||||
@Override
|
||||
public String getTransformerId()
|
||||
{
|
||||
return ID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(String transformName, String sourceMimetype, String targetMimetype,
|
||||
Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile) throws Exception
|
||||
{
|
||||
final SelectableTransformer transformer = transformers.get(transformName);
|
||||
logOptions(sourceFile, targetFile, transformOptions);
|
||||
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
|
||||
}
|
||||
|
||||
public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
|
||||
Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile) throws Exception
|
||||
{
|
||||
final SelectableTransformer transformer = transformers.get(transformName);
|
||||
logOptions(sourceFile, targetFile, transformOptions);
|
||||
transformer.extractMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
|
||||
}
|
||||
|
||||
private static void logOptions(File sourceFile, File targetFile, Map<String, String> parameters)
|
||||
{
|
||||
StringJoiner sj = new StringJoiner(" ");
|
||||
parameters.forEach((k, v) ->
|
||||
{
|
||||
if (!TRANSFORM_NAME_PARAMETER.equals(k))
|
||||
{
|
||||
sj.add("--" + k + "=" + v);
|
||||
}
|
||||
}); // keeping the existing style used in other T-Engines
|
||||
sj.add(getExtension(sourceFile));
|
||||
sj.add(getExtension(targetFile));
|
||||
LogEntry.setOptions(sj.toString());
|
||||
}
|
||||
|
||||
private static String getExtension(File file)
|
||||
{
|
||||
final String name = file.getName();
|
||||
int i = name.lastIndexOf('.');
|
||||
return i == -1 ? "???" : name.substring(i + 1);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2020 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.alfresco.transformer.executors.Transformer;
|
||||
import org.alfresco.transformer.logging.LogEntry;
|
||||
import org.alfresco.transformer.metadataExtractors.HtmlMetadataExtractor;
|
||||
import org.alfresco.transformer.metadataExtractors.RFC822MetadataExtractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Map;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
|
||||
|
||||
/**
|
||||
* The SelectingTransformer selects a registered {@link SelectableTransformer}
|
||||
* and delegates the transformation to its implementation.
|
||||
*
|
||||
* @author eknizat
|
||||
*/
|
||||
public class SelectingTransformer implements Transformer
|
||||
{
|
||||
private static final String ID = "misc";
|
||||
|
||||
public static final String LICENCE =
|
||||
"This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\\n" +
|
||||
"Additional libraries used:\n" +
|
||||
"* htmlparser http://htmlparser.sourceforge.net/license.html";
|
||||
|
||||
private final Map<String, SelectableTransformer> transformers = ImmutableMap
|
||||
.<String, SelectableTransformer>builder()
|
||||
.put("appleIWorks", new AppleIWorksContentTransformer())
|
||||
.put("html", new HtmlParserContentTransformer())
|
||||
.put("string", new StringExtractingContentTransformer())
|
||||
.put("textToPdf", new TextToPdfContentTransformer())
|
||||
.put("rfc822", new EMLTransformer())
|
||||
.put("ooXmlThumbnail", new OOXMLThumbnailContentTransformer())
|
||||
.put("HtmlMetadataExtractor", new HtmlMetadataExtractor())
|
||||
.put("RFC822MetadataExtractor", new RFC822MetadataExtractor())
|
||||
.build();
|
||||
|
||||
@Override
|
||||
public String getTransformerId()
|
||||
{
|
||||
return ID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(String transformName, String sourceMimetype, String targetMimetype,
|
||||
Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile) throws Exception
|
||||
{
|
||||
final SelectableTransformer transformer = transformers.get(transformName);
|
||||
logOptions(sourceFile, targetFile, transformOptions);
|
||||
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
|
||||
}
|
||||
|
||||
public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
|
||||
Map<String, String> transformOptions,
|
||||
File sourceFile, File targetFile) throws Exception
|
||||
{
|
||||
final SelectableTransformer transformer = transformers.get(transformName);
|
||||
logOptions(sourceFile, targetFile, transformOptions);
|
||||
transformer.extractMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
|
||||
}
|
||||
|
||||
private static void logOptions(File sourceFile, File targetFile, Map<String, String> parameters)
|
||||
{
|
||||
StringJoiner sj = new StringJoiner(" ");
|
||||
parameters.forEach((k, v) ->
|
||||
{
|
||||
if (!TRANSFORM_NAME_PARAMETER.equals(k))
|
||||
{
|
||||
sj.add("--" + k + "=" + v);
|
||||
}
|
||||
}); // keeping the existing style used in other T-Engines
|
||||
sj.add(getExtension(sourceFile));
|
||||
sj.add(getExtension(targetFile));
|
||||
LogEntry.setOptions(sj.toString());
|
||||
}
|
||||
|
||||
private static String getExtension(File file)
|
||||
{
|
||||
final String name = file.getName();
|
||||
int i = name.lastIndexOf('.');
|
||||
return i == -1 ? "???" : name.substring(i + 1);
|
||||
}
|
||||
}
|
||||
|
@@ -1,158 +1,158 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Reader;
|
||||
import java.io.Writer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
|
||||
import static org.alfresco.transform.client.util.RequestParamMap.TARGET_ENCODING;
|
||||
|
||||
/**
|
||||
* Converts any textual format to plain text.
|
||||
* <p>
|
||||
* The transformation is sensitive to the source and target string encodings.
|
||||
*
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* @author Derek Hulley
|
||||
* @author eknizat
|
||||
*/
|
||||
public class StringExtractingContentTransformer implements SelectableTransformer
|
||||
{
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
|
||||
|
||||
/**
|
||||
* Text to text conversions are done directly using the content reader and writer string
|
||||
* manipulation methods.
|
||||
* <p>
|
||||
* Extraction of text from binary content attempts to take the possible character
|
||||
* encoding into account. The text produced from this will, if the encoding was correct,
|
||||
* be unformatted but valid.
|
||||
*/
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile) throws Exception
|
||||
{
|
||||
String sourceEncoding = parameters.get(SOURCE_ENCODING);
|
||||
String targetEncoding = parameters.get(TARGET_ENCODING);
|
||||
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
|
||||
+ " targetEncoding=" + targetEncoding);
|
||||
}
|
||||
|
||||
Reader charReader = null;
|
||||
Writer charWriter = null;
|
||||
try
|
||||
{
|
||||
// Build reader
|
||||
if (sourceEncoding == null)
|
||||
{
|
||||
charReader = new BufferedReader(
|
||||
new InputStreamReader(new FileInputStream(sourceFile)));
|
||||
}
|
||||
else
|
||||
{
|
||||
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
|
||||
charReader = new BufferedReader(
|
||||
new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
|
||||
}
|
||||
|
||||
// Build writer
|
||||
if (targetEncoding == null)
|
||||
{
|
||||
charWriter = new BufferedWriter(
|
||||
new OutputStreamWriter(new FileOutputStream(targetFile)));
|
||||
}
|
||||
else
|
||||
{
|
||||
checkEncodingParameter(targetEncoding, TARGET_ENCODING);
|
||||
charWriter = new BufferedWriter(
|
||||
new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
|
||||
}
|
||||
|
||||
// copy from the one to the other
|
||||
char[] buffer = new char[8192];
|
||||
int readCount = 0;
|
||||
while (readCount > -1)
|
||||
{
|
||||
// write the last read count number of bytes
|
||||
charWriter.write(buffer, 0, readCount);
|
||||
// fill the buffer again
|
||||
readCount = charReader.read(buffer);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (charReader != null)
|
||||
{
|
||||
try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
|
||||
}
|
||||
if (charWriter != null)
|
||||
{
|
||||
try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
|
||||
}
|
||||
}
|
||||
// done
|
||||
}
|
||||
|
||||
private void checkEncodingParameter(String encoding, String paramterName)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (!Charset.isSupported(encoding))
|
||||
{
|
||||
throw new IllegalArgumentException(
|
||||
paramterName + "=" + encoding + " is not supported by the JVM.");
|
||||
}
|
||||
}
|
||||
catch (IllegalCharsetNameException e)
|
||||
{
|
||||
throw new IllegalArgumentException(
|
||||
paramterName + "=" + encoding + " is not a valid encoding.");
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Reader;
|
||||
import java.io.Writer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
|
||||
import static org.alfresco.transform.client.util.RequestParamMap.TARGET_ENCODING;
|
||||
|
||||
/**
|
||||
* Converts any textual format to plain text.
|
||||
* <p>
|
||||
* The transformation is sensitive to the source and target string encodings.
|
||||
*
|
||||
*
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* @author Derek Hulley
|
||||
* @author eknizat
|
||||
*/
|
||||
public class StringExtractingContentTransformer implements SelectableTransformer
|
||||
{
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
|
||||
|
||||
/**
|
||||
* Text to text conversions are done directly using the content reader and writer string
|
||||
* manipulation methods.
|
||||
* <p>
|
||||
* Extraction of text from binary content attempts to take the possible character
|
||||
* encoding into account. The text produced from this will, if the encoding was correct,
|
||||
* be unformatted but valid.
|
||||
*/
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile) throws Exception
|
||||
{
|
||||
String sourceEncoding = parameters.get(SOURCE_ENCODING);
|
||||
String targetEncoding = parameters.get(TARGET_ENCODING);
|
||||
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
|
||||
+ " targetEncoding=" + targetEncoding);
|
||||
}
|
||||
|
||||
Reader charReader = null;
|
||||
Writer charWriter = null;
|
||||
try
|
||||
{
|
||||
// Build reader
|
||||
if (sourceEncoding == null)
|
||||
{
|
||||
charReader = new BufferedReader(
|
||||
new InputStreamReader(new FileInputStream(sourceFile)));
|
||||
}
|
||||
else
|
||||
{
|
||||
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
|
||||
charReader = new BufferedReader(
|
||||
new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
|
||||
}
|
||||
|
||||
// Build writer
|
||||
if (targetEncoding == null)
|
||||
{
|
||||
charWriter = new BufferedWriter(
|
||||
new OutputStreamWriter(new FileOutputStream(targetFile)));
|
||||
}
|
||||
else
|
||||
{
|
||||
checkEncodingParameter(targetEncoding, TARGET_ENCODING);
|
||||
charWriter = new BufferedWriter(
|
||||
new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
|
||||
}
|
||||
|
||||
// copy from the one to the other
|
||||
char[] buffer = new char[8192];
|
||||
int readCount = 0;
|
||||
while (readCount > -1)
|
||||
{
|
||||
// write the last read count number of bytes
|
||||
charWriter.write(buffer, 0, readCount);
|
||||
// fill the buffer again
|
||||
readCount = charReader.read(buffer);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (charReader != null)
|
||||
{
|
||||
try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
|
||||
}
|
||||
if (charWriter != null)
|
||||
{
|
||||
try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
|
||||
}
|
||||
}
|
||||
// done
|
||||
}
|
||||
|
||||
private void checkEncodingParameter(String encoding, String paramterName)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (!Charset.isSupported(encoding))
|
||||
{
|
||||
throw new IllegalArgumentException(
|
||||
paramterName + "=" + encoding + " is not supported by the JVM.");
|
||||
}
|
||||
}
|
||||
catch (IllegalCharsetNameException e)
|
||||
{
|
||||
throw new IllegalArgumentException(
|
||||
paramterName + "=" + encoding + " is not a valid encoding.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,448 +1,448 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.alfresco.transformer.util.RequestParamMap;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.tools.TextToPDF;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* Makes use of the <a href="http://www.pdfbox.org/">PDFBox</a> library's <code>TextToPDF</code> utility.
|
||||
*
|
||||
* @author Derek Hulley
|
||||
* @author eknizat
|
||||
*/
|
||||
public class TextToPdfContentTransformer implements SelectableTransformer
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
|
||||
|
||||
private static final int UTF16_READ_AHEAD_BYTES = 16; // 8 characters including BOM if it exists
|
||||
private static final byte FE = (byte) 0xFE;
|
||||
private static final byte FF = (byte) 0xFF;
|
||||
|
||||
public static final String PAGE_LIMIT = RequestParamMap.PAGE_LIMIT;
|
||||
|
||||
private final PagedTextToPDF transformer;
|
||||
|
||||
public TextToPdfContentTransformer()
|
||||
{
|
||||
transformer = new PagedTextToPDF();
|
||||
}
|
||||
|
||||
public void setStandardFont(String fontName)
|
||||
{
|
||||
try
|
||||
{
|
||||
transformer.setFont(PagedTextToPDF.getStandardFont(fontName));
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
throw new RuntimeException(
|
||||
"Unable to set Standard Font for PDF generation: " + fontName, e);
|
||||
}
|
||||
}
|
||||
|
||||
public void setFontSize(int fontSize)
|
||||
{
|
||||
try
|
||||
{
|
||||
transformer.setFontSize(fontSize);
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
throw new RuntimeException(
|
||||
"Unable to set Font Size for PDF generation: " + fontSize);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile) throws Exception
|
||||
{
|
||||
String sourceEncoding = parameters.get(SOURCE_ENCODING);
|
||||
String stringPageLimit = parameters.get(PAGE_LIMIT);
|
||||
int pageLimit = -1;
|
||||
if (stringPageLimit != null)
|
||||
{
|
||||
pageLimit = parseInt(stringPageLimit, PAGE_LIMIT);
|
||||
}
|
||||
|
||||
PDDocument pdf = null;
|
||||
try (InputStream is = new FileInputStream(sourceFile);
|
||||
Reader ir = new BufferedReader(buildReader(is, sourceEncoding));
|
||||
OutputStream os = new BufferedOutputStream(new FileOutputStream(targetFile)))
|
||||
{
|
||||
//TransformationOptionLimits limits = getLimits(reader, writer, options);
|
||||
//TransformationOptionPair pageLimits = limits.getPagesPair();
|
||||
pdf = transformer.createPDFFromText(ir, pageLimit);
|
||||
pdf.save(os);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (pdf != null)
|
||||
{
|
||||
try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected InputStreamReader buildReader(InputStream is, String encoding)
|
||||
{
|
||||
// If they gave an encoding, try to use it
|
||||
if (encoding != null)
|
||||
{
|
||||
Charset charset = null;
|
||||
try
|
||||
{
|
||||
charset = Charset.forName(encoding);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
logger.warn("JVM doesn't understand encoding '" + encoding +
|
||||
"' when transforming text to pdf");
|
||||
}
|
||||
if (charset != null)
|
||||
{
|
||||
// Handles the situation where there is a BOM even though the encoding indicates that normally
|
||||
// there should not be one for UTF-16BE and UTF-16LE. For extra flexibility includes UTF-16 too
|
||||
// which optionally has the BOM. Rather than look at the BOM we look at the number of zero bytes
|
||||
// in the first few character. XML files even when not in European languages tend to have more
|
||||
// even zero bytes when big-endian encoded and more odd zero bytes when little-endian.
|
||||
// Think of: <?xml version="1.0"?> The normal Java decoder does not have this flexibility but
|
||||
// other transformers do.
|
||||
String name = charset.displayName();
|
||||
if ("UTF-16".equals(name) || "UTF-16BE".equals(name) || "UTF-16LE".equals(name))
|
||||
{
|
||||
logger.debug("Handle big and little endian UTF-16 text. Using UTF-16 rather than encoding " + name);
|
||||
charset = Charset.forName("UTF-16");
|
||||
is = new PushbackInputStream(is, UTF16_READ_AHEAD_BYTES)
|
||||
{
|
||||
boolean bomRead;
|
||||
boolean switchByteOrder;
|
||||
boolean evenByte = true;
|
||||
|
||||
@Override
|
||||
public int read(byte[] bytes, int off, int len) throws IOException
|
||||
{
|
||||
int i = 0;
|
||||
int b = 0;
|
||||
for (; i<len; i++)
|
||||
{
|
||||
b = read();
|
||||
if (b == -1)
|
||||
{
|
||||
break;
|
||||
}
|
||||
bytes[off+i] = (byte)b;
|
||||
}
|
||||
return i == 0 && b == -1 ? -1 : i;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException
|
||||
{
|
||||
if (!bomRead)
|
||||
{
|
||||
bomRead = true;
|
||||
boolean switchBom = false;
|
||||
byte[] bytes = new byte[UTF16_READ_AHEAD_BYTES];
|
||||
int end = in.read(bytes, 0, UTF16_READ_AHEAD_BYTES);
|
||||
int evenZeros = countZeros(bytes, 0);
|
||||
int oddZeros = countZeros(bytes, 1);
|
||||
if (evenZeros > oddZeros)
|
||||
{
|
||||
if (bytes[0] == FF && bytes[1] == FE)
|
||||
{
|
||||
switchByteOrder = true;
|
||||
switchBom = true;
|
||||
logger.warn("Little-endian BOM FFFE read, but characters are big-endian");
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.debug("More even zero bytes, so normal read for big-endian");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bytes[0] == FE && bytes[1] == FF)
|
||||
{
|
||||
switchBom = true;
|
||||
logger.debug("Big-endian BOM FEFF read, but characters are little-endian");
|
||||
}
|
||||
else
|
||||
{
|
||||
switchByteOrder = true;
|
||||
logger.debug("More odd zero bytes, so switch bytes from little-endian");
|
||||
}
|
||||
}
|
||||
|
||||
if (switchBom)
|
||||
{
|
||||
byte b = bytes[0];
|
||||
bytes[0] = bytes[1];
|
||||
bytes[1] = b;
|
||||
}
|
||||
|
||||
for (int i = end-1; i>=0; i--)
|
||||
{
|
||||
unread(bytes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (switchByteOrder)
|
||||
{
|
||||
if (evenByte)
|
||||
{
|
||||
int b1 = super.read();
|
||||
int b2 = super.read();
|
||||
if (b1 != -1)
|
||||
{
|
||||
unread(b1);
|
||||
}
|
||||
if (b2 != -1)
|
||||
{
|
||||
unread(b2);
|
||||
}
|
||||
}
|
||||
evenByte = !evenByte;
|
||||
}
|
||||
|
||||
return super.read();
|
||||
}
|
||||
|
||||
// Counts the number of even or odd 00 bytes
|
||||
private int countZeros(byte[] b, int offset)
|
||||
{
|
||||
int count = 0;
|
||||
for (int i=offset; i<UTF16_READ_AHEAD_BYTES; i+=2)
|
||||
{
|
||||
if (b[i] == 0)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
};
|
||||
}
|
||||
logger.debug("Processing plain text in encoding " + name);
|
||||
return new InputStreamReader(is, charset);
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back on the system default
|
||||
logger.debug("Processing plain text using system default encoding");
|
||||
return new InputStreamReader(is);
|
||||
}
|
||||
|
||||
private static class PagedTextToPDF extends TextToPDF
|
||||
{
|
||||
// REPO-1066: duplicating the following lines from org.apache.pdfbox.tools.TextToPDF because they made them private
|
||||
// before the upgrade to pdfbox 2.0.8, in pdfbox 1.8, this piece of code was public in org.apache.pdfbox.pdmodel.font.PDType1Font
|
||||
static PDType1Font getStandardFont(String name)
|
||||
{
|
||||
return STANDARD_14.get(name);
|
||||
}
|
||||
|
||||
private static final Map<String, PDType1Font> STANDARD_14 = new HashMap<>();
|
||||
|
||||
static
|
||||
{
|
||||
STANDARD_14.put(PDType1Font.TIMES_ROMAN.getBaseFont(), PDType1Font.TIMES_ROMAN);
|
||||
STANDARD_14.put(PDType1Font.TIMES_BOLD.getBaseFont(), PDType1Font.TIMES_BOLD);
|
||||
STANDARD_14.put(PDType1Font.TIMES_ITALIC.getBaseFont(), PDType1Font.TIMES_ITALIC);
|
||||
STANDARD_14.put(PDType1Font.TIMES_BOLD_ITALIC.getBaseFont(),
|
||||
PDType1Font.TIMES_BOLD_ITALIC);
|
||||
STANDARD_14.put(PDType1Font.HELVETICA.getBaseFont(), PDType1Font.HELVETICA);
|
||||
STANDARD_14.put(PDType1Font.HELVETICA_BOLD.getBaseFont(), PDType1Font.HELVETICA_BOLD);
|
||||
STANDARD_14.put(PDType1Font.HELVETICA_OBLIQUE.getBaseFont(),
|
||||
PDType1Font.HELVETICA_OBLIQUE);
|
||||
STANDARD_14.put(PDType1Font.HELVETICA_BOLD_OBLIQUE.getBaseFont(),
|
||||
PDType1Font.HELVETICA_BOLD_OBLIQUE);
|
||||
STANDARD_14.put(PDType1Font.COURIER.getBaseFont(), PDType1Font.COURIER);
|
||||
STANDARD_14.put(PDType1Font.COURIER_BOLD.getBaseFont(), PDType1Font.COURIER_BOLD);
|
||||
STANDARD_14.put(PDType1Font.COURIER_OBLIQUE.getBaseFont(), PDType1Font.COURIER_OBLIQUE);
|
||||
STANDARD_14.put(PDType1Font.COURIER_BOLD_OBLIQUE.getBaseFont(),
|
||||
PDType1Font.COURIER_BOLD_OBLIQUE);
|
||||
STANDARD_14.put(PDType1Font.SYMBOL.getBaseFont(), PDType1Font.SYMBOL);
|
||||
STANDARD_14.put(PDType1Font.ZAPF_DINGBATS.getBaseFont(), PDType1Font.ZAPF_DINGBATS);
|
||||
}
|
||||
//duplicating until here
|
||||
|
||||
// The following code is based on the code in TextToPDF with the addition of
|
||||
// checks for page limits.
|
||||
// The calling code must close the PDDocument once finished with it.
|
||||
public PDDocument createPDFFromText(Reader text, int pageLimit)
|
||||
throws IOException
|
||||
{
|
||||
PDDocument doc = null;
|
||||
int pageCount = 0;
|
||||
try
|
||||
{
|
||||
final int margin = 40;
|
||||
float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000;
|
||||
|
||||
//calculate font height and increase by 5 percent.
|
||||
height = height * getFontSize() * 1.05f;
|
||||
doc = new PDDocument();
|
||||
BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text);
|
||||
String nextLine;
|
||||
PDPage page = new PDPage();
|
||||
PDPageContentStream contentStream = null;
|
||||
float y = -1;
|
||||
float maxStringLength = page.getMediaBox().getWidth() - 2 * margin;
|
||||
|
||||
// There is a special case of creating a PDF document from an empty string.
|
||||
boolean textIsEmpty = true;
|
||||
|
||||
outer:
|
||||
while ((nextLine = data.readLine()) != null)
|
||||
{
|
||||
// The input text is nonEmpty. New pages will be created and added
|
||||
// to the PDF document as they are needed, depending on the length of
|
||||
// the text.
|
||||
textIsEmpty = false;
|
||||
|
||||
String[] lineWords = nextLine.trim().split(" ");
|
||||
int lineIndex = 0;
|
||||
while (lineIndex < lineWords.length)
|
||||
{
|
||||
final StringBuilder nextLineToDraw = new StringBuilder();
|
||||
float lengthIfUsingNextWord = 0;
|
||||
do
|
||||
{
|
||||
nextLineToDraw.append(lineWords[lineIndex]);
|
||||
nextLineToDraw.append(" ");
|
||||
lineIndex++;
|
||||
if (lineIndex < lineWords.length)
|
||||
{
|
||||
String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
|
||||
lengthIfUsingNextWord =
|
||||
(getFont().getStringWidth(
|
||||
lineWithNextWord) / 1000) * getFontSize();
|
||||
}
|
||||
}
|
||||
while (lineIndex < lineWords.length &&
|
||||
lengthIfUsingNextWord < maxStringLength);
|
||||
if (y < margin)
|
||||
{
|
||||
int test = pageCount + 1;
|
||||
if (pageLimit > 0 && (pageCount++ >= pageLimit))
|
||||
{
|
||||
break outer;
|
||||
}
|
||||
|
||||
// We have crossed the end-of-page boundary and need to extend the
|
||||
// document by another page.
|
||||
page = new PDPage();
|
||||
doc.addPage(page);
|
||||
if (contentStream != null)
|
||||
{
|
||||
contentStream.endText();
|
||||
contentStream.close();
|
||||
}
|
||||
contentStream = new PDPageContentStream(doc, page);
|
||||
contentStream.setFont(getFont(), getFontSize());
|
||||
contentStream.beginText();
|
||||
y = page.getMediaBox().getHeight() - margin + height;
|
||||
contentStream.moveTextPositionByAmount(margin, y);
|
||||
}
|
||||
|
||||
if (contentStream == null)
|
||||
{
|
||||
throw new IOException("Error:Expected non-null content stream.");
|
||||
}
|
||||
contentStream.moveTextPositionByAmount(0, -height);
|
||||
y -= height;
|
||||
contentStream.drawString(nextLineToDraw.toString());
|
||||
}
|
||||
}
|
||||
|
||||
// If the input text was the empty string, then the above while loop will have short-circuited
|
||||
// and we will not have added any PDPages to the document.
|
||||
// So in order to make the resultant PDF document readable by Adobe Reader etc, we'll add an empty page.
|
||||
if (textIsEmpty)
|
||||
{
|
||||
doc.addPage(page);
|
||||
}
|
||||
|
||||
if (contentStream != null)
|
||||
{
|
||||
contentStream.endText();
|
||||
contentStream.close();
|
||||
}
|
||||
}
|
||||
catch (IOException io)
|
||||
{
|
||||
if (doc != null)
|
||||
{
|
||||
doc.close();
|
||||
}
|
||||
throw io;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
|
||||
private int parseInt(String s, String paramName)
|
||||
{
|
||||
try
|
||||
{
|
||||
return Integer.valueOf(s);
|
||||
}
|
||||
catch (NumberFormatException e)
|
||||
{
|
||||
throw new IllegalArgumentException(paramName + " parameter must be an integer.");
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.alfresco.transformer.util.RequestParamMap;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.tools.TextToPDF;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This code is based on a class of the same name originally implemented in alfresco-repository.
|
||||
* </p>
|
||||
*
|
||||
* Makes use of the <a href="http://www.pdfbox.org/">PDFBox</a> library's <code>TextToPDF</code> utility.
|
||||
*
|
||||
* @author Derek Hulley
|
||||
* @author eknizat
|
||||
*/
|
||||
public class TextToPdfContentTransformer implements SelectableTransformer
|
||||
{
|
||||
private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
|
||||
|
||||
private static final int UTF16_READ_AHEAD_BYTES = 16; // 8 characters including BOM if it exists
|
||||
private static final byte FE = (byte) 0xFE;
|
||||
private static final byte FF = (byte) 0xFF;
|
||||
|
||||
public static final String PAGE_LIMIT = RequestParamMap.PAGE_LIMIT;
|
||||
|
||||
private final PagedTextToPDF transformer;
|
||||
|
||||
public TextToPdfContentTransformer()
|
||||
{
|
||||
transformer = new PagedTextToPDF();
|
||||
}
|
||||
|
||||
public void setStandardFont(String fontName)
|
||||
{
|
||||
try
|
||||
{
|
||||
transformer.setFont(PagedTextToPDF.getStandardFont(fontName));
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
throw new RuntimeException(
|
||||
"Unable to set Standard Font for PDF generation: " + fontName, e);
|
||||
}
|
||||
}
|
||||
|
||||
public void setFontSize(int fontSize)
|
||||
{
|
||||
try
|
||||
{
|
||||
transformer.setFontSize(fontSize);
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
throw new RuntimeException(
|
||||
"Unable to set Font Size for PDF generation: " + fontSize);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
|
||||
final File sourceFile, final File targetFile) throws Exception
|
||||
{
|
||||
String sourceEncoding = parameters.get(SOURCE_ENCODING);
|
||||
String stringPageLimit = parameters.get(PAGE_LIMIT);
|
||||
int pageLimit = -1;
|
||||
if (stringPageLimit != null)
|
||||
{
|
||||
pageLimit = parseInt(stringPageLimit, PAGE_LIMIT);
|
||||
}
|
||||
|
||||
PDDocument pdf = null;
|
||||
try (InputStream is = new FileInputStream(sourceFile);
|
||||
Reader ir = new BufferedReader(buildReader(is, sourceEncoding));
|
||||
OutputStream os = new BufferedOutputStream(new FileOutputStream(targetFile)))
|
||||
{
|
||||
//TransformationOptionLimits limits = getLimits(reader, writer, options);
|
||||
//TransformationOptionPair pageLimits = limits.getPagesPair();
|
||||
pdf = transformer.createPDFFromText(ir, pageLimit);
|
||||
pdf.save(os);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (pdf != null)
|
||||
{
|
||||
try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected InputStreamReader buildReader(InputStream is, String encoding)
|
||||
{
|
||||
// If they gave an encoding, try to use it
|
||||
if (encoding != null)
|
||||
{
|
||||
Charset charset = null;
|
||||
try
|
||||
{
|
||||
charset = Charset.forName(encoding);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
logger.warn("JVM doesn't understand encoding '" + encoding +
|
||||
"' when transforming text to pdf");
|
||||
}
|
||||
if (charset != null)
|
||||
{
|
||||
// Handles the situation where there is a BOM even though the encoding indicates that normally
|
||||
// there should not be one for UTF-16BE and UTF-16LE. For extra flexibility includes UTF-16 too
|
||||
// which optionally has the BOM. Rather than look at the BOM we look at the number of zero bytes
|
||||
// in the first few character. XML files even when not in European languages tend to have more
|
||||
// even zero bytes when big-endian encoded and more odd zero bytes when little-endian.
|
||||
// Think of: <?xml version="1.0"?> The normal Java decoder does not have this flexibility but
|
||||
// other transformers do.
|
||||
String name = charset.displayName();
|
||||
if ("UTF-16".equals(name) || "UTF-16BE".equals(name) || "UTF-16LE".equals(name))
|
||||
{
|
||||
logger.debug("Handle big and little endian UTF-16 text. Using UTF-16 rather than encoding " + name);
|
||||
charset = Charset.forName("UTF-16");
|
||||
is = new PushbackInputStream(is, UTF16_READ_AHEAD_BYTES)
|
||||
{
|
||||
boolean bomRead;
|
||||
boolean switchByteOrder;
|
||||
boolean evenByte = true;
|
||||
|
||||
@Override
|
||||
public int read(byte[] bytes, int off, int len) throws IOException
|
||||
{
|
||||
int i = 0;
|
||||
int b = 0;
|
||||
for (; i<len; i++)
|
||||
{
|
||||
b = read();
|
||||
if (b == -1)
|
||||
{
|
||||
break;
|
||||
}
|
||||
bytes[off+i] = (byte)b;
|
||||
}
|
||||
return i == 0 && b == -1 ? -1 : i;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException
|
||||
{
|
||||
if (!bomRead)
|
||||
{
|
||||
bomRead = true;
|
||||
boolean switchBom = false;
|
||||
byte[] bytes = new byte[UTF16_READ_AHEAD_BYTES];
|
||||
int end = in.read(bytes, 0, UTF16_READ_AHEAD_BYTES);
|
||||
int evenZeros = countZeros(bytes, 0);
|
||||
int oddZeros = countZeros(bytes, 1);
|
||||
if (evenZeros > oddZeros)
|
||||
{
|
||||
if (bytes[0] == FF && bytes[1] == FE)
|
||||
{
|
||||
switchByteOrder = true;
|
||||
switchBom = true;
|
||||
logger.warn("Little-endian BOM FFFE read, but characters are big-endian");
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.debug("More even zero bytes, so normal read for big-endian");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bytes[0] == FE && bytes[1] == FF)
|
||||
{
|
||||
switchBom = true;
|
||||
logger.debug("Big-endian BOM FEFF read, but characters are little-endian");
|
||||
}
|
||||
else
|
||||
{
|
||||
switchByteOrder = true;
|
||||
logger.debug("More odd zero bytes, so switch bytes from little-endian");
|
||||
}
|
||||
}
|
||||
|
||||
if (switchBom)
|
||||
{
|
||||
byte b = bytes[0];
|
||||
bytes[0] = bytes[1];
|
||||
bytes[1] = b;
|
||||
}
|
||||
|
||||
for (int i = end-1; i>=0; i--)
|
||||
{
|
||||
unread(bytes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (switchByteOrder)
|
||||
{
|
||||
if (evenByte)
|
||||
{
|
||||
int b1 = super.read();
|
||||
int b2 = super.read();
|
||||
if (b1 != -1)
|
||||
{
|
||||
unread(b1);
|
||||
}
|
||||
if (b2 != -1)
|
||||
{
|
||||
unread(b2);
|
||||
}
|
||||
}
|
||||
evenByte = !evenByte;
|
||||
}
|
||||
|
||||
return super.read();
|
||||
}
|
||||
|
||||
// Counts the number of even or odd 00 bytes
|
||||
private int countZeros(byte[] b, int offset)
|
||||
{
|
||||
int count = 0;
|
||||
for (int i=offset; i<UTF16_READ_AHEAD_BYTES; i+=2)
|
||||
{
|
||||
if (b[i] == 0)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
};
|
||||
}
|
||||
logger.debug("Processing plain text in encoding " + name);
|
||||
return new InputStreamReader(is, charset);
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back on the system default
|
||||
logger.debug("Processing plain text using system default encoding");
|
||||
return new InputStreamReader(is);
|
||||
}
|
||||
|
||||
private static class PagedTextToPDF extends TextToPDF
|
||||
{
|
||||
// REPO-1066: duplicating the following lines from org.apache.pdfbox.tools.TextToPDF because they made them private
|
||||
// before the upgrade to pdfbox 2.0.8, in pdfbox 1.8, this piece of code was public in org.apache.pdfbox.pdmodel.font.PDType1Font
|
||||
static PDType1Font getStandardFont(String name)
|
||||
{
|
||||
return STANDARD_14.get(name);
|
||||
}
|
||||
|
||||
private static final Map<String, PDType1Font> STANDARD_14 = new HashMap<>();
|
||||
|
||||
static
|
||||
{
|
||||
STANDARD_14.put(PDType1Font.TIMES_ROMAN.getBaseFont(), PDType1Font.TIMES_ROMAN);
|
||||
STANDARD_14.put(PDType1Font.TIMES_BOLD.getBaseFont(), PDType1Font.TIMES_BOLD);
|
||||
STANDARD_14.put(PDType1Font.TIMES_ITALIC.getBaseFont(), PDType1Font.TIMES_ITALIC);
|
||||
STANDARD_14.put(PDType1Font.TIMES_BOLD_ITALIC.getBaseFont(),
|
||||
PDType1Font.TIMES_BOLD_ITALIC);
|
||||
STANDARD_14.put(PDType1Font.HELVETICA.getBaseFont(), PDType1Font.HELVETICA);
|
||||
STANDARD_14.put(PDType1Font.HELVETICA_BOLD.getBaseFont(), PDType1Font.HELVETICA_BOLD);
|
||||
STANDARD_14.put(PDType1Font.HELVETICA_OBLIQUE.getBaseFont(),
|
||||
PDType1Font.HELVETICA_OBLIQUE);
|
||||
STANDARD_14.put(PDType1Font.HELVETICA_BOLD_OBLIQUE.getBaseFont(),
|
||||
PDType1Font.HELVETICA_BOLD_OBLIQUE);
|
||||
STANDARD_14.put(PDType1Font.COURIER.getBaseFont(), PDType1Font.COURIER);
|
||||
STANDARD_14.put(PDType1Font.COURIER_BOLD.getBaseFont(), PDType1Font.COURIER_BOLD);
|
||||
STANDARD_14.put(PDType1Font.COURIER_OBLIQUE.getBaseFont(), PDType1Font.COURIER_OBLIQUE);
|
||||
STANDARD_14.put(PDType1Font.COURIER_BOLD_OBLIQUE.getBaseFont(),
|
||||
PDType1Font.COURIER_BOLD_OBLIQUE);
|
||||
STANDARD_14.put(PDType1Font.SYMBOL.getBaseFont(), PDType1Font.SYMBOL);
|
||||
STANDARD_14.put(PDType1Font.ZAPF_DINGBATS.getBaseFont(), PDType1Font.ZAPF_DINGBATS);
|
||||
}
|
||||
//duplicating until here
|
||||
|
||||
// The following code is based on the code in TextToPDF with the addition of
|
||||
// checks for page limits.
|
||||
// The calling code must close the PDDocument once finished with it.
|
||||
public PDDocument createPDFFromText(Reader text, int pageLimit)
|
||||
throws IOException
|
||||
{
|
||||
PDDocument doc = null;
|
||||
int pageCount = 0;
|
||||
try
|
||||
{
|
||||
final int margin = 40;
|
||||
float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000;
|
||||
|
||||
//calculate font height and increase by 5 percent.
|
||||
height = height * getFontSize() * 1.05f;
|
||||
doc = new PDDocument();
|
||||
BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text);
|
||||
String nextLine;
|
||||
PDPage page = new PDPage();
|
||||
PDPageContentStream contentStream = null;
|
||||
float y = -1;
|
||||
float maxStringLength = page.getMediaBox().getWidth() - 2 * margin;
|
||||
|
||||
// There is a special case of creating a PDF document from an empty string.
|
||||
boolean textIsEmpty = true;
|
||||
|
||||
outer:
|
||||
while ((nextLine = data.readLine()) != null)
|
||||
{
|
||||
// The input text is nonEmpty. New pages will be created and added
|
||||
// to the PDF document as they are needed, depending on the length of
|
||||
// the text.
|
||||
textIsEmpty = false;
|
||||
|
||||
String[] lineWords = nextLine.trim().split(" ");
|
||||
int lineIndex = 0;
|
||||
while (lineIndex < lineWords.length)
|
||||
{
|
||||
final StringBuilder nextLineToDraw = new StringBuilder();
|
||||
float lengthIfUsingNextWord = 0;
|
||||
do
|
||||
{
|
||||
nextLineToDraw.append(lineWords[lineIndex]);
|
||||
nextLineToDraw.append(" ");
|
||||
lineIndex++;
|
||||
if (lineIndex < lineWords.length)
|
||||
{
|
||||
String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
|
||||
lengthIfUsingNextWord =
|
||||
(getFont().getStringWidth(
|
||||
lineWithNextWord) / 1000) * getFontSize();
|
||||
}
|
||||
}
|
||||
while (lineIndex < lineWords.length &&
|
||||
lengthIfUsingNextWord < maxStringLength);
|
||||
if (y < margin)
|
||||
{
|
||||
int test = pageCount + 1;
|
||||
if (pageLimit > 0 && (pageCount++ >= pageLimit))
|
||||
{
|
||||
break outer;
|
||||
}
|
||||
|
||||
// We have crossed the end-of-page boundary and need to extend the
|
||||
// document by another page.
|
||||
page = new PDPage();
|
||||
doc.addPage(page);
|
||||
if (contentStream != null)
|
||||
{
|
||||
contentStream.endText();
|
||||
contentStream.close();
|
||||
}
|
||||
contentStream = new PDPageContentStream(doc, page);
|
||||
contentStream.setFont(getFont(), getFontSize());
|
||||
contentStream.beginText();
|
||||
y = page.getMediaBox().getHeight() - margin + height;
|
||||
contentStream.moveTextPositionByAmount(margin, y);
|
||||
}
|
||||
|
||||
if (contentStream == null)
|
||||
{
|
||||
throw new IOException("Error:Expected non-null content stream.");
|
||||
}
|
||||
contentStream.moveTextPositionByAmount(0, -height);
|
||||
y -= height;
|
||||
contentStream.drawString(nextLineToDraw.toString());
|
||||
}
|
||||
}
|
||||
|
||||
// If the input text was the empty string, then the above while loop will have short-circuited
|
||||
// and we will not have added any PDPages to the document.
|
||||
// So in order to make the resultant PDF document readable by Adobe Reader etc, we'll add an empty page.
|
||||
if (textIsEmpty)
|
||||
{
|
||||
doc.addPage(page);
|
||||
}
|
||||
|
||||
if (contentStream != null)
|
||||
{
|
||||
contentStream.endText();
|
||||
contentStream.close();
|
||||
}
|
||||
}
|
||||
catch (IOException io)
|
||||
{
|
||||
if (doc != null)
|
||||
{
|
||||
doc.close();
|
||||
}
|
||||
throw io;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
|
||||
private int parseInt(String s, String paramName)
|
||||
{
|
||||
try
|
||||
{
|
||||
return Integer.valueOf(s);
|
||||
}
|
||||
catch (NumberFormatException e)
|
||||
{
|
||||
throw new IllegalArgumentException(paramName + " parameter must be an integer.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,12 +1,12 @@
|
||||
#
|
||||
# HtmlMetadataExtractor - default mapping
|
||||
#
|
||||
# author: Derek Hulley
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
||||
#
|
||||
# HtmlMetadataExtractor - default mapping
|
||||
#
|
||||
# author: Derek Hulley
|
||||
|
||||
# Namespaces
|
||||
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
|
||||
|
||||
# Mappings
|
||||
author=cm:author
|
||||
title=cm:title
|
||||
description=cm:description
|
||||
|
@@ -1,162 +1,162 @@
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.nio.file.Files;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class HtmlParserContentTransformerTest
|
||||
{
|
||||
private static final String SOURCE_MIMETYPE = "text/html";
|
||||
private static final String TARGET_MIMETYPE = "text/plain";
|
||||
|
||||
HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
|
||||
|
||||
/**
|
||||
* Checks that we correctly handle text in different encodings,
|
||||
* no matter if the encoding is specified on the Content Property
|
||||
* or in a meta tag within the HTML itself. (ALF-10466)
|
||||
*
|
||||
* On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
|
||||
* so we must be careful when checking the returned text
|
||||
*/
|
||||
@Test
|
||||
public void testEncodingHandling() throws Exception
|
||||
{
|
||||
final String NEWLINE = System.getProperty("line.separator");
|
||||
final String TITLE = "Testing!";
|
||||
final String TEXT_P1 = "This is some text in English";
|
||||
final String TEXT_P2 = "This is more text in English";
|
||||
final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
|
||||
String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
|
||||
String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
|
||||
"<p>" + TEXT_P2 + "</p>" + NEWLINE +
|
||||
"<p>" + TEXT_P3 + "</p>" + NEWLINE;
|
||||
String partC = "</body></html>";
|
||||
final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
|
||||
|
||||
File tmpS = null;
|
||||
File tmpD = null;
|
||||
|
||||
try
|
||||
{
|
||||
// Content set to ISO 8859-1
|
||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||
writeToFile(tmpS, partA + partB + partC, "ISO-8859-1");
|
||||
|
||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||
|
||||
Map<String, String> parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
|
||||
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
|
||||
|
||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||
tmpS.delete();
|
||||
tmpD.delete();
|
||||
|
||||
// Content set to UTF-8
|
||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||
writeToFile(tmpS, partA + partB + partC, "UTF-8");
|
||||
|
||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||
parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "UTF-8");
|
||||
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
|
||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||
tmpS.delete();
|
||||
tmpD.delete();
|
||||
|
||||
// Content set to UTF-16
|
||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||
writeToFile(tmpS, partA + partB + partC, "UTF-16");
|
||||
|
||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||
parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "UTF-16");
|
||||
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
|
||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||
tmpS.delete();
|
||||
tmpD.delete();
|
||||
|
||||
// Note - since HTML Parser 2.0 META tags specifying the
|
||||
// document encoding will ONLY be respected if the original
|
||||
// content type was set to ISO-8859-1.
|
||||
//
|
||||
// This means there is now only one test which we can perform
|
||||
// to ensure that this now-limited overriding of the encoding
|
||||
// takes effect.
|
||||
|
||||
// Content set to ISO 8859-1, meta set to UTF-8
|
||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||
String str = partA +
|
||||
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" +
|
||||
partB + partC;
|
||||
|
||||
writeToFile(tmpS, str, "UTF-8");
|
||||
|
||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||
|
||||
parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
|
||||
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
|
||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||
tmpS.delete();
|
||||
tmpD.delete();
|
||||
|
||||
// Note - we can't test UTF-16 with only a meta encoding,
|
||||
// because without that the parser won't know about the
|
||||
// 2 byte format so won't be able to identify the meta tag
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (tmpS != null && tmpS.exists()) tmpS.delete();
|
||||
if (tmpD != null && tmpD.exists()) tmpD.delete();
|
||||
}
|
||||
}
|
||||
|
||||
private void writeToFile(File file, String content, String encoding) throws Exception
|
||||
{
|
||||
try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
|
||||
{
|
||||
ow.append(content);
|
||||
}
|
||||
}
|
||||
|
||||
private String readFromFile(File file, final String encoding) throws Exception
|
||||
{
|
||||
return new String(Files.readAllBytes(file.toPath()), encoding);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* #%L
|
||||
* Alfresco Transform Core
|
||||
* %%
|
||||
* Copyright (C) 2005 - 2022 Alfresco Software Limited
|
||||
* %%
|
||||
* This file is part of the Alfresco software.
|
||||
* -
|
||||
* If the software was purchased under a paid Alfresco license, the terms of
|
||||
* the paid license agreement will prevail. Otherwise, the software is
|
||||
* provided under the following open source license terms:
|
||||
* -
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* -
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
* -
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
* #L%
|
||||
*/
|
||||
package org.alfresco.transformer.transformers;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.nio.file.Files;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class HtmlParserContentTransformerTest
|
||||
{
|
||||
private static final String SOURCE_MIMETYPE = "text/html";
|
||||
private static final String TARGET_MIMETYPE = "text/plain";
|
||||
|
||||
HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
|
||||
|
||||
/**
|
||||
* Checks that we correctly handle text in different encodings,
|
||||
* no matter if the encoding is specified on the Content Property
|
||||
* or in a meta tag within the HTML itself. (ALF-10466)
|
||||
*
|
||||
* On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
|
||||
* so we must be careful when checking the returned text
|
||||
*/
|
||||
@Test
|
||||
public void testEncodingHandling() throws Exception
|
||||
{
|
||||
final String NEWLINE = System.getProperty("line.separator");
|
||||
final String TITLE = "Testing!";
|
||||
final String TEXT_P1 = "This is some text in English";
|
||||
final String TEXT_P2 = "This is more text in English";
|
||||
final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
|
||||
String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
|
||||
String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
|
||||
"<p>" + TEXT_P2 + "</p>" + NEWLINE +
|
||||
"<p>" + TEXT_P3 + "</p>" + NEWLINE;
|
||||
String partC = "</body></html>";
|
||||
final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
|
||||
|
||||
File tmpS = null;
|
||||
File tmpD = null;
|
||||
|
||||
try
|
||||
{
|
||||
// Content set to ISO 8859-1
|
||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||
writeToFile(tmpS, partA + partB + partC, "ISO-8859-1");
|
||||
|
||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||
|
||||
Map<String, String> parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
|
||||
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
|
||||
|
||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||
tmpS.delete();
|
||||
tmpD.delete();
|
||||
|
||||
// Content set to UTF-8
|
||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||
writeToFile(tmpS, partA + partB + partC, "UTF-8");
|
||||
|
||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||
parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "UTF-8");
|
||||
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
|
||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||
tmpS.delete();
|
||||
tmpD.delete();
|
||||
|
||||
// Content set to UTF-16
|
||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||
writeToFile(tmpS, partA + partB + partC, "UTF-16");
|
||||
|
||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||
parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "UTF-16");
|
||||
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
|
||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||
tmpS.delete();
|
||||
tmpD.delete();
|
||||
|
||||
// Note - since HTML Parser 2.0 META tags specifying the
|
||||
// document encoding will ONLY be respected if the original
|
||||
// content type was set to ISO-8859-1.
|
||||
//
|
||||
// This means there is now only one test which we can perform
|
||||
// to ensure that this now-limited overriding of the encoding
|
||||
// takes effect.
|
||||
|
||||
// Content set to ISO 8859-1, meta set to UTF-8
|
||||
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
|
||||
String str = partA +
|
||||
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" +
|
||||
partB + partC;
|
||||
|
||||
writeToFile(tmpS, str, "UTF-8");
|
||||
|
||||
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
|
||||
|
||||
parameters = new HashMap<>();
|
||||
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
|
||||
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
|
||||
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
|
||||
tmpS.delete();
|
||||
tmpD.delete();
|
||||
|
||||
// Note - we can't test UTF-16 with only a meta encoding,
|
||||
// because without that the parser won't know about the
|
||||
// 2 byte format so won't be able to identify the meta tag
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (tmpS != null && tmpS.exists()) tmpS.delete();
|
||||
if (tmpD != null && tmpD.exists()) tmpD.delete();
|
||||
}
|
||||
}
|
||||
|
||||
private void writeToFile(File file, String content, String encoding) throws Exception
|
||||
{
|
||||
try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
|
||||
{
|
||||
ow.append(content);
|
||||
}
|
||||
}
|
||||
|
||||
private String readFromFile(File file, final String encoding) throws Exception
|
||||
{
|
||||
return new String(Files.readAllBytes(file.toPath()), encoding);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user