fix mixed-line-ending

This commit is contained in:
Marcello Teodori
2022-02-23 22:40:19 +01:00
parent 157e261dde
commit 4175ac34da
56 changed files with 5160 additions and 5160 deletions

View File

@@ -1,77 +1,77 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
public class Application
{
private static final Logger logger = LoggerFactory.getLogger(Application.class);
@Value("${container.name}")
private String containerName;
@Bean
MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
{
return registry -> registry.config().commonTags("containerName", containerName);
}
public static void main(String[] args)
{
SpringApplication.run(Application.class, args);
}
@EventListener(ApplicationReadyEvent.class)
public void startup()
{
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
Arrays.stream(SelectingTransformer.LICENCE.split("\\n")).forEach(logger::info);
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
logger.info("Starting application components... Done");
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import io.micrometer.core.instrument.MeterRegistry;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.boot.context.event.ApplicationReadyEvent;
import org.springframework.context.annotation.Bean;
import org.springframework.context.event.EventListener;
import java.util.Arrays;
import static org.alfresco.transformer.logging.StandardMessages.LICENCE;
@SpringBootApplication
@EnableAutoConfiguration(exclude = {DataSourceAutoConfiguration.class})
public class Application
{
private static final Logger logger = LoggerFactory.getLogger(Application.class);
@Value("${container.name}")
private String containerName;
@Bean
MeterRegistryCustomizer<MeterRegistry> metricsCommonTags()
{
return registry -> registry.config().commonTags("containerName", containerName);
}
public static void main(String[] args)
{
SpringApplication.run(Application.class, args);
}
@EventListener(ApplicationReadyEvent.class)
public void startup()
{
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
Arrays.stream(LICENCE.split("\\n")).forEach(logger::info);
Arrays.stream(SelectingTransformer.LICENCE.split("\\n")).forEach(logger::info);
logger.info("--------------------------------------------------------------------------------------------------------------------------------------------------------------");
logger.info("Starting application components... Done");
}
}

View File

@@ -1,89 +1,89 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
@Controller
public class MiscController extends AbstractTransformerController
{
private static final Logger logger = LoggerFactory.getLogger(MiscController.class);
private SelectingTransformer transformer = new SelectingTransformer();
@Override
public String getTransformerName()
{
return "Miscellaneous Transformers";
}
@Override
public String version()
{
return getTransformerName() + " available";
}
@Override
public ProbeTestTransform getProbeTestTransform()
{
// HtmlParserContentTransformer html -> text
// See the Javadoc on this method and Probes.md for the choice of these values.
return new ProbeTestTransform(this, "quick.html", "quick.txt",
119, 30, 150, 1024,
60 * 2 + 1, 60 * 2)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformImpl("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
}
};
}
@Override
public void transformImpl(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.alfresco.transformer.transformers.SelectingTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import static org.alfresco.transformer.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
@Controller
public class MiscController extends AbstractTransformerController
{
private static final Logger logger = LoggerFactory.getLogger(MiscController.class);
private SelectingTransformer transformer = new SelectingTransformer();
@Override
public String getTransformerName()
{
return "Miscellaneous Transformers";
}
@Override
public String version()
{
return getTransformerName() + " available";
}
@Override
public ProbeTestTransform getProbeTestTransform()
{
// HtmlParserContentTransformer html -> text
// See the Javadoc on this method and Probes.md for the choice of these values.
return new ProbeTestTransform(this, "quick.html", "quick.txt",
119, 30, 150, 1024,
60 * 2 + 1, 60 * 2)
{
@Override
protected void executeTransformCommand(File sourceFile, File targetFile)
{
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformImpl("html", MIMETYPE_HTML, MIMETYPE_TEXT_PLAIN, parameters, sourceFile, targetFile);
}
};
}
@Override
public void transformImpl(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile)
{
transformOptions.put(TRANSFORM_NAME_PARAMETER, transformName);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
}

View File

@@ -1,55 +1,55 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.springframework.boot.test.context.SpringBootTest;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
properties = {"activemq.url=nio://localhost:61616"})
public class MiscQueueTransformServiceIT extends AbstractQueueTransformServiceIT
{
@Override
protected TransformRequest buildRequest()
{
return TransformRequest
.builder()
.withRequestId(UUID.randomUUID().toString())
.withSourceMediaType(MIMETYPE_HTML)
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
.withTargetExtension("txt")
.withSchema(1)
.withClientData("ACS")
.withSourceReference(UUID.randomUUID().toString())
.withSourceSize(32L).build();
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
import java.util.UUID;
import org.alfresco.transform.client.model.TransformRequest;
import org.springframework.boot.test.context.SpringBootTest;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
properties = {"activemq.url=nio://localhost:61616"})
public class MiscQueueTransformServiceIT extends AbstractQueueTransformServiceIT
{
@Override
protected TransformRequest buildRequest()
{
return TransformRequest
.builder()
.withRequestId(UUID.randomUUID().toString())
.withSourceMediaType(MIMETYPE_HTML)
.withTargetMediaType(MIMETYPE_TEXT_PLAIN)
.withTargetExtension("txt")
.withSchema(1)
.withClientData("ACS")
.withSourceReference(UUID.randomUUID().toString())
.withSourceSize(32L).build();
}
}

View File

@@ -1,48 +1,48 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.springframework.boot.test.context.SpringBootTest;
/**
* Tests MiscController with a server test harness.
*/
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
public class MiscTransformerHttpRequestTest extends AbstractHttpRequestTest
{
@Override
protected String getTransformerName()
{
return "Miscellaneous Transformers";
}
@Override
protected String getSourceExtension()
{
return "html";
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2021 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer;
import org.springframework.boot.test.context.SpringBootTest;
/**
* Tests MiscController with a server test harness.
*/
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
public class MiscTransformerHttpRequestTest extends AbstractHttpRequestTest
{
@Override
protected String getTransformerName()
{
return "Miscellaneous Transformers";
}
@Override
protected String getSourceExtension()
{
return "html";
}
}

View File

@@ -1,30 +1,30 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
This is a multipart message in MIME format.
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
alternative plain text
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">alternative html text</div>
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
Parts form an multipart/alternative should represent the same content in different formats
In this eml example the content differs with the purpose of determining if right part was used in transformation
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
This is a multipart message in MIME format.
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
alternative plain text
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">alternative html text</div>
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
Parts form an multipart/alternative should represent the same content in different formats
In this eml example the content differs with the purpose of determining if right part was used in transformation

View File

@@ -1,44 +1,44 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/mixed;
boundary="----=_NextPart_000_0000_01D06C6A.D04F3750"
This is a multipart message in MIME format.
------=_NextPart_000_0000_01D06C6A.D04F3750
Content-Type: multipart/alternative;
boundary="----=_NextPart_001_0001_01D06C6A.D04F3750"
------=_NextPart_001_0001_01D06C6A.D04F3750
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
Mail with attachment content
------=_NextPart_001_0001_01D06C6A.D04F3750
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">Mail with attachment content</div>
------=_NextPart_001_0001_01D06C6A.D04F3750--
------=_NextPart_000_0000_01D06C6A.D04F3750
Content-Type: text/plain;
name="alt.txt"
Content-Transfer-Encoding: quoted-printable
Content-ID: <796B1E07B04ACC41A78199F35721150F@eurprd04.prod.outlook.com>
Content-Disposition: attachment;
filename="alt.txt"
File attachment content
------=_NextPart_000_0000_01D06C6A.D04F3750--
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/mixed;
boundary="----=_NextPart_000_0000_01D06C6A.D04F3750"
This is a multipart message in MIME format.
------=_NextPart_000_0000_01D06C6A.D04F3750
Content-Type: multipart/alternative;
boundary="----=_NextPart_001_0001_01D06C6A.D04F3750"
------=_NextPart_001_0001_01D06C6A.D04F3750
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
Mail with attachment content
------=_NextPart_001_0001_01D06C6A.D04F3750
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">Mail with attachment content</div>
------=_NextPart_001_0001_01D06C6A.D04F3750--
------=_NextPart_000_0000_01D06C6A.D04F3750
Content-Type: text/plain;
name="alt.txt"
Content-Transfer-Encoding: quoted-printable
Content-ID: <796B1E07B04ACC41A78199F35721150F@eurprd04.prod.outlook.com>
Content-Disposition: attachment;
filename="alt.txt"
File attachment content
------=_NextPart_000_0000_01D06C6A.D04F3750--

View File

@@ -1,28 +1,28 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
This is a multipart message in MIME format.
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
html special characters
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">html&nbsp;special&nbsp;characters</div>
------=_NextPart_000_0005_01D06C6A.DBA98EC0--
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0005_01D06C6A.DBA98EC0"
This is a multipart message in MIME format.
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 7bit
html special characters
------=_NextPart_000_0005_01D06C6A.DBA98EC0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">html&nbsp;special&nbsp;characters</div>
------=_NextPart_000_0005_01D06C6A.DBA98EC0--

View File

@@ -1,41 +1,41 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/related;
boundary="--_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423";
type="multipart/alternative"
This is a multi-part message in MIME format.
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
Content-Type: multipart/alternative; boundary="--_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362"
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
nested alternative plain text
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
Content-Type: text/html; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">nested alternative html text</div>
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362--
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
Content-Type: image/jpeg; name="image001.jpg"
Content-Transfer-Encoding: base64
Content-ID: <image001.jpg@01D146F0.63006280>
image
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423--
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: Attachment test
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/related;
boundary="--_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423";
type="multipart/alternative"
This is a multi-part message in MIME format.
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
Content-Type: multipart/alternative; boundary="--_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362"
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
nested alternative plain text
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362
Content-Type: text/html; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">nested alternative html text</div>
----_=_NextPart0_f68fab3d-a986-41a5-9cf0-3a3aefb21362--
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423
Content-Type: image/jpeg; name="image001.jpg"
Content-Transfer-Encoding: base64
Content-ID: <image001.jpg@01D146F0.63006280>
image
----_=_NextPart1_03fb5278-acd0-44a8-88cd-bfd1347fd423--

View File

@@ -1,31 +1,31 @@
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: The quick brown fox jumps over the lazy dog
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0009_01D06BC5.14D754D0"
This is a multipart message in MIME format.
------=_NextPart_000_0009_01D06BC5.14D754D0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 8bit
El rápido zorro marrón salta sobre el perro perezoso
------=_NextPart_000_0009_01D06BC5.14D754D0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">El r=C3=A1pido zorro marr=C3=B3n salta sobre el perro =
perezoso&nbsp;<br></div>
------=_NextPart_000_0009_01D06BC5.14D754D0--
MIME-Version: 1.0
Received: by 10.000.0.000 with HTTP; Thu, 16 Aug 2012 08:13:29 -0700 (PDT)
Date: Thu, 16 Aug 2012 16:13:29 +0100
Delivered-To: jane.doe@alfresco.com
Message-ID: <CAL0uq1f9vPczLRinL3xB5U_oSSd5U0ob=408nBgosCY0OVFyBw@mail.alfresco.com>
Subject: The quick brown fox jumps over the lazy dog
From: <john.doe@alfresco.com>
To: <jane.doe@alfresco.com>
Content-Type: multipart/alternative;
boundary="----=_NextPart_000_0009_01D06BC5.14D754D0"
This is a multipart message in MIME format.
------=_NextPart_000_0009_01D06BC5.14D754D0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 8bit
El rápido zorro marrón salta sobre el perro perezoso
------=_NextPart_000_0009_01D06BC5.14D754D0
Content-Type: text/html;
charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">El r=C3=A1pido zorro marr=C3=B3n salta sobre el perro =
perezoso&nbsp;<br></div>
------=_NextPart_000_0009_01D06BC5.14D754D0--

View File

@@ -1,115 +1,115 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import com.google.common.collect.ImmutableList;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.List;
import java.util.Map;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
/**
* Converts Apple iWorks files to JPEGs for thumbnailing and previewing.
* The transformer will only work for iWorks 2013/14 files. Support for iWorks 2008/9 has been dropped as we cannot
* support both, because the newer format does not contain a PDF. If we say this transformer supports PDF, Share will
* assume incorrectly that we can convert to PDF and we would only get a preview for the older format and never the
* newer one. Both formats have the same mimetype.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* @author Neil Mc Erlean
* @author eknizat
* @since 4.0
*/
public class AppleIWorksContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(
AppleIWorksContentTransformer.class);
// Apple's zip entry names for previews in iWorks have changed over time.
private static final List<String> PDF_PATHS = ImmutableList.of(
"QuickLook/Preview.pdf"); // iWorks 2008/9
private static final List<String> JPG_PATHS = ImmutableList.of(
"QuickLook/Thumbnail.jpg", // iWorks 2008/9
"preview.jpg"); // iWorks 2013/14 (720 x 552) We use the best quality image. Others are:
// (225 x 173) preview-web.jpg
// (53 x 41) preview-micro.jpg
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile)
{
logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
sourceMimetype, targetMimetype);
// iWorks files are zip (or package) files.
// If it's not a zip file, the resultant ZipException will be caught as an IOException below.
try (ZipArchiveInputStream iWorksZip = new ZipArchiveInputStream(
new BufferedInputStream(new FileInputStream(sourceFile))))
{
// Look through the zip file entries for the preview/thumbnail.
List<String> paths = MIMETYPE_IMAGE_JPEG.equals(targetMimetype) ? JPG_PATHS : PDF_PATHS;
ZipArchiveEntry entry;
boolean found = false;
while ((entry = iWorksZip.getNextZipEntry()) != null)
{
String name = entry.getName();
if (paths.contains(name))
{
Files.copy(iWorksZip, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
found = true;
break;
}
}
if (!found)
{
throw new RuntimeException(
"The source " + sourceMimetype + " file did not contain a " + targetMimetype + " preview");
}
}
catch (IOException e)
{
throw new RuntimeException(
"Unable to transform " + sourceMimetype + " file. It should have been a zip format file.",
e);
}
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import com.google.common.collect.ImmutableList;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.List;
import java.util.Map;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_IMAGE_JPEG;
/**
* Converts Apple iWorks files to JPEGs for thumbnailing and previewing.
* The transformer will only work for iWorks 2013/14 files. Support for iWorks 2008/9 has been dropped as we cannot
* support both, because the newer format does not contain a PDF. If we say this transformer supports PDF, Share will
* assume incorrectly that we can convert to PDF and we would only get a preview for the older format and never the
* newer one. Both formats have the same mimetype.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* @author Neil Mc Erlean
* @author eknizat
* @since 4.0
*/
public class AppleIWorksContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(
AppleIWorksContentTransformer.class);
// Apple's zip entry names for previews in iWorks have changed over time.
private static final List<String> PDF_PATHS = ImmutableList.of(
"QuickLook/Preview.pdf"); // iWorks 2008/9
private static final List<String> JPG_PATHS = ImmutableList.of(
"QuickLook/Thumbnail.jpg", // iWorks 2008/9
"preview.jpg"); // iWorks 2013/14 (720 x 552) We use the best quality image. Others are:
// (225 x 173) preview-web.jpg
// (53 x 41) preview-micro.jpg
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile)
{
logger.debug("Performing IWorks to jpeg transform with sourceMimetype={} targetMimetype={}",
sourceMimetype, targetMimetype);
// iWorks files are zip (or package) files.
// If it's not a zip file, the resultant ZipException will be caught as an IOException below.
try (ZipArchiveInputStream iWorksZip = new ZipArchiveInputStream(
new BufferedInputStream(new FileInputStream(sourceFile))))
{
// Look through the zip file entries for the preview/thumbnail.
List<String> paths = MIMETYPE_IMAGE_JPEG.equals(targetMimetype) ? JPG_PATHS : PDF_PATHS;
ZipArchiveEntry entry;
boolean found = false;
while ((entry = iWorksZip.getNextZipEntry()) != null)
{
String name = entry.getName();
if (paths.contains(name))
{
Files.copy(iWorksZip, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
found = true;
break;
}
}
if (!found)
{
throw new RuntimeException(
"The source " + sourceMimetype + " file did not contain a " + targetMimetype + " preview");
}
}
catch (IOException e)
{
throw new RuntimeException(
"Unable to transform " + sourceMimetype + " file. It should have been a zip format file.",
e);
}
}
}

View File

@@ -1,232 +1,232 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.alfresco.transformer.fs.FileManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.internet.MimeMessage;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Map;
import java.util.Properties;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
/**
* Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
* messages. Searches for all text content parts, and returns them. Any
* attachments are ignored. TIKA Note - could be replaced with the Tika email
* parser. Would require a recursing parser to be specified, but not the full
* Auto one (we don't want attachments), just one containing text and html
* related parsers.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*/
public class EMLTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(EMLTransformer.class);
private static final String CHARSET = "charset";
private static final String DEFAULT_ENCODING = "UTF-8";
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
logger.debug("Performing RFC822 to text transform.");
// Use try with resource
try (InputStream contentInputStream = new BufferedInputStream(
new FileInputStream(sourceFile));
Writer bufferedFileWriter = new BufferedWriter(new FileWriter(targetFile)))
{
MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()),
contentInputStream);
final StringBuilder sb = new StringBuilder();
Object content = mimeMessage.getContent();
if (content instanceof Multipart)
{
processMultiPart((Multipart) content, sb);
}
else
{
sb.append(content.toString());
}
bufferedFileWriter.write(sb.toString());
}
}
/**
* Find "text" parts of message recursively and appends it to sb StringBuilder
*
* @param multipart Multipart to process
* @param sb StringBuilder
* @throws MessagingException
* @throws IOException
*/
private void processMultiPart(Multipart multipart, StringBuilder sb) throws MessagingException,
IOException
{
boolean isAlternativeMultipart = multipart.getContentType().contains(
MIMETYPE_MULTIPART_ALTERNATIVE);
if (isAlternativeMultipart)
{
processAlternativeMultipart(multipart, sb);
}
else
{
for (int i = 0, n = multipart.getCount(); i < n; i++)
{
Part part = multipart.getBodyPart(i);
if (part.getContent() instanceof Multipart)
{
processMultiPart((Multipart) part.getContent(), sb);
}
else
{
processPart(part, sb);
}
}
}
}
/**
* Finds the suitable part from an multipart/alternative and appends it's text content to StringBuilder sb
*
* @param multipart
* @param sb
* @throws IOException
* @throws MessagingException
*/
private void processAlternativeMultipart(Multipart multipart, StringBuilder sb) throws
IOException, MessagingException
{
Part partToUse = null;
for (int i = 0, n = multipart.getCount(); i < n; i++)
{
Part part = multipart.getBodyPart(i);
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
{
partToUse = part;
break;
}
else if (part.getContentType().contains(MIMETYPE_HTML))
{
partToUse = part;
}
else if (part.getContentType().contains(MIMETYPE_MULTIPART_ALTERNATIVE))
{
if (part.getContent() instanceof Multipart)
{
processAlternativeMultipart((Multipart) part.getContent(), sb);
}
}
}
if (partToUse != null)
{
processPart(partToUse, sb);
}
}
/**
* Finds text on a given mail part. Accepted parts types are text/html and text/plain.
* Attachments are ignored
*
* @param part
* @param sb
* @throws IOException
* @throws MessagingException
*/
private void processPart(Part part, StringBuilder sb) throws IOException, MessagingException
{
boolean isAttachment = Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition());
if (isAttachment)
{
return;
}
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
{
sb.append(part.getContent().toString());
}
else if (part.getContentType().contains(MIMETYPE_HTML))
{
String mailPartContent = part.getContent().toString();
//create a temporary html file with same mail part content and encoding
File tempHtmlFile = FileManager.TempFileProvider.createTempFile("EMLTransformer_",
".html");
String encoding = getMailPartContentEncoding(part);
try (OutputStreamWriter osWriter = new OutputStreamWriter(
new FileOutputStream(tempHtmlFile), encoding))
{
osWriter.write(mailPartContent);
}
//transform html file's content to plain text
HtmlParserContentTransformer.EncodingAwareStringBean extractor = new HtmlParserContentTransformer.EncodingAwareStringBean();
extractor.setCollapse(false);
extractor.setLinks(false);
extractor.setReplaceNonBreakingSpaces(false);
extractor.setURL(tempHtmlFile, encoding);
sb.append(extractor.getStrings());
tempHtmlFile.delete();
}
}
private String getMailPartContentEncoding(Part part) throws MessagingException
{
String encoding = DEFAULT_ENCODING;
String contentType = part.getContentType();
int startIndex = contentType.indexOf(CHARSET);
if (startIndex > 0)
{
encoding = contentType.substring(startIndex + CHARSET.length() + 1)
.replaceAll("\"", "");
}
return encoding;
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.alfresco.transformer.fs.FileManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.internet.MimeMessage;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Map;
import java.util.Properties;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_HTML;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_MULTIPART_ALTERNATIVE;
import static org.alfresco.transform.client.model.Mimetype.MIMETYPE_TEXT_PLAIN;
/**
* Uses javax.mail.MimeMessage to generate plain text versions of RFC822 email
* messages. Searches for all text content parts, and returns them. Any
* attachments are ignored. TIKA Note - could be replaced with the Tika email
* parser. Would require a recursing parser to be specified, but not the full
* Auto one (we don't want attachments), just one containing text and html
* related parsers.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*/
public class EMLTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(EMLTransformer.class);
private static final String CHARSET = "charset";
private static final String DEFAULT_ENCODING = "UTF-8";
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
logger.debug("Performing RFC822 to text transform.");
// Use try with resource
try (InputStream contentInputStream = new BufferedInputStream(
new FileInputStream(sourceFile));
Writer bufferedFileWriter = new BufferedWriter(new FileWriter(targetFile)))
{
MimeMessage mimeMessage = new MimeMessage(Session.getDefaultInstance(new Properties()),
contentInputStream);
final StringBuilder sb = new StringBuilder();
Object content = mimeMessage.getContent();
if (content instanceof Multipart)
{
processMultiPart((Multipart) content, sb);
}
else
{
sb.append(content.toString());
}
bufferedFileWriter.write(sb.toString());
}
}
/**
* Find "text" parts of message recursively and appends it to sb StringBuilder
*
* @param multipart Multipart to process
* @param sb StringBuilder
* @throws MessagingException
* @throws IOException
*/
private void processMultiPart(Multipart multipart, StringBuilder sb) throws MessagingException,
IOException
{
boolean isAlternativeMultipart = multipart.getContentType().contains(
MIMETYPE_MULTIPART_ALTERNATIVE);
if (isAlternativeMultipart)
{
processAlternativeMultipart(multipart, sb);
}
else
{
for (int i = 0, n = multipart.getCount(); i < n; i++)
{
Part part = multipart.getBodyPart(i);
if (part.getContent() instanceof Multipart)
{
processMultiPart((Multipart) part.getContent(), sb);
}
else
{
processPart(part, sb);
}
}
}
}
/**
* Finds the suitable part from an multipart/alternative and appends it's text content to StringBuilder sb
*
* @param multipart
* @param sb
* @throws IOException
* @throws MessagingException
*/
private void processAlternativeMultipart(Multipart multipart, StringBuilder sb) throws
IOException, MessagingException
{
Part partToUse = null;
for (int i = 0, n = multipart.getCount(); i < n; i++)
{
Part part = multipart.getBodyPart(i);
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
{
partToUse = part;
break;
}
else if (part.getContentType().contains(MIMETYPE_HTML))
{
partToUse = part;
}
else if (part.getContentType().contains(MIMETYPE_MULTIPART_ALTERNATIVE))
{
if (part.getContent() instanceof Multipart)
{
processAlternativeMultipart((Multipart) part.getContent(), sb);
}
}
}
if (partToUse != null)
{
processPart(partToUse, sb);
}
}
/**
* Finds text on a given mail part. Accepted parts types are text/html and text/plain.
* Attachments are ignored
*
* @param part
* @param sb
* @throws IOException
* @throws MessagingException
*/
private void processPart(Part part, StringBuilder sb) throws IOException, MessagingException
{
boolean isAttachment = Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition());
if (isAttachment)
{
return;
}
if (part.getContentType().contains(MIMETYPE_TEXT_PLAIN))
{
sb.append(part.getContent().toString());
}
else if (part.getContentType().contains(MIMETYPE_HTML))
{
String mailPartContent = part.getContent().toString();
//create a temporary html file with same mail part content and encoding
File tempHtmlFile = FileManager.TempFileProvider.createTempFile("EMLTransformer_",
".html");
String encoding = getMailPartContentEncoding(part);
try (OutputStreamWriter osWriter = new OutputStreamWriter(
new FileOutputStream(tempHtmlFile), encoding))
{
osWriter.write(mailPartContent);
}
//transform html file's content to plain text
HtmlParserContentTransformer.EncodingAwareStringBean extractor = new HtmlParserContentTransformer.EncodingAwareStringBean();
extractor.setCollapse(false);
extractor.setLinks(false);
extractor.setReplaceNonBreakingSpaces(false);
extractor.setURL(tempHtmlFile, encoding);
sb.append(extractor.getStrings());
tempHtmlFile.delete();
}
}
private String getMailPartContentEncoding(Part part) throws MessagingException
{
String encoding = DEFAULT_ENCODING;
String contentType = part.getContentType();
int startIndex = contentType.indexOf(CHARSET);
if (startIndex > 0)
{
encoding = contentType.substring(startIndex + CHARSET.length() + 1)
.replaceAll("\"", "");
}
return encoding;
}
}

View File

@@ -1,192 +1,192 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
/**
* Content transformer which wraps the HTML Parser library for
* parsing HTML content.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* <p>
* Since HTML Parser was updated from v1.6 to v2.1, META tags
* defining an encoding for the content via http-equiv=Content-Type
* will ONLY be respected if the encoding of the content item
* itself is set to ISO-8859-1.
* </p>
*
* <p>
* Tika Note - could be converted to use the Tika HTML parser,
* but we'd potentially need a custom text handler to replicate
* the current settings around links and non-breaking spaces.
* </p>
*
* @author Derek Hulley
* @author eknizat
* @see <a href="http://htmlparser.sourceforge.net/">http://htmlparser.sourceforge.net</a>
* @see org.htmlparser.beans.StringBean
* @see <a href="http://sourceforge.net/tracker/?func=detail&aid=1644504&group_id=24399&atid=381401">HTML Parser</a>
*/
public class HtmlParserContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(
HtmlParserContentTransformer.class);
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
if (logger.isDebugEnabled())
{
logger.debug("Performing HTML to text transform with sourceEncoding=" + sourceEncoding);
}
// Create the extractor
EncodingAwareStringBean extractor = new EncodingAwareStringBean();
extractor.setCollapse(false);
extractor.setLinks(false);
extractor.setReplaceNonBreakingSpaces(false);
extractor.setURL(sourceFile, sourceEncoding);
// get the text
String text = extractor.getStrings();
// write it to the writer
try (Writer writer = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(targetFile))))
{
writer.write(text);
}
}
private void checkEncodingParameter(String encoding, String parameterName)
{
try
{
if (encoding != null && !Charset.isSupported(encoding))
{
throw new IllegalArgumentException(
parameterName + "=" + encoding + " is not supported by the JVM.");
}
}
catch (IllegalCharsetNameException e)
{
throw new IllegalArgumentException(
parameterName + "=" + encoding + " is not a valid encoding.");
}
}
/**
* <p>
* This code is based on a class of the same name, originally implemented in alfresco-repository.
* </p>
*
* A version of {@link StringBean} which allows control of the
* encoding in the underlying HTML Parser.
* Unfortunately, StringBean doesn't allow easy over-riding of
* this, so we have to duplicate some code to control this.
* This allows us to correctly handle HTML files where the encoding
* is specified against the content property (rather than in the
* HTML Head Meta), see ALF-10466 for details.
*/
public static class EncodingAwareStringBean extends StringBean
{
private static final long serialVersionUID = -9033414360428669553L;
/**
* Sets the File to extract strings from, and the encoding
* it's in (if known to Alfresco)
*
* @param file The File that text should be fetched from.
* @param encoding The encoding of the input
*/
public void setURL(File file, String encoding)
{
String previousURL = getURL();
String newURL = file.getAbsolutePath();
if (previousURL == null || !newURL.equals(previousURL))
{
try
{
URLConnection conn = getConnection();
if (null == mParser)
{
mParser = new Parser(newURL);
}
else
{
mParser.setURL(newURL);
}
if (encoding != null)
{
mParser.setEncoding(encoding);
}
mPropertySupport.firePropertyChange(StringBean.PROP_URL_PROPERTY, previousURL,
getURL());
mPropertySupport.firePropertyChange(StringBean.PROP_CONNECTION_PROPERTY, conn,
mParser.getConnection());
setStrings();
}
catch (ParserException pe)
{
updateStrings(pe.toString());
}
}
}
public String getEncoding()
{
return mParser.getEncoding();
}
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
/**
* Content transformer which wraps the HTML Parser library for
* parsing HTML content.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* <p>
* Since HTML Parser was updated from v1.6 to v2.1, META tags
* defining an encoding for the content via http-equiv=Content-Type
* will ONLY be respected if the encoding of the content item
* itself is set to ISO-8859-1.
* </p>
*
* <p>
* Tika Note - could be converted to use the Tika HTML parser,
* but we'd potentially need a custom text handler to replicate
* the current settings around links and non-breaking spaces.
* </p>
*
* @author Derek Hulley
* @author eknizat
* @see <a href="http://htmlparser.sourceforge.net/">http://htmlparser.sourceforge.net</a>
* @see org.htmlparser.beans.StringBean
* @see <a href="http://sourceforge.net/tracker/?func=detail&aid=1644504&group_id=24399&atid=381401">HTML Parser</a>
*/
public class HtmlParserContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(
HtmlParserContentTransformer.class);
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
if (logger.isDebugEnabled())
{
logger.debug("Performing HTML to text transform with sourceEncoding=" + sourceEncoding);
}
// Create the extractor
EncodingAwareStringBean extractor = new EncodingAwareStringBean();
extractor.setCollapse(false);
extractor.setLinks(false);
extractor.setReplaceNonBreakingSpaces(false);
extractor.setURL(sourceFile, sourceEncoding);
// get the text
String text = extractor.getStrings();
// write it to the writer
try (Writer writer = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(targetFile))))
{
writer.write(text);
}
}
private void checkEncodingParameter(String encoding, String parameterName)
{
try
{
if (encoding != null && !Charset.isSupported(encoding))
{
throw new IllegalArgumentException(
parameterName + "=" + encoding + " is not supported by the JVM.");
}
}
catch (IllegalCharsetNameException e)
{
throw new IllegalArgumentException(
parameterName + "=" + encoding + " is not a valid encoding.");
}
}
/**
* <p>
* This code is based on a class of the same name, originally implemented in alfresco-repository.
* </p>
*
* A version of {@link StringBean} which allows control of the
* encoding in the underlying HTML Parser.
* Unfortunately, StringBean doesn't allow easy over-riding of
* this, so we have to duplicate some code to control this.
* This allows us to correctly handle HTML files where the encoding
* is specified against the content property (rather than in the
* HTML Head Meta), see ALF-10466 for details.
*/
public static class EncodingAwareStringBean extends StringBean
{
private static final long serialVersionUID = -9033414360428669553L;
/**
* Sets the File to extract strings from, and the encoding
* it's in (if known to Alfresco)
*
* @param file The File that text should be fetched from.
* @param encoding The encoding of the input
*/
public void setURL(File file, String encoding)
{
String previousURL = getURL();
String newURL = file.getAbsolutePath();
if (previousURL == null || !newURL.equals(previousURL))
{
try
{
URLConnection conn = getConnection();
if (null == mParser)
{
mParser = new Parser(newURL);
}
else
{
mParser.setURL(newURL);
}
if (encoding != null)
{
mParser.setEncoding(encoding);
}
mPropertySupport.firePropertyChange(StringBean.PROP_URL_PROPERTY, previousURL,
getURL());
mPropertySupport.firePropertyChange(StringBean.PROP_CONNECTION_PROPERTY, conn,
mParser.getConnection());
setStrings();
}
catch (ParserException pe)
{
updateStrings(pe.toString());
}
}
}
public String getEncoding()
{
return mParser.getEncoding();
}
}
}

View File

@@ -1,130 +1,130 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.Map;
/**
* Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing.
* This transformer will only work for OOXML files where thumbnailing was enabled,
* which isn't on by default on Windows, but is more common on Mac.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* @author Nick Burch
* @author eknizat
*/
public class OOXMLThumbnailContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(
OOXMLThumbnailContentTransformer.class);
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
if (logger.isDebugEnabled())
{
logger.debug("Performing OOXML to jpeg transform with sourceMimetype=" + sourceMimetype
+ " targetMimetype=" + targetMimetype);
}
try (OPCPackage pkg = OPCPackage.open(sourceFile.getPath()))
{
// Does it have a thumbnail?
PackageRelationshipCollection rels = pkg.getRelationshipsByType(
PackageRelationshipTypes.THUMBNAIL);
if (rels.size() > 0)
{
// Get the thumbnail part
PackageRelationship tRel = rels.getRelationship(0);
PackagePart tPart = pkg.getPart(tRel);
// Write it to the target
InputStream tStream = tPart.getInputStream();
Files.copy(tStream, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
tStream.close();
}
else
{
logger.debug("No thumbnail present in file.");
throw new Exception(
"No thumbnail present in file, unable to generate " + targetMimetype);
}
}
catch (IOException e)
{
throw new RuntimeException("Unable to transform file.", e);
}
}
/*
// TODO Add this back to engine_config.json when the transformer is fixed for java 11
{
"transformerName": "ooxmlThumbnail",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "image/jpeg"}
],
"transformOptions": [
]
}
*/
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.Map;
/**
* Extracts out Thumbnail JPEGs from OOXML files for thumbnailing and previewing.
* This transformer will only work for OOXML files where thumbnailing was enabled,
* which isn't on by default on Windows, but is more common on Mac.
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* @author Nick Burch
* @author eknizat
*/
public class OOXMLThumbnailContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(
OOXMLThumbnailContentTransformer.class);
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
if (logger.isDebugEnabled())
{
logger.debug("Performing OOXML to jpeg transform with sourceMimetype=" + sourceMimetype
+ " targetMimetype=" + targetMimetype);
}
try (OPCPackage pkg = OPCPackage.open(sourceFile.getPath()))
{
// Does it have a thumbnail?
PackageRelationshipCollection rels = pkg.getRelationshipsByType(
PackageRelationshipTypes.THUMBNAIL);
if (rels.size() > 0)
{
// Get the thumbnail part
PackageRelationship tRel = rels.getRelationship(0);
PackagePart tPart = pkg.getPart(tRel);
// Write it to the target
InputStream tStream = tPart.getInputStream();
Files.copy(tStream, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
tStream.close();
}
else
{
logger.debug("No thumbnail present in file.");
throw new Exception(
"No thumbnail present in file, unable to generate " + targetMimetype);
}
}
catch (IOException e)
{
throw new RuntimeException("Unable to transform file.", e);
}
}
/*
// TODO Add this back to engine_config.json when the transformer is fixed for java 11
{
"transformerName": "ooxmlThumbnail",
"supportedSourceAndTargetList": [
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-word.document.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-word.template.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.presentation.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.template", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.template.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.presentationml.slide", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-powerpoint.slide.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.template.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.addin.macroenabled.12", "targetMediaType": "image/jpeg"},
{"sourceMediaType": "application/vnd.ms-excel.sheet.binary.macroenabled.12", "targetMediaType": "image/jpeg"}
],
"transformOptions": [
]
}
*/
}

View File

@@ -1,53 +1,53 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import java.io.File;
import java.util.Map;
/**
* Implemented by transformers used by {@link SelectingTransformer}.
*
* @author eknizat
*/
public interface SelectableTransformer
{
default void transform(String sourceMimetype, String targetMimetype, Map<String, String> parameters,
File sourceFile, File targetFile) throws Exception
{
}
default void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
}
default void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import java.io.File;
import java.util.Map;
/**
* Implemented by transformers used by {@link SelectingTransformer}.
*
* @author eknizat
*/
public interface SelectableTransformer
{
default void transform(String sourceMimetype, String targetMimetype, Map<String, String> parameters,
File sourceFile, File targetFile) throws Exception
{
}
default void extractMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
}
default void embedMetadata(String sourceMimetype, String targetMimetype, Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
}
}

View File

@@ -1,114 +1,114 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import com.google.common.collect.ImmutableMap;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.metadataExtractors.HtmlMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.RFC822MetadataExtractor;
import java.io.File;
import java.util.Map;
import java.util.StringJoiner;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
/**
* The SelectingTransformer selects a registered {@link SelectableTransformer}
* and delegates the transformation to its implementation.
*
* @author eknizat
*/
public class SelectingTransformer implements Transformer
{
private static final String ID = "misc";
public static final String LICENCE =
"This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\\n" +
"Additional libraries used:\n" +
"* htmlparser http://htmlparser.sourceforge.net/license.html";
private final Map<String, SelectableTransformer> transformers = ImmutableMap
.<String, SelectableTransformer>builder()
.put("appleIWorks", new AppleIWorksContentTransformer())
.put("html", new HtmlParserContentTransformer())
.put("string", new StringExtractingContentTransformer())
.put("textToPdf", new TextToPdfContentTransformer())
.put("rfc822", new EMLTransformer())
.put("ooXmlThumbnail", new OOXMLThumbnailContentTransformer())
.put("HtmlMetadataExtractor", new HtmlMetadataExtractor())
.put("RFC822MetadataExtractor", new RFC822MetadataExtractor())
.build();
@Override
public String getTransformerId()
{
return ID;
}
@Override
public void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
final SelectableTransformer transformer = transformers.get(transformName);
logOptions(sourceFile, targetFile, transformOptions);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
final SelectableTransformer transformer = transformers.get(transformName);
logOptions(sourceFile, targetFile, transformOptions);
transformer.extractMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
private static void logOptions(File sourceFile, File targetFile, Map<String, String> parameters)
{
StringJoiner sj = new StringJoiner(" ");
parameters.forEach((k, v) ->
{
if (!TRANSFORM_NAME_PARAMETER.equals(k))
{
sj.add("--" + k + "=" + v);
}
}); // keeping the existing style used in other T-Engines
sj.add(getExtension(sourceFile));
sj.add(getExtension(targetFile));
LogEntry.setOptions(sj.toString());
}
private static String getExtension(File file)
{
final String name = file.getName();
int i = name.lastIndexOf('.');
return i == -1 ? "???" : name.substring(i + 1);
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2020 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import com.google.common.collect.ImmutableMap;
import org.alfresco.transformer.executors.Transformer;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.metadataExtractors.HtmlMetadataExtractor;
import org.alfresco.transformer.metadataExtractors.RFC822MetadataExtractor;
import java.io.File;
import java.util.Map;
import java.util.StringJoiner;
import static org.alfresco.transformer.util.RequestParamMap.TRANSFORM_NAME_PARAMETER;
/**
* The SelectingTransformer selects a registered {@link SelectableTransformer}
* and delegates the transformation to its implementation.
*
* @author eknizat
*/
public class SelectingTransformer implements Transformer
{
private static final String ID = "misc";
public static final String LICENCE =
"This transformer uses libraries from Apache. See the license at http://www.apache.org/licenses/LICENSE-2.0. or in /Apache\\\\ 2.0.txt\\n" +
"Additional libraries used:\n" +
"* htmlparser http://htmlparser.sourceforge.net/license.html";
private final Map<String, SelectableTransformer> transformers = ImmutableMap
.<String, SelectableTransformer>builder()
.put("appleIWorks", new AppleIWorksContentTransformer())
.put("html", new HtmlParserContentTransformer())
.put("string", new StringExtractingContentTransformer())
.put("textToPdf", new TextToPdfContentTransformer())
.put("rfc822", new EMLTransformer())
.put("ooXmlThumbnail", new OOXMLThumbnailContentTransformer())
.put("HtmlMetadataExtractor", new HtmlMetadataExtractor())
.put("RFC822MetadataExtractor", new RFC822MetadataExtractor())
.build();
@Override
public String getTransformerId()
{
return ID;
}
@Override
public void transform(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
final SelectableTransformer transformer = transformers.get(transformName);
logOptions(sourceFile, targetFile, transformOptions);
transformer.transform(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
public void extractMetadata(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions,
File sourceFile, File targetFile) throws Exception
{
final SelectableTransformer transformer = transformers.get(transformName);
logOptions(sourceFile, targetFile, transformOptions);
transformer.extractMetadata(sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
}
private static void logOptions(File sourceFile, File targetFile, Map<String, String> parameters)
{
StringJoiner sj = new StringJoiner(" ");
parameters.forEach((k, v) ->
{
if (!TRANSFORM_NAME_PARAMETER.equals(k))
{
sj.add("--" + k + "=" + v);
}
}); // keeping the existing style used in other T-Engines
sj.add(getExtension(sourceFile));
sj.add(getExtension(targetFile));
LogEntry.setOptions(sj.toString());
}
private static String getExtension(File file)
{
final String name = file.getName();
int i = name.lastIndexOf('.');
return i == -1 ? "???" : name.substring(i + 1);
}
}

View File

@@ -1,158 +1,158 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transform.client.util.RequestParamMap.TARGET_ENCODING;
/**
* Converts any textual format to plain text.
* <p>
* The transformation is sensitive to the source and target string encodings.
*
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* @author Derek Hulley
* @author eknizat
*/
public class StringExtractingContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
/**
* Text to text conversions are done directly using the content reader and writer string
* manipulation methods.
* <p>
* Extraction of text from binary content attempts to take the possible character
* encoding into account. The text produced from this will, if the encoding was correct,
* be unformatted but valid.
*/
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
String targetEncoding = parameters.get(TARGET_ENCODING);
if (logger.isDebugEnabled())
{
logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
+ " targetEncoding=" + targetEncoding);
}
Reader charReader = null;
Writer charWriter = null;
try
{
// Build reader
if (sourceEncoding == null)
{
charReader = new BufferedReader(
new InputStreamReader(new FileInputStream(sourceFile)));
}
else
{
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
charReader = new BufferedReader(
new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
}
// Build writer
if (targetEncoding == null)
{
charWriter = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(targetFile)));
}
else
{
checkEncodingParameter(targetEncoding, TARGET_ENCODING);
charWriter = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
}
// copy from the one to the other
char[] buffer = new char[8192];
int readCount = 0;
while (readCount > -1)
{
// write the last read count number of bytes
charWriter.write(buffer, 0, readCount);
// fill the buffer again
readCount = charReader.read(buffer);
}
}
finally
{
if (charReader != null)
{
try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
}
if (charWriter != null)
{
try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
}
}
// done
}
private void checkEncodingParameter(String encoding, String paramterName)
{
try
{
if (!Charset.isSupported(encoding))
{
throw new IllegalArgumentException(
paramterName + "=" + encoding + " is not supported by the JVM.");
}
}
catch (IllegalCharsetNameException e)
{
throw new IllegalArgumentException(
paramterName + "=" + encoding + " is not a valid encoding.");
}
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
import static org.alfresco.transform.client.util.RequestParamMap.TARGET_ENCODING;
/**
* Converts any textual format to plain text.
* <p>
* The transformation is sensitive to the source and target string encodings.
*
*
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* @author Derek Hulley
* @author eknizat
*/
public class StringExtractingContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(StringExtractingContentTransformer.class);
/**
* Text to text conversions are done directly using the content reader and writer string
* manipulation methods.
* <p>
* Extraction of text from binary content attempts to take the possible character
* encoding into account. The text produced from this will, if the encoding was correct,
* be unformatted but valid.
*/
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
String targetEncoding = parameters.get(TARGET_ENCODING);
if (logger.isDebugEnabled())
{
logger.debug("Performing text to text transform with sourceEncoding=" + sourceEncoding
+ " targetEncoding=" + targetEncoding);
}
Reader charReader = null;
Writer charWriter = null;
try
{
// Build reader
if (sourceEncoding == null)
{
charReader = new BufferedReader(
new InputStreamReader(new FileInputStream(sourceFile)));
}
else
{
checkEncodingParameter(sourceEncoding, SOURCE_ENCODING);
charReader = new BufferedReader(
new InputStreamReader(new FileInputStream(sourceFile), sourceEncoding));
}
// Build writer
if (targetEncoding == null)
{
charWriter = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(targetFile)));
}
else
{
checkEncodingParameter(targetEncoding, TARGET_ENCODING);
charWriter = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(targetFile), targetEncoding));
}
// copy from the one to the other
char[] buffer = new char[8192];
int readCount = 0;
while (readCount > -1)
{
// write the last read count number of bytes
charWriter.write(buffer, 0, readCount);
// fill the buffer again
readCount = charReader.read(buffer);
}
}
finally
{
if (charReader != null)
{
try { charReader.close(); } catch (Throwable e) { logger.error("Failed to close charReader", e); }
}
if (charWriter != null)
{
try { charWriter.close(); } catch (Throwable e) { logger.error("Failed to close charWriter", e); }
}
}
// done
}
private void checkEncodingParameter(String encoding, String paramterName)
{
try
{
if (!Charset.isSupported(encoding))
{
throw new IllegalArgumentException(
paramterName + "=" + encoding + " is not supported by the JVM.");
}
}
catch (IllegalCharsetNameException e)
{
throw new IllegalArgumentException(
paramterName + "=" + encoding + " is not a valid encoding.");
}
}
}

View File

@@ -1,448 +1,448 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.alfresco.transformer.util.RequestParamMap;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.tools.TextToPDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
/**
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* Makes use of the <a href="http://www.pdfbox.org/">PDFBox</a> library's <code>TextToPDF</code> utility.
*
* @author Derek Hulley
* @author eknizat
*/
public class TextToPdfContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
private static final int UTF16_READ_AHEAD_BYTES = 16; // 8 characters including BOM if it exists
private static final byte FE = (byte) 0xFE;
private static final byte FF = (byte) 0xFF;
public static final String PAGE_LIMIT = RequestParamMap.PAGE_LIMIT;
private final PagedTextToPDF transformer;
public TextToPdfContentTransformer()
{
transformer = new PagedTextToPDF();
}
public void setStandardFont(String fontName)
{
try
{
transformer.setFont(PagedTextToPDF.getStandardFont(fontName));
}
catch (Throwable e)
{
throw new RuntimeException(
"Unable to set Standard Font for PDF generation: " + fontName, e);
}
}
public void setFontSize(int fontSize)
{
try
{
transformer.setFontSize(fontSize);
}
catch (Throwable e)
{
throw new RuntimeException(
"Unable to set Font Size for PDF generation: " + fontSize);
}
}
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
String stringPageLimit = parameters.get(PAGE_LIMIT);
int pageLimit = -1;
if (stringPageLimit != null)
{
pageLimit = parseInt(stringPageLimit, PAGE_LIMIT);
}
PDDocument pdf = null;
try (InputStream is = new FileInputStream(sourceFile);
Reader ir = new BufferedReader(buildReader(is, sourceEncoding));
OutputStream os = new BufferedOutputStream(new FileOutputStream(targetFile)))
{
//TransformationOptionLimits limits = getLimits(reader, writer, options);
//TransformationOptionPair pageLimits = limits.getPagesPair();
pdf = transformer.createPDFFromText(ir, pageLimit);
pdf.save(os);
}
finally
{
if (pdf != null)
{
try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); }
}
}
}
protected InputStreamReader buildReader(InputStream is, String encoding)
{
// If they gave an encoding, try to use it
if (encoding != null)
{
Charset charset = null;
try
{
charset = Charset.forName(encoding);
}
catch (Exception e)
{
logger.warn("JVM doesn't understand encoding '" + encoding +
"' when transforming text to pdf");
}
if (charset != null)
{
// Handles the situation where there is a BOM even though the encoding indicates that normally
// there should not be one for UTF-16BE and UTF-16LE. For extra flexibility includes UTF-16 too
// which optionally has the BOM. Rather than look at the BOM we look at the number of zero bytes
// in the first few character. XML files even when not in European languages tend to have more
// even zero bytes when big-endian encoded and more odd zero bytes when little-endian.
// Think of: <?xml version="1.0"?> The normal Java decoder does not have this flexibility but
// other transformers do.
String name = charset.displayName();
if ("UTF-16".equals(name) || "UTF-16BE".equals(name) || "UTF-16LE".equals(name))
{
logger.debug("Handle big and little endian UTF-16 text. Using UTF-16 rather than encoding " + name);
charset = Charset.forName("UTF-16");
is = new PushbackInputStream(is, UTF16_READ_AHEAD_BYTES)
{
boolean bomRead;
boolean switchByteOrder;
boolean evenByte = true;
@Override
public int read(byte[] bytes, int off, int len) throws IOException
{
int i = 0;
int b = 0;
for (; i<len; i++)
{
b = read();
if (b == -1)
{
break;
}
bytes[off+i] = (byte)b;
}
return i == 0 && b == -1 ? -1 : i;
}
@Override
public int read() throws IOException
{
if (!bomRead)
{
bomRead = true;
boolean switchBom = false;
byte[] bytes = new byte[UTF16_READ_AHEAD_BYTES];
int end = in.read(bytes, 0, UTF16_READ_AHEAD_BYTES);
int evenZeros = countZeros(bytes, 0);
int oddZeros = countZeros(bytes, 1);
if (evenZeros > oddZeros)
{
if (bytes[0] == FF && bytes[1] == FE)
{
switchByteOrder = true;
switchBom = true;
logger.warn("Little-endian BOM FFFE read, but characters are big-endian");
}
else
{
logger.debug("More even zero bytes, so normal read for big-endian");
}
}
else
{
if (bytes[0] == FE && bytes[1] == FF)
{
switchBom = true;
logger.debug("Big-endian BOM FEFF read, but characters are little-endian");
}
else
{
switchByteOrder = true;
logger.debug("More odd zero bytes, so switch bytes from little-endian");
}
}
if (switchBom)
{
byte b = bytes[0];
bytes[0] = bytes[1];
bytes[1] = b;
}
for (int i = end-1; i>=0; i--)
{
unread(bytes[i]);
}
}
if (switchByteOrder)
{
if (evenByte)
{
int b1 = super.read();
int b2 = super.read();
if (b1 != -1)
{
unread(b1);
}
if (b2 != -1)
{
unread(b2);
}
}
evenByte = !evenByte;
}
return super.read();
}
// Counts the number of even or odd 00 bytes
private int countZeros(byte[] b, int offset)
{
int count = 0;
for (int i=offset; i<UTF16_READ_AHEAD_BYTES; i+=2)
{
if (b[i] == 0)
{
count++;
}
}
return count;
}
};
}
logger.debug("Processing plain text in encoding " + name);
return new InputStreamReader(is, charset);
}
}
// Fall back on the system default
logger.debug("Processing plain text using system default encoding");
return new InputStreamReader(is);
}
private static class PagedTextToPDF extends TextToPDF
{
// REPO-1066: duplicating the following lines from org.apache.pdfbox.tools.TextToPDF because they made them private
// before the upgrade to pdfbox 2.0.8, in pdfbox 1.8, this piece of code was public in org.apache.pdfbox.pdmodel.font.PDType1Font
static PDType1Font getStandardFont(String name)
{
return STANDARD_14.get(name);
}
private static final Map<String, PDType1Font> STANDARD_14 = new HashMap<>();
static
{
STANDARD_14.put(PDType1Font.TIMES_ROMAN.getBaseFont(), PDType1Font.TIMES_ROMAN);
STANDARD_14.put(PDType1Font.TIMES_BOLD.getBaseFont(), PDType1Font.TIMES_BOLD);
STANDARD_14.put(PDType1Font.TIMES_ITALIC.getBaseFont(), PDType1Font.TIMES_ITALIC);
STANDARD_14.put(PDType1Font.TIMES_BOLD_ITALIC.getBaseFont(),
PDType1Font.TIMES_BOLD_ITALIC);
STANDARD_14.put(PDType1Font.HELVETICA.getBaseFont(), PDType1Font.HELVETICA);
STANDARD_14.put(PDType1Font.HELVETICA_BOLD.getBaseFont(), PDType1Font.HELVETICA_BOLD);
STANDARD_14.put(PDType1Font.HELVETICA_OBLIQUE.getBaseFont(),
PDType1Font.HELVETICA_OBLIQUE);
STANDARD_14.put(PDType1Font.HELVETICA_BOLD_OBLIQUE.getBaseFont(),
PDType1Font.HELVETICA_BOLD_OBLIQUE);
STANDARD_14.put(PDType1Font.COURIER.getBaseFont(), PDType1Font.COURIER);
STANDARD_14.put(PDType1Font.COURIER_BOLD.getBaseFont(), PDType1Font.COURIER_BOLD);
STANDARD_14.put(PDType1Font.COURIER_OBLIQUE.getBaseFont(), PDType1Font.COURIER_OBLIQUE);
STANDARD_14.put(PDType1Font.COURIER_BOLD_OBLIQUE.getBaseFont(),
PDType1Font.COURIER_BOLD_OBLIQUE);
STANDARD_14.put(PDType1Font.SYMBOL.getBaseFont(), PDType1Font.SYMBOL);
STANDARD_14.put(PDType1Font.ZAPF_DINGBATS.getBaseFont(), PDType1Font.ZAPF_DINGBATS);
}
//duplicating until here
// The following code is based on the code in TextToPDF with the addition of
// checks for page limits.
// The calling code must close the PDDocument once finished with it.
public PDDocument createPDFFromText(Reader text, int pageLimit)
throws IOException
{
PDDocument doc = null;
int pageCount = 0;
try
{
final int margin = 40;
float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000;
//calculate font height and increase by 5 percent.
height = height * getFontSize() * 1.05f;
doc = new PDDocument();
BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text);
String nextLine;
PDPage page = new PDPage();
PDPageContentStream contentStream = null;
float y = -1;
float maxStringLength = page.getMediaBox().getWidth() - 2 * margin;
// There is a special case of creating a PDF document from an empty string.
boolean textIsEmpty = true;
outer:
while ((nextLine = data.readLine()) != null)
{
// The input text is nonEmpty. New pages will be created and added
// to the PDF document as they are needed, depending on the length of
// the text.
textIsEmpty = false;
String[] lineWords = nextLine.trim().split(" ");
int lineIndex = 0;
while (lineIndex < lineWords.length)
{
final StringBuilder nextLineToDraw = new StringBuilder();
float lengthIfUsingNextWord = 0;
do
{
nextLineToDraw.append(lineWords[lineIndex]);
nextLineToDraw.append(" ");
lineIndex++;
if (lineIndex < lineWords.length)
{
String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
lengthIfUsingNextWord =
(getFont().getStringWidth(
lineWithNextWord) / 1000) * getFontSize();
}
}
while (lineIndex < lineWords.length &&
lengthIfUsingNextWord < maxStringLength);
if (y < margin)
{
int test = pageCount + 1;
if (pageLimit > 0 && (pageCount++ >= pageLimit))
{
break outer;
}
// We have crossed the end-of-page boundary and need to extend the
// document by another page.
page = new PDPage();
doc.addPage(page);
if (contentStream != null)
{
contentStream.endText();
contentStream.close();
}
contentStream = new PDPageContentStream(doc, page);
contentStream.setFont(getFont(), getFontSize());
contentStream.beginText();
y = page.getMediaBox().getHeight() - margin + height;
contentStream.moveTextPositionByAmount(margin, y);
}
if (contentStream == null)
{
throw new IOException("Error:Expected non-null content stream.");
}
contentStream.moveTextPositionByAmount(0, -height);
y -= height;
contentStream.drawString(nextLineToDraw.toString());
}
}
// If the input text was the empty string, then the above while loop will have short-circuited
// and we will not have added any PDPages to the document.
// So in order to make the resultant PDF document readable by Adobe Reader etc, we'll add an empty page.
if (textIsEmpty)
{
doc.addPage(page);
}
if (contentStream != null)
{
contentStream.endText();
contentStream.close();
}
}
catch (IOException io)
{
if (doc != null)
{
doc.close();
}
throw io;
}
return doc;
}
}
private int parseInt(String s, String paramName)
{
try
{
return Integer.valueOf(s);
}
catch (NumberFormatException e)
{
throw new IllegalArgumentException(paramName + " parameter must be an integer.");
}
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.alfresco.transformer.util.RequestParamMap;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.tools.TextToPDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
/**
* <p>
* This code is based on a class of the same name originally implemented in alfresco-repository.
* </p>
*
* Makes use of the <a href="http://www.pdfbox.org/">PDFBox</a> library's <code>TextToPDF</code> utility.
*
* @author Derek Hulley
* @author eknizat
*/
public class TextToPdfContentTransformer implements SelectableTransformer
{
private static final Logger logger = LoggerFactory.getLogger(TextToPdfContentTransformer.class);
private static final int UTF16_READ_AHEAD_BYTES = 16; // 8 characters including BOM if it exists
private static final byte FE = (byte) 0xFE;
private static final byte FF = (byte) 0xFF;
public static final String PAGE_LIMIT = RequestParamMap.PAGE_LIMIT;
private final PagedTextToPDF transformer;
public TextToPdfContentTransformer()
{
transformer = new PagedTextToPDF();
}
public void setStandardFont(String fontName)
{
try
{
transformer.setFont(PagedTextToPDF.getStandardFont(fontName));
}
catch (Throwable e)
{
throw new RuntimeException(
"Unable to set Standard Font for PDF generation: " + fontName, e);
}
}
public void setFontSize(int fontSize)
{
try
{
transformer.setFontSize(fontSize);
}
catch (Throwable e)
{
throw new RuntimeException(
"Unable to set Font Size for PDF generation: " + fontSize);
}
}
@Override
public void transform(final String sourceMimetype, final String targetMimetype, final Map<String, String> parameters,
final File sourceFile, final File targetFile) throws Exception
{
String sourceEncoding = parameters.get(SOURCE_ENCODING);
String stringPageLimit = parameters.get(PAGE_LIMIT);
int pageLimit = -1;
if (stringPageLimit != null)
{
pageLimit = parseInt(stringPageLimit, PAGE_LIMIT);
}
PDDocument pdf = null;
try (InputStream is = new FileInputStream(sourceFile);
Reader ir = new BufferedReader(buildReader(is, sourceEncoding));
OutputStream os = new BufferedOutputStream(new FileOutputStream(targetFile)))
{
//TransformationOptionLimits limits = getLimits(reader, writer, options);
//TransformationOptionPair pageLimits = limits.getPagesPair();
pdf = transformer.createPDFFromText(ir, pageLimit);
pdf.save(os);
}
finally
{
if (pdf != null)
{
try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); }
}
}
}
protected InputStreamReader buildReader(InputStream is, String encoding)
{
// If they gave an encoding, try to use it
if (encoding != null)
{
Charset charset = null;
try
{
charset = Charset.forName(encoding);
}
catch (Exception e)
{
logger.warn("JVM doesn't understand encoding '" + encoding +
"' when transforming text to pdf");
}
if (charset != null)
{
// Handles the situation where there is a BOM even though the encoding indicates that normally
// there should not be one for UTF-16BE and UTF-16LE. For extra flexibility includes UTF-16 too
// which optionally has the BOM. Rather than look at the BOM we look at the number of zero bytes
// in the first few character. XML files even when not in European languages tend to have more
// even zero bytes when big-endian encoded and more odd zero bytes when little-endian.
// Think of: <?xml version="1.0"?> The normal Java decoder does not have this flexibility but
// other transformers do.
String name = charset.displayName();
if ("UTF-16".equals(name) || "UTF-16BE".equals(name) || "UTF-16LE".equals(name))
{
logger.debug("Handle big and little endian UTF-16 text. Using UTF-16 rather than encoding " + name);
charset = Charset.forName("UTF-16");
is = new PushbackInputStream(is, UTF16_READ_AHEAD_BYTES)
{
boolean bomRead;
boolean switchByteOrder;
boolean evenByte = true;
@Override
public int read(byte[] bytes, int off, int len) throws IOException
{
int i = 0;
int b = 0;
for (; i<len; i++)
{
b = read();
if (b == -1)
{
break;
}
bytes[off+i] = (byte)b;
}
return i == 0 && b == -1 ? -1 : i;
}
@Override
public int read() throws IOException
{
if (!bomRead)
{
bomRead = true;
boolean switchBom = false;
byte[] bytes = new byte[UTF16_READ_AHEAD_BYTES];
int end = in.read(bytes, 0, UTF16_READ_AHEAD_BYTES);
int evenZeros = countZeros(bytes, 0);
int oddZeros = countZeros(bytes, 1);
if (evenZeros > oddZeros)
{
if (bytes[0] == FF && bytes[1] == FE)
{
switchByteOrder = true;
switchBom = true;
logger.warn("Little-endian BOM FFFE read, but characters are big-endian");
}
else
{
logger.debug("More even zero bytes, so normal read for big-endian");
}
}
else
{
if (bytes[0] == FE && bytes[1] == FF)
{
switchBom = true;
logger.debug("Big-endian BOM FEFF read, but characters are little-endian");
}
else
{
switchByteOrder = true;
logger.debug("More odd zero bytes, so switch bytes from little-endian");
}
}
if (switchBom)
{
byte b = bytes[0];
bytes[0] = bytes[1];
bytes[1] = b;
}
for (int i = end-1; i>=0; i--)
{
unread(bytes[i]);
}
}
if (switchByteOrder)
{
if (evenByte)
{
int b1 = super.read();
int b2 = super.read();
if (b1 != -1)
{
unread(b1);
}
if (b2 != -1)
{
unread(b2);
}
}
evenByte = !evenByte;
}
return super.read();
}
// Counts the number of even or odd 00 bytes
private int countZeros(byte[] b, int offset)
{
int count = 0;
for (int i=offset; i<UTF16_READ_AHEAD_BYTES; i+=2)
{
if (b[i] == 0)
{
count++;
}
}
return count;
}
};
}
logger.debug("Processing plain text in encoding " + name);
return new InputStreamReader(is, charset);
}
}
// Fall back on the system default
logger.debug("Processing plain text using system default encoding");
return new InputStreamReader(is);
}
private static class PagedTextToPDF extends TextToPDF
{
// REPO-1066: duplicating the following lines from org.apache.pdfbox.tools.TextToPDF because they made them private
// before the upgrade to pdfbox 2.0.8, in pdfbox 1.8, this piece of code was public in org.apache.pdfbox.pdmodel.font.PDType1Font
static PDType1Font getStandardFont(String name)
{
return STANDARD_14.get(name);
}
private static final Map<String, PDType1Font> STANDARD_14 = new HashMap<>();
static
{
STANDARD_14.put(PDType1Font.TIMES_ROMAN.getBaseFont(), PDType1Font.TIMES_ROMAN);
STANDARD_14.put(PDType1Font.TIMES_BOLD.getBaseFont(), PDType1Font.TIMES_BOLD);
STANDARD_14.put(PDType1Font.TIMES_ITALIC.getBaseFont(), PDType1Font.TIMES_ITALIC);
STANDARD_14.put(PDType1Font.TIMES_BOLD_ITALIC.getBaseFont(),
PDType1Font.TIMES_BOLD_ITALIC);
STANDARD_14.put(PDType1Font.HELVETICA.getBaseFont(), PDType1Font.HELVETICA);
STANDARD_14.put(PDType1Font.HELVETICA_BOLD.getBaseFont(), PDType1Font.HELVETICA_BOLD);
STANDARD_14.put(PDType1Font.HELVETICA_OBLIQUE.getBaseFont(),
PDType1Font.HELVETICA_OBLIQUE);
STANDARD_14.put(PDType1Font.HELVETICA_BOLD_OBLIQUE.getBaseFont(),
PDType1Font.HELVETICA_BOLD_OBLIQUE);
STANDARD_14.put(PDType1Font.COURIER.getBaseFont(), PDType1Font.COURIER);
STANDARD_14.put(PDType1Font.COURIER_BOLD.getBaseFont(), PDType1Font.COURIER_BOLD);
STANDARD_14.put(PDType1Font.COURIER_OBLIQUE.getBaseFont(), PDType1Font.COURIER_OBLIQUE);
STANDARD_14.put(PDType1Font.COURIER_BOLD_OBLIQUE.getBaseFont(),
PDType1Font.COURIER_BOLD_OBLIQUE);
STANDARD_14.put(PDType1Font.SYMBOL.getBaseFont(), PDType1Font.SYMBOL);
STANDARD_14.put(PDType1Font.ZAPF_DINGBATS.getBaseFont(), PDType1Font.ZAPF_DINGBATS);
}
//duplicating until here
// The following code is based on the code in TextToPDF with the addition of
// checks for page limits.
// The calling code must close the PDDocument once finished with it.
public PDDocument createPDFFromText(Reader text, int pageLimit)
throws IOException
{
PDDocument doc = null;
int pageCount = 0;
try
{
final int margin = 40;
float height = getFont().getFontDescriptor().getFontBoundingBox().getHeight() / 1000;
//calculate font height and increase by 5 percent.
height = height * getFontSize() * 1.05f;
doc = new PDDocument();
BufferedReader data = (text instanceof BufferedReader) ? (BufferedReader) text : new BufferedReader(text);
String nextLine;
PDPage page = new PDPage();
PDPageContentStream contentStream = null;
float y = -1;
float maxStringLength = page.getMediaBox().getWidth() - 2 * margin;
// There is a special case of creating a PDF document from an empty string.
boolean textIsEmpty = true;
outer:
while ((nextLine = data.readLine()) != null)
{
// The input text is nonEmpty. New pages will be created and added
// to the PDF document as they are needed, depending on the length of
// the text.
textIsEmpty = false;
String[] lineWords = nextLine.trim().split(" ");
int lineIndex = 0;
while (lineIndex < lineWords.length)
{
final StringBuilder nextLineToDraw = new StringBuilder();
float lengthIfUsingNextWord = 0;
do
{
nextLineToDraw.append(lineWords[lineIndex]);
nextLineToDraw.append(" ");
lineIndex++;
if (lineIndex < lineWords.length)
{
String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
lengthIfUsingNextWord =
(getFont().getStringWidth(
lineWithNextWord) / 1000) * getFontSize();
}
}
while (lineIndex < lineWords.length &&
lengthIfUsingNextWord < maxStringLength);
if (y < margin)
{
int test = pageCount + 1;
if (pageLimit > 0 && (pageCount++ >= pageLimit))
{
break outer;
}
// We have crossed the end-of-page boundary and need to extend the
// document by another page.
page = new PDPage();
doc.addPage(page);
if (contentStream != null)
{
contentStream.endText();
contentStream.close();
}
contentStream = new PDPageContentStream(doc, page);
contentStream.setFont(getFont(), getFontSize());
contentStream.beginText();
y = page.getMediaBox().getHeight() - margin + height;
contentStream.moveTextPositionByAmount(margin, y);
}
if (contentStream == null)
{
throw new IOException("Error:Expected non-null content stream.");
}
contentStream.moveTextPositionByAmount(0, -height);
y -= height;
contentStream.drawString(nextLineToDraw.toString());
}
}
// If the input text was the empty string, then the above while loop will have short-circuited
// and we will not have added any PDPages to the document.
// So in order to make the resultant PDF document readable by Adobe Reader etc, we'll add an empty page.
if (textIsEmpty)
{
doc.addPage(page);
}
if (contentStream != null)
{
contentStream.endText();
contentStream.close();
}
}
catch (IOException io)
{
if (doc != null)
{
doc.close();
}
throw io;
}
return doc;
}
}
private int parseInt(String s, String paramName)
{
try
{
return Integer.valueOf(s);
}
catch (NumberFormatException e)
{
throw new IllegalArgumentException(paramName + " parameter must be an integer.");
}
}
}

View File

@@ -1,12 +1,12 @@
#
# HtmlMetadataExtractor - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description
#
# HtmlMetadataExtractor - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
# Mappings
author=cm:author
title=cm:title
description=cm:description

View File

@@ -1,162 +1,162 @@
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.nio.file.Files;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class HtmlParserContentTransformerTest
{
private static final String SOURCE_MIMETYPE = "text/html";
private static final String TARGET_MIMETYPE = "text/plain";
HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
/**
* Checks that we correctly handle text in different encodings,
* no matter if the encoding is specified on the Content Property
* or in a meta tag within the HTML itself. (ALF-10466)
*
* On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
* so we must be careful when checking the returned text
*/
@Test
public void testEncodingHandling() throws Exception
{
final String NEWLINE = System.getProperty("line.separator");
final String TITLE = "Testing!";
final String TEXT_P1 = "This is some text in English";
final String TEXT_P2 = "This is more text in English";
final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
"<p>" + TEXT_P2 + "</p>" + NEWLINE +
"<p>" + TEXT_P3 + "</p>" + NEWLINE;
String partC = "</body></html>";
final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
File tmpS = null;
File tmpD = null;
try
{
// Content set to ISO 8859-1
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
writeToFile(tmpS, partA + partB + partC, "ISO-8859-1");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Content set to UTF-8
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
writeToFile(tmpS, partA + partB + partC, "UTF-8");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Content set to UTF-16
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
writeToFile(tmpS, partA + partB + partC, "UTF-16");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-16");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Note - since HTML Parser 2.0 META tags specifying the
// document encoding will ONLY be respected if the original
// content type was set to ISO-8859-1.
//
// This means there is now only one test which we can perform
// to ensure that this now-limited overriding of the encoding
// takes effect.
// Content set to ISO 8859-1, meta set to UTF-8
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
String str = partA +
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" +
partB + partC;
writeToFile(tmpS, str, "UTF-8");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Note - we can't test UTF-16 with only a meta encoding,
// because without that the parser won't know about the
// 2 byte format so won't be able to identify the meta tag
}
finally
{
if (tmpS != null && tmpS.exists()) tmpS.delete();
if (tmpD != null && tmpD.exists()) tmpD.delete();
}
}
private void writeToFile(File file, String content, String encoding) throws Exception
{
try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
{
ow.append(content);
}
}
private String readFromFile(File file, final String encoding) throws Exception
{
return new String(Files.readAllBytes(file.toPath()), encoding);
}
}
/*
* #%L
* Alfresco Transform Core
* %%
* Copyright (C) 2005 - 2022 Alfresco Software Limited
* %%
* This file is part of the Alfresco software.
* -
* If the software was purchased under a paid Alfresco license, the terms of
* the paid license agreement will prevail. Otherwise, the software is
* provided under the following open source license terms:
* -
* Alfresco is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* -
* Alfresco is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* -
* You should have received a copy of the GNU Lesser General Public License
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
package org.alfresco.transformer.transformers;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.nio.file.Files;
import java.util.HashMap;
import java.util.Map;
import static org.alfresco.transform.client.util.RequestParamMap.SOURCE_ENCODING;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class HtmlParserContentTransformerTest
{
private static final String SOURCE_MIMETYPE = "text/html";
private static final String TARGET_MIMETYPE = "text/plain";
HtmlParserContentTransformer transformer = new HtmlParserContentTransformer();
/**
* Checks that we correctly handle text in different encodings,
* no matter if the encoding is specified on the Content Property
* or in a meta tag within the HTML itself. (ALF-10466)
*
* On Windows, org.htmlparser.beans.StringBean.carriageReturn() appends a new system dependent new line
* so we must be careful when checking the returned text
*/
@Test
public void testEncodingHandling() throws Exception
{
final String NEWLINE = System.getProperty("line.separator");
final String TITLE = "Testing!";
final String TEXT_P1 = "This is some text in English";
final String TEXT_P2 = "This is more text in English";
final String TEXT_P3 = "C'est en Fran\u00e7ais et Espa\u00f1ol";
String partA = "<html><head><title>" + TITLE + "</title></head>" + NEWLINE;
String partB = "<body><p>" + TEXT_P1 + "</p>" + NEWLINE +
"<p>" + TEXT_P2 + "</p>" + NEWLINE +
"<p>" + TEXT_P3 + "</p>" + NEWLINE;
String partC = "</body></html>";
final String expected = TITLE + NEWLINE + TEXT_P1 + NEWLINE + TEXT_P2 + NEWLINE + TEXT_P3 + NEWLINE;
File tmpS = null;
File tmpD = null;
try
{
// Content set to ISO 8859-1
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
writeToFile(tmpS, partA + partB + partC, "ISO-8859-1");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
Map<String, String> parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Content set to UTF-8
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
writeToFile(tmpS, partA + partB + partC, "UTF-8");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-8");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Content set to UTF-16
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
writeToFile(tmpS, partA + partB + partC, "UTF-16");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "UTF-16");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Note - since HTML Parser 2.0 META tags specifying the
// document encoding will ONLY be respected if the original
// content type was set to ISO-8859-1.
//
// This means there is now only one test which we can perform
// to ensure that this now-limited overriding of the encoding
// takes effect.
// Content set to ISO 8859-1, meta set to UTF-8
tmpS = File.createTempFile("AlfrescoTestSource_", ".html");
String str = partA +
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" +
partB + partC;
writeToFile(tmpS, str, "UTF-8");
tmpD = File.createTempFile("AlfrescoTestTarget_", ".txt");
parameters = new HashMap<>();
parameters.put(SOURCE_ENCODING, "ISO-8859-1");
transformer.transform(SOURCE_MIMETYPE, TARGET_MIMETYPE, parameters, tmpS, tmpD);
assertEquals(expected, readFromFile(tmpD, "UTF-8"));
tmpS.delete();
tmpD.delete();
// Note - we can't test UTF-16 with only a meta encoding,
// because without that the parser won't know about the
// 2 byte format so won't be able to identify the meta tag
}
finally
{
if (tmpS != null && tmpS.exists()) tmpS.delete();
if (tmpD != null && tmpD.exists()) tmpD.delete();
}
}
private void writeToFile(File file, String content, String encoding) throws Exception
{
try (OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(file), encoding))
{
ow.append(content);
}
}
private String readFromFile(File file, final String encoding) throws Exception
{
return new String(Files.readAllBytes(file.toPath()), encoding);
}
}