mirror of
https://github.com/Alfresco/alfresco-transform-core.git
synced 2025-10-01 14:41:17 +00:00
ACS-10169 Bump tika to 3.x (#1140)
* ACS-10169 Bump tika libraries - to fix CVE Vulnerability CVE-2025-54988 * ACS-10169 Bump tika libraries - adjusted MailMetadataExtractor to correctly parse e-mail send date/time by new library * ACS-10169 Bump tika libraries - fixed pdf loading in test * ACS-10169 Code improvements
This commit is contained in:
@@ -44,9 +44,9 @@ import java.io.PushbackInputStream;
|
||||
import java.io.Reader;
|
||||
import java.net.URI;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import jakarta.annotation.PostConstruct;
|
||||
|
||||
import org.apache.fontbox.ttf.TrueTypeFont;
|
||||
@@ -59,6 +59,7 @@ import org.apache.pdfbox.pdmodel.font.FontMapping;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
|
||||
import org.apache.pdfbox.tools.TextToPDF;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@@ -252,41 +253,24 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor
|
||||
|
||||
private static class PagedTextToPDF extends TextToPDF
|
||||
{
|
||||
// REPO-1066: duplicating the following lines from org.apache.pdfbox.tools.TextToPDF because they made them private
|
||||
// before the upgrade to pdfbox 2.0.8, in pdfbox 1.8, this piece of code was public in org.apache.pdfbox.pdmodel.font.PDType1Font
|
||||
private static final PDType1Font DEFAULT_FONT = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
|
||||
private static final Map<String, PDType1Font> STANDARD_14 = Standard14Fonts.getNames().stream()
|
||||
.collect(Collectors.toMap(name -> name, name -> new PDType1Font(Standard14Fonts.getMappedFontName(name))));
|
||||
|
||||
private String fontName = null;
|
||||
private String defaultFont = null;
|
||||
|
||||
PagedTextToPDF()
|
||||
{
|
||||
super();
|
||||
setFont(DEFAULT_FONT);
|
||||
}
|
||||
|
||||
static PDType1Font getStandardFont(String name)
|
||||
{
|
||||
return STANDARD_14.get(name);
|
||||
}
|
||||
|
||||
private static final Map<String, PDType1Font> STANDARD_14 = new HashMap<>();
|
||||
|
||||
static
|
||||
{
|
||||
STANDARD_14.put(PDType1Font.TIMES_ROMAN.getBaseFont(), PDType1Font.TIMES_ROMAN);
|
||||
STANDARD_14.put(PDType1Font.TIMES_BOLD.getBaseFont(), PDType1Font.TIMES_BOLD);
|
||||
STANDARD_14.put(PDType1Font.TIMES_ITALIC.getBaseFont(), PDType1Font.TIMES_ITALIC);
|
||||
STANDARD_14.put(PDType1Font.TIMES_BOLD_ITALIC.getBaseFont(),
|
||||
PDType1Font.TIMES_BOLD_ITALIC);
|
||||
STANDARD_14.put(PDType1Font.HELVETICA.getBaseFont(), PDType1Font.HELVETICA);
|
||||
STANDARD_14.put(PDType1Font.HELVETICA_BOLD.getBaseFont(), PDType1Font.HELVETICA_BOLD);
|
||||
STANDARD_14.put(PDType1Font.HELVETICA_OBLIQUE.getBaseFont(),
|
||||
PDType1Font.HELVETICA_OBLIQUE);
|
||||
STANDARD_14.put(PDType1Font.HELVETICA_BOLD_OBLIQUE.getBaseFont(),
|
||||
PDType1Font.HELVETICA_BOLD_OBLIQUE);
|
||||
STANDARD_14.put(PDType1Font.COURIER.getBaseFont(), PDType1Font.COURIER);
|
||||
STANDARD_14.put(PDType1Font.COURIER_BOLD.getBaseFont(), PDType1Font.COURIER_BOLD);
|
||||
STANDARD_14.put(PDType1Font.COURIER_OBLIQUE.getBaseFont(), PDType1Font.COURIER_OBLIQUE);
|
||||
STANDARD_14.put(PDType1Font.COURIER_BOLD_OBLIQUE.getBaseFont(),
|
||||
PDType1Font.COURIER_BOLD_OBLIQUE);
|
||||
STANDARD_14.put(PDType1Font.SYMBOL.getBaseFont(), PDType1Font.SYMBOL);
|
||||
STANDARD_14.put(PDType1Font.ZAPF_DINGBATS.getBaseFont(), PDType1Font.ZAPF_DINGBATS);
|
||||
}
|
||||
// duplicating until here
|
||||
|
||||
private String fontName = null;
|
||||
private String defaultFont = null;
|
||||
|
||||
// The following code is based on the code in TextToPDF with the addition of
|
||||
// checks for page limits.
|
||||
// The calling code must close the PDDocument once finished with it.
|
||||
@@ -369,16 +353,16 @@ public class TextToPdfContentTransformer implements CustomTransformerFileAdaptor
|
||||
contentStream.setFont(font, fontSize);
|
||||
contentStream.beginText();
|
||||
y = page.getMediaBox().getHeight() - margin + height;
|
||||
contentStream.moveTextPositionByAmount(margin, y);
|
||||
contentStream.newLineAtOffset(margin, y);
|
||||
}
|
||||
|
||||
if (contentStream == null)
|
||||
{
|
||||
throw new IOException("Error:Expected non-null content stream.");
|
||||
}
|
||||
contentStream.moveTextPositionByAmount(0, -height);
|
||||
contentStream.newLineAtOffset(0, -height);
|
||||
y -= height;
|
||||
contentStream.drawString(nextLineToDraw.toString());
|
||||
contentStream.showText(nextLineToDraw.toString());
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -50,6 +50,7 @@ import java.io.StringWriter;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.file.Files;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
@@ -388,7 +389,7 @@ public class MiscTest extends AbstractBaseTest
|
||||
expected.getBytes());
|
||||
|
||||
// Read back in the PDF and check it
|
||||
PDDocument doc = PDDocument.load(result.getResponse().getContentAsByteArray());
|
||||
PDDocument doc = Loader.loadPDF(result.getResponse().getContentAsByteArray());
|
||||
PDFTextStripper textStripper = new PDFTextStripper();
|
||||
StringWriter textWriter = new StringWriter();
|
||||
textStripper.writeText(doc, textWriter);
|
||||
|
@@ -67,6 +67,7 @@ import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
@@ -178,7 +179,7 @@ public class MiscTransformsIT
|
||||
if (MIMETYPE_PDF.equals(targetMimetype))
|
||||
{
|
||||
// verify if PDF isn't corrupted
|
||||
final PDDocument pdfFile = PDDocument.load(Objects.requireNonNull(response.getBody()).getInputStream());
|
||||
final PDDocument pdfFile = Loader.loadPDF(Objects.requireNonNull(response.getBody()).getContentAsByteArray());
|
||||
assertNotNull(pdfFile);
|
||||
}
|
||||
}
|
||||
|
@@ -57,6 +57,7 @@ import javax.imageio.ImageIO;
|
||||
import javax.imageio.ImageReader;
|
||||
import javax.imageio.stream.ImageInputStream;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
@@ -144,7 +145,7 @@ class ImageToPdfTransformerTest
|
||||
transformer.transform(imageFile.mimetype, MIMETYPE_PDF, transformOptions.toMap(), sourceFile, targetFile, transformManager);
|
||||
|
||||
then(transformManager).shouldHaveNoInteractions();
|
||||
try (PDDocument actualPdfDocument = PDDocument.load(targetFile))
|
||||
try (PDDocument actualPdfDocument = Loader.loadPDF(targetFile))
|
||||
{
|
||||
int expectedNumberOfPages = calculateExpectedNumberOfPages(transformOptions, imageFile.firstPage(), imageFile.lastPage());
|
||||
assertNotNull(actualPdfDocument);
|
||||
@@ -245,7 +246,7 @@ class ImageToPdfTransformerTest
|
||||
// when
|
||||
transformer.transform(MIMETYPE_IMAGE_TIFF, MIMETYPE_PDF, transformOptions.toMap(), sourceFile, targetFile, transformManager);
|
||||
|
||||
try (PDDocument actualPdfDocument = PDDocument.load(targetFile))
|
||||
try (PDDocument actualPdfDocument = Loader.loadPDF(targetFile))
|
||||
{
|
||||
PDRectangle finalExpectedPdfFormat = expectedPdfFormatRotator.apply(expectedPdfFormat.getWidth(), expectedPdfFormat.getHeight());
|
||||
assertNotNull(actualPdfDocument);
|
||||
@@ -262,7 +263,7 @@ class ImageToPdfTransformerTest
|
||||
// when
|
||||
transformer.transform(MIMETYPE_IMAGE_TIFF, MIMETYPE_PDF, transformOptions.toMap(), sourceFile, targetFile, transformManager);
|
||||
|
||||
try (PDDocument actualPdfDocument = PDDocument.load(targetFile))
|
||||
try (PDDocument actualPdfDocument = Loader.loadPDF(targetFile))
|
||||
{
|
||||
BufferedImage actualImage = ImageIO.read(sourceFile);
|
||||
assertNotNull(actualPdfDocument);
|
||||
@@ -279,7 +280,7 @@ class ImageToPdfTransformerTest
|
||||
// when
|
||||
transformer.transform(MIMETYPE_IMAGE_TIFF, MIMETYPE_PDF, transformOptions.toMap(), sourceFile, targetFile, transformManager);
|
||||
|
||||
try (PDDocument actualPdfDocument = PDDocument.load(targetFile))
|
||||
try (PDDocument actualPdfDocument = Loader.loadPDF(targetFile))
|
||||
{
|
||||
BufferedImage actualImage = ImageIO.read(sourceFile);
|
||||
assertNotNull(actualPdfDocument);
|
||||
@@ -314,7 +315,7 @@ class ImageToPdfTransformerTest
|
||||
// when
|
||||
transformer.transform(imageFile.mimetype, MIMETYPE_PDF, transformOptions.toMap(), source, targetFile, transformManager);
|
||||
|
||||
try (PDDocument actualPdfDocument = PDDocument.load(targetFile))
|
||||
try (PDDocument actualPdfDocument = Loader.loadPDF(targetFile))
|
||||
{
|
||||
assertNotNull(actualPdfDocument);
|
||||
assertEquals(expectedWidth, actualPdfDocument.getPage(0).getMediaBox().getWidth(), "Pdf width");
|
||||
|
@@ -48,8 +48,9 @@ import java.io.StringWriter;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
@@ -171,13 +172,13 @@ public class TextToPdfContentTransformerTest
|
||||
writeToFile(sourceFile, TEXT_WITHOUT_A_BREVE, encoding, null, null);
|
||||
|
||||
Map<String, String> parameters = new HashMap<>();
|
||||
parameters.put(PDF_FONT, PDType1Font.TIMES_BOLD.getName());
|
||||
parameters.put(PDF_FONT, Standard14Fonts.FontName.TIMES_BOLD.getName());
|
||||
parameters.put(PDF_FONT_SIZE, "30");
|
||||
|
||||
TransformCheckResult result = transformTextAndCheck(sourceFile, encoding, TEXT_WITHOUT_A_BREVE, String.valueOf(-1), true,
|
||||
parameters, false);
|
||||
|
||||
assertEquals(result.getUsedFont(), PDType1Font.TIMES_BOLD.getName());
|
||||
assertEquals(result.getUsedFont(), Standard14Fonts.FontName.TIMES_BOLD.getName());
|
||||
assertNull(result.getErrorMessage());
|
||||
}
|
||||
|
||||
@@ -200,7 +201,7 @@ public class TextToPdfContentTransformerTest
|
||||
TransformCheckResult result = transformTextAndCheck(sourceFile, encoding, TEXT_WITHOUT_A_BREVE, String.valueOf(-1), true,
|
||||
parameters, false);
|
||||
|
||||
assertEquals(result.getUsedFont(), PDType1Font.TIMES_ROMAN.getName());
|
||||
assertEquals(result.getUsedFont(), Standard14Fonts.FontName.TIMES_ROMAN.getName());
|
||||
assertNull(result.getErrorMessage());
|
||||
}
|
||||
|
||||
@@ -218,14 +219,14 @@ public class TextToPdfContentTransformerTest
|
||||
writeToFile(sourceFile, TEXT_WITH_A_BREVE, encoding, null, null);
|
||||
|
||||
Map<String, String> parameters = new HashMap<>();
|
||||
parameters.put(PDF_FONT, PDType1Font.TIMES_BOLD.getName());
|
||||
parameters.put(PDF_FONT, Standard14Fonts.FontName.TIMES_BOLD.getName());
|
||||
|
||||
TransformCheckResult result = transformTextAndCheck(sourceFile, encoding, TEXT_WITH_A_BREVE, String.valueOf(-1), true,
|
||||
parameters, true);
|
||||
|
||||
assertEquals(result.getUsedFont(), PDType1Font.TIMES_BOLD.getName());
|
||||
assertEquals(result.getUsedFont(), Standard14Fonts.FontName.TIMES_BOLD.getName());
|
||||
assertNotNull(result.getErrorMessage());
|
||||
assertTrue(result.getErrorMessage().contains(PDType1Font.TIMES_BOLD.getName()));
|
||||
assertTrue(result.getErrorMessage().contains(Standard14Fonts.FontName.TIMES_BOLD.getName()));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -326,7 +327,7 @@ public class TextToPdfContentTransformerTest
|
||||
if (!failed)
|
||||
{
|
||||
// Read back in the PDF and check it
|
||||
PDDocument doc = PDDocument.load(targetFile);
|
||||
PDDocument doc = Loader.loadPDF(targetFile);
|
||||
PDFTextStripper textStripper = new PDFTextStripper();
|
||||
StringWriter textWriter = new StringWriter();
|
||||
textStripper.writeText(doc, textWriter);
|
||||
|
@@ -96,7 +96,7 @@ public class MailMetadataExtractor extends AbstractTikaMetadataExtractorEmbeddor
|
||||
putRawValue(KEY_ORIGINATOR, metadata.get(TikaCoreProperties.CREATOR), properties);
|
||||
putRawValue(KEY_SUBJECT, metadata.get(TikaCoreProperties.TITLE), properties);
|
||||
putRawValue(KEY_DESCRIPTION, metadata.get(TikaCoreProperties.SUBJECT), properties);
|
||||
putRawValue(KEY_SENT_DATE, metadata.get(TikaCoreProperties.MODIFIED), properties);
|
||||
putRawValue(KEY_SENT_DATE, metadata.get(TikaCoreProperties.CREATED), properties);
|
||||
|
||||
// Store the TO, but not cc/bcc in the addressee field
|
||||
putRawValue(KEY_ADDRESSEE, metadata.get(Message.MESSAGE_TO), properties);
|
||||
|
@@ -197,7 +197,7 @@ public class ExifToolParser extends ExternalParser
|
||||
TemporaryResources tmp = new TemporaryResources();
|
||||
try
|
||||
{
|
||||
TikaInputStream tis = TikaInputStream.get(stream, tmp);
|
||||
TikaInputStream tis = TikaInputStream.get(stream, tmp, metadata);
|
||||
|
||||
if (this.getSupportedTypes().contains(mediaType))
|
||||
{
|
||||
|
10
pom.xml
10
pom.xml
@@ -21,11 +21,11 @@
|
||||
<acs-compatible.java.version>11</acs-compatible.java.version>
|
||||
<image.tag>latest</image.tag>
|
||||
<image.registry>quay.io</image.registry>
|
||||
<dependency.pdfbox.version>2.0.30</dependency.pdfbox.version>
|
||||
<dependency.pdfbox.version>3.0.5</dependency.pdfbox.version>
|
||||
<dependency.alfresco-jodconverter-core.version>3.0.1.20</dependency.alfresco-jodconverter-core.version>
|
||||
<env.project_version>${project.version}</env.project_version>
|
||||
<dependency.jackson.version>2.19.2</dependency.jackson.version>
|
||||
<dependency.tika.version>2.9.2</dependency.tika.version>
|
||||
<dependency.tika.version>3.2.3</dependency.tika.version>
|
||||
<dependency.poi.version>5.4.1</dependency.poi.version>
|
||||
<dependency.commons-io.version>2.20.0</dependency.commons-io.version>
|
||||
<dependency.imaging.version>1.0.0-alpha6</dependency.imaging.version>
|
||||
@@ -135,6 +135,12 @@
|
||||
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<!-- v1.10 has 0BSD license it must be consulted with Legal -->
|
||||
<dependency>
|
||||
<groupId>org.tukaani</groupId>
|
||||
<artifactId>xz</artifactId>
|
||||
<version>1.9</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
|
Reference in New Issue
Block a user