MNT-22082 transformation of pdf to text hang (#367)

A new constructor has been added to the TikaController to provide
the new spring config.
The creation of the TikaExecutor has been moved to "singleton pattern" as
the injection of the @Value happens after the instantiation of the
TikaJavaExecutor and does not pass the value correctly. The
instantiation is now done once, on the first transform request.
Param has been added to the AIO beans.
This commit is contained in:
David Edwards
2021-04-13 09:59:42 +01:00
committed by GitHub
parent bc2306d2bb
commit 03d08d0c9e
10 changed files with 187 additions and 12 deletions

View File

@@ -76,6 +76,9 @@ public class AIOCustomConfig
@Value("${transform.core.imagemagick.config}")
private String imageMagickConfigPath;
@Value("${transform.core.tika.pdfBox.notExtractBookmarksTextDefault:false}")
private boolean notExtractBookmarksTextDefault;
/**
*
* @return Override the TransformRegistryImpl used in {@link AbstractTransformerController}
@@ -86,7 +89,7 @@ public class AIOCustomConfig
{
AIOTransformRegistry aioTransformRegistry = new AIOTransformRegistry();
aioTransformRegistry.registerTransformer(new SelectingTransformer());
aioTransformRegistry.registerTransformer(new TikaJavaExecutor());
aioTransformRegistry.registerTransformer(new TikaJavaExecutor(notExtractBookmarksTextDefault));
aioTransformRegistry.registerTransformer(new ImageMagickCommandExecutor(imageMagickExePath, imageMagickDynPath, imageMagickRootPath, imageMagickCodersPath, imageMagickConfigPath));
aioTransformRegistry.registerTransformer(new LibreOfficeJavaExecutor(libreofficePath, libreofficeMaxTasksPerProcess, libreofficeTimeout, libreofficePortNumbers, libreofficeTemplateProfileDir, libreofficeIsEnabled));
aioTransformRegistry.registerTransformer(new PdfRendererCommandExecutor(pdfRendererPath));

View File

@@ -17,3 +17,6 @@ transform:
exe: ${IMAGEMAGICK_EXE:/usr/bin/convert}
coders: ${IMAGEMAGICK_CODERS:}
config: ${IMAGEMAGICK_CONFIG:}
tika:
pdfBox:
notExtractBookmarksTextDefault: ${PDFBOX_NOTEXTRACTBOOKMARKS_DEFAULT:false}