From 735fee253a1248fc13193c261bf949d1fe340b0d Mon Sep 17 00:00:00 2001 From: Alan Davis Date: Tue, 13 Sep 2016 15:28:03 +0000 Subject: [PATCH] MNT-16181 transformations for application/dita+xml - StringExtractor is now able to extract text so searching is now working, which was the main reason for this Customer issue. - PdfBox.TextToPdf is now able to transform .dita fiels to .pdf. As a result it is also possible to preview the files (although the xml tags are visible) and to generate .png thumbnails using the complex.Text.Image transformer (which has an intermediate .pdf stage). - As expected TikaAuto (or our patched 1.6 version) does not know about the DITA mimetype application/dita+xml so is not claiming to transform this type even though it is just an XML file. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/DEV/5.2.N/root@130609 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- .../Transformers/default/transformers.properties | 9 +++++---- .../transform/StringExtractingContentTransformer.java | 5 +++-- .../content/transform/TextToPdfContentTransformer.java | 4 +++- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/config/alfresco/subsystems/Transformers/default/transformers.properties b/config/alfresco/subsystems/Transformers/default/transformers.properties index 3436636ab7..042adb6acb 100644 --- a/config/alfresco/subsystems/Transformers/default/transformers.properties +++ b/config/alfresco/subsystems/Transformers/default/transformers.properties @@ -62,14 +62,14 @@ content.transformer.complex.Any.Image.priority=400 content.transformer.complex.Text.Image.pipeline=*|pdf|complex.PDF.Image content.transformer.complex.Text.Image.priority=350 +content.transformer.complex.Text.Image.extensions.csv.*.supported=true +content.transformer.complex.Text.Image.extensions.dita.*.supported=true +content.transformer.complex.Text.Image.extensions.xml.*.supported=true +content.transformer.complex.Text.Image.extensions.txt.*.supported=true content.transformer.OOXMLThumbnail.extensions.dotx.jpg.priority=50 content.transformer.OOXMLThumbnail.extensions.potx.jpg.priority=50 -content.transformer.complex.Text.Image.extensions.csv.*.supported=true -content.transformer.complex.Text.Image.extensions.xml.*.supported=true -content.transformer.complex.Text.Image.extensions.txt.*.supported=true - content.transformer.iWorksQuicklooks.extensions.key.jpg.priority=50 content.transformer.iWorksQuicklooks.extensions.pages.jpg.priority=50 content.transformer.iWorksQuicklooks.extensions.numbers.jpg.priority=50 @@ -94,6 +94,7 @@ content.transformer.PdfBox.priority=110 content.transformer.PdfBox.extensions.pdf.txt.priority=50 content.transformer.PdfBox.TextToPdf.extensions.csv.pdf.supported=true +content.transformer.PdfBox.TextToPdf.extensions.dita.pdf.supported=true content.transformer.PdfBox.TextToPdf.extensions.xml.pdf.supported=true content.transformer.PdfBox.TextToPdf.maxSourceSizeKBytes=10240 diff --git a/source/java/org/alfresco/repo/content/transform/StringExtractingContentTransformer.java b/source/java/org/alfresco/repo/content/transform/StringExtractingContentTransformer.java index bdbb81aae1..a4479ca616 100644 --- a/source/java/org/alfresco/repo/content/transform/StringExtractingContentTransformer.java +++ b/source/java/org/alfresco/repo/content/transform/StringExtractingContentTransformer.java @@ -71,7 +71,8 @@ public class StringExtractingContentTransformer extends AbstractContentTransform // conversions from any plain text format are very reliable return true; } - else if (sourceMimetype.startsWith(PREFIX_TEXT)) + else if (sourceMimetype.startsWith(PREFIX_TEXT) || + sourceMimetype.equals(MimetypeMap.MIMETYPE_DITA)) { // the source is text, but probably with some kind of markup return true; @@ -88,7 +89,7 @@ public class StringExtractingContentTransformer extends AbstractContentTransform { StringBuilder sb = new StringBuilder(); sb.append(super.getComments(available)); - sb.append("# Only supports transformation of js and mimetypes starting with \""); + sb.append("# Only supports transformation of js, dita and mimetypes starting with \""); sb.append(PREFIX_TEXT); sb.append("\" to txt.\n"); return sb.toString(); diff --git a/source/java/org/alfresco/repo/content/transform/TextToPdfContentTransformer.java b/source/java/org/alfresco/repo/content/transform/TextToPdfContentTransformer.java index a0a25ca5de..57bcb69d8c 100644 --- a/source/java/org/alfresco/repo/content/transform/TextToPdfContentTransformer.java +++ b/source/java/org/alfresco/repo/content/transform/TextToPdfContentTransformer.java @@ -113,6 +113,7 @@ public class TextToPdfContentTransformer extends AbstractContentTransformer2 { if ( (!MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(sourceMimetype) && !MimetypeMap.MIMETYPE_TEXT_CSV.equals(sourceMimetype) && + !MimetypeMap.MIMETYPE_DITA.equals(sourceMimetype) && !MimetypeMap.MIMETYPE_XML.equals(sourceMimetype) ) || !MimetypeMap.MIMETYPE_PDF.equals(targetMimetype)) { @@ -129,7 +130,8 @@ public class TextToPdfContentTransformer extends AbstractContentTransformer2 public String getComments(boolean available) { return getCommentsOnlySupports( - Arrays.asList(new String[] {MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_TEXT_CSV, MimetypeMap.MIMETYPE_XML}), + Arrays.asList(new String[] {MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_TEXT_CSV, + MimetypeMap.MIMETYPE_DITA, MimetypeMap.MIMETYPE_XML}), Arrays.asList(new String[] {MimetypeMap.MIMETYPE_PDF}), available); }