diff --git a/config/alfresco/content-services-context.xml b/config/alfresco/content-services-context.xml index 6b14146146..9aead1462d 100644 --- a/config/alfresco/content-services-context.xml +++ b/config/alfresco/content-services-context.xml @@ -165,7 +165,7 @@ - + @@ -471,4 +471,18 @@ + + + + + + application/zip + text/plain + + + + + diff --git a/config/alfresco/swf-transform-context.xml b/config/alfresco/swf-transform-context.xml index bc0a8f759c..436e26ca44 100644 --- a/config/alfresco/swf-transform-context.xml +++ b/config/alfresco/swf-transform-context.xml @@ -57,4 +57,23 @@ + + + + + + + + + + + + text/plain + application/pdf + + + + diff --git a/source/java/org/alfresco/repo/content/transform/AbstractContentTransformerTest.java b/source/java/org/alfresco/repo/content/transform/AbstractContentTransformerTest.java index 57fb2317b9..5e7dad0c6b 100644 --- a/source/java/org/alfresco/repo/content/transform/AbstractContentTransformerTest.java +++ b/source/java/org/alfresco/repo/content/transform/AbstractContentTransformerTest.java @@ -229,7 +229,7 @@ public abstract class AbstractContentTransformerTest extends TestCase transformer.transform(sourceReader.getReader(), targetWriter); // if the target format is any type of text, then it must contain the 'quick' phrase - if (targetMimetype.equals(MimetypeMap.MIMETYPE_TEXT_PLAIN)) + if (isQuickPhraseExpected(targetMimetype)) { ContentReader targetReader = targetWriter.getReader(); String checkContent = targetReader.getContentString(); @@ -239,7 +239,7 @@ public abstract class AbstractContentTransformerTest extends TestCase " target: " + targetWriter, checkContent.contains(QUICK_CONTENT)); } - else if (targetMimetype.startsWith(StringExtractingContentTransformer.PREFIX_TEXT)) + else if (isQuickWordsExpected(targetMimetype)) { ContentReader targetReader = targetWriter.getReader(); String checkContent = targetReader.getContentString(); @@ -280,7 +280,33 @@ public abstract class AbstractContentTransformerTest extends TestCase outputWriter.setEncoding("UTF8"); outputWriter.putContent(sb.toString()); } - + + /** + * This method is an extension point for enabling/disabling an assertion that the "quick brown fox" + * phrase is present in the transformed content. + * By default, the phrase is expected in all text/plain outputs. + * + * @param targetMimetype mimetype of the target of the transformation + * @return true if phrase is expected else false. + */ + protected boolean isQuickPhraseExpected(String targetMimetype) + { + return targetMimetype.equals(MimetypeMap.MIMETYPE_TEXT_PLAIN); + } + + /** + * This method is an extension point for enabling/disabling an assertion that the "quick brown fox" + * words are each present in the transformed content. + * By default, the words in the phrase are expected in all text/* outputs. + * + * @param targetMimetype mimetype of the target of the transformation + * @return true if each word is expected else false. + */ + protected boolean isQuickWordsExpected(String targetMimetype) + { + return targetMimetype.startsWith(StringExtractingContentTransformer.PREFIX_TEXT); + } + /** * This method is an extension point for excluding certain transformations in a subclass. * The default implementation returns false for all mime type pairs. diff --git a/source/java/org/alfresco/repo/content/transform/ArchiveContentTransformer.java b/source/java/org/alfresco/repo/content/transform/ArchiveContentTransformer.java new file mode 100644 index 0000000000..5396939664 --- /dev/null +++ b/source/java/org/alfresco/repo/content/transform/ArchiveContentTransformer.java @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2005-2010 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.repo.content.transform; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.service.cmr.repository.ContentReader; +import org.alfresco.service.cmr.repository.ContentWriter; +import org.alfresco.service.cmr.repository.TransformationOptions; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * This class transforms archive files (currently only ZIPs) to text, which enables indexing + * and searching of archives as well as webpreviewing. + * The transformation simply lists the names of the entries within the zip file and does not consider their content. + * + * @author Neil McErlean + * @since Swift + */ +public class ArchiveContentTransformer extends AbstractContentTransformer2 +{ + /** + * The logger + */ + private static Log logger = LogFactory.getLog(ArchiveContentTransformer.class); + + /** + * Currently the only transformation performed is that of text extraction from PDF documents. + */ + public boolean isTransformable(String sourceMimetype, String targetMimetype, TransformationOptions options) + { + // TODO: Expand to other archive types e.g. tar. + if (!MimetypeMap.MIMETYPE_ZIP.equals(sourceMimetype) || + !MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimetype)) + { + // Currently only support ZIP -> Text + return false; + } + else + { + return true; + } + } + + protected void transformInternal( + ContentReader reader, + ContentWriter writer, + TransformationOptions options) throws Exception + { + InputStream is = null; + try + { + is = reader.getContentInputStream(); + + List zipEntryNames = new ArrayList(); + ZipInputStream zin = new ZipInputStream(is); + + // Enumerate each entry + ZipEntry nextZipEntry = null; + while ((nextZipEntry = zin.getNextEntry()) != null) + { + String entryName = nextZipEntry.getName(); + zipEntryNames.add(entryName); + + // Currently we do not recurse into 'zips within zips'. + } + + if (logger.isDebugEnabled()) + { + StringBuilder msg = new StringBuilder(); + msg.append("Transformed ") + .append(zipEntryNames.size()) + .append(zipEntryNames.size() == 1 ? " zip entry" : " zip entries"); + logger.debug(msg.toString()); + } + + String text = createTextContentFrom(zipEntryNames); + + // dump it all to the writer + writer.putContent(text); + } + finally + { + if (is != null) + { + try { is.close(); } catch (Throwable e) {e.printStackTrace(); } + } + } + } + + private String createTextContentFrom(List zipEntryNames) + { + StringBuilder result = new StringBuilder(); + for (String entryName : zipEntryNames) + { + result.append(entryName) + .append('\n'); + } + return result.toString(); + } +} diff --git a/source/java/org/alfresco/repo/content/transform/ArchiveContentTransformerTest.java b/source/java/org/alfresco/repo/content/transform/ArchiveContentTransformerTest.java new file mode 100644 index 0000000000..0bfc128a3b --- /dev/null +++ b/source/java/org/alfresco/repo/content/transform/ArchiveContentTransformerTest.java @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2005-2010 Alfresco Software Limited. + * + * This file is part of Alfresco + * + * Alfresco is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Alfresco is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Alfresco. If not, see . + */ +package org.alfresco.repo.content.transform; + +import org.alfresco.repo.content.MimetypeMap; +import org.alfresco.service.cmr.repository.TransformationOptions; + +/** + * Test class for ArchiveContentTransformer. + * + * @see org.alfresco.repo.content.transform.ArchiveContentTransformer + * + * @author Neil McErlean + */ +public class ArchiveContentTransformerTest extends AbstractContentTransformerTest +{ + private ContentTransformer transformer; + + @Override + public void setUp() throws Exception + { + super.setUp(); + + transformer = new ArchiveContentTransformer(); + } + + protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype) + { + return transformer; + } + + public void testIsTransformable() throws Exception + { + assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_ZIP, MimetypeMap.MIMETYPE_TEXT_PLAIN, new TransformationOptions())); + } + + @Override + protected boolean isQuickPhraseExpected(String targetMimetype) + { + // The Zip transformer produces names of the entries, not their contents. + return false; + } + + @Override + protected boolean isQuickWordsExpected(String targetMimetype) + { + // The Zip transformer produces names of the entries, not their contents. + return false; + } +} diff --git a/source/test-resources/quick/quick.zip b/source/test-resources/quick/quick.zip new file mode 100644 index 0000000000..168109f4a3 Binary files /dev/null and b/source/test-resources/quick/quick.zip differ