diff --git a/config/alfresco/content-services-context.xml b/config/alfresco/content-services-context.xml
index 6b14146146..9aead1462d 100644
--- a/config/alfresco/content-services-context.xml
+++ b/config/alfresco/content-services-context.xml
@@ -165,7 +165,7 @@
-
+
@@ -471,4 +471,18 @@
+
+
+
+
+
+ application/zip
+ text/plain
+
+
+
+
+
diff --git a/config/alfresco/swf-transform-context.xml b/config/alfresco/swf-transform-context.xml
index bc0a8f759c..436e26ca44 100644
--- a/config/alfresco/swf-transform-context.xml
+++ b/config/alfresco/swf-transform-context.xml
@@ -57,4 +57,23 @@
+
+
+
+
+
+
+
+
+
+
+
+ text/plain
+ application/pdf
+
+
+
+
diff --git a/source/java/org/alfresco/repo/content/transform/AbstractContentTransformerTest.java b/source/java/org/alfresco/repo/content/transform/AbstractContentTransformerTest.java
index 57fb2317b9..5e7dad0c6b 100644
--- a/source/java/org/alfresco/repo/content/transform/AbstractContentTransformerTest.java
+++ b/source/java/org/alfresco/repo/content/transform/AbstractContentTransformerTest.java
@@ -229,7 +229,7 @@ public abstract class AbstractContentTransformerTest extends TestCase
transformer.transform(sourceReader.getReader(), targetWriter);
// if the target format is any type of text, then it must contain the 'quick' phrase
- if (targetMimetype.equals(MimetypeMap.MIMETYPE_TEXT_PLAIN))
+ if (isQuickPhraseExpected(targetMimetype))
{
ContentReader targetReader = targetWriter.getReader();
String checkContent = targetReader.getContentString();
@@ -239,7 +239,7 @@ public abstract class AbstractContentTransformerTest extends TestCase
" target: " + targetWriter,
checkContent.contains(QUICK_CONTENT));
}
- else if (targetMimetype.startsWith(StringExtractingContentTransformer.PREFIX_TEXT))
+ else if (isQuickWordsExpected(targetMimetype))
{
ContentReader targetReader = targetWriter.getReader();
String checkContent = targetReader.getContentString();
@@ -280,7 +280,33 @@ public abstract class AbstractContentTransformerTest extends TestCase
outputWriter.setEncoding("UTF8");
outputWriter.putContent(sb.toString());
}
-
+
+ /**
+ * This method is an extension point for enabling/disabling an assertion that the "quick brown fox"
+ * phrase is present in the transformed content.
+ * By default, the phrase is expected in all text/plain outputs.
+ *
+ * @param targetMimetype mimetype of the target of the transformation
+ * @return true
if phrase is expected else false
.
+ */
+ protected boolean isQuickPhraseExpected(String targetMimetype)
+ {
+ return targetMimetype.equals(MimetypeMap.MIMETYPE_TEXT_PLAIN);
+ }
+
+ /**
+ * This method is an extension point for enabling/disabling an assertion that the "quick brown fox"
+ * words are each present in the transformed content.
+ * By default, the words in the phrase are expected in all text/* outputs.
+ *
+ * @param targetMimetype mimetype of the target of the transformation
+ * @return true
if each word is expected else false
.
+ */
+ protected boolean isQuickWordsExpected(String targetMimetype)
+ {
+ return targetMimetype.startsWith(StringExtractingContentTransformer.PREFIX_TEXT);
+ }
+
/**
* This method is an extension point for excluding certain transformations in a subclass.
* The default implementation returns false
for all mime type pairs.
diff --git a/source/java/org/alfresco/repo/content/transform/ArchiveContentTransformer.java b/source/java/org/alfresco/repo/content/transform/ArchiveContentTransformer.java
new file mode 100644
index 0000000000..5396939664
--- /dev/null
+++ b/source/java/org/alfresco/repo/content/transform/ArchiveContentTransformer.java
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2005-2010 Alfresco Software Limited.
+ *
+ * This file is part of Alfresco
+ *
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ */
+package org.alfresco.repo.content.transform;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.ContentReader;
+import org.alfresco.service.cmr.repository.ContentWriter;
+import org.alfresco.service.cmr.repository.TransformationOptions;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * This class transforms archive files (currently only ZIPs) to text, which enables indexing
+ * and searching of archives as well as webpreviewing.
+ * The transformation simply lists the names of the entries within the zip file and does not consider their content.
+ *
+ * @author Neil McErlean
+ * @since Swift
+ */
+public class ArchiveContentTransformer extends AbstractContentTransformer2
+{
+ /**
+ * The logger
+ */
+ private static Log logger = LogFactory.getLog(ArchiveContentTransformer.class);
+
+ /**
+ * Currently the only transformation performed is that of text extraction from PDF documents.
+ */
+ public boolean isTransformable(String sourceMimetype, String targetMimetype, TransformationOptions options)
+ {
+ // TODO: Expand to other archive types e.g. tar.
+ if (!MimetypeMap.MIMETYPE_ZIP.equals(sourceMimetype) ||
+ !MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
+ {
+ // Currently only support ZIP -> Text
+ return false;
+ }
+ else
+ {
+ return true;
+ }
+ }
+
+ protected void transformInternal(
+ ContentReader reader,
+ ContentWriter writer,
+ TransformationOptions options) throws Exception
+ {
+ InputStream is = null;
+ try
+ {
+ is = reader.getContentInputStream();
+
+ List zipEntryNames = new ArrayList();
+ ZipInputStream zin = new ZipInputStream(is);
+
+ // Enumerate each entry
+ ZipEntry nextZipEntry = null;
+ while ((nextZipEntry = zin.getNextEntry()) != null)
+ {
+ String entryName = nextZipEntry.getName();
+ zipEntryNames.add(entryName);
+
+ // Currently we do not recurse into 'zips within zips'.
+ }
+
+ if (logger.isDebugEnabled())
+ {
+ StringBuilder msg = new StringBuilder();
+ msg.append("Transformed ")
+ .append(zipEntryNames.size())
+ .append(zipEntryNames.size() == 1 ? " zip entry" : " zip entries");
+ logger.debug(msg.toString());
+ }
+
+ String text = createTextContentFrom(zipEntryNames);
+
+ // dump it all to the writer
+ writer.putContent(text);
+ }
+ finally
+ {
+ if (is != null)
+ {
+ try { is.close(); } catch (Throwable e) {e.printStackTrace(); }
+ }
+ }
+ }
+
+ private String createTextContentFrom(List zipEntryNames)
+ {
+ StringBuilder result = new StringBuilder();
+ for (String entryName : zipEntryNames)
+ {
+ result.append(entryName)
+ .append('\n');
+ }
+ return result.toString();
+ }
+}
diff --git a/source/java/org/alfresco/repo/content/transform/ArchiveContentTransformerTest.java b/source/java/org/alfresco/repo/content/transform/ArchiveContentTransformerTest.java
new file mode 100644
index 0000000000..0bfc128a3b
--- /dev/null
+++ b/source/java/org/alfresco/repo/content/transform/ArchiveContentTransformerTest.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2005-2010 Alfresco Software Limited.
+ *
+ * This file is part of Alfresco
+ *
+ * Alfresco is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Alfresco is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Alfresco. If not, see .
+ */
+package org.alfresco.repo.content.transform;
+
+import org.alfresco.repo.content.MimetypeMap;
+import org.alfresco.service.cmr.repository.TransformationOptions;
+
+/**
+ * Test class for ArchiveContentTransformer.
+ *
+ * @see org.alfresco.repo.content.transform.ArchiveContentTransformer
+ *
+ * @author Neil McErlean
+ */
+public class ArchiveContentTransformerTest extends AbstractContentTransformerTest
+{
+ private ContentTransformer transformer;
+
+ @Override
+ public void setUp() throws Exception
+ {
+ super.setUp();
+
+ transformer = new ArchiveContentTransformer();
+ }
+
+ protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
+ {
+ return transformer;
+ }
+
+ public void testIsTransformable() throws Exception
+ {
+ assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_ZIP, MimetypeMap.MIMETYPE_TEXT_PLAIN, new TransformationOptions()));
+ }
+
+ @Override
+ protected boolean isQuickPhraseExpected(String targetMimetype)
+ {
+ // The Zip transformer produces names of the entries, not their contents.
+ return false;
+ }
+
+ @Override
+ protected boolean isQuickWordsExpected(String targetMimetype)
+ {
+ // The Zip transformer produces names of the entries, not their contents.
+ return false;
+ }
+}
diff --git a/source/test-resources/quick/quick.zip b/source/test-resources/quick/quick.zip
new file mode 100644
index 0000000000..168109f4a3
Binary files /dev/null and b/source/test-resources/quick/quick.zip differ