Moving to root below branch label

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@2005 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Derek Hulley
2005-12-08 07:13:07 +00:00
commit e1e6508fec
1095 changed files with 230566 additions and 0 deletions

View File

@@ -0,0 +1,242 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.util.Collections;
import java.util.Map;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.service.cmr.repository.ContentAccessor;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Provides basic services for {@link org.alfresco.repo.content.transform.ContentTransformer}
* implementations.
* <p>
* This class maintains the performance measures for the transformers as well, making sure that
* there is an extra penalty for transformers that fail regularly.
*
* @author Derek Hulley
*/
public abstract class AbstractContentTransformer implements ContentTransformer
{
private static final Log logger = LogFactory.getLog(AbstractContentTransformer.class);
private MimetypeService mimetypeService;
private double averageTime = 0.0;
private long count = 0L;
/**
* All transformers start with an average transformation time of 0.0ms.
*/
protected AbstractContentTransformer()
{
averageTime = 0.0;
}
/**
* Helper setter of the mimetype service. This is not always required.
*
* @param mimetypeService
*/
public void setMimetypeService(MimetypeService mimetypeService)
{
this.mimetypeService = mimetypeService;
}
/**
* @return Returns the mimetype helper
*/
protected MimetypeService getMimetypeService()
{
return mimetypeService;
}
@Override
public String toString()
{
StringBuilder sb = new StringBuilder();
sb.append(this.getClass().getSimpleName())
.append("[ average=").append((long)averageTime).append("ms")
.append("]");
return sb.toString();
}
/**
* Convenience to fetch and check the mimetype for the given content
*
* @param content the reader/writer for the content
* @return Returns the mimetype for the content
* @throws AlfrescoRuntimeException if the content doesn't have a mimetype
*/
protected String getMimetype(ContentAccessor content)
{
String mimetype = content.getMimetype();
if (mimetype == null)
{
throw new AlfrescoRuntimeException("Mimetype is mandatory for transformation: " + content);
}
// done
return mimetype;
}
/**
* Convenience method to check the reliability of a transformation
*
* @param reader
* @param writer
* @throws AlfrescoRuntimeException if the reliability isn't > 0
*/
protected void checkReliability(ContentReader reader, ContentWriter writer)
{
String sourceMimetype = getMimetype(reader);
String targetMimetype = getMimetype(writer);
if (getReliability(sourceMimetype, targetMimetype) <= 0.0)
{
throw new AlfrescoRuntimeException("Zero scoring transformation attempted: \n" +
" reader: " + reader + "\n" +
" writer: " + writer);
}
// it all checks out OK
}
/**
* Method to be implemented by subclasses wishing to make use of the common infrastructural code
* provided by this class.
*
* @param reader the source of the content to transform
* @param writer the target to which to write the transformed content
* @param options a map of options to use when performing the transformation. The map
* will never be null.
* @throws Exception exceptions will be handled by this class - subclasses can throw anything
*/
protected abstract void transformInternal(
ContentReader reader,
ContentWriter writer,
Map<String, Object> options) throws Exception;
/**
* @see #transform(ContentReader, ContentWriter, Map)
* @see #transformInternal(ContentReader, ContentWriter, Map)
*/
public final void transform(ContentReader reader, ContentWriter writer) throws ContentIOException
{
transform(reader, writer, null);
}
/**
* Performs the following:
* <ul>
* <li>Times the transformation</li>
* <li>Ensures that the transformation is allowed</li>
* <li>Calls the subclass implementation of {@link #transformInternal(ContentReader, ContentWriter)}</li>
* <li>Transforms any exceptions generated</li>
* <li>Logs a successful transformation</li>
* </ul>
* Subclass need only be concerned with performing the transformation.
* <p>
* If the options provided are null, then an empty map will be created.
*/
public final void transform(
ContentReader reader,
ContentWriter writer,
Map<String, Object> options) throws ContentIOException
{
// begin timing
long before = System.currentTimeMillis();
// check the reliability
checkReliability(reader, writer);
// check options map
if (options == null)
{
options = Collections.emptyMap();
}
try
{
transformInternal(reader, writer, options);
}
catch (Throwable e)
{
// Make sure that this transformation gets set back i.t.o. time taken.
// This will ensure that transformers that compete for the same transformation
// will be prejudiced against transformers that tend to fail
recordTime(10000); // 10 seconds, i.e. rubbish
throw new ContentIOException("Content conversion failed: \n" +
" reader: " + reader + "\n" +
" writer: " + writer + "\n" +
" options: " + options,
e);
}
// record time
long after = System.currentTimeMillis();
recordTime(after - before);
// done
if (logger.isDebugEnabled())
{
logger.debug("Completed transformation: \n" +
" reader: " + reader + "\n" +
" writer: " + writer + "\n" +
" options: " + options + "\n" +
" transformer: " + this);
}
}
/**
* @return Returns the calculated running average of the current transformations
*/
public synchronized long getTransformationTime()
{
return (long) averageTime;
}
/**
* Records and updates the average transformation time for this transformer.
* <p>
* Subclasses should call this after every transformation in order to keep
* the running average of the transformation times up to date.
* <p>
* This method is thread-safe. The time spent in this method is negligible
* so the impact will be minor.
*
* @param transformationTime the time it took to perform the transformation.
* The value may be 0.
*/
protected final synchronized void recordTime(long transformationTime)
{
if (count == Long.MAX_VALUE)
{
// we have reached the max count - reduce it by half
// the average fluctuation won't be extreme
count /= 2L;
}
// adjust the average
count++;
double diffTime = ((double) transformationTime) - averageTime;
averageTime += diffTime / (double) count;
}
}

View File

@@ -0,0 +1,214 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.List;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.util.BaseSpringTest;
import org.alfresco.util.TempFileProvider;
/**
* Provides a base set of tests for testing
* {@link org.alfresco.repo.content.transform.ContentTransformer}
* implementations.
*
* @author Derek Hulley
*/
public abstract class AbstractContentTransformerTest extends BaseSpringTest
{
private static String QUICK_CONTENT = "The quick brown fox jumps over the lazy dog";
private static String[] QUICK_WORDS = new String[] {
"quick", "brown", "fox", "jumps", "lazy", "dog"};
protected MimetypeMap mimetypeMap;
protected ContentTransformer transformer;
public final void setMimetypeMap(MimetypeMap mimetypeMap)
{
this.mimetypeMap = mimetypeMap;
}
/**
* Fetches a transformer to test for a given transformation. The transformer
* does not <b>have</b> to be reliable for the given format - if it isn't
* then it will be ignored.
*
* @param sourceMimetype the sourceMimetype to be tested
* @param targetMimetype the targetMimetype to be tested
* @return Returns the <tt>ContentTranslators</tt> that will be tested by
* the methods implemented in this class. A null return value is
* acceptable if the source and target mimetypes are not of interest.
*/
protected abstract ContentTransformer getTransformer(String sourceMimetype, String targetMimetype);
/**
* Ensures that the temp locations are cleaned out before the tests start
*/
@Override
protected void onSetUpInTransaction() throws Exception
{
// perform a little cleaning up
long now = System.currentTimeMillis();
TempFileProvider.TempFileCleanerJob.removeFiles(now);
}
/**
* Check that all objects are present
*/
public void testSetUp() throws Exception
{
assertNotNull("MimetypeMap not present", mimetypeMap);
// check that the quick resources are available
File sourceFile = AbstractContentTransformerTest.loadQuickTestFile("txt");
assertNotNull(sourceFile);
}
/**
* Helper method to load one of the "The quick brown fox" files from the
* classpath.
*
* @param extension the extension of the file required
* @return Returns a test resource loaded from the classpath or <tt>null</tt> if
* no resource could be found.
* @throws IOException
*/
public static File loadQuickTestFile(String extension) throws IOException
{
URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/quick." + extension);
if (url == null)
{
return null;
}
File file = new File(url.getFile());
if (!file.exists())
{
return null;
}
return file;
}
/**
* Tests the full range of transformations available on the
* {@link #getTransformer(String, String) transformer} subject to the
* {@link org.alfresco.util.test.QuickFileTest available test files}
* and the {@link ContentTransformer#getReliability(String, String) reliability} of
* the {@link #getTransformer(String, String) transformer} itself.
* <p>
* Each transformation is repeated several times, with a transformer being
* {@link #getTransformer(String, String) requested} for each transformation. In the
* case where optimizations are being done around the selection of the most
* appropriate transformer, different transformers could be used during the iteration
* process.
*/
public void testAllConversions() throws Exception
{
// get all mimetypes
List<String> mimetypes = mimetypeMap.getMimetypes();
for (String sourceMimetype : mimetypes)
{
// attempt to get a source file for each mimetype
String sourceExtension = mimetypeMap.getExtension(sourceMimetype);
File sourceFile = AbstractContentTransformerTest.loadQuickTestFile(sourceExtension);
if (sourceFile == null)
{
continue; // no test file available for that extension
}
// attempt to convert to every other mimetype
for (String targetMimetype : mimetypes)
{
ContentWriter targetWriter = null;
// construct a reader onto the source file
ContentReader sourceReader = new FileContentReader(sourceFile);
// perform the transformation several times so that we get a good idea of performance
int count = 0;
for (int i = 0; i < 5; i++)
{
// must we test the transformation?
ContentTransformer transformer = getTransformer(sourceMimetype, targetMimetype);
if (transformer == null)
{
break; // test is not required
}
else if (transformer.getReliability(sourceMimetype, targetMimetype) <= 0.0)
{
break; // not reliable for this transformation
}
// make a writer for the target file
String targetExtension = mimetypeMap.getExtension(targetMimetype);
File targetFile = TempFileProvider.createTempFile(
getClass().getSimpleName() + "_" + getName() + "_" + sourceExtension + "_",
"." + targetExtension);
targetWriter = new FileContentWriter(targetFile);
// do the transformation
sourceReader.setMimetype(sourceMimetype);
targetWriter.setMimetype(targetMimetype);
transformer.transform(sourceReader.getReader(), targetWriter);
// if the target format is any type of text, then it must contain the 'quick' phrase
if (targetMimetype.equals(MimetypeMap.MIMETYPE_TEXT_PLAIN))
{
ContentReader targetReader = targetWriter.getReader();
String checkContent = targetReader.getContentString();
assertTrue("Quick phrase not present in document converted to text: \n" +
" transformer: " + transformer + "\n" +
" source: " + sourceReader + "\n" +
" target: " + targetWriter,
checkContent.contains(QUICK_CONTENT));
}
else if (targetMimetype.startsWith(StringExtractingContentTransformer.PREFIX_TEXT))
{
ContentReader targetReader = targetWriter.getReader();
String checkContent = targetReader.getContentString();
// essentially check that FTS indexing can use the conversion properly
for (int word = 0; word < QUICK_WORDS.length; word++)
{
assertTrue("Quick phrase word not present in document converted to text: \n" +
" transformer: " + transformer + "\n" +
" source: " + sourceReader + "\n" +
" target: " + targetWriter + "\n" +
" word: " + word,
checkContent.contains(QUICK_WORDS[word]));
}
}
// increment count
count++;
}
if (logger.isDebugEnabled())
{
logger.debug("Transformation performed " + count + " time: " +
sourceMimetype + " --> " + targetMimetype + "\n" +
" source: " + sourceReader + "\n" +
" target: " + targetWriter + "\n" +
" transformer: " + transformer);
}
}
}
}
}

View File

@@ -0,0 +1,74 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.util.Map;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Allows direct streaming from source to target when the respective mimetypes
* are identical, except where the mimetype is text.
* <p>
* Text has to be transformed based on the encoding even if the mimetypes don't
* reflect it.
*
* @see org.alfresco.repo.content.transform.StringExtractingContentTransformer
*
* @author Derek Hulley
*/
public class BinaryPassThroughContentTransformer extends AbstractContentTransformer
{
private static final Log logger = LogFactory.getLog(BinaryPassThroughContentTransformer.class);
/**
* @return Returns 1.0 if the formats are identical and not text
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
if (sourceMimetype.startsWith(StringExtractingContentTransformer.PREFIX_TEXT))
{
// we can only stream binary content through
return 0.0;
}
else if (!sourceMimetype.equals(targetMimetype))
{
// no transformation is possible so formats must be exact
return 0.0;
}
else
{
// formats are the same and are not text
return 1.0;
}
}
/**
* Performs a direct stream provided the preconditions are met
*/
public void transformInternal(
ContentReader reader,
ContentWriter writer,
Map<String, Object> options) throws Exception
{
// just stream it
writer.putContent(reader.getContentInputStream());
}
}

View File

@@ -0,0 +1,59 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import org.alfresco.repo.content.MimetypeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* @see org.alfresco.repo.content.transform.BinaryPassThroughContentTransformer
*
* @author Derek Hulley
*/
public class BinaryPassThroughContentTransformerTest extends AbstractContentTransformerTest
{
private static final Log logger = LogFactory.getLog(BinaryPassThroughContentTransformerTest.class);
private ContentTransformer transformer;
public void onSetUpInTransaction() throws Exception
{
transformer = new BinaryPassThroughContentTransformer();
}
/**
* @return Returns the same transformer regardless - it is allowed
*/
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
return transformer;
}
public void testReliability() throws Exception
{
double reliability = 0.0;
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_XML, MimetypeMap.MIMETYPE_XML);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_WORD);
assertEquals("Mimetype should be supported", 1.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_EXCEL, MimetypeMap.MIMETYPE_EXCEL);
assertEquals("Mimetype should be supported", 1.0, reliability);
}
}

View File

@@ -0,0 +1,149 @@
package org.alfresco.repo.content.transform;
import java.io.File;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.util.TempFileProvider;
import org.springframework.beans.factory.InitializingBean;
/**
* Transformer that passes a document through several nested transformations
* in order to accomplish its goal.
*
* @author Derek Hulley
*/
public class ComplexContentTransformer extends AbstractContentTransformer implements InitializingBean
{
private List<ContentTransformer> transformers;
private List<String> intermediateMimetypes;
public ComplexContentTransformer()
{
}
/**
* The list of transformers to use.
* <p>
* If a single transformer is supplied, then it will still be used.
*
* @param transformers list of <b>at least one</b> transformer
*/
public void setTransformers(List<ContentTransformer> transformers)
{
this.transformers = transformers;
}
/**
* Set the intermediate mimetypes that the transformer must take the content
* through. If the transformation <b>A..B..C</b> is performed in order to
* simulate <b>A..C</b>, then <b>B</b> is the intermediate mimetype. There
* must always be <b>n-1</b> intermediate mimetypes, where <b>n</b> is the
* number of {@link #setTransformers(List) transformers} taking part in the
* transformation.
*
* @param intermediateMimetypes intermediate mimetypes to transition the content
* through.
*/
public void setIntermediateMimetypes(List<String> intermediateMimetypes)
{
this.intermediateMimetypes = intermediateMimetypes;
}
/**
* Ensures that required properties have been set
*/
public void afterPropertiesSet() throws Exception
{
if (transformers == null || transformers.size() == 0)
{
throw new AlfrescoRuntimeException("At least one inner transformer must be supplied: " + this);
}
if (intermediateMimetypes == null || intermediateMimetypes.size() != transformers.size() - 1)
{
throw new AlfrescoRuntimeException(
"There must be n-1 intermediate mimetypes, where n is the number of transformers");
}
if (getMimetypeService() == null)
{
throw new AlfrescoRuntimeException("'mimetypeService' is a required property");
}
}
/**
* @return Returns the multiple of the reliabilities of the chain of transformers
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
double reliability = 1.0;
String currentSourceMimetype = sourceMimetype;
Iterator<ContentTransformer> transformerIterator = transformers.iterator();
Iterator<String> intermediateMimetypeIterator = intermediateMimetypes.iterator();
while (transformerIterator.hasNext())
{
ContentTransformer transformer = transformerIterator.next();
// determine the target mimetype. This is the final target if we are on the last transformation
String currentTargetMimetype = null;
if (!transformerIterator.hasNext())
{
currentTargetMimetype = targetMimetype;
}
else
{
// use an intermediate transformation mimetype
currentTargetMimetype = intermediateMimetypeIterator.next();
}
// the reliability is a multiple
reliability *= transformer.getReliability(currentSourceMimetype, currentTargetMimetype);
// move the source on
currentSourceMimetype = currentTargetMimetype;
}
// done
return reliability;
}
@Override
public void transformInternal(
ContentReader reader,
ContentWriter writer,
Map<String, Object> options) throws Exception
{
ContentReader currentReader = reader;
Iterator<ContentTransformer> transformerIterator = transformers.iterator();
Iterator<String> intermediateMimetypeIterator = intermediateMimetypes.iterator();
while (transformerIterator.hasNext())
{
ContentTransformer transformer = transformerIterator.next();
// determine the target mimetype. This is the final target if we are on the last transformation
ContentWriter currentWriter = null;
if (!transformerIterator.hasNext())
{
currentWriter = writer;
}
else
{
String nextMimetype = intermediateMimetypeIterator.next();
// make a temp file writer with the correct extension
String sourceExt = getMimetypeService().getExtension(currentReader.getMimetype());
String targetExt = getMimetypeService().getExtension(nextMimetype);
File tempFile = TempFileProvider.createTempFile(
"ComplextTransformer_intermediate_" + sourceExt + "_",
"." + targetExt);
currentWriter = new FileContentWriter(tempFile);
currentWriter.setMimetype(nextMimetype);
}
// transform
transformer.transform(currentReader, currentWriter, options);
// move the source on
currentReader = currentWriter.getReader();
}
// done
}
}

View File

@@ -0,0 +1,89 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.alfresco.repo.content.MimetypeMap;
/**
* Tests a transformation from Powerpoint->PDF->Text.
*
* @see org.alfresco.repo.content.transform.ComplexContentTransformer
*
* @author Derek Hulley
*/
public class ComplexContentTransformerTest extends AbstractContentTransformerTest
{
private ComplexContentTransformer transformer;
private boolean isAvailable;
public void onSetUpInTransaction() throws Exception
{
ContentTransformer unoTransformer = (ContentTransformer) applicationContext.getBean("transformer.OpenOffice");
ContentTransformer pdfBoxTransformer = (ContentTransformer) applicationContext.getBean("transformer.PdfBox");
// make sure that they are working for this test
if (unoTransformer.getReliability(MimetypeMap.MIMETYPE_PPT, MimetypeMap.MIMETYPE_PDF) == 0.0)
{
isAvailable = false;
return;
}
else if (pdfBoxTransformer.getReliability(MimetypeMap.MIMETYPE_PDF, MimetypeMap.MIMETYPE_TEXT_PLAIN) == 0.0)
{
isAvailable = false;
return;
}
else
{
isAvailable = true;
}
transformer = new ComplexContentTransformer();
transformer.setMimetypeService(mimetypeMap);
// set the transformer list
List<ContentTransformer> transformers = new ArrayList<ContentTransformer>(2);
transformers.add(unoTransformer);
transformers.add(pdfBoxTransformer);
transformer.setTransformers(transformers);
// set the intermediate mimetypes
List<String> intermediateMimetypes = Collections.singletonList(MimetypeMap.MIMETYPE_PDF);
transformer.setIntermediateMimetypes(intermediateMimetypes);
}
/**
* @return Returns the same transformer regardless - it is allowed
*/
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
return transformer;
}
public void testReliability() throws Exception
{
if (!isAvailable)
{
return;
}
double reliability = 0.0;
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_PPT, MimetypeMap.MIMETYPE_PDF);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_PPT, MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Mimetype should be supported", 1.0, reliability);
}
}

View File

@@ -0,0 +1,248 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import java.util.LinkedList;
import java.util.Map;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.util.TempFileProvider;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* A chain of transformations that is able to produce non-zero reliability
* transformation from one mimetype to another.
* <p>
* The reliability of the chain is the product of all the individual
* transformations.
*
* @author Derek Hulley
*/
public class CompoundContentTransformer implements ContentTransformer
{
private static final Log logger = LogFactory.getLog(CompoundContentTransformer.class);
/** a sequence of transformers to apply */
private LinkedList<Transformation> chain;
/** the combined reliability of all the transformations in the chain */
private double reliability;
public CompoundContentTransformer()
{
chain = new LinkedList<Transformation>();
reliability = 1.0;
}
/**
* Adds a transformation to the chain. The reliability of each transformation
* added must be greater than 0.0.
*
* @param sourceMimetype
* @param targetMimetype
* @param transformer the transformer that will transform from the source to
* the target mimetype
*/
public void addTransformation(String sourceMimetype, String targetMimetype, ContentTransformer transformer)
{
// create a transformation that aggregates the transform info
Transformation transformation = new Transformation(
transformer,
sourceMimetype,
targetMimetype);
// add to the chain
chain.add(transformation);
// recalculate combined reliability
double transformerReliability = transformer.getReliability(sourceMimetype, targetMimetype);
if (transformerReliability <= 0.0 || transformerReliability > 1.0)
{
throw new AlfrescoRuntimeException(
"Reliability of transformer must be between 0.0 and 1.0: \n" +
" transformer: " + transformer + "\n" +
" source: " + sourceMimetype + "\n" +
" target: " + targetMimetype + "\n" +
" reliability: " + transformerReliability);
}
this.reliability *= transformerReliability;
}
/**
* In order to score anything, the source mimetype must match the source
* mimetype of the first transformer and the target mimetype must match
* the target mimetype of the last transformer in the chain.
*
* @return Returns the product of the individual reliability scores of the
* transformations in the chain
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
if (chain.size() == 0)
{
// no transformers therefore no transformation possible
return 0.0;
}
Transformation first = chain.getFirst();
Transformation last = chain.getLast();
if (!first.getSourceMimetype().equals(sourceMimetype)
&& last.getTargetMimetype().equals(targetMimetype))
{
// the source type of the first transformation must match the source
// the target type of the last transformation must match the target
return 0.0;
}
return reliability;
}
/**
* @return Returns 0 if there are no transformers in the chain otherwise
* returns the sum of all the individual transformation times
*/
public long getTransformationTime()
{
long transformationTime = 0L;
for (Transformation transformation : chain)
{
ContentTransformer transformer = transformation.transformer;
transformationTime += transformer.getTransformationTime();
}
return transformationTime;
}
/**
*
*/
public void transform(ContentReader reader, ContentWriter writer) throws ContentIOException
{
transform(reader, writer, null);
}
/**
* Executes each transformer in the chain, passing the content between them
*/
public void transform(ContentReader reader, ContentWriter writer, Map<String, Object> options)
throws ContentIOException
{
if (chain.size() == 0)
{
throw new AlfrescoRuntimeException("No transformations present in chain");
}
// check that the mimetypes of the transformation are valid for the chain
String sourceMimetype = reader.getMimetype();
String targetMimetype = writer.getMimetype();
Transformation firstTransformation = chain.getFirst();
Transformation lastTransformation = chain.getLast();
if (!firstTransformation.getSourceMimetype().equals(sourceMimetype)
&& lastTransformation.getTargetMimetype().equals(targetMimetype))
{
throw new AlfrescoRuntimeException("Attempting to perform unreliable transformation: \n" +
" reader: " + reader + "\n" +
" writer: " + writer);
}
ContentReader currentReader = reader;
ContentWriter currentWriter = null;
int currentIndex = 0;
for (Transformation transformation : chain)
{
boolean last = (currentIndex == chain.size() - 1);
if (last)
{
// we are on the last transformation so use the final output writer
currentWriter = writer;
}
else
{
// have to create an intermediate writer - just use a file writer
File tempFile = TempFileProvider.createTempFile("transform", ".tmp");
currentWriter = new FileContentWriter(tempFile);
// set the writer's mimetype to conform to the transformation we are using
currentWriter.setMimetype(transformation.getTargetMimetype());
}
// transform from the current reader to the current writer
transformation.execute(currentReader, currentWriter, options);
if (!currentWriter.isClosed())
{
throw new AlfrescoRuntimeException("Writer not closed by transformation: \n" +
" transformation: " + transformation + "\n" +
" writer: " + currentWriter);
}
// if we have more transformations, then use the written content
// as the next source
if (!last)
{
currentReader = currentWriter.getReader();
}
}
// done
if (logger.isDebugEnabled())
{
logger.debug("Executed complex transformation: \n" +
" chain: " + chain + "\n" +
" reader: " + reader + "\n" +
" writer: " + writer);
}
}
/**
* A transformation that contains the transformer as well as the
* transformation mimetypes to be used
*/
public static class Transformation extends ContentTransformerRegistry.TransformationKey
{
private ContentTransformer transformer;
public Transformation(ContentTransformer transformer, String sourceMimetype, String targetMimetype)
{
super(sourceMimetype, targetMimetype);
this.transformer = transformer;
}
/**
* Executs the transformation
*
* @param reader the reader from which to read the content
* @param writer the writer to write content to
* @param options the options to execute with
* @throws ContentIOException if the transformation fails
*/
public void execute(ContentReader reader, ContentWriter writer, Map<String, Object> options)
throws ContentIOException
{
String sourceMimetype = getSourceMimetype();
String targetMimetype = getTargetMimetype();
// check that the source and target mimetypes of the reader and writer match
if (!sourceMimetype.equals(reader.getMimetype()))
{
throw new AlfrescoRuntimeException("The source mimetype doesn't match the reader's mimetype: \n" +
" source mimetype: " + sourceMimetype + "\n" +
" reader: " + reader);
}
if (!targetMimetype.equals(writer.getMimetype()))
{
throw new AlfrescoRuntimeException("The target mimetype doesn't match the writer's mimetype: \n" +
" target mimetype: " + targetMimetype + "\n" +
" writer: " + writer);
}
transformer.transform(reader, writer, options);
}
}
}

View File

@@ -0,0 +1,84 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.util.Map;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
/**
* Interface for class that allow content transformation from one mimetype to another.
*
* @author Derek Hulley
*/
public interface ContentTransformer
{
/**
* Provides the approximate accuracy with which this transformer can
* transform from one mimetype to another.
* <p>
* This method is used to determine, up front, which of a set of
* transformers will be used to perform a specific transformation.
*
* @param sourceMimetype the source mimetype
* @param targetMimetype the target mimetype
* @return Returns a score 0.0 to 1.0. 0.0 indicates that the
* transformation cannot be performed at all. 1.0 indicates that
* the transformation can be performed perfectly.
*/
public double getReliability(String sourceMimetype, String targetMimetype);
/**
* Provides an estimate, usually a worst case guess, of how long a transformation
* will take.
* <p>
* This method is used to determine, up front, which of a set of
* equally reliant transformers will be used for a specific transformation.
*
* @return Returns the approximate number of milliseconds per transformation
*/
public long getTransformationTime();
/**
* @see #transform(ContentReader, ContentWriter, Map)
*/
public void transform(ContentReader reader, ContentWriter writer) throws ContentIOException;
/**
* Transforms the content provided by the reader and source mimetype
* to the writer and target mimetype.
* <p>
* The transformation viability can be determined by an up front call
* to {@link #getReliability(String, String)}.
* <p>
* The source and target mimetypes <b>must</b> be available on the
* {@link org.alfresco.service.cmr.repository.ContentAccessor#getMimetype()} methods of
* both the reader and the writer.
*
* @param reader the source of the content
* @param writer the destination of the transformed content
* @param options options to pass to the transformer. These are transformer dependent
* and may be null.
* @throws ContentIOException if an IO exception occurs
*/
public void transform(
ContentReader reader,
ContentWriter writer,
Map<String, Object> options) throws ContentIOException;
}

View File

@@ -0,0 +1,362 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.content.MimetypeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.util.Assert;
/**
* Holds and provides the most appropriate content transformer for
* a particular source and target mimetype transformation request.
* <p>
* The transformers themselves are used to determine the applicability
* of a particular transformation.
*
* @see org.alfresco.repo.content.transform.ContentTransformer
*
* @author Derek Hulley
*/
public class ContentTransformerRegistry
{
private static final Log logger = LogFactory.getLog(ContentTransformerRegistry.class);
private List<ContentTransformer> transformers;
private MimetypeMap mimetypeMap;
/** Cache of previously used transactions */
private Map<TransformationKey, List<ContentTransformer>> transformationCache;
private short accessCount;
/** Controls read access to the transformation cache */
private Lock transformationCacheReadLock;
/** controls write access to the transformation cache */
private Lock transformationCacheWriteLock;
/**
* @param mimetypeMap all the mimetypes available to the system
*/
public ContentTransformerRegistry(MimetypeMap mimetypeMap)
{
Assert.notNull(mimetypeMap, "The MimetypeMap is mandatory");
this.mimetypeMap = mimetypeMap;
this.transformers = Collections.emptyList(); // just in case it isn't set
transformationCache = new HashMap<TransformationKey, List<ContentTransformer>>(17);
accessCount = 0;
// create lock objects for access to the cache
ReadWriteLock transformationCacheLock = new ReentrantReadWriteLock();
transformationCacheReadLock = transformationCacheLock.readLock();
transformationCacheWriteLock = transformationCacheLock.writeLock();
}
/**
* Provides a list of explicit transformers to use.
*
* @param transformations list of ( list of ( (from-mimetype)(to-mimetype)(transformer) ) )
*/
public void setExplicitTransformations(List<List<Object>> transformations)
{
for (List<Object> list : transformations)
{
if (list.size() != 3)
{
throw new AlfrescoRuntimeException(
"Explicit transformation is 'from-mimetype', 'to-mimetype' and 'transformer': \n" +
" list: " + list);
}
try
{
String sourceMimetype = (String) list.get(0);
String targetMimetype = (String) list.get(1);
ContentTransformer transformer = (ContentTransformer) list.get(2);
// create the transformation
TransformationKey key = new TransformationKey(sourceMimetype, targetMimetype);
// bypass all discovery and plug this directly into the cache
transformationCache.put(key, Collections.singletonList(transformer));
}
catch (ClassCastException e)
{
throw new AlfrescoRuntimeException(
"Explicit transformation is 'from-mimetype', 'to-mimetype' and 'transformer': \n" +
" list: " + list);
}
}
}
/**
* Provides a list of self-discovering transformers that the registry will fall
* back on if a transformation is not available from the explicitly set
* transformations.
*
* @param transformers all the available transformers that the registry can
* work with
*/
public void setTransformers(List<ContentTransformer> transformers)
{
this.transformers = transformers;
}
/**
* Resets the transformation cache. This allows a fresh analysis of the best
* conversions based on actual average performance of the transformers.
*/
public void resetCache()
{
// get a write lock on the cache
transformationCacheWriteLock.lock();
try
{
transformationCache.clear();
accessCount = 0;
}
finally
{
transformationCacheWriteLock.unlock();
}
// done
if (logger.isDebugEnabled())
{
logger.debug("Content transformation cache reset");
}
}
/**
* Gets the best transformer possible. This is a combination of the most reliable
* and the most performant transformer.
* <p>
* The result is cached for quicker access next time.
*
* @param sourceMimetype the source mimetype of the transformation
* @param targetMimetype the target mimetype of the transformation
* @return Returns a content transformer that can perform the desired
* transformation or null if no transformer could be found that would do it.
*/
public ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
// check that the mimetypes are valid
if (!mimetypeMap.getMimetypes().contains(sourceMimetype))
{
throw new AlfrescoRuntimeException("Unknown source mimetype: " + sourceMimetype);
}
if (!mimetypeMap.getMimetypes().contains(targetMimetype))
{
throw new AlfrescoRuntimeException("Unknown target mimetype: " + targetMimetype);
}
TransformationKey key = new TransformationKey(sourceMimetype, targetMimetype);
List<ContentTransformer> transformers = null;
transformationCacheReadLock.lock();
try
{
if (transformationCache.containsKey(key))
{
// the translation has been requested before
// it might have been null
transformers = transformationCache.get(key);
}
}
finally
{
transformationCacheReadLock.unlock();
}
if (transformers == null)
{
// the translation has not been requested before
// get a write lock on the cache
// no double check done as it is not an expensive task
transformationCacheWriteLock.lock();
try
{
// find the most suitable transformer - may be empty list
transformers = findTransformers(sourceMimetype, targetMimetype);
// store the result even if it is null
transformationCache.put(key, transformers);
}
finally
{
transformationCacheWriteLock.unlock();
}
}
// select the most performant transformer
long bestTime = -1L;
ContentTransformer bestTransformer = null;
for (ContentTransformer transformer : transformers)
{
long transformationTime = transformer.getTransformationTime();
// is it better?
if (bestTransformer == null || transformationTime < bestTime)
{
bestTransformer = transformer;
bestTime = transformationTime;
}
}
// done
return bestTransformer;
}
/**
* Gets all transformers, of equal reliability, that can perform the requested transformation.
*
* @return Returns best transformer for the translation - null if all
* score 0.0 on reliability
*/
private List<ContentTransformer> findTransformers(String sourceMimetype, String targetMimetype)
{
// search for a simple transformer that can do the job
List<ContentTransformer> transformers = findDirectTransformers(sourceMimetype, targetMimetype);
// get the complex transformers that can do the job
List<ContentTransformer> complexTransformers = findComplexTransformer(sourceMimetype, targetMimetype);
transformers.addAll(complexTransformers);
// done
if (logger.isDebugEnabled())
{
logger.debug("Searched for transformer: \n" +
" source mimetype: " + sourceMimetype + "\n" +
" target mimetype: " + targetMimetype + "\n" +
" transformers: " + transformers);
}
return transformers;
}
/**
* Loops through the content transformers and picks the ones with the highest reliabilities.
* <p>
* Where there are several transformers that are equally reliable, they are all returned.
*
* @return Returns the most reliable transformers for the translation - empty list if there
* are none.
*/
private List<ContentTransformer> findDirectTransformers(String sourceMimetype, String targetMimetype)
{
double maxReliability = 0.0;
long leastTime = 100000L; // 100 seconds - longer than anyone would think of waiting
List<ContentTransformer> bestTransformers = new ArrayList<ContentTransformer>(2);
// loop through transformers
for (ContentTransformer transformer : this.transformers)
{
double reliability = transformer.getReliability(sourceMimetype, targetMimetype);
if (reliability <= 0.0)
{
// it is unusable
continue;
}
else if (reliability < maxReliability)
{
// it is not the best one to use
continue;
}
else if (reliability == maxReliability)
{
// it is as reliable as a previous transformer
}
else
{
// it is better than any previous transformer - wipe them
bestTransformers.clear();
maxReliability = reliability;
}
// add the transformer to the list
bestTransformers.add(transformer);
}
// done
return bestTransformers;
}
/**
* Uses a list of known mimetypes to build transformations from several direct transformations.
*/
private List<ContentTransformer> findComplexTransformer(String sourceMimetype, String targetMimetype)
{
// get a complete list of mimetypes
// TODO: Build complex transformers by searching for transformations by mimetype
return Collections.emptyList();
}
/**
* Recursive method to build up a list of content transformers
*/
private void buildTransformer(List<ContentTransformer> transformers,
double reliability,
List<String> touchedMimetypes,
String currentMimetype,
String targetMimetype)
{
throw new UnsupportedOperationException();
}
/**
* A key for a combination of a source and target mimetype
*/
public static class TransformationKey
{
private final String sourceMimetype;
private final String targetMimetype;
private final String key;
public TransformationKey(String sourceMimetype, String targetMimetype)
{
this.key = (sourceMimetype + "_" + targetMimetype);
this.sourceMimetype = sourceMimetype;
this.targetMimetype = targetMimetype;
}
public String getSourceMimetype()
{
return sourceMimetype;
}
public String getTargetMimetype()
{
return targetMimetype;
}
@Override
public boolean equals(Object obj)
{
if (obj == null)
{
return false;
}
else if (this == obj)
{
return true;
}
else if (!(obj instanceof TransformationKey))
{
return false;
}
TransformationKey that = (TransformationKey) obj;
return this.key.equals(that.key);
}
@Override
public int hashCode()
{
return key.hashCode();
}
}
}

View File

@@ -0,0 +1,237 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.util.TempFileProvider;
/**
* @see org.alfresco.repo.content.transform.ContentTransformerRegistry
*
* @author Derek Hulley
*/
public class ContentTransformerRegistryTest extends AbstractContentTransformerTest
{
private static final String A = MimetypeMap.MIMETYPE_TEXT_PLAIN;
private static final String B = MimetypeMap.MIMETYPE_XML;
private static final String C = MimetypeMap.MIMETYPE_WORD;
private static final String D = MimetypeMap.MIMETYPE_HTML;
/** a real registry with real transformers */
private ContentTransformerRegistry registry;
/** a fake registry with fake transformers */
private ContentTransformerRegistry dummyRegistry;
private ContentReader reader;
private ContentWriter writer;
/**
* Allows dependency injection
*/
public void setContentTransformerRegistry(ContentTransformerRegistry registry)
{
this.registry = registry;
}
@Override
public void onSetUpInTransaction() throws Exception
{
reader = new FileContentReader(TempFileProvider.createTempFile(getName(), ".txt"));
reader.setMimetype(A);
writer = new FileContentWriter(TempFileProvider.createTempFile(getName(), ".txt"));
writer.setMimetype(D);
byte[] bytes = new byte[256];
for (int i = 0; i < 256; i++)
{
bytes[i] = (byte)i;
}
List<ContentTransformer> transformers = new ArrayList<ContentTransformer>(5);
// create some dummy transformers for reliability tests
transformers.add(new DummyTransformer(A, B, 0.3, 10L));
transformers.add(new DummyTransformer(A, B, 0.6, 10L));
transformers.add(new DummyTransformer(A, C, 0.5, 10L));
transformers.add(new DummyTransformer(A, C, 1.0, 10L));
transformers.add(new DummyTransformer(B, C, 0.2, 10L));
// create some dummy transformers for speed tests
transformers.add(new DummyTransformer(A, D, 1.0, 20L));
transformers.add(new DummyTransformer(A, D, 1.0, 20L));
transformers.add(new DummyTransformer(A, D, 1.0, 10L)); // the fast one
transformers.add(new DummyTransformer(A, D, 1.0, 20L));
transformers.add(new DummyTransformer(A, D, 1.0, 20L));
// create the dummyRegistry
dummyRegistry = new ContentTransformerRegistry(mimetypeMap);
dummyRegistry.setTransformers(transformers);
}
/**
* Checks that required objects are present
*/
public void testSetUp() throws Exception
{
super.testSetUp();
assertNotNull(registry);
}
/**
* @return Returns the transformer provided by the <b>real</b> registry
*/
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
return registry.getTransformer(sourceMimetype, targetMimetype);
}
public void testNullRetrieval() throws Exception
{
ContentTransformer transformer = null;
transformer = dummyRegistry.getTransformer(C, B);
assertNull("No transformer expected", transformer);
transformer = dummyRegistry.getTransformer(C, A);
assertNull("No transformer expected", transformer);
transformer = dummyRegistry.getTransformer(B, A);
assertNull("No transformer expected", transformer);
}
public void testSimpleRetrieval() throws Exception
{
ContentTransformer transformer = null;
// B -> C expect 0.2
transformer = dummyRegistry.getTransformer(B, C);
transformer = dummyRegistry.getTransformer(B, C);
assertNotNull("No transformer found", transformer);
assertEquals("Incorrect reliability", 0.2, transformer.getReliability(B, C));
assertEquals("Incorrect reliability", 0.0, transformer.getReliability(C, B));
}
/**
* Force some equally reliant transformers to do some work and develop
* different average transformation times. Check that the registry
* copes with the new averages after a reset.
*/
public void testPerformanceRetrieval() throws Exception
{
// A -> D expect 1.0, 10ms
ContentTransformer transformer1 = dummyRegistry.getTransformer(A, D);
assertEquals("Incorrect reliability", 1.0, transformer1.getReliability(A, D));
assertEquals("Incorrect reliability", 0.0, transformer1.getReliability(D, A));
assertEquals("Incorrect transformation time", 10L, transformer1.getTransformationTime());
}
public void testScoredRetrieval() throws Exception
{
ContentTransformer transformer = null;
// A -> B expect 0.6
transformer = dummyRegistry.getTransformer(A, B);
assertNotNull("No transformer found", transformer);
assertEquals("Incorrect reliability", 0.6, transformer.getReliability(A, B));
assertEquals("Incorrect reliability", 0.0, transformer.getReliability(B, A));
// A -> C expect 1.0
transformer = dummyRegistry.getTransformer(A, C);
assertNotNull("No transformer found", transformer);
assertEquals("Incorrect reliability", 1.0, transformer.getReliability(A, C));
assertEquals("Incorrect reliability", 0.0, transformer.getReliability(C, A));
}
/**
* Set an explicit, and bizarre, transformation. Check that it is used.
*
*/
public void testExplicitTransformation()
{
ContentTransformer dummyTransformer = new DummyTransformer(
MimetypeMap.MIMETYPE_FLASH, MimetypeMap.MIMETYPE_EXCEL, 1.0, 12345);
List<Object> transform = new ArrayList<Object>(3);
transform.add(MimetypeMap.MIMETYPE_FLASH);
transform.add(MimetypeMap.MIMETYPE_EXCEL);
transform.add(dummyTransformer);
List<List<Object>> explicitTransformers = Collections.singletonList(transform);
// add it to the registry
dummyRegistry.setExplicitTransformations(explicitTransformers);
// get the appropriate transformer for the bizarre mapping
ContentTransformer checkTransformer = dummyRegistry.getTransformer(
MimetypeMap.MIMETYPE_FLASH, MimetypeMap.MIMETYPE_EXCEL);
assertNotNull("No explicit transformer found", checkTransformer);
assertTrue("Expected explicit transformer", dummyTransformer == checkTransformer);
}
/**
* Dummy transformer that does no transformation and scores exactly as it is
* told to in the constructor. It enables the tests to be sure of what to expect.
*/
private static class DummyTransformer extends AbstractContentTransformer
{
private String sourceMimetype;
private String targetMimetype;
private double reliability;
private long transformationTime;
public DummyTransformer(String sourceMimetype, String targetMimetype,
double reliability, long transformationTime)
{
this.sourceMimetype = sourceMimetype;
this.targetMimetype = targetMimetype;
this.reliability = reliability;
this.transformationTime = transformationTime;
}
public double getReliability(String sourceMimetype, String targetMimetype)
{
if (this.sourceMimetype.equals(sourceMimetype)
&& this.targetMimetype.equals(targetMimetype))
{
return reliability;
}
else
{
return 0.0;
}
}
/**
* Just notches up some average times
*/
public void transformInternal(
ContentReader reader,
ContentWriter writer,
Map<String, Object> options) throws Exception
{
// just update the transformation time
super.recordTime(transformationTime);
}
/**
* @return Returns the fixed dummy average transformation time
*/
public synchronized long getTransformationTime()
{
return transformationTime;
}
}
}

View File

@@ -0,0 +1,76 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import java.util.Map;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.util.TempFileProvider;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.htmlparser.beans.StringBean;
/**
* @see http://htmlparser.sourceforge.net/
* @see org.htmlparser.beans.StringBean
*
* @author Derek Hulley
*/
public class HtmlParserContentTransformer extends AbstractContentTransformer
{
private static final Log logger = LogFactory.getLog(HtmlParserContentTransformer.class);
/**
* Only support HTML to TEXT.
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
if (!MimetypeMap.MIMETYPE_HTML.equals(sourceMimetype) ||
!MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
{
// only support HTML -> TEXT
return 0.0;
}
else
{
return 1.0;
}
}
public void transformInternal(ContentReader reader, ContentWriter writer, Map<String, Object> options)
throws Exception
{
// we can only work from a file
File htmlFile = TempFileProvider.createTempFile("HtmlParserContentTransformer_", ".html");
reader.getContent(htmlFile);
// create the extractor
StringBean extractor = new StringBean();
extractor.setCollapse(false);
extractor.setLinks(false);
extractor.setReplaceNonBreakingSpaces(false);
extractor.setURL(htmlFile.getAbsolutePath());
// get the text
String text = extractor.getStrings();
// write it to the writer
writer.putContent(text);
}
}

View File

@@ -0,0 +1,58 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import org.alfresco.repo.content.MimetypeMap;
/**
* @see org.alfresco.repo.content.transform.HtmlParserContentTransformer
*
* @author Derek Hulley
*/
public class HtmlParserContentTransformerTest extends AbstractContentTransformerTest
{
private static final String SOME_CONTENT = "azAz10!<21>$%^&*()\t\r\n";
private ContentTransformer transformer;
@Override
public void onSetUpInTransaction() throws Exception
{
transformer = new HtmlParserContentTransformer();
}
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
return transformer;
}
public void testSetUp() throws Exception
{
assertNotNull(transformer);
}
public void checkReliability() throws Exception
{
// check reliability
double reliability = transformer.getReliability(MimetypeMap.MIMETYPE_HTML, MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Reliability incorrect", 1.0, reliability); // plain text to plain text is 100%
// check other way around
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_HTML);
assertEquals("Reliability incorrect", 0.0, reliability); // plain text to plain text is 0%
}
}

View File

@@ -0,0 +1,87 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.IOException;
import java.util.Map;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
/**
* Makes use of the {@link http://www.pdfbox.org/ PDFBox} library to
* perform conversions from PDF files to text.
*
* @author Derek Hulley
*/
public class PdfBoxContentTransformer extends AbstractContentTransformer
{
private static final Log logger = LogFactory.getLog(PdfBoxContentTransformer.class);
/**
* Currently the only transformation performed is that of text extraction from PDF documents.
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
// TODO: Expand PDFBox usage to convert images to PDF and investigate other conversions
if (!MimetypeMap.MIMETYPE_PDF.equals(sourceMimetype) ||
!MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
{
// only support PDF -> Text
return 0.0;
}
else
{
return 1.0;
}
}
public void transformInternal(ContentReader reader, ContentWriter writer, Map<String, Object> options)
{
PDDocument pdf = null;
try
{
// stream the document in
pdf = PDDocument.load(reader.getContentInputStream());
// strip the text out
PDFTextStripper stripper = new PDFTextStripper();
String text = stripper.getText(pdf);
// dump it all to the writer
writer.putContent(text);
}
catch (IOException e)
{
throw new ContentIOException("PDF text stripping failed: \n" +
" reader: " + reader);
}
finally
{
if (pdf != null)
{
try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); }
}
}
}
}

View File

@@ -0,0 +1,55 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import org.alfresco.repo.content.MimetypeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* @see org.alfresco.repo.content.transform.PdfBoxContentTransformer
*
* @author Derek Hulley
*/
public class PdfBoxContentTransformerTest extends AbstractContentTransformerTest
{
private static final Log logger = LogFactory.getLog(PdfBoxContentTransformerTest.class);
private ContentTransformer transformer;
public void onSetUpInTransaction() throws Exception
{
transformer = new PdfBoxContentTransformer();
}
/**
* @return Returns the same transformer regardless - it is allowed
*/
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
return transformer;
}
public void testReliability() throws Exception
{
double reliability = 0.0;
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_PDF);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_PDF, MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Mimetype should be supported", 1.0, reliability);
}
}

View File

@@ -0,0 +1,251 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.OutputStream;
import java.util.Map;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
/**
* Makes use of the {@link http://jakarta.apache.org/poi/ POI} library to
* perform conversions from Excel spreadsheets to text (comma separated).
* <p>
* While most text extraction from spreadsheets only extract the first sheet of
* the workbook, the method used here extracts the text from <b>all the sheets</b>.
* This is more useful, especially when it comes to indexing spreadsheets.
* <p>
* In the case where there is only one sheet in the document, the results will be
* exactly the same as most extractors. Where there are multiple sheets, the results
* will differ, but meaningful reimporting of the text document is not possible
* anyway.
*
* @author Derek Hulley
*/
public class PoiHssfContentTransformer extends AbstractContentTransformer
{
/**
* Windows carriage return line feed pair.
*/
private static final String LINE_BREAK = "\r\n";
private static final Log logger = LogFactory.getLog(PoiHssfContentTransformer.class);
/**
* Currently the only transformation performed is that of text extraction from XLS documents.
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
if (!MimetypeMap.MIMETYPE_EXCEL.equals(sourceMimetype) ||
!MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
{
// only support XLS -> Text
return 0.0;
}
else
{
return 1.0;
}
}
public void transformInternal(ContentReader reader, ContentWriter writer, Map<String, Object> options)
throws Exception
{
OutputStream os = writer.getContentOutputStream();
String encoding = writer.getEncoding();
try
{
// open the workbook
HSSFWorkbook workbook = new HSSFWorkbook(reader.getContentInputStream());
// how many sheets are there?
int sheetCount = workbook.getNumberOfSheets();
// transform each sheet
for (int i = 0; i < sheetCount; i++)
{
HSSFSheet sheet = workbook.getSheetAt(i);
String sheetName = workbook.getSheetName(i);
writeSheet(os, sheet, encoding);
// write the sheet name
PoiHssfContentTransformer.writeString(os, encoding, LINE_BREAK, false);
PoiHssfContentTransformer.writeString(os, encoding, "End of sheet: " + sheetName, true);
PoiHssfContentTransformer.writeString(os, encoding, LINE_BREAK, false);
PoiHssfContentTransformer.writeString(os, encoding, LINE_BREAK, false);
}
}
finally
{
if (os != null)
{
try { os.close(); } catch (Throwable e) {}
}
}
}
/**
* Dumps the text from the sheet to the stream in CSV format
*/
private void writeSheet(OutputStream os, HSSFSheet sheet, String encoding) throws Exception
{
int rows = sheet.getLastRowNum();
// transform each row
for (int i = 0; i <= rows; i++)
{
HSSFRow row = sheet.getRow(i);
if (row != null)
{
writeRow(os, row, encoding);
}
// break between rows
if (i < rows)
{
PoiHssfContentTransformer.writeString(os, encoding, LINE_BREAK, false);
}
}
}
private void writeRow(OutputStream os, HSSFRow row, String encoding) throws Exception
{
short firstCellNum = row.getFirstCellNum();
short lastCellNum = row.getLastCellNum();
// pad out to first cell
for (short i = 0; i < firstCellNum; i++)
{
PoiHssfContentTransformer.writeString(os, encoding, ",", false); // CSV up to first cell
}
// write each cell
for (short i = 0; i <= lastCellNum; i++)
{
HSSFCell cell = row.getCell(i);
if (cell != null)
{
StringBuilder sb = new StringBuilder(10);
switch (cell.getCellType())
{
case HSSFCell.CELL_TYPE_BLANK:
// ignore
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
sb.append(cell.getBooleanCellValue());
break;
case HSSFCell.CELL_TYPE_ERROR:
sb.append("ERROR");
break;
case HSSFCell.CELL_TYPE_FORMULA:
double dataNumber = cell.getNumericCellValue();
if (Double.isNaN(dataNumber))
{
// treat it as a string
sb.append(cell.getStringCellValue());
}
else
{
// treat it as a number
sb.append(dataNumber);
}
break;
case HSSFCell.CELL_TYPE_NUMERIC:
sb.append(cell.getNumericCellValue());
break;
case HSSFCell.CELL_TYPE_STRING:
sb.append(cell.getStringCellValue());
break;
default:
throw new RuntimeException("Unknown HSSF cell type: " + cell);
}
String data = sb.toString();
PoiHssfContentTransformer.writeString(os, encoding, data, true);
}
// comma separate if required
if (i < lastCellNum)
{
PoiHssfContentTransformer.writeString(os, encoding, ",", false);
}
}
}
/**
* Writes the given data to the stream using the encoding specified. If the encoding
* is not given, the default <tt>String</tt> to <tt>byte[]</tt> conversion will be
* used.
* <p>
* The given data string will be escaped appropriately.
*
* @param os the stream to write to
* @param encoding the encoding to use, or null if the default encoding is acceptable
* @param value the string to write
* @param isData true if the value represents a human-readable string, false if the
* value represents formatting characters, separating characters, etc.
* @throws Exception
*/
public static void writeString(OutputStream os, String encoding, String value, boolean isData) throws Exception
{
if (value == null)
{
// nothing to do
return;
}
int dataLength = value.length();
if (dataLength == 0)
{
// nothing to do
return;
}
// escape the string
StringBuilder sb = new StringBuilder(dataLength + 5); // slightly longer than the data
for (int i = 0; i < dataLength; i++)
{
char currentChar = value.charAt(i);
if (currentChar == '\"') // inverted commas
{
sb.append("\""); // CSV escaping of inverted commas
}
// append the char
sb.append(currentChar);
}
// enclose in inverted commas for safety
if (isData)
{
sb.insert(0, "\"");
sb.append("\"");
}
// escaping complete
value = sb.toString();
byte[] bytes = null;
if (encoding == null)
{
// use default encoding
bytes = value.getBytes();
}
else
{
bytes = value.getBytes(encoding);
}
// write to the stream
os.write(bytes);
// done
}
}

View File

@@ -0,0 +1,90 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import java.io.InputStream;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.util.TempFileProvider;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* @see org.alfresco.repo.content.transform.PoiHssfContentTransformer
*
* @author Derek Hulley
*/
public class PoiHssfContentTransformerTest extends AbstractContentTransformerTest
{
private static final Log logger = LogFactory.getLog(PoiHssfContentTransformerTest.class);
private ContentTransformer transformer;
public void onSetUpInTransaction() throws Exception
{
transformer = new PoiHssfContentTransformer();
}
/**
* @return Returns the same transformer regardless - it is allowed
*/
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
return transformer;
}
public void testReliability() throws Exception
{
double reliability = 0.0;
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_EXCEL);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_EXCEL, MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Mimetype should be supported", 1.0, reliability);
}
/**
* Tests a specific failure in the library
*/
public void xtestBugFixAR114() throws Exception
{
File tempFile = TempFileProvider.createTempFile(
getClass().getSimpleName() + "_" + getName() + "_",
".xls");
FileContentWriter writer = new FileContentWriter(tempFile);
writer.setMimetype(MimetypeMap.MIMETYPE_EXCEL);
// get the test resource and write it (Excel)
InputStream is = getClass().getClassLoader().getResourceAsStream("Plan270904b.xls");
assertNotNull("Test resource not found: Plan270904b.xls");
writer.putContent(is);
// get the source of the transformation
ContentReader reader = writer.getReader();
// make a new location of the transform output (plain text)
tempFile = TempFileProvider.createTempFile(
getClass().getSimpleName() + "_" + getName() + "_",
".txt");
writer = new FileContentWriter(tempFile);
writer.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
// transform it
transformer.transform(reader, writer);
}
}

View File

@@ -0,0 +1,287 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.util.TempFileProvider;
import org.alfresco.util.exec.RuntimeExec;
import org.alfresco.util.exec.RuntimeExec.ExecutionResult;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* This configurable wrapper is able to execute any command line transformation that
* accepts an input and an output file on the command line.
* <p>
* The following parameters are use:
* <ul>
* <li><b>{@link #VAR_SOURCE target}</b> - full path to the source file</li>
* <li><b>{@link #VAR_TARGET source}</b> - full path to the target file</li>
* </ul>
* Provided that the command executed ultimately transforms the source file
* and leaves the result in the target file, the transformation should be
* successful.
* <p>
* <b>NOTE</b>: It is only the contents of the files that can be transformed.
* Any attempt to modify the source or target file metadata will, at best, have
* no effect, but may ultimately lead to the transformation failing. This is
* because the files provided are both temporary files that reside in a location
* outside the system's content store.
*
* @see org.alfresco.util.exec.RuntimeExec
*
* @since 1.1
* @author Derek Hulley
*/
public class RuntimeExecutableContentTransformer extends AbstractContentTransformer
{
public static final String VAR_SOURCE = "source";
public static final String VAR_TARGET = "target";
private static Log logger = LogFactory.getLog(RuntimeExecutableContentTransformer.class);
private boolean available;
private MimetypeService mimetypeService;
private RuntimeExec checkCommand;
private RuntimeExec transformCommand;
private Set<Integer> errCodes;
public RuntimeExecutableContentTransformer()
{
this.errCodes = new HashSet<Integer>(2);
errCodes.add(1);
errCodes.add(2);
}
/**
* @param mimetypeService the mapping from mimetype to extensions
*/
public void setMimetypeService(MimetypeService mimetypeService)
{
this.mimetypeService = mimetypeService;
}
/**
* Set the runtime executer that will be called as part of the initialisation
* to determine if the transformer is able to function. This is optional, but allows
* the transformer registry to detect and avoid using this instance if it is not working.
* <p>
* The command will be considered to have failed if the
*
* @param checkCommand the initialisation check command
*/
public void setCheckCommand(RuntimeExec checkCommand)
{
this.checkCommand = checkCommand;
}
/**
* Set the runtime executer that will called to perform the actual transformation.
*
* @param transformCommand the runtime transform command
*/
public void setTransformCommand(RuntimeExec transformCommand)
{
this.transformCommand = transformCommand;
}
/**
* A comma or space separated list of values that, if returned by the executed command,
* indicate an error value. This defaults to <b>"1, 2"</b>.
*
* @param erroCodesStr
*/
public void setErrorCodes(String errCodesStr)
{
StringTokenizer tokenizer = new StringTokenizer(errCodesStr, " ,");
while(tokenizer.hasMoreElements())
{
String errCodeStr = tokenizer.nextToken();
// attempt to convert it to an integer
try
{
int errCode = Integer.parseInt(errCodeStr);
this.errCodes.add(errCode);
}
catch (NumberFormatException e)
{
throw new AlfrescoRuntimeException("Error codes string must be integers: " + errCodesStr);
}
}
}
/**
* @param exitValue the command exit value
* @return Returns true if the code is a listed failure code
*
* @see #setErrorCodes(String)
*/
private boolean isFailureCode(int exitValue)
{
return errCodes.contains((Integer)exitValue);
}
/**
* Executes the check command, if present. Any errors will result in this component
* being rendered unusable within the transformer registry, but may still be called
* directly.
*/
public void init()
{
if (transformCommand == null)
{
throw new AlfrescoRuntimeException("Mandatory property 'transformCommand' not set");
}
else if (mimetypeService == null)
{
throw new AlfrescoRuntimeException("Mandatory property 'mimetypeService' not set");
}
// execute the command
if (checkCommand != null)
{
ExecutionResult result = checkCommand.execute();
// check the return code
available = !isFailureCode(result.getExitValue());
}
else
{
// no check - just assume it is available
available = true;
}
}
/**
* Unless otherwise configured, this component supports all mimetypes.
* If the {@link #init() initialization} failed,
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
if (!available)
{
return 0.0;
}
else
{
return 1.0;
}
}
/**
* Converts the source and target content to temporary files with the
* correct extensions for the mimetype that they map to.
*
* @see #transformInternal(File, File)
*/
protected final void transformInternal(
ContentReader reader,
ContentWriter writer,
Map<String, Object> options) throws Exception
{
// get mimetypes
String sourceMimetype = getMimetype(reader);
String targetMimetype = getMimetype(writer);
// get the extensions to use
String sourceExtension = mimetypeService.getExtension(sourceMimetype);
String targetExtension = mimetypeService.getExtension(targetMimetype);
if (sourceExtension == null || targetExtension == null)
{
throw new AlfrescoRuntimeException("Unknown extensions for mimetypes: \n" +
" source mimetype: " + sourceMimetype + "\n" +
" source extension: " + sourceExtension + "\n" +
" target mimetype: " + targetMimetype + "\n" +
" target extension: " + targetExtension);
}
// if the source mimetype is the same as the target's then just stream it
if (sourceMimetype.equals(targetMimetype))
{
writer.putContent(reader.getContentInputStream());
return;
}
// create required temp files
File sourceFile = TempFileProvider.createTempFile(
getClass().getSimpleName() + "_source_",
"." + sourceExtension);
File targetFile = TempFileProvider.createTempFile(
getClass().getSimpleName() + "_target_",
"." + targetExtension);
Map<String, String> properties = new HashMap<String, String>(5);
// copy options over
for (Map.Entry<String, Object> entry : options.entrySet())
{
String key = entry.getKey();
Object value = entry.getValue();
properties.put(key, (value == null ? null : value.toString()));
}
// add the source and target properties
properties.put(VAR_SOURCE, sourceFile.getAbsolutePath());
properties.put(VAR_TARGET, targetFile.getAbsolutePath());
// pull reader file into source temp file
reader.getContent(sourceFile);
// execute the transformation command
ExecutionResult result = null;
try
{
result = transformCommand.execute(properties);
}
catch (Throwable e)
{
throw new ContentIOException("Transformation failed during command execution: \n" + transformCommand, e);
}
// check
if (isFailureCode(result.getExitValue()))
{
throw new ContentIOException("Transformation failed - status indicates an error: \n" + result);
}
// check that the file was created
if (!targetFile.exists())
{
throw new ContentIOException("Transformation failed - target file doesn't exist: \n" + result);
}
// copy the target file back into the repo
writer.putContent(targetFile);
// done
if (logger.isDebugEnabled())
{
logger.debug("Transformation completed: \n" +
" source: " + reader + "\n" +
" target: " + writer + "\n" +
" options: " + options + "\n" +
" result: \n" + result);
}
}
}

View File

@@ -0,0 +1,82 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.util.BaseAlfrescoTestCase;
import org.alfresco.util.TempFileProvider;
import org.alfresco.util.exec.RuntimeExec;
/**
* @see org.alfresco.repo.content.transform.RuntimeExecutableContentTransformer
*
* @author Derek Hulley
*/
public class RuntimeExecutableContentTransformerTest extends BaseAlfrescoTestCase
{
private RuntimeExecutableContentTransformer transformer;
@Override
protected void setUp() throws Exception
{
super.setUp();
transformer = new RuntimeExecutableContentTransformer();
// the command to execute
RuntimeExec transformCommand = new RuntimeExec();
Map<String, String> commandMap = new HashMap<String, String>(5);
commandMap.put("Linux", "mv -f ${source} ${target}");
commandMap.put("*", "cmd /c copy /Y \"${source}\" \"${target}\"");
transformCommand.setCommandMap(commandMap);
transformer.setTransformCommand(transformCommand);
transformer.setMimetypeService(serviceRegistry.getMimetypeService());
transformer.setErrorCodes("1, 2");
// initialise so that it doesn't score 0
transformer.init();
}
public void testCopyCommand() throws Exception
{
String content = "<A><B></B></A>";
// create the source
File sourceFile = TempFileProvider.createTempFile(getName() + "_", ".txt");
ContentWriter tempWriter = new FileContentWriter(sourceFile);
tempWriter.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
tempWriter.putContent(content);
ContentReader reader = tempWriter.getReader();
// create the target
File targetFile = TempFileProvider.createTempFile(getName() + "_", ".xml");
ContentWriter writer = new FileContentWriter(targetFile);
writer.setMimetype(MimetypeMap.MIMETYPE_XML);
// do the transformation
transformer.transform(reader, writer); // no options on the copy
// make sure that the content was copied over
ContentReader checkReader = writer.getReader();
String checkContent = checkReader.getContentString();
assertEquals("Content not copied", content, checkContent);
}
}

View File

@@ -0,0 +1,141 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.Map;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Converts any textual format to plain text.
* <p>
* The transformation is sensitive to the source and target string encodings.
*
* @author Derek Hulley
*/
public class StringExtractingContentTransformer extends AbstractContentTransformer
{
public static final String PREFIX_TEXT = "text/";
private static final Log logger = LogFactory.getLog(StringExtractingContentTransformer.class);
/**
* Gives a high reliability for all translations from <i>text/sometype</i> to
* <i>text/plain</i>. As the text formats are already text, the characters
* are preserved and no actual conversion takes place.
* <p>
* Extraction of text from binary data is wholly unreliable.
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
if (!targetMimetype.equals(MimetypeMap.MIMETYPE_TEXT_PLAIN))
{
// can only convert to plain text
return 0.0;
}
else if (sourceMimetype.equals(MimetypeMap.MIMETYPE_TEXT_PLAIN))
{
// conversions from any plain text format are very reliable
return 1.0;
}
else if (sourceMimetype.startsWith(PREFIX_TEXT))
{
// the source is text, but probably with some kind of markup
return 0.1;
}
else
{
// extracting text from binary is not useful
return 0.0;
}
}
/**
* Text to text conversions are done directly using the content reader and writer string
* manipulation methods.
* <p>
* Extraction of text from binary content attempts to take the possible character
* encoding into account. The text produced from this will, if the encoding was correct,
* be unformatted but valid.
*/
@Override
public void transformInternal(ContentReader reader, ContentWriter writer, Map<String, Object> options)
throws Exception
{
// is this a straight text-text transformation
transformText(reader, writer);
}
/**
* Transformation optimized for text-to-text conversion
*/
private void transformText(ContentReader reader, ContentWriter writer) throws Exception
{
// get a char reader and writer
Reader charReader = null;
Writer charWriter = null;
try
{
if (reader.getEncoding() == null)
{
charReader = new InputStreamReader(reader.getContentInputStream());
}
else
{
charReader = new InputStreamReader(reader.getContentInputStream(), reader.getEncoding());
}
if (writer.getEncoding() == null)
{
charWriter = new OutputStreamWriter(writer.getContentOutputStream());
}
else
{
charWriter = new OutputStreamWriter(writer.getContentOutputStream(), writer.getEncoding());
}
// copy from the one to the other
char[] buffer = new char[1024];
int readCount = 0;
while (readCount > -1)
{
// write the last read count number of bytes
charWriter.write(buffer, 0, readCount);
// fill the buffer again
readCount = charReader.read(buffer);
}
}
finally
{
if (charReader != null)
{
try { charReader.close(); } catch (Throwable e) { logger.error(e); }
}
if (charWriter != null)
{
try { charWriter.close(); } catch (Throwable e) { logger.error(e); }
}
}
// done
}
}

View File

@@ -0,0 +1,162 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Random;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.util.TempFileProvider;
/**
* @see org.alfresco.repo.content.transform.StringExtractingContentTransformer
*
* @author Derek Hulley
*/
public class StringExtractingContentTransformerTest extends AbstractContentTransformerTest
{
private static final String SOME_CONTENT = "azAz10!<21>$%^&*()\t\r\n";
private ContentTransformer transformer;
/** the final destination of transformations */
private ContentWriter targetWriter;
@Override
public void onSetUpInTransaction() throws Exception
{
transformer = new StringExtractingContentTransformer();
targetWriter = new FileContentWriter(getTempFile());
targetWriter.setMimetype("text/plain");
targetWriter.setEncoding("UTF-8");
}
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
return transformer;
}
public void testSetUp() throws Exception
{
assertNotNull(transformer);
}
/**
* @return Returns a new temp file
*/
private File getTempFile()
{
return TempFileProvider.createTempFile(getName(), ".txt");
}
/**
* Writes some content using the mimetype and encoding specified.
*
* @param mimetype
* @param encoding
* @return Returns a reader onto the newly written content
*/
private ContentReader writeContent(String mimetype, String encoding)
{
ContentWriter writer = new FileContentWriter(getTempFile());
writer.setMimetype(mimetype);
writer.setEncoding(encoding);
// put content
writer.putContent(SOME_CONTENT);
// return a reader onto the new content
return writer.getReader();
}
public void testDirectTransform() throws Exception
{
ContentReader reader = writeContent("text/plain", "latin1");
// check reliability
double reliability = transformer.getReliability(reader.getMimetype(), targetWriter.getMimetype());
assertEquals("Reliability incorrect", 1.0, reliability); // plain text to plain text is 100%
// transform
transformer.transform(reader, targetWriter);
// get a reader onto the transformed content and check
ContentReader checkReader = targetWriter.getReader();
String checkContent = checkReader.getContentString();
assertEquals("Content check failed", SOME_CONTENT, checkContent);
}
public void testInterTextTransform() throws Exception
{
ContentReader reader = writeContent("text/xml", "UTF-16");
// check reliability
double reliability = transformer.getReliability(reader.getMimetype(), targetWriter.getMimetype());
assertEquals("Reliability incorrect", 0.1, reliability); // markup to plain text not 100%
// transform
transformer.transform(reader, targetWriter);
// get a reader onto the transformed content and check
ContentReader checkReader = targetWriter.getReader();
String checkContent = checkReader.getContentString();
assertEquals("Content check failed", SOME_CONTENT, checkContent);
}
/**
* Generate a large file and then transform it using the text extractor.
* We are not creating super-large file (1GB) in order to test the transform
* as it takes too long to create the file in the first place. Rather,
* this test can be used during profiling to ensure that memory is not
* being consumed.
*/
public void testLargeFileStreaming() throws Exception
{
File sourceFile = TempFileProvider.createTempFile(getName(), ".txt");
int chars = 1000000; // a million characters should do the trick
Random random = new Random();
Writer charWriter = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(sourceFile)));
for (int i = 0; i < chars; i++)
{
char next = (char)(random.nextDouble() * 93D + 32D);
charWriter.write(next);
}
charWriter.close();
// get a reader and a writer
ContentReader reader = new FileContentReader(sourceFile);
reader.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
File outputFile = TempFileProvider.createTempFile(getName(), ".txt");
ContentWriter writer = new FileContentWriter(outputFile);
writer.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
// transform
transformer.transform(reader, writer);
// delete files
sourceFile.delete();
outputFile.delete();
}
}

View File

@@ -0,0 +1,86 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.textmining.text.extraction.WordExtractor;
/**
* Makes use of the {@link http://www.textmining.org/ TextMining} library to
* perform conversions from MSWord documents to text.
*
* @author Derek Hulley
*/
public class TextMiningContentTransformer extends AbstractContentTransformer
{
private static final Log logger = LogFactory.getLog(TextMiningContentTransformer.class);
private WordExtractor wordExtractor;
public TextMiningContentTransformer()
{
this.wordExtractor = new WordExtractor();
}
/**
* Currently the only transformation performed is that of text extraction from Word documents.
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
if (!MimetypeMap.MIMETYPE_WORD.equals(sourceMimetype) ||
!MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
{
// only support DOC -> Text
return 0.0;
}
else
{
return 1.0;
}
}
public void transformInternal(ContentReader reader, ContentWriter writer, Map<String, Object> options)
throws Exception
{
InputStream is = reader.getContentInputStream();
String text = null;
try
{
text = wordExtractor.extractText(is);
}
catch (IOException e)
{
// check if this is an error caused by the fact that the .doc is in fact
// one of Word's temp non-documents
if (e.getMessage().contains("Unable to read entire header"))
{
// just assign an empty string
text = "";
}
}
// dump the text out
writer.putContent(text);
}
}

View File

@@ -0,0 +1,90 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import java.io.InputStream;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.util.TempFileProvider;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* @see org.alfresco.repo.content.transform.TextMiningContentTransformer
*
* @author Derek Hulley
*/
public class TextMiningContentTransformerTest extends AbstractContentTransformerTest
{
private static final Log logger = LogFactory.getLog(TextMiningContentTransformerTest.class);
private ContentTransformer transformer;
public void onSetUpInTransaction() throws Exception
{
transformer = new TextMiningContentTransformer();
}
/**
* @return Returns the same transformer regardless - it is allowed
*/
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
return transformer;
}
public void testReliability() throws Exception
{
double reliability = 0.0;
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_WORD);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Mimetype should be supported", 1.0, reliability);
}
/**
* Tests a specific failure in the library
*/
public void testBugFixAR1() throws Exception
{
File tempFile = TempFileProvider.createTempFile(
getClass().getSimpleName() + "_" + getName() + "_",
".doc");
FileContentWriter writer = new FileContentWriter(tempFile);
writer.setMimetype(MimetypeMap.MIMETYPE_WORD);
// get the test resource and write it (MS Word)
InputStream is = getClass().getClassLoader().getResourceAsStream("farmers_markets_list_2003.doc");
assertNotNull("Test resource not found: farmers_markets_list_2003.doc");
writer.putContent(is);
// get the source of the transformation
ContentReader reader = writer.getReader();
// make a new location of the transform output (plain text)
tempFile = TempFileProvider.createTempFile(
getClass().getSimpleName() + "_" + getName() + "_",
".txt");
writer = new FileContentWriter(tempFile);
writer.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
// transform it
transformer.transform(reader, writer);
}
}

View File

@@ -0,0 +1,279 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import java.io.IOException;
import java.net.ConnectException;
import java.util.HashMap;
import java.util.Map;
import net.sf.joott.uno.DocumentConverter;
import net.sf.joott.uno.DocumentFormat;
import net.sf.joott.uno.UnoConnection;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.util.TempFileProvider;
/**
* Makes use of the OpenOffice Uno interfaces to convert the content.
* <p>
* The conversions are slow but reliable.
*
* @author Derek Hulley
*/
public class UnoContentTransformer extends AbstractContentTransformer
{
/** map of <tt>DocumentFormat</tt> instances keyed by mimetype conversion */
private static Map<ContentTransformerRegistry.TransformationKey, DocumentFormatWrapper> formatsByConversion;
static
{
// Build the map of known Uno document formats and store by conversion key
formatsByConversion = new HashMap<ContentTransformerRegistry.TransformationKey, DocumentFormatWrapper>(17);
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_HTML),
new DocumentFormatWrapper(DocumentFormat.HTML_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_WORD),
new DocumentFormatWrapper(DocumentFormat.TEXT, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_TEXT_PLAIN),
new DocumentFormatWrapper(DocumentFormat.TEXT, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_EXCEL, MimetypeMap.MIMETYPE_TEXT_PLAIN),
new DocumentFormatWrapper(DocumentFormat.TEXT_CALC, 0.8)); // only first sheet extracted
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_EXCEL, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_CALC, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_PPT, MimetypeMap.MIMETYPE_FLASH),
new DocumentFormatWrapper(DocumentFormat.FLASH_IMPRESS, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_PPT, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_IMPRESS, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_HTML),
new DocumentFormatWrapper(DocumentFormat.HTML_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_HTML, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER_WEB, 1.0));
// there are many more formats available and therefore many more transformation combinations possible
// DocumentFormat.FLASH_IMPRESS
// DocumentFormat.HTML_CALC
// DocumentFormat.HTML_WRITER
// DocumentFormat.MS_EXCEL_97
// DocumentFormat.MS_POWERPOINT_97
// DocumentFormat.MS_WORD_97
// DocumentFormat.PDF_CALC
// DocumentFormat.PDF_IMPRESS
// DocumentFormat.PDF_WRITER
// DocumentFormat.PDF_WRITER_WEB
// DocumentFormat.RTF
// DocumentFormat.TEXT
// DocumentFormat.TEXT_CALC
// DocumentFormat.XML_CALC
// DocumentFormat.XML_IMPRESS
// DocumentFormat.XML_WRITER
// DocumentFormat.XML_WRITER_WEB
}
private String connectionUrl = UnoConnection.DEFAULT_CONNECTION_STRING;
private UnoConnection connection;
private boolean isConnected;
/**
* Constructs the default transformer that will attempt to connect to the
* Uno server using the default connect string.
*
* @see UnoConnection#DEFAULT_CONNECTION_STRING
*/
public UnoContentTransformer()
{
}
/**
* Override the default connection URL with a new one.
*
* @param connectionUrl the connection string
*
* @see UnoConnection#DEFAULT_CONNECTION_STRING
*/
public void setConnectionUrl(String connectionUrl)
{
this.connectionUrl = connectionUrl;
}
/**
* Perform bean initialization
*/
public synchronized void init()
{
connection = new UnoConnection(connectionUrl);
// attempt to make an connection
try
{
connection.connect();
isConnected = true;
}
catch (ConnectException e)
{
isConnected = false;
}
}
/**
* @return Returns true if a connection to the Uno server could be established
*/
public boolean isConnected()
{
return isConnected;
}
/**
* @param sourceMimetype
* @param targetMimetype
* @return Returns a document format wrapper that is valid for the given source and target mimetypes
*/
private static DocumentFormatWrapper getDocumentFormatWrapper(String sourceMimetype, String targetMimetype)
{
// get the well-known document format for the specific conversion
ContentTransformerRegistry.TransformationKey key =
new ContentTransformerRegistry.TransformationKey(sourceMimetype, targetMimetype);
DocumentFormatWrapper wrapper = UnoContentTransformer.formatsByConversion.get(key);
return wrapper;
}
/**
* Checks how reliable the conversion will be when performed by the Uno server.
* <p>
* The connection for the Uno server is checked in order to have any chance of
* being reliable.
* <p>
* The conversions' reliabilities are set up statically based on prior tests that
* included checking performance as well as accuracy.
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
// check if a connection to the Uno server can be established
if (!isConnected())
{
// no connection means that conversion is not possible
return 0.0;
}
// check if the source and target mimetypes are supported
DocumentFormatWrapper docFormatWrapper = getDocumentFormatWrapper(sourceMimetype, targetMimetype);
if (docFormatWrapper == null)
{
return 0.0;
}
else
{
return docFormatWrapper.getReliability();
}
}
public void transformInternal(ContentReader reader, ContentWriter writer, Map<String, Object> options)
throws Exception
{
String sourceMimetype = getMimetype(reader);
String targetMimetype = getMimetype(writer);
// create temporary files to convert from and to
File tempFromFile = TempFileProvider.createTempFile(
"UnoContentTransformer",
"." + getMimetypeService().getExtension(sourceMimetype));
File tempToFile = TempFileProvider.createTempFile(
"UnoContentTransformer",
"." + getMimetypeService().getExtension(targetMimetype));
// download the content from the source reader
reader.getContent(tempFromFile);
// get the document format that should be used
DocumentFormatWrapper docFormatWrapper = getDocumentFormatWrapper(sourceMimetype, targetMimetype);
try
{
docFormatWrapper.execute(tempFromFile, tempToFile, connection);
// conversion success
}
catch (ConnectException e)
{
throw new ContentIOException("Connection to Uno server failed: \n" +
" reader: " + reader + "\n" +
" writer: " + writer,
e);
}
catch (IOException e)
{
throw new ContentIOException("Uno server conversion failed: \n" +
" reader: " + reader + "\n" +
" writer: " + writer + "\n" +
" from file: " + tempFromFile + "\n" +
" to file: " + tempToFile,
e);
}
// upload the temp output to the writer given us
writer.putContent(tempToFile);
}
/**
* Wraps a document format as well the reliability. The source and target mimetypes
* are not kept, but will probably be closely associated with the reliability.
*/
private static class DocumentFormatWrapper
{
/*
* Source and target mimetypes not kept -> class is private as it doesn't keep
* enough info to be used safely externally
*/
private DocumentFormat documentFormat;
private double reliability;
public DocumentFormatWrapper(DocumentFormat documentFormat, double reliability)
{
this.documentFormat = documentFormat;
this.reliability = reliability;
}
public double getReliability()
{
return reliability;
}
/**
* Executs the transformation
*/
public void execute(File fromFile, File toFile, UnoConnection connection) throws ConnectException, IOException
{
DocumentConverter converter = new DocumentConverter(connection);
converter.convert(fromFile, toFile, documentFormat);
}
}
}

View File

@@ -0,0 +1,71 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import org.alfresco.repo.content.MimetypeMap;
/**
* @see org.alfresco.repo.content.transform.UnoContentTransformer
*
* @author Derek Hulley
*/
public class UnoContentTransformerTest extends AbstractContentTransformerTest
{
private static String MIMETYPE_RUBBISH = "text/rubbish";
private UnoContentTransformer transformer;
public void onSetUpInTransaction() throws Exception
{
transformer = new UnoContentTransformer();
transformer.setMimetypeService(mimetypeMap);
}
/**
* @return Returns the same transformer regardless - it is allowed
*/
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
return transformer;
}
public void testSetUp() throws Exception
{
super.testSetUp();
assertNotNull(mimetypeMap);
}
public void testReliability() throws Exception
{
if (!transformer.isConnected())
{
// no connection
return;
}
double reliability = 0.0;
reliability = transformer.getReliability(MIMETYPE_RUBBISH, MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MIMETYPE_RUBBISH);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_WORD);
assertEquals("Mimetype should be supported", 1.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Mimetype should be supported", 1.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_EXCEL, MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Mimetype should be supported", 0.8, reliability);
}
}

View File

@@ -0,0 +1,256 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform.magick;
import java.io.File;
import java.io.InputStream;
import java.util.Collections;
import java.util.Map;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.repo.content.transform.AbstractContentTransformer;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.util.TempFileProvider;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Abstract helper for transformations based on <b>ImageMagick</b>
*
* @author Derek Hulley
*/
public abstract class AbstractImageMagickContentTransformer extends AbstractContentTransformer
{
/** the prefix for mimetypes supported by the transformer */
public static final String MIMETYPE_IMAGE_PREFIX = "image/";
private static final Log logger = LogFactory.getLog(AbstractImageMagickContentTransformer.class);
private MimetypeMap mimetypeMap;
private boolean available;
public AbstractImageMagickContentTransformer()
{
this.available = false;
}
/**
* Set the mimetype map to resolve mimetypes to file extensions.
*
* @param mimetypeMap
*/
public void setMimetypeMap(MimetypeMap mimetypeMap)
{
this.mimetypeMap = mimetypeMap;
}
/**
* @return Returns true if the transformer is functioning otherwise false
*/
public boolean isAvailable()
{
return available;
}
/**
* Make the transformer available
* @param available
*/
protected void setAvailable(boolean available)
{
this.available = available;
}
/**
* Checks for the JMagick and ImageMagick dependencies, using the common
* {@link #transformInternal(File, File) transformation method} to check
* that the sample image can be converted.
*/
public void init()
{
if (mimetypeMap == null)
{
throw new AlfrescoRuntimeException("MimetypeMap not present");
}
try
{
// load, into memory the sample gif
String resourcePath = "org/alfresco/repo/content/transform/magick/alfresco.gif";
InputStream imageStream = getClass().getClassLoader().getResourceAsStream(resourcePath);
if (imageStream == null)
{
throw new AlfrescoRuntimeException("Sample image not found: " + resourcePath);
}
// dump to a temp file
File inputFile = TempFileProvider.createTempFile(
getClass().getSimpleName() + "_init_source_",
".gif");
FileContentWriter writer = new FileContentWriter(inputFile);
writer.putContent(imageStream);
// create the output file
File outputFile = TempFileProvider.createTempFile(
getClass().getSimpleName() + "_init_target_",
".png");
// execute it
Map<String, Object> options = Collections.emptyMap();
transformInternal(inputFile, outputFile, options);
// check that the file exists
if (!outputFile.exists())
{
throw new Exception("Image conversion failed: \n" +
" from: " + inputFile + "\n" +
" to: " + outputFile);
}
// we can be sure that it works
setAvailable(true);
}
catch (Throwable e)
{
logger.error(
getClass().getSimpleName() + " not available: " +
(e.getMessage() != null ? e.getMessage() : ""));
// debug so that we can trace the issue if required
logger.debug(e);
}
}
/**
* Some image formats are not supported by ImageMagick, or at least appear not to work.
*
* @param mimetype the mimetype to check
* @return Returns true if ImageMagic can handle the given image format
*/
public static boolean isSupported(String mimetype)
{
if (!mimetype.startsWith(MIMETYPE_IMAGE_PREFIX))
{
return false; // not an image
}
else if (mimetype.equals(MimetypeMap.MIMETYPE_IMAGE_RGB))
{
return false; // rgb extension doesn't work
}
else
{
return true;
}
}
/**
* Supports image to image conversion, but only if the JMagick library and required
* libraries are available.
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
if (!available)
{
return 0.0;
}
if (!AbstractImageMagickContentTransformer.isSupported(sourceMimetype) ||
!AbstractImageMagickContentTransformer.isSupported(targetMimetype))
{
// only support IMAGE -> IMAGE (excl. RGB)
return 0.0;
}
else
{
return 1.0;
}
}
/**
* @see #transformInternal(File, File)
*/
protected final void transformInternal(
ContentReader reader,
ContentWriter writer,
Map<String, Object> options) throws Exception
{
// get mimetypes
String sourceMimetype = getMimetype(reader);
String targetMimetype = getMimetype(writer);
// get the extensions to use
String sourceExtension = mimetypeMap.getExtension(sourceMimetype);
String targetExtension = mimetypeMap.getExtension(targetMimetype);
if (sourceExtension == null || targetExtension == null)
{
throw new AlfrescoRuntimeException("Unknown extensions for mimetypes: \n" +
" source mimetype: " + sourceMimetype + "\n" +
" source extension: " + sourceExtension + "\n" +
" target mimetype: " + targetMimetype + "\n" +
" target extension: " + targetExtension);
}
// if the source mimetype is the same as the target's then just stream it
if (sourceMimetype.equals(targetMimetype))
{
writer.putContent(reader.getContentInputStream());
return;
}
// create required temp files
File sourceFile = TempFileProvider.createTempFile(
getClass().getSimpleName() + "_source_",
"." + sourceExtension);
File targetFile = TempFileProvider.createTempFile(
getClass().getSimpleName() + "_target_",
"." + targetExtension);
// pull reader file into source temp file
reader.getContent(sourceFile);
// transform the source temp file to the target temp file
transformInternal(sourceFile, targetFile, options);
// check that the file was created
if (!targetFile.exists())
{
throw new ContentIOException("JMagick transformation failed to write output file");
}
// upload the output image
writer.putContent(targetFile);
// done
if (logger.isDebugEnabled())
{
logger.debug("Transformation completed: \n" +
" source: " + reader + "\n" +
" target: " + writer + "\n" +
" options: " + options);
}
}
/**
* Transform the image content from the source file to the target file
*
* @param sourceFile the source of the transformation
* @param targetFile the target of the transformation
* @param options the transformation options supported by ImageMagick
* @throws Exception
*/
protected abstract void transformInternal(
File sourceFile,
File targetFile,
Map<String, Object> options) throws Exception;
}

View File

@@ -0,0 +1,110 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform.magick;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.util.exec.RuntimeExec;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Executes a statement to implement
*
* @author Derek Hulley
*/
public class ImageMagickContentTransformer extends AbstractImageMagickContentTransformer
{
/** the command options, such as <b>--resize</b>, etc. */
public static final String KEY_OPTIONS = "options";
/** source variable name */
public static final String VAR_OPTIONS = "options";
/** source variable name */
public static final String VAR_SOURCE = "source";
/** target variable name */
public static final String VAR_TARGET = "target";
private static final Log logger = LogFactory.getLog(ImageMagickContentTransformer.class);
/** the system command executer */
private RuntimeExec executer;
public ImageMagickContentTransformer()
{
}
/**
* Set the runtime command executer that must be executed in order to run
* <b>ImageMagick</b>. Whether or not this is the full path to the convertCommand
* or just the convertCommand itself depends the environment setup.
* <p>
* The command must contain the variables <code>${source}</code> and
* <code>${target}</code>, which will be replaced by the names of the file to
* be transformed and the name of the output file respectively.
* <pre>
* convert ${source} ${target}
* </pre>
*
* @param executer the system command executer
*/
public void setExecuter(RuntimeExec executer)
{
this.executer = executer;
}
/**
* Checks for the JMagick and ImageMagick dependencies, using the common
* {@link #transformInternal(File, File) transformation method} to check
* that the sample image can be converted.
*/
public void init()
{
if (executer == null)
{
throw new AlfrescoRuntimeException("System runtime executer not set");
}
super.init();
}
/**
* Transform the image content from the source file to the target file
*/
protected void transformInternal(File sourceFile, File targetFile, Map<String, Object> options) throws Exception
{
Map<String, String> properties = new HashMap<String, String>(5);
// set properties
properties.put(KEY_OPTIONS, (String) options.get(KEY_OPTIONS));
properties.put(VAR_SOURCE, sourceFile.getAbsolutePath());
properties.put(VAR_TARGET, targetFile.getAbsolutePath());
// execute the statement
RuntimeExec.ExecutionResult result = executer.execute(properties);
if (result.getExitValue() != 0 && result.getStdErr() != null && result.getStdErr().length() > 0)
{
throw new ContentIOException("Failed to perform ImageMagick transformation: \n" + result);
}
// success
if (logger.isDebugEnabled())
{
logger.debug("ImageMagic executed successfully: \n" + executer);
}
}
}

View File

@@ -0,0 +1,67 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform.magick;
import java.util.Collections;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.repo.content.transform.ContentTransformer;
import org.alfresco.util.exec.RuntimeExec;
/**
* @see org.alfresco.repo.content.transform.magick.JMagickContentTransformer
*
* @author Derek Hulley
*/
public class ImageMagickContentTransformerTest extends AbstractContentTransformerTest
{
private ImageMagickContentTransformer transformer;
public void onSetUpInTransaction() throws Exception
{
RuntimeExec executer = new RuntimeExec();
executer.setCommand("imconvert.exe ${source} ${options} ${target}");
executer.setDefaultProperties(Collections.singletonMap("options", ""));
transformer = new ImageMagickContentTransformer();
transformer.setMimetypeMap(mimetypeMap);
transformer.setExecuter(executer);
transformer.init();
}
/**
* @return Returns the same transformer regardless - it is allowed
*/
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
return transformer;
}
public void testReliability() throws Exception
{
if (!transformer.isAvailable())
{
return;
}
double reliability = 0.0;
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_IMAGE_GIF, MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_IMAGE_GIF, MimetypeMap.MIMETYPE_IMAGE_JPEG);
assertEquals("Mimetype should be supported", 1.0, reliability);
}
}

View File

@@ -0,0 +1,57 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform.magick;
import java.io.File;
import java.util.Map;
import magick.ImageInfo;
import magick.MagickImage;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Makes use of the {@link http://www.textmining.org/ TextMining} library to
* perform conversions from MSWord documents to text.
*
* @author Derek Hulley
*/
public class JMagickContentTransformer extends AbstractImageMagickContentTransformer
{
private static final Log logger = LogFactory.getLog(JMagickContentTransformer.class);
public JMagickContentTransformer()
{
}
/**
* Uses the <b>JMagick</b> library to perform the transformation
*
* @param sourceFile
* @param targetFile
* @throws Exception
*/
@Override
protected void transformInternal(File sourceFile, File targetFile, Map<String, Object> options) throws Exception
{
ImageInfo imageInfo = new ImageInfo(sourceFile.getAbsolutePath());
MagickImage image = new MagickImage(imageInfo);
image.setFileName(targetFile.getAbsolutePath());
image.writeImage(imageInfo);
}
}

View File

@@ -0,0 +1,63 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform.magick;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
import org.alfresco.repo.content.transform.ContentTransformer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* @see org.alfresco.repo.content.transform.magick.JMagickContentTransformer
*
* @author Derek Hulley
*/
public class JMagickContentTransformerTest extends AbstractContentTransformerTest
{
private static final Log logger = LogFactory.getLog(JMagickContentTransformerTest.class);
private JMagickContentTransformer transformer;
public void onSetUpInTransaction() throws Exception
{
transformer = new JMagickContentTransformer();
transformer.setMimetypeMap(mimetypeMap);
transformer.init();
}
/**
* @return Returns the same transformer regardless - it is allowed
*/
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
return transformer;
}
public void testReliability() throws Exception
{
if (!transformer.isAvailable())
{
return;
}
double reliability = 0.0;
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_IMAGE_GIF, MimetypeMap.MIMETYPE_TEXT_PLAIN);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_IMAGE_GIF, MimetypeMap.MIMETYPE_IMAGE_JPEG);
assertEquals("Mimetype should be supported", 1.0, reliability);
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.1 KiB