mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-31 17:39:05 +00:00
Tika content transformer support for OOXML office
Enable explicit Tika content transform for OOXML files Allow the Excel transformer (which does CSV as well as text/html) to handle .xlsx as well as .xls Also update the .doc parser test to ensure that the older word 6 and word 95 files are correctly handled too git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@20781 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -377,6 +377,11 @@
|
||||
class="org.alfresco.repo.content.transform.PoiContentTransformer"
|
||||
parent="baseContentTransformer" />
|
||||
|
||||
<!-- This one handles the newer ooxml office formats, such as .xlsx and .docx -->
|
||||
<bean id="transformer.OOXML"
|
||||
class="org.alfresco.repo.content.transform.PoiOOXMLContentTransformer"
|
||||
parent="baseContentTransformer" />
|
||||
|
||||
<bean id="transformer.TextMining"
|
||||
class="org.alfresco.repo.content.transform.TextMiningContentTransformer"
|
||||
parent="baseContentTransformer" >
|
||||
|
@@ -39,6 +39,7 @@ import org.alfresco.repo.content.transform.OpenOfficeContentTransformerTest;
|
||||
import org.alfresco.repo.content.transform.PdfBoxContentTransformerTest;
|
||||
import org.alfresco.repo.content.transform.PoiContentTransformerTest;
|
||||
import org.alfresco.repo.content.transform.PoiHssfContentTransformerTest;
|
||||
import org.alfresco.repo.content.transform.PoiOOXMLContentTransformerTest;
|
||||
import org.alfresco.repo.content.transform.RuntimeExecutableContentTransformerTest;
|
||||
import org.alfresco.repo.content.transform.StringExtractingContentTransformerTest;
|
||||
import org.alfresco.repo.content.transform.TextMiningContentTransformerTest;
|
||||
@@ -107,6 +108,7 @@ public class ContentMinimalContextTestSuite extends TestSuite
|
||||
suite.addTestSuite(PdfBoxContentTransformerTest.class);
|
||||
suite.addTestSuite(PoiContentTransformerTest.class);
|
||||
suite.addTestSuite(PoiHssfContentTransformerTest.class);
|
||||
suite.addTestSuite(PoiOOXMLContentTransformerTest.class);
|
||||
suite.addTestSuite(RuntimeExecutableContentTransformerTest.class);
|
||||
suite.addTestSuite(StringExtractingContentTransformerTest.class);
|
||||
suite.addTestSuite(TextMiningContentTransformerTest.class);
|
||||
|
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.content;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.apache.tika.parser.microsoft.OfficeParser;
|
||||
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
/**
|
||||
* {@link http://tika.apache.org/ Apache Tika} assumes that
|
||||
* you either know exactly what your content is, or that
|
||||
* you'll leave it to auto-detection.
|
||||
* Within Alfresco, we usually do know. However, from time
|
||||
* to time, we don't know if we have one of the old or one
|
||||
* of the new office files (eg .xls and .xlsx).
|
||||
* This class allows automatically selects the appropriate
|
||||
* old (OLE2) or new (OOXML) Tika parser as required.
|
||||
*
|
||||
* @author Nick Burch
|
||||
*/
|
||||
public class TikaOfficeDetectParser implements Parser {
|
||||
private Parser ole2Parser = new OfficeParser();
|
||||
private Parser ooxmlParser = new OOXMLParser();
|
||||
|
||||
public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
|
||||
Set<MediaType> types = new HashSet<MediaType>();
|
||||
types.addAll(ole2Parser.getSupportedTypes(parseContext));
|
||||
types.addAll(ooxmlParser.getSupportedTypes(parseContext));
|
||||
return types;
|
||||
}
|
||||
|
||||
public void parse(InputStream stream,
|
||||
ContentHandler handler, Metadata metadata,
|
||||
ParseContext parseContext) throws IOException, SAXException,
|
||||
TikaException
|
||||
{
|
||||
PushbackInputStream inp = new PushbackInputStream(stream, 4);
|
||||
byte[] initial4 = new byte[4];
|
||||
IOUtils.readFully(inp, initial4);
|
||||
inp.unread(initial4);
|
||||
|
||||
// Which is it?
|
||||
if(initial4[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
|
||||
initial4[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
|
||||
initial4[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
|
||||
initial4[3] == POIFSConstants.OOXML_FILE_HEADER[3])
|
||||
{
|
||||
ooxmlParser.parse(inp, handler, metadata, parseContext);
|
||||
}
|
||||
else
|
||||
{
|
||||
ole2Parser.parse(inp, handler, metadata, parseContext);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated This method will be removed in Apache Tika 1.0.
|
||||
*/
|
||||
public void parse(InputStream stream,
|
||||
ContentHandler handler, Metadata metadata)
|
||||
throws IOException, SAXException, TikaException
|
||||
{
|
||||
parse(stream, handler, metadata, new ParseContext());
|
||||
}
|
||||
}
|
@@ -111,14 +111,14 @@ public abstract class AbstractContentTransformerTest extends TestCase
|
||||
* Helper method to load one of the "The quick brown fox" files from the
|
||||
* classpath.
|
||||
*
|
||||
* @param extension the extension of the file required, e.g. <b>txt</b>
|
||||
* @param the file required, eg <b>quick.txt</b>
|
||||
* @return Returns a test resource loaded from the classpath or <tt>null</tt> if
|
||||
* no resource could be found.
|
||||
* @throws IOException
|
||||
*/
|
||||
public static File loadQuickTestFile(String extension) throws IOException
|
||||
public static File loadNamedQuickTestFile(String quickname) throws IOException
|
||||
{
|
||||
URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/quick." + extension);
|
||||
URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + quickname);
|
||||
if (url == null)
|
||||
{
|
||||
return null;
|
||||
@@ -130,6 +130,34 @@ public abstract class AbstractContentTransformerTest extends TestCase
|
||||
}
|
||||
return file;
|
||||
}
|
||||
/**
|
||||
* Helper method to load one of the "The quick brown fox" files from the
|
||||
* classpath.
|
||||
*
|
||||
* @param the file extension required, eg <b>txt</b> for the file quick.txt
|
||||
* @return Returns a test resource loaded from the classpath or <tt>null</tt> if
|
||||
* no resource could be found.
|
||||
* @throws IOException
|
||||
*/
|
||||
public static File loadQuickTestFile(String extension) throws IOException
|
||||
{
|
||||
return loadNamedQuickTestFile("quick."+extension);
|
||||
}
|
||||
|
||||
/**
|
||||
* For the given mime type, returns one or more quick*
|
||||
* files to be tested.
|
||||
* By default this is just quick + the default extension.
|
||||
* However, you can override this if you need special
|
||||
* rules, eg quickOld.foo, quickMid.foo and quickNew.foo
|
||||
* for differing versions of the file format.
|
||||
*/
|
||||
protected String[] getQuickFilenames(String sourceMimetype) {
|
||||
String sourceExtension = mimetypeService.getExtension(sourceMimetype);
|
||||
return new String[] {
|
||||
"quick." + sourceExtension
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests the full range of transformations available on the
|
||||
@@ -160,120 +188,124 @@ public abstract class AbstractContentTransformerTest extends TestCase
|
||||
for (String sourceMimetype : mimetypes)
|
||||
{
|
||||
// attempt to get a source file for each mimetype
|
||||
String sourceExtension = mimetypeService.getExtension(sourceMimetype);
|
||||
String[] quickFiles = getQuickFilenames(sourceMimetype);
|
||||
sb.append(" Source Files: ").append(quickFiles).append("\n");
|
||||
|
||||
sb.append(" Source Extension: ").append(sourceExtension).append("\n");
|
||||
|
||||
// attempt to convert to every other mimetype
|
||||
for (String targetMimetype : mimetypes)
|
||||
for (String quickFile : quickFiles)
|
||||
{
|
||||
if (sourceMimetype.equals(targetMimetype))
|
||||
{
|
||||
// Don't test like-to-like transformations
|
||||
continue;
|
||||
}
|
||||
ContentWriter targetWriter = null;
|
||||
// construct a reader onto the source file
|
||||
String targetExtension = mimetypeService.getExtension(targetMimetype);
|
||||
String sourceExtension = quickFile.substring(quickFile.lastIndexOf('.')+1);
|
||||
|
||||
// must we test the transformation?
|
||||
ContentTransformer transformer = getTransformer(sourceMimetype, targetMimetype);
|
||||
if (transformer == null || transformer.isTransformable(sourceMimetype, targetMimetype, null) == false)
|
||||
{
|
||||
// no transformer
|
||||
continue;
|
||||
}
|
||||
// attempt to convert to every other mimetype
|
||||
for (String targetMimetype : mimetypes)
|
||||
{
|
||||
if (sourceMimetype.equals(targetMimetype))
|
||||
{
|
||||
// Don't test like-to-like transformations
|
||||
continue;
|
||||
}
|
||||
ContentWriter targetWriter = null;
|
||||
// construct a reader onto the source file
|
||||
String targetExtension = mimetypeService.getExtension(targetMimetype);
|
||||
|
||||
if (isTransformationExcluded(sourceExtension, targetExtension))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// must we test the transformation?
|
||||
ContentTransformer transformer = getTransformer(sourceMimetype, targetMimetype);
|
||||
if (transformer == null || transformer.isTransformable(sourceMimetype, targetMimetype, null) == false)
|
||||
{
|
||||
// no transformer
|
||||
continue;
|
||||
}
|
||||
|
||||
// dump
|
||||
sb.append(" Target Extension: ").append(targetExtension);
|
||||
sb.append(" <").append(transformer.getClass().getSimpleName()).append(">");
|
||||
if (isTransformationExcluded(sourceExtension, targetExtension))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// is there a test file for this conversion?
|
||||
File sourceFile = AbstractContentTransformerTest.loadQuickTestFile(sourceExtension);
|
||||
if (sourceFile == null)
|
||||
{
|
||||
sb.append(" <no source test file>\n");
|
||||
continue; // no test file available for that extension
|
||||
}
|
||||
ContentReader sourceReader = new FileContentReader(sourceFile);
|
||||
// dump
|
||||
sb.append(" Target Extension: ").append(targetExtension);
|
||||
sb.append(" <").append(transformer.getClass().getSimpleName()).append(">");
|
||||
|
||||
// perform the transformation several times so that we get a good idea of performance
|
||||
int count = 0;
|
||||
long before = System.currentTimeMillis();
|
||||
Set<String> transformerClasses = new HashSet<String>(2);
|
||||
for (int i = 0; i < 5; i++)
|
||||
{
|
||||
// get the transformer repeatedly as it might be different each time around
|
||||
transformer = getTransformer(sourceMimetype, targetMimetype);
|
||||
// must we report on this class?
|
||||
if (!transformerClasses.contains(transformer.getClass().getName()))
|
||||
{
|
||||
transformerClasses.add(transformer.getClass().getName());
|
||||
sb.append(" <").append(transformer.getClass().getSimpleName()).append(">");
|
||||
}
|
||||
// is there a test file for this conversion?
|
||||
File sourceFile = AbstractContentTransformerTest.loadNamedQuickTestFile(quickFile);
|
||||
if (sourceFile == null)
|
||||
{
|
||||
sb.append(" <no source test file>\n");
|
||||
continue; // no test file available for that extension
|
||||
}
|
||||
ContentReader sourceReader = new FileContentReader(sourceFile);
|
||||
|
||||
// make a writer for the target file
|
||||
File targetFile = TempFileProvider.createTempFile(
|
||||
getClass().getSimpleName() + "_" + getName() + "_" + sourceExtension + "_",
|
||||
"." + targetExtension);
|
||||
targetWriter = new FileContentWriter(targetFile);
|
||||
// perform the transformation several times so that we get a good idea of performance
|
||||
int count = 0;
|
||||
long before = System.currentTimeMillis();
|
||||
Set<String> transformerClasses = new HashSet<String>(2);
|
||||
for (int i = 0; i < 5; i++)
|
||||
{
|
||||
// get the transformer repeatedly as it might be different each time around
|
||||
transformer = getTransformer(sourceMimetype, targetMimetype);
|
||||
// must we report on this class?
|
||||
if (!transformerClasses.contains(transformer.getClass().getName()))
|
||||
{
|
||||
transformerClasses.add(transformer.getClass().getName());
|
||||
sb.append(" <").append(transformer.getClass().getSimpleName()).append(">");
|
||||
}
|
||||
|
||||
// do the transformation
|
||||
sourceReader.setMimetype(sourceMimetype);
|
||||
targetWriter.setMimetype(targetMimetype);
|
||||
transformer.transform(sourceReader.getReader(), targetWriter);
|
||||
// make a writer for the target file
|
||||
File targetFile = TempFileProvider.createTempFile(
|
||||
getClass().getSimpleName() + "_" + getName() + "_" + sourceExtension + "_",
|
||||
"." + targetExtension);
|
||||
targetWriter = new FileContentWriter(targetFile);
|
||||
|
||||
// if the target format is any type of text, then it must contain the 'quick' phrase
|
||||
if (isQuickPhraseExpected(targetMimetype))
|
||||
{
|
||||
ContentReader targetReader = targetWriter.getReader();
|
||||
String checkContent = targetReader.getContentString();
|
||||
assertTrue("Quick phrase not present in document converted to text: \n" +
|
||||
" transformer: " + transformer + "\n" +
|
||||
" source: " + sourceReader + "\n" +
|
||||
" target: " + targetWriter,
|
||||
checkContent.contains(QUICK_CONTENT));
|
||||
// do the transformation
|
||||
sourceReader.setMimetype(sourceMimetype);
|
||||
targetWriter.setMimetype(targetMimetype);
|
||||
transformer.transform(sourceReader.getReader(), targetWriter);
|
||||
|
||||
// Let subclasses do extra checks if they want
|
||||
additionalContentCheck(sourceMimetype, targetMimetype, checkContent);
|
||||
}
|
||||
else if (isQuickWordsExpected(targetMimetype))
|
||||
{
|
||||
ContentReader targetReader = targetWriter.getReader();
|
||||
String checkContent = targetReader.getContentString();
|
||||
// essentially check that FTS indexing can use the conversion properly
|
||||
for (int word = 0; word < QUICK_WORDS.length; word++)
|
||||
{
|
||||
assertTrue("Quick phrase word not present in document converted to text: \n" +
|
||||
" transformer: " + transformer + "\n" +
|
||||
" source: " + sourceReader + "\n" +
|
||||
" target: " + targetWriter + "\n" +
|
||||
" word: " + word,
|
||||
checkContent.contains(QUICK_WORDS[word]));
|
||||
}
|
||||
}
|
||||
// increment count
|
||||
count++;
|
||||
}
|
||||
long after = System.currentTimeMillis();
|
||||
double average = (double) (after - before) / (double) count;
|
||||
// if the target format is any type of text, then it must contain the 'quick' phrase
|
||||
if (isQuickPhraseExpected(targetMimetype))
|
||||
{
|
||||
ContentReader targetReader = targetWriter.getReader();
|
||||
String checkContent = targetReader.getContentString();
|
||||
assertTrue("Quick phrase not present in document converted to text: \n" +
|
||||
" transformer: " + transformer + "\n" +
|
||||
" source: " + sourceReader + "\n" +
|
||||
" target: " + targetWriter,
|
||||
checkContent.contains(QUICK_CONTENT));
|
||||
|
||||
// dump
|
||||
sb.append(String.format(" average %10.0f ms", average)).append("\n");
|
||||
// Let subclasses do extra checks if they want
|
||||
additionalContentCheck(sourceMimetype, targetMimetype, checkContent);
|
||||
}
|
||||
else if (isQuickWordsExpected(targetMimetype))
|
||||
{
|
||||
ContentReader targetReader = targetWriter.getReader();
|
||||
String checkContent = targetReader.getContentString();
|
||||
// essentially check that FTS indexing can use the conversion properly
|
||||
for (int word = 0; word < QUICK_WORDS.length; word++)
|
||||
{
|
||||
assertTrue("Quick phrase word not present in document converted to text: \n" +
|
||||
" transformer: " + transformer + "\n" +
|
||||
" source: " + sourceReader + "\n" +
|
||||
" target: " + targetWriter + "\n" +
|
||||
" word: " + word,
|
||||
checkContent.contains(QUICK_WORDS[word]));
|
||||
}
|
||||
}
|
||||
// increment count
|
||||
count++;
|
||||
}
|
||||
long after = System.currentTimeMillis();
|
||||
double average = (double) (after - before) / (double) count;
|
||||
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Transformation performed " + count + " time: " +
|
||||
sourceMimetype + " --> " + targetMimetype + "\n" +
|
||||
" source: " + sourceReader + "\n" +
|
||||
" target: " + targetWriter + "\n" +
|
||||
" transformer: " + getTransformer(sourceMimetype, targetMimetype));
|
||||
}
|
||||
// dump
|
||||
sb.append(String.format(" average %10.0f ms", average)).append("\n");
|
||||
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Transformation performed " + count + " time: " +
|
||||
sourceMimetype + " --> " + targetMimetype + "\n" +
|
||||
" source: " + sourceReader + "\n" +
|
||||
" target: " + targetWriter + "\n" +
|
||||
" transformer: " + getTransformer(sourceMimetype, targetMimetype));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -46,7 +46,7 @@ public class PoiContentTransformer extends TikaPoweredContentTransformer
|
||||
public static ArrayList<String> SUPPORTED_MIMETYPES;
|
||||
static {
|
||||
SUPPORTED_MIMETYPES = new ArrayList<String>();
|
||||
OfficeParser p = new OfficeParser();
|
||||
Parser p = new OfficeParser();
|
||||
for(MediaType mt : p.getSupportedTypes(null)) {
|
||||
if(mt.toString().equals(MimetypeMap.MIMETYPE_EXCEL))
|
||||
{
|
||||
|
@@ -24,11 +24,11 @@ import java.util.regex.Pattern;
|
||||
import javax.xml.transform.TransformerConfigurationException;
|
||||
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.repo.content.TikaOfficeDetectParser;
|
||||
import org.alfresco.service.cmr.repository.TransformationOptions;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.apache.tika.parser.microsoft.OfficeParser;
|
||||
import org.apache.tika.sax.BodyContentHandler;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.ContentHandler;
|
||||
@@ -56,14 +56,15 @@ public class PoiHssfContentTransformer extends TikaPoweredContentTransformer
|
||||
public PoiHssfContentTransformer()
|
||||
{
|
||||
super(new String[] {
|
||||
MimetypeMap.MIMETYPE_EXCEL
|
||||
MimetypeMap.MIMETYPE_EXCEL,
|
||||
MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Parser getParser()
|
||||
{
|
||||
return new OfficeParser();
|
||||
return new TikaOfficeDetectParser();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -46,7 +46,14 @@ public class PoiHssfContentTransformerTest extends TikaPoweredContentTransformer
|
||||
transformer = new PoiHssfContentTransformer();
|
||||
}
|
||||
|
||||
/**
|
||||
@Override
|
||||
protected String[] getQuickFilenames(String sourceMimetype) {
|
||||
return new String[] {
|
||||
"quick.xls", "quick.xlsx"
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the same transformer regardless - it is allowed
|
||||
*/
|
||||
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
|
||||
|
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.content.transform;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
|
||||
|
||||
/**
|
||||
* Uses {@link http://tika.apache.org/ Apache Tika} and
|
||||
* {@link http://poi.apache.org/ Apache POI} to perform
|
||||
* conversions from the newer OOXML Office documents.
|
||||
*
|
||||
* @author Nick Burch
|
||||
*/
|
||||
public class PoiOOXMLContentTransformer extends TikaPoweredContentTransformer
|
||||
{
|
||||
/**
|
||||
* We support all the office mimetypes that the Tika
|
||||
* office parser can handle
|
||||
*/
|
||||
public static ArrayList<String> SUPPORTED_MIMETYPES;
|
||||
static {
|
||||
SUPPORTED_MIMETYPES = new ArrayList<String>();
|
||||
Parser p = new OOXMLParser();
|
||||
for(MediaType mt : p.getSupportedTypes(null)) {
|
||||
SUPPORTED_MIMETYPES.add( mt.toString() );
|
||||
}
|
||||
}
|
||||
|
||||
public PoiOOXMLContentTransformer() {
|
||||
super(SUPPORTED_MIMETYPES);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Parser getParser() {
|
||||
return new OOXMLParser();
|
||||
}
|
||||
}
|
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (C) 2005-2010 Alfresco Software Limited.
|
||||
*
|
||||
* This file is part of Alfresco
|
||||
*
|
||||
* Alfresco is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Alfresco is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.alfresco.repo.content.transform;
|
||||
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.TransformationOptions;
|
||||
|
||||
/**
|
||||
* @see org.alfresco.repo.content.transform.PoiOOXMLContentTransformer
|
||||
*
|
||||
* @author Nick Burch
|
||||
*/
|
||||
public class PoiOOXMLContentTransformerTest extends AbstractContentTransformerTest
|
||||
{
|
||||
private ContentTransformer transformer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
super.setUp();
|
||||
|
||||
transformer = new PoiOOXMLContentTransformer();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the same transformer regardless - it is allowed
|
||||
*/
|
||||
protected ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
|
||||
{
|
||||
return transformer;
|
||||
}
|
||||
|
||||
public void testIsTransformable() throws Exception
|
||||
{
|
||||
assertFalse(transformer.isTransformable(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING, new TransformationOptions()));
|
||||
assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING, MimetypeMap.MIMETYPE_TEXT_PLAIN, new TransformationOptions()));
|
||||
assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING, MimetypeMap.MIMETYPE_HTML, new TransformationOptions()));
|
||||
assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_OPENXML_WORDPROCESSING, MimetypeMap.MIMETYPE_XML, new TransformationOptions()));
|
||||
|
||||
assertFalse(transformer.isTransformable(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_OPENXML_PRESENTATION, new TransformationOptions()));
|
||||
assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_OPENXML_PRESENTATION, MimetypeMap.MIMETYPE_TEXT_PLAIN, new TransformationOptions()));
|
||||
assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_OPENXML_PRESENTATION, MimetypeMap.MIMETYPE_HTML, new TransformationOptions()));
|
||||
assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_OPENXML_PRESENTATION, MimetypeMap.MIMETYPE_XML, new TransformationOptions()));
|
||||
|
||||
assertFalse(transformer.isTransformable(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET, new TransformationOptions()));
|
||||
assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET, MimetypeMap.MIMETYPE_TEXT_PLAIN, new TransformationOptions()));
|
||||
assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET, MimetypeMap.MIMETYPE_HTML, new TransformationOptions()));
|
||||
assertTrue(transformer.isTransformable(MimetypeMap.MIMETYPE_OPENXML_SPREADSHEET, MimetypeMap.MIMETYPE_XML, new TransformationOptions()));
|
||||
}
|
||||
}
|
@@ -52,6 +52,13 @@ public class TextMiningContentTransformerTest extends AbstractContentTransformer
|
||||
return transformer;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String[] getQuickFilenames(String sourceMimetype) {
|
||||
return new String[] {
|
||||
"quick.doc", "quick95.doc", "quick6.doc"
|
||||
};
|
||||
}
|
||||
|
||||
public void testIsTransformable() throws Exception
|
||||
{
|
||||
assertFalse(transformer.isTransformable(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_WORD, new TransformationOptions()));
|
||||
|
Reference in New Issue
Block a user