Upgraded JOOConverter to V2.0.0

- Fixes AR-505
 - OpenOffice transformations are config driven
 - Incorporated WordPerfect transformations


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@3367 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Derek Hulley 2006-07-21 14:14:11 +00:00
parent eda985bb31
commit 2c2816a39b
11 changed files with 485 additions and 401 deletions

View File

@ -87,6 +87,8 @@
</constructor-arg>
</bean>
<bean id="openOfficeConnection" class="net.sf.jooreports.openoffice.connection.SocketOpenOfficeConnection" />
<!-- Metadata Extraction Regisitry -->
<bean id="metadataExtracterRegistry" class="org.alfresco.repo.content.metadata.MetadataExtracterRegistry" >
<property name="mimetypeMap">
@ -113,7 +115,11 @@
<bean class="org.alfresco.repo.content.metadata.HtmlMetadataExtracter" parent="baseMetadataExtracter" />
<bean class="org.alfresco.repo.content.metadata.MP3MetadataExtracter" parent="baseMetadataExtracter" />
<bean class="org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracter" parent="baseMetadataExtracter" />
<bean class="org.alfresco.repo.content.metadata.UnoMetadataExtracter" parent="baseMetadataExtracter" init-method="init" />
<bean class="org.alfresco.repo.content.metadata.OpenOfficeMetadataExtracter" parent="baseMetadataExtracter" init-method="init" >
<property name="connection">
<ref bean="openOfficeConnection" />
</property>
</bean>
<!-- Content Transformation Regisitry -->
@ -180,9 +186,15 @@
parent="baseContentTransformer" />
<bean id="transformer.OpenOffice"
class="org.alfresco.repo.content.transform.UnoContentTransformer"
parent="baseContentTransformer"
init-method="init" />
class="org.alfresco.repo.content.transform.OpenOfficeContentTransformer"
parent="baseContentTransformer" >
<property name="connection">
<ref bean="openOfficeConnection" />
</property>
<property name="documentFormatsConfiguration">
<value>classpath:alfresco/mimetype/openoffice-document-formats.xml</value>
</property>
</bean>
<bean id="transformer.complex.OpenOffice.PdfBox"
class="org.alfresco.repo.content.transform.ComplexContentTransformer"

View File

@ -0,0 +1,154 @@
<?xml version="1.0"?>
<document-formats>
<!-- Export-Only Formats (no family attribute) -->
<document-format><name>Portable Document Format</name>
<mime-type>application/pdf</mime-type>
<file-extension>pdf</file-extension>
<export-filters>
<entry><family>Presentation</family><string>impress_pdf_Export</string></entry>
<entry><family>Spreadsheet</family><string>calc_pdf_Export</string></entry>
<entry><family>Text</family><string>writer_pdf_Export</string></entry>
</export-filters>
</document-format>
<document-format><name>Macromedia Flash</name>
<mime-type>application/x-shockwave-flash</mime-type>
<file-extension>swf</file-extension>
<export-filters>
<entry><family>Presentation</family><string>impress_flash_Export</string></entry>
</export-filters>
</document-format>
<!--
- Note: (X)HTML formats are here for completeness but they are currently unsupported because
- 1. additional files may be generated for images and this would require extra care in a servlet environment
- 2. output quality does not seem to be very good in many cases
-->
<document-format><name>HTML</name>
<mime-type>text/html</mime-type>
<file-extension>html</file-extension>
<export-filters>
<entry><family>Presentation</family><string>impress_html_Export</string></entry>
<entry><family>Spreadsheet</family><string>HTML (StarCalc)</string></entry>
<entry><family>Text</family><string>HTML (StarWriter)</string></entry>
</export-filters>
</document-format>
<!-- Text (Word Processor) Formats -->
<document-format><name>OpenDocument Text</name>
<family>Text</family>
<mime-type>application/vnd.oasis.opendocument.text</mime-type>
<file-extension>odt</file-extension>
<export-filters>
<entry><family>Text</family><string>writer8</string></entry>
</export-filters>
</document-format>
<document-format><name>OpenOffice.org 1.0 Text Document</name>
<family>Text</family>
<mime-type>application/vnd.sun.xml.writer</mime-type>
<file-extension>sxw</file-extension>
<export-filters>
<entry><family>Text</family><string>StarOffice XML (Writer)</string></entry>
</export-filters>
</document-format>
<document-format><name>Microsoft Word</name>
<family>Text</family>
<mime-type>application/msword</mime-type>
<file-extension>doc</file-extension>
<export-filters>
<entry><family>Text</family><string>MS Word 97</string></entry>
</export-filters>
</document-format>
<document-format><name>WordPerfect</name>
<family>Text</family>
<mime-type>application/wordperfect</mime-type>
<file-extension>wpd</file-extension>
<export-filters>
<entry><family>Text</family><string>WordPerfect</string></entry>
</export-filters>
</document-format>
<document-format><name>Rich Text Format</name>
<family>Text</family>
<mime-type>text/rtf</mime-type>
<file-extension>rtf</file-extension>
<export-filters>
<entry><family>Text</family><string>Rich Text Format</string></entry>
</export-filters>
</document-format>
<document-format><name>Plain Text</name>
<family>Text</family>
<mime-type>text/plain</mime-type>
<file-extension>txt</file-extension>
<export-filters>
<entry><family>Text</family><string>Text</string></entry>
</export-filters>
</document-format>
<!-- Spreadsheet Formats -->
<document-format><name>OpenDocument Spreadsheet</name>
<family>Spreadsheet</family>
<mime-type>application/vnd.oasis.opendocument.spreadsheet</mime-type>
<file-extension>ods</file-extension>
<export-filters>
<entry><family>Spreadsheet</family><string>calc8</string></entry>
</export-filters>
</document-format>
<document-format><name>OpenOffice.org 1.0 Spreadsheet</name>
<family>Spreadsheet</family>
<mime-type>application/vnd.sun.xml.calc</mime-type>
<file-extension>sxc</file-extension>
<export-filters>
<entry><family>Spreadsheet</family><string>StarOffice XML (Calc)</string></entry>
</export-filters>
</document-format>
<document-format><name>Microsoft Excel</name>
<family>Spreadsheet</family>
<mime-type>application/application/vnd.ms-excel</mime-type>
<file-extension>xls</file-extension>
<export-filters>
<entry><family>Spreadsheet</family><string>MS Excel 97</string></entry>
</export-filters>
</document-format>
<!-- Presentation Formats -->
<document-format><name>OpenDocument Presentation</name>
<family>Presentation</family>
<mime-type>application/vnd.oasis.opendocument.presentation</mime-type>
<file-extension>odp</file-extension>
<export-filters>
<entry><family>Presentation</family><string>impress8</string></entry>
</export-filters>
</document-format>
<document-format><name>OpenOffice.org 1.0 Presentation</name>
<family>Presentation</family>
<mime-type>application/vnd.sun.xml.impress</mime-type>
<file-extension>sxi</file-extension>
<export-filters>
<entry><family>Presentation</family><string>StarOffice XML (Impress)</string></entry>
</export-filters>
</document-format>
<document-format><name>Microsoft PowerPoint</name>
<family>Presentation</family>
<mime-type>application/vnd.ms-powerpoint</mime-type>
<file-extension>ppt</file-extension>
<export-filters>
<entry><family>Presentation</family><string>MS PowerPoint 97</string></entry>
</export-filters>
</document-format>
</document-formats>

View File

@ -67,8 +67,6 @@ import org.alfresco.service.transaction.TransactionService;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.sun.star.uno.RuntimeException;
/**
* FTP Server Session Class
*

View File

@ -40,6 +40,8 @@ import org.apache.commons.logging.LogFactory;
*/
public class MimetypeMap implements MimetypeService
{
public static final String EXTENSION_BINARY = "bin";
public static final String MIMETYPE_TEXT_PLAIN = "text/plain";
public static final String MIMETYPE_TEXT_CSS = "text/css";
public static final String MIMETYPE_XML = "text/xml";
@ -87,6 +89,8 @@ public class MimetypeMap implements MimetypeService
public static final String MIMETYPE_STAROFFICE5_WRITER = "application/vnd.stardivision.writer";
public static final String MIMETYPE_STAROFFICE5_WRITER_GLOBAL = "application/vnd.stardivision.writer-global";
public static final String MIMETYPE_STAROFFICE5_MATH = "application/vnd.stardivision.math";
// WordPerfect
public static final String MIMETYPE_WORDPERFECT = "application/wordperfect";
// Audio
public static final String MIMETYPE_MP3 = "audio/x-mpeg";
// Alfresco
@ -207,18 +211,26 @@ public class MimetypeMap implements MimetypeService
}
/**
* Get the file extension associated with the mimetype.
*
* @param mimetype a valid mimetype
* @return Returns the default extension for the mimetype
* @throws AlfrescoRuntimeException if the mimetype doesn't exist
* @return Returns the default extension for the mimetype. Returns the {@link #MIMETYPE_BINARY binary}
* mimetype extension.
*
* @see #MIMETYPE_BINARY
* @see #EXTENSION_BINARY
*/
public String getExtension(String mimetype)
{
String extension = extensionsByMimetype.get(mimetype);
if (extension == null)
{
throw new AlfrescoRuntimeException("No extension available for mimetype: " + mimetype);
return EXTENSION_BINARY;
}
else
{
return extension;
}
return extension;
}
public Map<String, String> getDisplaysByExtension()

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen Møller
* Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@ -24,12 +24,13 @@ import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import net.sf.joott.uno.UnoConnection;
import net.sf.jooreports.openoffice.connection.OpenOfficeConnection;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
import org.alfresco.util.PropertyCheck;
import org.alfresco.util.TempFileProvider;
import com.sun.star.beans.PropertyValue;
@ -41,9 +42,9 @@ import com.sun.star.ucb.XFileIdentifierConverter;
import com.sun.star.uno.UnoRuntime;
/**
* @author Jesper Steen Møller
* @author Jesper Steen M<EFBFBD>ller
*/
public class UnoMetadataExtracter extends AbstractMetadataExtracter
public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter
{
public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_STAROFFICE5_WRITER,
@ -55,31 +56,26 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
// quality since they involve conversion.
};
private String contentUrl;
private MyUnoConnection connection;
private OpenOfficeConnection connection;
private boolean isConnected;
public UnoMetadataExtracter()
public OpenOfficeMetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.00, 10000);
this.contentUrl = UnoConnection.DEFAULT_CONNECTION_STRING;
}
/**
*
* @param contentUrl the URL to connect to
*/
public void setContentUrl(String contentUrl)
public void setConnection(OpenOfficeConnection connection)
{
this.contentUrl = contentUrl;
this.connection = connection;
}
/**
* Initialises the bean by establishing an UNO connection
*/
public synchronized void init()
{
connection = new MyUnoConnection(contentUrl);
PropertyCheck.mandatory("OpenOfficeMetadataExtracter", "connection", connection);
// attempt to make an connection
try
{
@ -109,7 +105,7 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
// create temporary files to convert from and to
File tempFromFile = TempFileProvider.createTempFile(
"UnoContentTransformer_", "."
"OpenOfficeMetadataExtracter-", "."
+ getMimetypeService().getExtension(sourceMimetype));
// download the content from the source reader
reader.getContent(tempFromFile);
@ -158,9 +154,9 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
}
}
public String toUrl(File file, MyUnoConnection connection) throws ConnectException
public String toUrl(File file, OpenOfficeConnection connection) throws ConnectException
{
Object fcp = connection.getFileContentService();
Object fcp = connection.getFileContentProvider();
XFileIdentifierConverter fic = (XFileIdentifierConverter) UnoRuntime.queryInterface(
XFileIdentifierConverter.class, fcp);
return fic.getFileURLFromSystemPath("", file.getAbsolutePath());
@ -181,17 +177,4 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
property.Value = value;
return property;
}
static class MyUnoConnection extends UnoConnection
{
public MyUnoConnection(String url)
{
super(url);
}
public Object getFileContentService() throws ConnectException
{
return getService("com.sun.star.ucb.FileContentProvider");
}
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen Møller
* Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@ -16,20 +16,27 @@
*/
package org.alfresco.repo.content.metadata;
import net.sf.jooreports.openoffice.connection.OpenOfficeConnection;
import net.sf.jooreports.openoffice.connection.SocketOpenOfficeConnection;
/**
* @author Jesper Steen Møller
* @author Jesper Steen M<EFBFBD>ller
*/
public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
public class OpenOfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private UnoMetadataExtracter extracter;
private OpenOfficeMetadataExtracter extracter;
@Override
public void setUp() throws Exception
{
super.setUp();
extracter = new UnoMetadataExtracter();
OpenOfficeConnection connection = new SocketOpenOfficeConnection();
extracter = new OpenOfficeMetadataExtracter();
extracter.setMimetypeService(mimetypeMap);
extracter.setConnection(connection);
extracter.init();
}
@ -48,7 +55,7 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
return;
}
for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
for (String mimetype : OpenOfficeMetadataExtracter.SUPPORTED_MIMETYPES)
{
double reliability = extracter.getReliability(mimetype);
assertTrue("Expected above zero reliability", reliability > 0.0);
@ -61,7 +68,7 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
{
return;
}
for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
for (String mimetype : OpenOfficeMetadataExtracter.SUPPORTED_MIMETYPES)
{
testExtractFromMimetype(mimetype);
}

View File

@ -123,12 +123,8 @@ public abstract class AbstractContentTransformer implements ContentTransformer
{
if (registry == null)
{
if (registry == null)
{
logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
" transformer: " + this);
return;
}
logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
" transformer: " + this);
return;
}
// first register any explicit transformations

View File

@ -50,7 +50,6 @@ public class ContentTransformerRegistry
private MimetypeMap mimetypeMap;
/** Cache of previously used transactions */
private Map<TransformationKey, List<ContentTransformer>> transformationCache;
private short accessCount;
/** Controls read access to the transformation cache */
private Lock transformationCacheReadLock;
/** controls write access to the transformation cache */
@ -67,7 +66,6 @@ public class ContentTransformerRegistry
this.transformers = new ArrayList<ContentTransformer>(10);
transformationCache = new HashMap<TransformationKey, List<ContentTransformer>>(17);
accessCount = 0;
// create lock objects for access to the cache
ReadWriteLock transformationCacheLock = new ReentrantReadWriteLock();
transformationCacheReadLock = transformationCacheLock.readLock();
@ -120,7 +118,6 @@ public class ContentTransformerRegistry
try
{
transformationCache.clear();
accessCount = 0;
}
finally
{
@ -243,7 +240,6 @@ public class ContentTransformerRegistry
private List<ContentTransformer> findDirectTransformers(String sourceMimetype, String targetMimetype)
{
double maxReliability = 0.0;
long leastTime = 100000L; // 100 seconds - longer than anyone would think of waiting
List<ContentTransformer> bestTransformers = new ArrayList<ContentTransformer>(2);
// loop through transformers
for (ContentTransformer transformer : this.transformers)
@ -289,6 +285,7 @@ public class ContentTransformerRegistry
/**
* Recursive method to build up a list of content transformers
*/
@SuppressWarnings("unused")
private void buildTransformer(List<ContentTransformer> transformers,
double reliability,
List<String> touchedMimetypes,

View File

@ -0,0 +1,254 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.ConnectException;
import java.util.Map;
import net.sf.jooreports.converter.DocumentFamily;
import net.sf.jooreports.converter.DocumentFormat;
import net.sf.jooreports.converter.DocumentFormatRegistry;
import net.sf.jooreports.converter.XmlDocumentFormatRegistry;
import net.sf.jooreports.openoffice.connection.OpenOfficeConnection;
import net.sf.jooreports.openoffice.connection.OpenOfficeException;
import net.sf.jooreports.openoffice.converter.OpenOfficeDocumentConverter;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.util.PropertyCheck;
import org.alfresco.util.TempFileProvider;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.core.io.DefaultResourceLoader;
/**
* Makes use of the {@link http://sourceforge.net/projects/joott/ JOOConverter} library to
* perform OpenOffice-drive conversions.
*
* @author Derek Hulley
*/
public class OpenOfficeContentTransformer extends AbstractContentTransformer
{
private static Log logger = LogFactory.getLog(OpenOfficeContentTransformer.class);
private OpenOfficeConnection connection;
private boolean connected;
private OpenOfficeDocumentConverter converter;
private String documentFormatsConfiguration;
private DocumentFormatRegistry formatRegistry;
public OpenOfficeContentTransformer()
{
this.connected = false;
}
public void setConnection(OpenOfficeConnection connection)
{
this.connection = connection;
}
/**
* Set a non-default location from which to load the document format mappings.
*
* @param path a resource location supporting the <b>file:</b> or <b>classpath:</b> prefixes
*/
public void setDocumentFormatsConfiguration(String path)
{
this.documentFormatsConfiguration = path;
}
public boolean isConnected()
{
return connected;
}
private synchronized void connect()
{
try
{
connection.connect();
connected = true;
}
catch (ConnectException e)
{
logger.warn(e.getMessage());
connected = false;
}
}
@Override
public void register()
{
PropertyCheck.mandatory("OpenOfficeContentTransformer", "connection", connection);
// attempt to establish a connection
connect();
// set up the converter
converter = new OpenOfficeDocumentConverter(connection);
// load the document conversion configuration
if (documentFormatsConfiguration != null)
{
DefaultResourceLoader resourceLoader = new DefaultResourceLoader();
try
{
InputStream is = resourceLoader.getResource(documentFormatsConfiguration).getInputStream();
formatRegistry = new XmlDocumentFormatRegistry(is);
}
catch (IOException e)
{
throw new AlfrescoRuntimeException(
"Unable to load document formats configuration file: " + documentFormatsConfiguration);
}
}
else
{
formatRegistry = new XmlDocumentFormatRegistry();
}
if (connected)
{
// register
super.register();
}
}
/**
* @see DocumentFormatRegistry
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
if (!connected)
{
return 0.0;
}
// there are some conversions that fail, despite the converter believing them possible
if (targetMimetype.equals(MimetypeMap.MIMETYPE_XHTML))
{
return 0.0;
}
else if (targetMimetype.equals(MimetypeMap.MIMETYPE_WORDPERFECT))
{
return 0.0;
}
MimetypeService mimetypeService = getMimetypeService();
String sourceExtension = mimetypeService.getExtension(sourceMimetype);
String targetExtension = mimetypeService.getExtension(targetMimetype);
// query the registry for the source format
DocumentFormat sourceFormat = formatRegistry.getFormatByFileExtension(sourceExtension);
if (sourceFormat == null)
{
// no document format
return 0.0;
}
// query the registry for the target format
DocumentFormat targetFormat = formatRegistry.getFormatByFileExtension(targetExtension);
if (targetFormat == null)
{
// no document format
return 0.0;
}
// get the family of the target document
DocumentFamily sourceFamily = sourceFormat.getFamily();
// does the format support the conversion
if (!targetFormat.isExportableFrom(sourceFamily))
{
// unable to export from source family of documents to the target format
return 0.0;
}
else
{
return 1.0;
}
}
protected void transformInternal(
ContentReader reader,
ContentWriter writer,
Map<String, Object> options) throws Exception
{
String sourceMimetype = getMimetype(reader);
String targetMimetype = getMimetype(writer);
MimetypeService mimetypeService = getMimetypeService();
String sourceExtension = mimetypeService.getExtension(sourceMimetype);
String targetExtension = mimetypeService.getExtension(targetMimetype);
// query the registry for the source format
DocumentFormat sourceFormat = formatRegistry.getFormatByFileExtension(sourceExtension);
if (sourceFormat == null)
{
// source format is not recognised
throw new ContentIOException("No OpenOffice document format for source extension: " + sourceExtension);
}
// query the registry for the target format
DocumentFormat targetFormat = formatRegistry.getFormatByFileExtension(targetExtension);
if (targetFormat == null)
{
// target format is not recognised
throw new ContentIOException("No OpenOffice document format for target extension: " + sourceExtension);
}
// get the family of the target document
DocumentFamily sourceFamily = sourceFormat.getFamily();
// does the format support the conversion
if (!targetFormat.isExportableFrom(sourceFamily))
{
throw new ContentIOException(
"OpenOffice conversion not supported: \n" +
" reader: " + reader + "\n" +
" writer: " + writer);
}
// create temporary files to convert from and to
File tempFromFile = TempFileProvider.createTempFile(
"OpenOfficeContentTransformer-source-",
"." + sourceExtension);
File tempToFile = TempFileProvider.createTempFile(
"OpenOfficeContentTransformer-target-",
"." + targetExtension);
// download the content from the source reader
reader.getContent(tempFromFile);
try
{
converter.convert(tempFromFile, sourceFormat, tempToFile, targetFormat);
// conversion success
}
catch (OpenOfficeException e)
{
throw new ContentIOException("OpenOffice server conversion failed: \n" +
" reader: " + reader + "\n" +
" writer: " + writer + "\n" +
" from file: " + tempFromFile + "\n" +
" to file: " + tempToFile,
e);
}
// upload the temp output to the writer given us
writer.putContent(tempToFile);
}
}

View File

@ -16,24 +16,30 @@
*/
package org.alfresco.repo.content.transform;
import net.sf.jooreports.openoffice.connection.OpenOfficeConnection;
import org.alfresco.repo.content.MimetypeMap;
/**
* @see org.alfresco.repo.content.transform.UnoContentTransformer
* @see org.alfresco.repo.content.transform.OpenOfficeContentTransformer
*
* @author Derek Hulley
*/
public class UnoContentTransformerTest extends AbstractContentTransformerTest
public class OpenOfficeContentTransformerTest extends AbstractContentTransformerTest
{
private static String MIMETYPE_RUBBISH = "text/rubbish";
private UnoContentTransformer transformer;
private OpenOfficeContentTransformer transformer;
public void onSetUpInTransaction() throws Exception
{
transformer = new UnoContentTransformer();
OpenOfficeConnection connection = (OpenOfficeConnection) applicationContext.getBean("openOfficeConnection");
transformer = new OpenOfficeContentTransformer();
transformer.setMimetypeService(mimetypeMap);
transformer.init();
transformer.setConnection(connection);
transformer.setDocumentFormatsConfiguration("classpath:alfresco/mimetype/openoffice-document-formats.xml");
transformer.register();
}
/**
@ -62,6 +68,8 @@ public class UnoContentTransformerTest extends AbstractContentTransformerTest
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MIMETYPE_RUBBISH);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_XHTML);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_WORD);
assertEquals("Mimetype should be supported", 1.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_TEXT_PLAIN);

View File

@ -1,337 +0,0 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import java.io.IOException;
import java.net.ConnectException;
import java.util.HashMap;
import java.util.Map;
import net.sf.joott.uno.DocumentConverter;
import net.sf.joott.uno.DocumentFormat;
import net.sf.joott.uno.UnoConnection;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.util.TempFileProvider;
/**
* Makes use of the OpenOffice Uno interfaces to convert the content.
* <p>
* The conversions are slow but reliable. Not <b>all</b> possible combinations of transformations
* have been enabled because they don't necessarily work and need to be specifically tested before
* being made available generally. As the conversion process is mostly automated, the introduction
* of faulty transformations can lead to unnecessary bugs. Feel free to experiment and, assuming
* that the unit test works, report any interesting conversions that can be enabled.
*
* @author Derek Hulley
*/
public class UnoContentTransformer extends AbstractContentTransformer
{
/** map of <tt>DocumentFormat</tt> instances keyed by mimetype conversion */
private static Map<ContentTransformerRegistry.TransformationKey, DocumentFormatWrapper> formatsByConversion;
static
{
// Build the map of known Uno document formats and store by conversion key
formatsByConversion = new HashMap<ContentTransformerRegistry.TransformationKey, DocumentFormatWrapper>(17);
// Open Office 2.0 / Open Document
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT, MimetypeMap.MIMETYPE_TEXT_PLAIN),
new DocumentFormatWrapper(DocumentFormat.TEXT, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENDOCUMENT_SPREADSHEET, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_CALC, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENDOCUMENT_PRESENTATION, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
// Open Office
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER, MimetypeMap.MIMETYPE_TEXT_PLAIN),
new DocumentFormatWrapper(DocumentFormat.TEXT, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENOFFICE1_CALC, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENOFFICE1_DRAW, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_IMPRESS, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENOFFICE1_IMPRESS, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_IMPRESS, 1.0));
// Star Office 5.x
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_DRAW, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_IMPRESS, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_CALC, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_CALC, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_CHART, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_IMPRESS, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_IMPRESS_PACKED, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_IMPRESS, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_WRITER, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_WRITER_GLOBAL, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
// MS Office
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_TEXT_PLAIN),
new DocumentFormatWrapper(DocumentFormat.TEXT, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_EXCEL, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_CALC, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_HTML),
new DocumentFormatWrapper(DocumentFormat.HTML_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_PPT, MimetypeMap.MIMETYPE_FLASH),
new DocumentFormatWrapper(DocumentFormat.FLASH_IMPRESS, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_PPT, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_IMPRESS, 1.0));
// Other
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_HTML),
new DocumentFormatWrapper(DocumentFormat.HTML_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_WORD),
new DocumentFormatWrapper(DocumentFormat.TEXT, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_HTML, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER_WEB, 1.0));
// there are many more formats available and therefore many more transformation combinations possible
// DocumentFormat.FLASH_IMPRESS
// DocumentFormat.HTML_CALC
// DocumentFormat.HTML_WRITER
// DocumentFormat.MS_EXCEL_97
// DocumentFormat.MS_POWERPOINT_97
// DocumentFormat.MS_WORD_97
// DocumentFormat.PDF_CALC
// DocumentFormat.PDF_IMPRESS
// DocumentFormat.PDF_WRITER
// DocumentFormat.PDF_WRITER_WEB
// DocumentFormat.RTF
// DocumentFormat.TEXT
// DocumentFormat.TEXT_CALC
// DocumentFormat.XML_CALC
// DocumentFormat.XML_IMPRESS
// DocumentFormat.XML_WRITER
// DocumentFormat.XML_WRITER_WEB
}
private String connectionUrl = UnoConnection.DEFAULT_CONNECTION_STRING;
private UnoConnection connection;
private boolean isConnected;
/**
* Constructs the default transformer that will attempt to connect to the
* Uno server using the default connect string.
*
* @see UnoConnection#DEFAULT_CONNECTION_STRING
*/
public UnoContentTransformer()
{
isConnected = false;
}
/**
* Override the default connection URL with a new one.
*
* @param connectionUrl the connection string
*
* @see UnoConnection#DEFAULT_CONNECTION_STRING
*/
public void setConnectionUrl(String connectionUrl)
{
this.connectionUrl = connectionUrl;
}
/**
* Connects to the OpenOffice server. If successful, then
* {@link AbstractContentTransformer#register() auto-registers}.
*/
public synchronized void init()
{
connection = new UnoConnection(connectionUrl);
// attempt to make an connection
try
{
connection.connect();
isConnected = true;
// register
super.register();
}
catch (ConnectException e)
{
isConnected = false;
}
}
/**
* @return Returns true if a connection to the Uno server could be established
*/
public boolean isConnected()
{
return isConnected;
}
/**
* @param sourceMimetype
* @param targetMimetype
* @return Returns a document format wrapper that is valid for the given source and target mimetypes
*/
private static DocumentFormatWrapper getDocumentFormatWrapper(String sourceMimetype, String targetMimetype)
{
// get the well-known document format for the specific conversion
ContentTransformerRegistry.TransformationKey key =
new ContentTransformerRegistry.TransformationKey(sourceMimetype, targetMimetype);
DocumentFormatWrapper wrapper = UnoContentTransformer.formatsByConversion.get(key);
return wrapper;
}
/**
* Checks how reliable the conversion will be when performed by the Uno server.
* <p>
* The connection for the Uno server is checked in order to have any chance of
* being reliable.
* <p>
* The conversions' reliabilities are set up statically based on prior tests that
* included checking performance as well as accuracy.
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
// check if a connection to the Uno server can be established
if (!isConnected())
{
// no connection means that conversion is not possible
return 0.0;
}
// check if the source and target mimetypes are supported
DocumentFormatWrapper docFormatWrapper = getDocumentFormatWrapper(sourceMimetype, targetMimetype);
if (docFormatWrapper == null)
{
return 0.0;
}
else
{
return docFormatWrapper.getReliability();
}
}
public void transformInternal(ContentReader reader, ContentWriter writer, Map<String, Object> options)
throws Exception
{
String sourceMimetype = getMimetype(reader);
String targetMimetype = getMimetype(writer);
// create temporary files to convert from and to
File tempFromFile = TempFileProvider.createTempFile(
"UnoContentTransformer",
"." + getMimetypeService().getExtension(sourceMimetype));
File tempToFile = TempFileProvider.createTempFile(
"UnoContentTransformer",
"." + getMimetypeService().getExtension(targetMimetype));
// download the content from the source reader
reader.getContent(tempFromFile);
// get the document format that should be used
DocumentFormatWrapper docFormatWrapper = getDocumentFormatWrapper(sourceMimetype, targetMimetype);
try
{
docFormatWrapper.execute(tempFromFile, tempToFile, connection);
// conversion success
}
catch (ConnectException e)
{
throw new ContentIOException("Connection to Uno server failed: \n" +
" reader: " + reader + "\n" +
" writer: " + writer,
e);
}
catch (IOException e)
{
throw new ContentIOException("Uno server conversion failed: \n" +
" reader: " + reader + "\n" +
" writer: " + writer + "\n" +
" from file: " + tempFromFile + "\n" +
" to file: " + tempToFile,
e);
}
// upload the temp output to the writer given us
writer.putContent(tempToFile);
}
/**
* Wraps a document format as well the reliability. The source and target mimetypes
* are not kept, but will probably be closely associated with the reliability.
*/
private static class DocumentFormatWrapper
{
/*
* Source and target mimetypes not kept -> class is private as it doesn't keep
* enough info to be used safely externally
*/
private DocumentFormat documentFormat;
private double reliability;
public DocumentFormatWrapper(DocumentFormat documentFormat, double reliability)
{
this.documentFormat = documentFormat;
this.reliability = reliability;
}
public double getReliability()
{
return reliability;
}
/**
* Executs the transformation
*/
public void execute(File fromFile, File toFile, UnoConnection connection) throws ConnectException, IOException
{
DocumentConverter converter = new DocumentConverter(connection);
converter.convert(fromFile, toFile, documentFormat);
}
}
}