Merge from HEAD into WCM-DEV2. Also fixes build breakage in

jndi-client and catalina-virtual that I introduced earlier. 


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/WCM-DEV2/root@3393 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Britt Park
2006-07-24 18:27:41 +00:00
parent c50a4aa669
commit f7d9d83036
83 changed files with 4469 additions and 1999 deletions

View File

@@ -40,6 +40,8 @@ import org.apache.commons.logging.LogFactory;
*/
public class MimetypeMap implements MimetypeService
{
public static final String EXTENSION_BINARY = "bin";
public static final String MIMETYPE_TEXT_PLAIN = "text/plain";
public static final String MIMETYPE_TEXT_CSS = "text/css";
public static final String MIMETYPE_XML = "text/xml";
@@ -87,6 +89,8 @@ public class MimetypeMap implements MimetypeService
public static final String MIMETYPE_STAROFFICE5_WRITER = "application/vnd.stardivision.writer";
public static final String MIMETYPE_STAROFFICE5_WRITER_GLOBAL = "application/vnd.stardivision.writer-global";
public static final String MIMETYPE_STAROFFICE5_MATH = "application/vnd.stardivision.math";
// WordPerfect
public static final String MIMETYPE_WORDPERFECT = "application/wordperfect";
// Audio
public static final String MIMETYPE_MP3 = "audio/x-mpeg";
// Alfresco
@@ -207,18 +211,26 @@ public class MimetypeMap implements MimetypeService
}
/**
* Get the file extension associated with the mimetype.
*
* @param mimetype a valid mimetype
* @return Returns the default extension for the mimetype
* @throws AlfrescoRuntimeException if the mimetype doesn't exist
* @return Returns the default extension for the mimetype. Returns the {@link #MIMETYPE_BINARY binary}
* mimetype extension.
*
* @see #MIMETYPE_BINARY
* @see #EXTENSION_BINARY
*/
public String getExtension(String mimetype)
{
String extension = extensionsByMimetype.get(mimetype);
if (extension == null)
{
throw new AlfrescoRuntimeException("No extension available for mimetype: " + mimetype);
return EXTENSION_BINARY;
}
else
{
return extension;
}
return extension;
}
public Map<String, String> getDisplaysByExtension()

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -31,11 +31,11 @@ import org.apache.commons.logging.LogFactory;
/**
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
abstract public class AbstractMetadataExtracter implements MetadataExtracter
{
private static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
protected static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
private MimetypeService mimetypeService;
private MetadataExtracterRegistry registry;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -38,7 +38,7 @@ import org.springframework.context.ApplicationContext;
* @see org.alfresco.repo.content.metadata.MetadataExtracter
* @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public abstract class AbstractMetadataExtracterTest extends TestCase
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -38,7 +38,7 @@ import org.alfresco.service.namespace.QName;
/**
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class HtmlMetadataExtracter extends AbstractMetadataExtracter
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -19,7 +19,7 @@ package org.alfresco.repo.content.metadata;
import org.alfresco.repo.content.MimetypeMap;
/**
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
{

View File

@@ -0,0 +1,180 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.metadata;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.NamespaceService;
import org.alfresco.service.namespace.QName;
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
/**
* Outlook format email meta-data extractor
*
* @author Kevin Roast
*/
public class MailMetadataExtracter extends AbstractMetadataExtracter
{
public static String[] SUPPORTED_MIMETYPES = new String[] {
"message/rfc822"};
private static final String SUBSTG_MESSAGEBODY = "__substg1.0_1000001E";
private static final String SUBSTG_RECIPIENTEMAIL = "__substg1.0_39FE001E";
private static final String SUBSTG_RECEIVEDEMAIL = "__substg1.0_0076001E";
private static final String SUBSTG_SENDEREMAIL = "__substg1.0_0C1F001E";
private static final String SUBSTG_DATE = "__substg1.0_00470102";
private static final QName ASPECT_MAILED = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "emailed");
private static final QName PROP_SENTDATE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "sentdate");
private static final QName PROP_ORIGINATOR = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "originator");
private static final QName PROP_ADDRESSEE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressee");
private static final QName PROP_ADDRESSEES = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressees");
// the CC: email addresses
private ThreadLocal<List<String>> receipientEmails = new ThreadLocal<List<String>>();
public MailMetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
}
public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable
{
POIFSReaderListener readerListener = new POIFSReaderListener()
{
public void processPOIFSReaderEvent(final POIFSReaderEvent event)
{
try
{
String name = event.getName();
if (name.equals(SUBSTG_RECIPIENTEMAIL)) // a recipient email address
{
String emailAddress = readPlainTextStream(event.getStream());
receipientEmails.get().add(convertExchangeAddress(emailAddress));
}
else if (name.equals(SUBSTG_RECEIVEDEMAIL)) // receiver email address
{
String emailAddress = readPlainTextStream(event.getStream());
destination.put(PROP_ADDRESSEE, convertExchangeAddress(emailAddress));
}
else if (name.equals(SUBSTG_SENDEREMAIL)) // sender email - NOTE either email OR full Exchange data e.g. : /O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=MIKE.FARMAN@BEN
{
String emailAddress = readPlainTextStream(event.getStream());
destination.put(PROP_ORIGINATOR, convertExchangeAddress(emailAddress));
}
else if (name.equals(SUBSTG_DATE))
{
// the date is not really plain text - but it's easier to parse as such
String date = readPlainTextStream(event.getStream());
int valueIndex = date.indexOf("l=");
if (valueIndex != -1)
{
int dateIndex = date.indexOf('-', valueIndex);
if (dateIndex != -1)
{
dateIndex++;
String strYear = date.substring(dateIndex, dateIndex + 2);
int year = Integer.parseInt(strYear) + (2000 - 1900);
String strMonth = date.substring(dateIndex + 2, dateIndex + 4);
int month = Integer.parseInt(strMonth) - 1;
String strDay = date.substring(dateIndex + 4, dateIndex + 6);
int day = Integer.parseInt(strDay);
String strHour = date.substring(dateIndex + 6, dateIndex + 8);
int hour = Integer.parseInt(strHour);
String strMinute = date.substring(dateIndex + 10, dateIndex + 12);
int minute = Integer.parseInt(strMinute);
destination.put(PROP_SENTDATE, new Date(year, month, day, hour, minute));
}
}
}
}
catch (Exception ex)
{
throw new ContentIOException("Property set stream: " + event.getPath() + event.getName(), ex);
}
}
};
InputStream is = null;
try
{
this.receipientEmails.set(new ArrayList<String>());
is = reader.getContentInputStream();
POIFSReader poiFSReader = new POIFSReader();
poiFSReader.registerListener(readerListener);
try
{
poiFSReader.read(is);
}
catch (IOException err)
{
// probably not an Outlook format MSG - ignore for now
logger.warn("Unable to extract meta-data from message: " + err.getMessage());
}
// store multi-value extracted property
if (receipientEmails.get().size() != 0)
{
destination.put(PROP_ADDRESSEES, (Serializable)receipientEmails.get());
}
}
finally
{
if (is != null)
{
try { is.close(); } catch (IOException e) {}
}
}
}
private static String readPlainTextStream(DocumentInputStream stream)
throws IOException
{
byte[] data = new byte[stream.available()];
int read = stream.read(data);
return new String(data);
}
private static String convertExchangeAddress(String email)
{
if (email.lastIndexOf("/CN=") == -1)
{
return email;
}
else
{
// found a full Exchange format To header
return email.substring(email.lastIndexOf("/CN=") + 4);
}
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -25,7 +25,7 @@ import org.alfresco.service.namespace.QName;
/**
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public interface MetadataExtracter
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -36,7 +36,7 @@ import org.apache.commons.logging.LogFactory;
* The extracters themselves know how well they are able to extract metadata.
*
* @see org.alfresco.repo.content.metadata.MetadataExtracter
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class MetadataExtracterRegistry
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -28,7 +28,6 @@ import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.SummaryInformation;
@@ -37,15 +36,16 @@ import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
/**
* Office file format Metadata Extracter
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class OfficeMetadataExtracter extends AbstractMetadataExtracter
{
public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_WORD,
MimetypeMap.MIMETYPE_EXCEL,
MimetypeMap.MIMETYPE_PPT };
MimetypeMap.MIMETYPE_PPT};
public OfficeMetadataExtracter()
{
@@ -64,6 +64,7 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter
if (ps instanceof SummaryInformation)
{
SummaryInformation si = (SummaryInformation) ps;
// Titled aspect
trimPut(ContentModel.PROP_TITLE, si.getTitle(), destination);
trimPut(ContentModel.PROP_DESCRIPTION, si.getSubject(), destination);
@@ -73,16 +74,6 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter
trimPut(ContentModel.PROP_MODIFIED, si.getLastSaveDateTime(), destination);
trimPut(ContentModel.PROP_AUTHOR, si.getAuthor(), destination);
}
else if (ps instanceof DocumentSummaryInformation)
{
// DocumentSummaryInformation dsi = (DocumentSummaryInformation) ps;
// These are not really interesting to any aspect:
// trimPut(ContentModel.PROP_xxx, dsi.getCompany(),
// destination);
// trimPut(ContentModel.PROP_yyy, dsi.getManager(),
// destination);
}
}
catch (Exception ex)
{
@@ -90,6 +81,7 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter
}
}
};
InputStream is = null;
try
{

View File

@@ -4,7 +4,7 @@ package org.alfresco.repo.content.metadata;
/**
* @see org.alfresco.repo.content.transform.OfficeMetadataExtracter
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen Møller
* Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -24,12 +24,13 @@ import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import net.sf.joott.uno.UnoConnection;
import net.sf.jooreports.openoffice.connection.OpenOfficeConnection;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
import org.alfresco.util.PropertyCheck;
import org.alfresco.util.TempFileProvider;
import com.sun.star.beans.PropertyValue;
@@ -41,9 +42,9 @@ import com.sun.star.ucb.XFileIdentifierConverter;
import com.sun.star.uno.UnoRuntime;
/**
* @author Jesper Steen Møller
* @author Jesper Steen Møller
*/
public class UnoMetadataExtracter extends AbstractMetadataExtracter
public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter
{
public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_STAROFFICE5_WRITER,
@@ -55,31 +56,26 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
// quality since they involve conversion.
};
private String contentUrl;
private MyUnoConnection connection;
private OpenOfficeConnection connection;
private boolean isConnected;
public UnoMetadataExtracter()
public OpenOfficeMetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.00, 10000);
this.contentUrl = UnoConnection.DEFAULT_CONNECTION_STRING;
}
/**
*
* @param contentUrl the URL to connect to
*/
public void setContentUrl(String contentUrl)
public void setConnection(OpenOfficeConnection connection)
{
this.contentUrl = contentUrl;
this.connection = connection;
}
/**
* Initialises the bean by establishing an UNO connection
*/
public synchronized void init()
{
connection = new MyUnoConnection(contentUrl);
PropertyCheck.mandatory("OpenOfficeMetadataExtracter", "connection", connection);
// attempt to make an connection
try
{
@@ -109,7 +105,7 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
// create temporary files to convert from and to
File tempFromFile = TempFileProvider.createTempFile(
"UnoContentTransformer_", "."
"OpenOfficeMetadataExtracter-", "."
+ getMimetypeService().getExtension(sourceMimetype));
// download the content from the source reader
reader.getContent(tempFromFile);
@@ -158,9 +154,9 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
}
}
public String toUrl(File file, MyUnoConnection connection) throws ConnectException
public String toUrl(File file, OpenOfficeConnection connection) throws ConnectException
{
Object fcp = connection.getFileContentService();
Object fcp = connection.getFileContentProvider();
XFileIdentifierConverter fic = (XFileIdentifierConverter) UnoRuntime.queryInterface(
XFileIdentifierConverter.class, fcp);
return fic.getFileURLFromSystemPath("", file.getAbsolutePath());
@@ -181,17 +177,4 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
property.Value = value;
return property;
}
static class MyUnoConnection extends UnoConnection
{
public MyUnoConnection(String url)
{
super(url);
}
public Object getFileContentService() throws ConnectException
{
return getService("com.sun.star.ucb.FileContentProvider");
}
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen Møller
* Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -16,20 +16,27 @@
*/
package org.alfresco.repo.content.metadata;
import net.sf.jooreports.openoffice.connection.OpenOfficeConnection;
import net.sf.jooreports.openoffice.connection.SocketOpenOfficeConnection;
/**
* @author Jesper Steen Møller
* @author Jesper Steen Møller
*/
public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
public class OpenOfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private UnoMetadataExtracter extracter;
private OpenOfficeMetadataExtracter extracter;
@Override
public void setUp() throws Exception
{
super.setUp();
extracter = new UnoMetadataExtracter();
OpenOfficeConnection connection = new SocketOpenOfficeConnection();
extracter = new OpenOfficeMetadataExtracter();
extracter.setMimetypeService(mimetypeMap);
extracter.setConnection(connection);
extracter.init();
}
@@ -48,7 +55,7 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
return;
}
for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
for (String mimetype : OpenOfficeMetadataExtracter.SUPPORTED_MIMETYPES)
{
double reliability = extracter.getReliability(mimetype);
assertTrue("Expected above zero reliability", reliability > 0.0);
@@ -61,7 +68,7 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
{
return;
}
for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
for (String mimetype : OpenOfficeMetadataExtracter.SUPPORTED_MIMETYPES)
{
testExtractFromMimetype(mimetype);
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -31,7 +31,7 @@ import org.pdfbox.pdmodel.PDDocumentInformation;
/**
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
{

View File

@@ -5,7 +5,7 @@ import org.alfresco.repo.content.MimetypeMap;
/**
* @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
{

View File

@@ -123,12 +123,8 @@ public abstract class AbstractContentTransformer implements ContentTransformer
{
if (registry == null)
{
if (registry == null)
{
logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
" transformer: " + this);
return;
}
logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
" transformer: " + this);
return;
}
// first register any explicit transformations

View File

@@ -50,7 +50,6 @@ public class ContentTransformerRegistry
private MimetypeMap mimetypeMap;
/** Cache of previously used transactions */
private Map<TransformationKey, List<ContentTransformer>> transformationCache;
private short accessCount;
/** Controls read access to the transformation cache */
private Lock transformationCacheReadLock;
/** controls write access to the transformation cache */
@@ -67,7 +66,6 @@ public class ContentTransformerRegistry
this.transformers = new ArrayList<ContentTransformer>(10);
transformationCache = new HashMap<TransformationKey, List<ContentTransformer>>(17);
accessCount = 0;
// create lock objects for access to the cache
ReadWriteLock transformationCacheLock = new ReentrantReadWriteLock();
transformationCacheReadLock = transformationCacheLock.readLock();
@@ -120,7 +118,6 @@ public class ContentTransformerRegistry
try
{
transformationCache.clear();
accessCount = 0;
}
finally
{
@@ -243,7 +240,6 @@ public class ContentTransformerRegistry
private List<ContentTransformer> findDirectTransformers(String sourceMimetype, String targetMimetype)
{
double maxReliability = 0.0;
long leastTime = 100000L; // 100 seconds - longer than anyone would think of waiting
List<ContentTransformer> bestTransformers = new ArrayList<ContentTransformer>(2);
// loop through transformers
for (ContentTransformer transformer : this.transformers)
@@ -289,6 +285,7 @@ public class ContentTransformerRegistry
/**
* Recursive method to build up a list of content transformers
*/
@SuppressWarnings("unused")
private void buildTransformer(List<ContentTransformer> transformers,
double reliability,
List<String> touchedMimetypes,

View File

@@ -0,0 +1,254 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.ConnectException;
import java.util.Map;
import net.sf.jooreports.converter.DocumentFamily;
import net.sf.jooreports.converter.DocumentFormat;
import net.sf.jooreports.converter.DocumentFormatRegistry;
import net.sf.jooreports.converter.XmlDocumentFormatRegistry;
import net.sf.jooreports.openoffice.connection.OpenOfficeConnection;
import net.sf.jooreports.openoffice.connection.OpenOfficeException;
import net.sf.jooreports.openoffice.converter.OpenOfficeDocumentConverter;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.util.PropertyCheck;
import org.alfresco.util.TempFileProvider;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.core.io.DefaultResourceLoader;
/**
* Makes use of the {@link http://sourceforge.net/projects/joott/ JOOConverter} library to
* perform OpenOffice-drive conversions.
*
* @author Derek Hulley
*/
public class OpenOfficeContentTransformer extends AbstractContentTransformer
{
private static Log logger = LogFactory.getLog(OpenOfficeContentTransformer.class);
private OpenOfficeConnection connection;
private boolean connected;
private OpenOfficeDocumentConverter converter;
private String documentFormatsConfiguration;
private DocumentFormatRegistry formatRegistry;
public OpenOfficeContentTransformer()
{
this.connected = false;
}
public void setConnection(OpenOfficeConnection connection)
{
this.connection = connection;
}
/**
* Set a non-default location from which to load the document format mappings.
*
* @param path a resource location supporting the <b>file:</b> or <b>classpath:</b> prefixes
*/
public void setDocumentFormatsConfiguration(String path)
{
this.documentFormatsConfiguration = path;
}
public boolean isConnected()
{
return connected;
}
private synchronized void connect()
{
try
{
connection.connect();
connected = true;
}
catch (ConnectException e)
{
logger.warn(e.getMessage());
connected = false;
}
}
@Override
public void register()
{
PropertyCheck.mandatory("OpenOfficeContentTransformer", "connection", connection);
// attempt to establish a connection
connect();
// set up the converter
converter = new OpenOfficeDocumentConverter(connection);
// load the document conversion configuration
if (documentFormatsConfiguration != null)
{
DefaultResourceLoader resourceLoader = new DefaultResourceLoader();
try
{
InputStream is = resourceLoader.getResource(documentFormatsConfiguration).getInputStream();
formatRegistry = new XmlDocumentFormatRegistry(is);
}
catch (IOException e)
{
throw new AlfrescoRuntimeException(
"Unable to load document formats configuration file: " + documentFormatsConfiguration);
}
}
else
{
formatRegistry = new XmlDocumentFormatRegistry();
}
if (connected)
{
// register
super.register();
}
}
/**
* @see DocumentFormatRegistry
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
if (!connected)
{
return 0.0;
}
// there are some conversions that fail, despite the converter believing them possible
if (targetMimetype.equals(MimetypeMap.MIMETYPE_XHTML))
{
return 0.0;
}
else if (targetMimetype.equals(MimetypeMap.MIMETYPE_WORDPERFECT))
{
return 0.0;
}
MimetypeService mimetypeService = getMimetypeService();
String sourceExtension = mimetypeService.getExtension(sourceMimetype);
String targetExtension = mimetypeService.getExtension(targetMimetype);
// query the registry for the source format
DocumentFormat sourceFormat = formatRegistry.getFormatByFileExtension(sourceExtension);
if (sourceFormat == null)
{
// no document format
return 0.0;
}
// query the registry for the target format
DocumentFormat targetFormat = formatRegistry.getFormatByFileExtension(targetExtension);
if (targetFormat == null)
{
// no document format
return 0.0;
}
// get the family of the target document
DocumentFamily sourceFamily = sourceFormat.getFamily();
// does the format support the conversion
if (!targetFormat.isExportableFrom(sourceFamily))
{
// unable to export from source family of documents to the target format
return 0.0;
}
else
{
return 1.0;
}
}
protected void transformInternal(
ContentReader reader,
ContentWriter writer,
Map<String, Object> options) throws Exception
{
String sourceMimetype = getMimetype(reader);
String targetMimetype = getMimetype(writer);
MimetypeService mimetypeService = getMimetypeService();
String sourceExtension = mimetypeService.getExtension(sourceMimetype);
String targetExtension = mimetypeService.getExtension(targetMimetype);
// query the registry for the source format
DocumentFormat sourceFormat = formatRegistry.getFormatByFileExtension(sourceExtension);
if (sourceFormat == null)
{
// source format is not recognised
throw new ContentIOException("No OpenOffice document format for source extension: " + sourceExtension);
}
// query the registry for the target format
DocumentFormat targetFormat = formatRegistry.getFormatByFileExtension(targetExtension);
if (targetFormat == null)
{
// target format is not recognised
throw new ContentIOException("No OpenOffice document format for target extension: " + sourceExtension);
}
// get the family of the target document
DocumentFamily sourceFamily = sourceFormat.getFamily();
// does the format support the conversion
if (!targetFormat.isExportableFrom(sourceFamily))
{
throw new ContentIOException(
"OpenOffice conversion not supported: \n" +
" reader: " + reader + "\n" +
" writer: " + writer);
}
// create temporary files to convert from and to
File tempFromFile = TempFileProvider.createTempFile(
"OpenOfficeContentTransformer-source-",
"." + sourceExtension);
File tempToFile = TempFileProvider.createTempFile(
"OpenOfficeContentTransformer-target-",
"." + targetExtension);
// download the content from the source reader
reader.getContent(tempFromFile);
try
{
converter.convert(tempFromFile, sourceFormat, tempToFile, targetFormat);
// conversion success
}
catch (OpenOfficeException e)
{
throw new ContentIOException("OpenOffice server conversion failed: \n" +
" reader: " + reader + "\n" +
" writer: " + writer + "\n" +
" from file: " + tempFromFile + "\n" +
" to file: " + tempToFile,
e);
}
// upload the temp output to the writer given us
writer.putContent(tempToFile);
}
}

View File

@@ -16,24 +16,30 @@
*/
package org.alfresco.repo.content.transform;
import net.sf.jooreports.openoffice.connection.OpenOfficeConnection;
import org.alfresco.repo.content.MimetypeMap;
/**
* @see org.alfresco.repo.content.transform.UnoContentTransformer
* @see org.alfresco.repo.content.transform.OpenOfficeContentTransformer
*
* @author Derek Hulley
*/
public class UnoContentTransformerTest extends AbstractContentTransformerTest
public class OpenOfficeContentTransformerTest extends AbstractContentTransformerTest
{
private static String MIMETYPE_RUBBISH = "text/rubbish";
private UnoContentTransformer transformer;
private OpenOfficeContentTransformer transformer;
public void onSetUpInTransaction() throws Exception
{
transformer = new UnoContentTransformer();
OpenOfficeConnection connection = (OpenOfficeConnection) applicationContext.getBean("openOfficeConnection");
transformer = new OpenOfficeContentTransformer();
transformer.setMimetypeService(mimetypeMap);
transformer.init();
transformer.setConnection(connection);
transformer.setDocumentFormatsConfiguration("classpath:alfresco/mimetype/openoffice-document-formats.xml");
transformer.register();
}
/**
@@ -62,6 +68,8 @@ public class UnoContentTransformerTest extends AbstractContentTransformerTest
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MIMETYPE_RUBBISH);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_XHTML);
assertEquals("Mimetype should not be supported", 0.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_WORD);
assertEquals("Mimetype should be supported", 1.0, reliability);
reliability = transformer.getReliability(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_TEXT_PLAIN);

View File

@@ -1,337 +0,0 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.transform;
import java.io.File;
import java.io.IOException;
import java.net.ConnectException;
import java.util.HashMap;
import java.util.Map;
import net.sf.joott.uno.DocumentConverter;
import net.sf.joott.uno.DocumentFormat;
import net.sf.joott.uno.UnoConnection;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.util.TempFileProvider;
/**
* Makes use of the OpenOffice Uno interfaces to convert the content.
* <p>
* The conversions are slow but reliable. Not <b>all</b> possible combinations of transformations
* have been enabled because they don't necessarily work and need to be specifically tested before
* being made available generally. As the conversion process is mostly automated, the introduction
* of faulty transformations can lead to unnecessary bugs. Feel free to experiment and, assuming
* that the unit test works, report any interesting conversions that can be enabled.
*
* @author Derek Hulley
*/
public class UnoContentTransformer extends AbstractContentTransformer
{
/** map of <tt>DocumentFormat</tt> instances keyed by mimetype conversion */
private static Map<ContentTransformerRegistry.TransformationKey, DocumentFormatWrapper> formatsByConversion;
static
{
// Build the map of known Uno document formats and store by conversion key
formatsByConversion = new HashMap<ContentTransformerRegistry.TransformationKey, DocumentFormatWrapper>(17);
// Open Office 2.0 / Open Document
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT, MimetypeMap.MIMETYPE_TEXT_PLAIN),
new DocumentFormatWrapper(DocumentFormat.TEXT, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENDOCUMENT_SPREADSHEET, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_CALC, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENDOCUMENT_PRESENTATION, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
// Open Office
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER, MimetypeMap.MIMETYPE_TEXT_PLAIN),
new DocumentFormatWrapper(DocumentFormat.TEXT, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENOFFICE1_WRITER, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENOFFICE1_CALC, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENOFFICE1_DRAW, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_IMPRESS, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_OPENOFFICE1_IMPRESS, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_IMPRESS, 1.0));
// Star Office 5.x
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_DRAW, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_IMPRESS, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_CALC, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_CALC, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_CHART, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_IMPRESS, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_IMPRESS_PACKED, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_IMPRESS, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_WRITER, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_STAROFFICE5_WRITER_GLOBAL, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
// MS Office
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_TEXT_PLAIN),
new DocumentFormatWrapper(DocumentFormat.TEXT, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_EXCEL, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_CALC, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_WORD, MimetypeMap.MIMETYPE_HTML),
new DocumentFormatWrapper(DocumentFormat.HTML_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_PPT, MimetypeMap.MIMETYPE_FLASH),
new DocumentFormatWrapper(DocumentFormat.FLASH_IMPRESS, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_PPT, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_IMPRESS, 1.0));
// Other
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_HTML),
new DocumentFormatWrapper(DocumentFormat.HTML_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_TEXT_PLAIN, MimetypeMap.MIMETYPE_WORD),
new DocumentFormatWrapper(DocumentFormat.TEXT, 1.0));
formatsByConversion.put(
new ContentTransformerRegistry.TransformationKey(MimetypeMap.MIMETYPE_HTML, MimetypeMap.MIMETYPE_PDF),
new DocumentFormatWrapper(DocumentFormat.PDF_WRITER_WEB, 1.0));
// there are many more formats available and therefore many more transformation combinations possible
// DocumentFormat.FLASH_IMPRESS
// DocumentFormat.HTML_CALC
// DocumentFormat.HTML_WRITER
// DocumentFormat.MS_EXCEL_97
// DocumentFormat.MS_POWERPOINT_97
// DocumentFormat.MS_WORD_97
// DocumentFormat.PDF_CALC
// DocumentFormat.PDF_IMPRESS
// DocumentFormat.PDF_WRITER
// DocumentFormat.PDF_WRITER_WEB
// DocumentFormat.RTF
// DocumentFormat.TEXT
// DocumentFormat.TEXT_CALC
// DocumentFormat.XML_CALC
// DocumentFormat.XML_IMPRESS
// DocumentFormat.XML_WRITER
// DocumentFormat.XML_WRITER_WEB
}
private String connectionUrl = UnoConnection.DEFAULT_CONNECTION_STRING;
private UnoConnection connection;
private boolean isConnected;
/**
* Constructs the default transformer that will attempt to connect to the
* Uno server using the default connect string.
*
* @see UnoConnection#DEFAULT_CONNECTION_STRING
*/
public UnoContentTransformer()
{
isConnected = false;
}
/**
* Override the default connection URL with a new one.
*
* @param connectionUrl the connection string
*
* @see UnoConnection#DEFAULT_CONNECTION_STRING
*/
public void setConnectionUrl(String connectionUrl)
{
this.connectionUrl = connectionUrl;
}
/**
* Connects to the OpenOffice server. If successful, then
* {@link AbstractContentTransformer#register() auto-registers}.
*/
public synchronized void init()
{
connection = new UnoConnection(connectionUrl);
// attempt to make an connection
try
{
connection.connect();
isConnected = true;
// register
super.register();
}
catch (ConnectException e)
{
isConnected = false;
}
}
/**
* @return Returns true if a connection to the Uno server could be established
*/
public boolean isConnected()
{
return isConnected;
}
/**
* @param sourceMimetype
* @param targetMimetype
* @return Returns a document format wrapper that is valid for the given source and target mimetypes
*/
private static DocumentFormatWrapper getDocumentFormatWrapper(String sourceMimetype, String targetMimetype)
{
// get the well-known document format for the specific conversion
ContentTransformerRegistry.TransformationKey key =
new ContentTransformerRegistry.TransformationKey(sourceMimetype, targetMimetype);
DocumentFormatWrapper wrapper = UnoContentTransformer.formatsByConversion.get(key);
return wrapper;
}
/**
* Checks how reliable the conversion will be when performed by the Uno server.
* <p>
* The connection for the Uno server is checked in order to have any chance of
* being reliable.
* <p>
* The conversions' reliabilities are set up statically based on prior tests that
* included checking performance as well as accuracy.
*/
public double getReliability(String sourceMimetype, String targetMimetype)
{
// check if a connection to the Uno server can be established
if (!isConnected())
{
// no connection means that conversion is not possible
return 0.0;
}
// check if the source and target mimetypes are supported
DocumentFormatWrapper docFormatWrapper = getDocumentFormatWrapper(sourceMimetype, targetMimetype);
if (docFormatWrapper == null)
{
return 0.0;
}
else
{
return docFormatWrapper.getReliability();
}
}
public void transformInternal(ContentReader reader, ContentWriter writer, Map<String, Object> options)
throws Exception
{
String sourceMimetype = getMimetype(reader);
String targetMimetype = getMimetype(writer);
// create temporary files to convert from and to
File tempFromFile = TempFileProvider.createTempFile(
"UnoContentTransformer",
"." + getMimetypeService().getExtension(sourceMimetype));
File tempToFile = TempFileProvider.createTempFile(
"UnoContentTransformer",
"." + getMimetypeService().getExtension(targetMimetype));
// download the content from the source reader
reader.getContent(tempFromFile);
// get the document format that should be used
DocumentFormatWrapper docFormatWrapper = getDocumentFormatWrapper(sourceMimetype, targetMimetype);
try
{
docFormatWrapper.execute(tempFromFile, tempToFile, connection);
// conversion success
}
catch (ConnectException e)
{
throw new ContentIOException("Connection to Uno server failed: \n" +
" reader: " + reader + "\n" +
" writer: " + writer,
e);
}
catch (IOException e)
{
throw new ContentIOException("Uno server conversion failed: \n" +
" reader: " + reader + "\n" +
" writer: " + writer + "\n" +
" from file: " + tempFromFile + "\n" +
" to file: " + tempToFile,
e);
}
// upload the temp output to the writer given us
writer.putContent(tempToFile);
}
/**
* Wraps a document format as well the reliability. The source and target mimetypes
* are not kept, but will probably be closely associated with the reliability.
*/
private static class DocumentFormatWrapper
{
/*
* Source and target mimetypes not kept -> class is private as it doesn't keep
* enough info to be used safely externally
*/
private DocumentFormat documentFormat;
private double reliability;
public DocumentFormatWrapper(DocumentFormat documentFormat, double reliability)
{
this.documentFormat = documentFormat;
this.reliability = reliability;
}
public double getReliability()
{
return reliability;
}
/**
* Executs the transformation
*/
public void execute(File fromFile, File toFile, UnoConnection connection) throws ConnectException, IOException
{
DocumentConverter converter = new DocumentConverter(connection);
converter.convert(fromFile, toFile, documentFormat);
}
}
}