Merge from HEAD into WCM-DEV2. Also fixes build breakage in

jndi-client and catalina-virtual that I introduced earlier. 


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/WCM-DEV2/root@3393 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Britt Park
2006-07-24 18:27:41 +00:00
parent c50a4aa669
commit f7d9d83036
83 changed files with 4469 additions and 1999 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -31,11 +31,11 @@ import org.apache.commons.logging.LogFactory;
/**
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
abstract public class AbstractMetadataExtracter implements MetadataExtracter
{
private static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
protected static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
private MimetypeService mimetypeService;
private MetadataExtracterRegistry registry;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -38,7 +38,7 @@ import org.springframework.context.ApplicationContext;
* @see org.alfresco.repo.content.metadata.MetadataExtracter
* @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public abstract class AbstractMetadataExtracterTest extends TestCase
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -38,7 +38,7 @@ import org.alfresco.service.namespace.QName;
/**
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class HtmlMetadataExtracter extends AbstractMetadataExtracter
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -19,7 +19,7 @@ package org.alfresco.repo.content.metadata;
import org.alfresco.repo.content.MimetypeMap;
/**
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
{

View File

@@ -0,0 +1,180 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
* copy of the License at
*
* http://www.alfresco.org/legal/license.txt
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the
* License.
*/
package org.alfresco.repo.content.metadata;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.NamespaceService;
import org.alfresco.service.namespace.QName;
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
/**
* Outlook format email meta-data extractor
*
* @author Kevin Roast
*/
public class MailMetadataExtracter extends AbstractMetadataExtracter
{
public static String[] SUPPORTED_MIMETYPES = new String[] {
"message/rfc822"};
private static final String SUBSTG_MESSAGEBODY = "__substg1.0_1000001E";
private static final String SUBSTG_RECIPIENTEMAIL = "__substg1.0_39FE001E";
private static final String SUBSTG_RECEIVEDEMAIL = "__substg1.0_0076001E";
private static final String SUBSTG_SENDEREMAIL = "__substg1.0_0C1F001E";
private static final String SUBSTG_DATE = "__substg1.0_00470102";
private static final QName ASPECT_MAILED = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "emailed");
private static final QName PROP_SENTDATE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "sentdate");
private static final QName PROP_ORIGINATOR = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "originator");
private static final QName PROP_ADDRESSEE = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressee");
private static final QName PROP_ADDRESSEES = QName.createQName(NamespaceService.CONTENT_MODEL_1_0_URI, "addressees");
// the CC: email addresses
private ThreadLocal<List<String>> receipientEmails = new ThreadLocal<List<String>>();
public MailMetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
}
public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable
{
POIFSReaderListener readerListener = new POIFSReaderListener()
{
public void processPOIFSReaderEvent(final POIFSReaderEvent event)
{
try
{
String name = event.getName();
if (name.equals(SUBSTG_RECIPIENTEMAIL)) // a recipient email address
{
String emailAddress = readPlainTextStream(event.getStream());
receipientEmails.get().add(convertExchangeAddress(emailAddress));
}
else if (name.equals(SUBSTG_RECEIVEDEMAIL)) // receiver email address
{
String emailAddress = readPlainTextStream(event.getStream());
destination.put(PROP_ADDRESSEE, convertExchangeAddress(emailAddress));
}
else if (name.equals(SUBSTG_SENDEREMAIL)) // sender email - NOTE either email OR full Exchange data e.g. : /O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=MIKE.FARMAN@BEN
{
String emailAddress = readPlainTextStream(event.getStream());
destination.put(PROP_ORIGINATOR, convertExchangeAddress(emailAddress));
}
else if (name.equals(SUBSTG_DATE))
{
// the date is not really plain text - but it's easier to parse as such
String date = readPlainTextStream(event.getStream());
int valueIndex = date.indexOf("l=");
if (valueIndex != -1)
{
int dateIndex = date.indexOf('-', valueIndex);
if (dateIndex != -1)
{
dateIndex++;
String strYear = date.substring(dateIndex, dateIndex + 2);
int year = Integer.parseInt(strYear) + (2000 - 1900);
String strMonth = date.substring(dateIndex + 2, dateIndex + 4);
int month = Integer.parseInt(strMonth) - 1;
String strDay = date.substring(dateIndex + 4, dateIndex + 6);
int day = Integer.parseInt(strDay);
String strHour = date.substring(dateIndex + 6, dateIndex + 8);
int hour = Integer.parseInt(strHour);
String strMinute = date.substring(dateIndex + 10, dateIndex + 12);
int minute = Integer.parseInt(strMinute);
destination.put(PROP_SENTDATE, new Date(year, month, day, hour, minute));
}
}
}
}
catch (Exception ex)
{
throw new ContentIOException("Property set stream: " + event.getPath() + event.getName(), ex);
}
}
};
InputStream is = null;
try
{
this.receipientEmails.set(new ArrayList<String>());
is = reader.getContentInputStream();
POIFSReader poiFSReader = new POIFSReader();
poiFSReader.registerListener(readerListener);
try
{
poiFSReader.read(is);
}
catch (IOException err)
{
// probably not an Outlook format MSG - ignore for now
logger.warn("Unable to extract meta-data from message: " + err.getMessage());
}
// store multi-value extracted property
if (receipientEmails.get().size() != 0)
{
destination.put(PROP_ADDRESSEES, (Serializable)receipientEmails.get());
}
}
finally
{
if (is != null)
{
try { is.close(); } catch (IOException e) {}
}
}
}
private static String readPlainTextStream(DocumentInputStream stream)
throws IOException
{
byte[] data = new byte[stream.available()];
int read = stream.read(data);
return new String(data);
}
private static String convertExchangeAddress(String email)
{
if (email.lastIndexOf("/CN=") == -1)
{
return email;
}
else
{
// found a full Exchange format To header
return email.substring(email.lastIndexOf("/CN=") + 4);
}
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -25,7 +25,7 @@ import org.alfresco.service.namespace.QName;
/**
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public interface MetadataExtracter
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -36,7 +36,7 @@ import org.apache.commons.logging.LogFactory;
* The extracters themselves know how well they are able to extract metadata.
*
* @see org.alfresco.repo.content.metadata.MetadataExtracter
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class MetadataExtracterRegistry
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -28,7 +28,6 @@ import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.SummaryInformation;
@@ -37,15 +36,16 @@ import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
/**
* Office file format Metadata Extracter
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class OfficeMetadataExtracter extends AbstractMetadataExtracter
{
public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_WORD,
MimetypeMap.MIMETYPE_EXCEL,
MimetypeMap.MIMETYPE_PPT };
MimetypeMap.MIMETYPE_PPT};
public OfficeMetadataExtracter()
{
@@ -64,6 +64,7 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter
if (ps instanceof SummaryInformation)
{
SummaryInformation si = (SummaryInformation) ps;
// Titled aspect
trimPut(ContentModel.PROP_TITLE, si.getTitle(), destination);
trimPut(ContentModel.PROP_DESCRIPTION, si.getSubject(), destination);
@@ -73,16 +74,6 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter
trimPut(ContentModel.PROP_MODIFIED, si.getLastSaveDateTime(), destination);
trimPut(ContentModel.PROP_AUTHOR, si.getAuthor(), destination);
}
else if (ps instanceof DocumentSummaryInformation)
{
// DocumentSummaryInformation dsi = (DocumentSummaryInformation) ps;
// These are not really interesting to any aspect:
// trimPut(ContentModel.PROP_xxx, dsi.getCompany(),
// destination);
// trimPut(ContentModel.PROP_yyy, dsi.getManager(),
// destination);
}
}
catch (Exception ex)
{
@@ -90,6 +81,7 @@ public class OfficeMetadataExtracter extends AbstractMetadataExtracter
}
}
};
InputStream is = null;
try
{

View File

@@ -4,7 +4,7 @@ package org.alfresco.repo.content.metadata;
/**
* @see org.alfresco.repo.content.transform.OfficeMetadataExtracter
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class OfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen Møller
* Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -24,12 +24,13 @@ import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import net.sf.joott.uno.UnoConnection;
import net.sf.jooreports.openoffice.connection.OpenOfficeConnection;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.namespace.QName;
import org.alfresco.util.PropertyCheck;
import org.alfresco.util.TempFileProvider;
import com.sun.star.beans.PropertyValue;
@@ -41,9 +42,9 @@ import com.sun.star.ucb.XFileIdentifierConverter;
import com.sun.star.uno.UnoRuntime;
/**
* @author Jesper Steen Møller
* @author Jesper Steen Møller
*/
public class UnoMetadataExtracter extends AbstractMetadataExtracter
public class OpenOfficeMetadataExtracter extends AbstractMetadataExtracter
{
public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_STAROFFICE5_WRITER,
@@ -55,31 +56,26 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
// quality since they involve conversion.
};
private String contentUrl;
private MyUnoConnection connection;
private OpenOfficeConnection connection;
private boolean isConnected;
public UnoMetadataExtracter()
public OpenOfficeMetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.00, 10000);
this.contentUrl = UnoConnection.DEFAULT_CONNECTION_STRING;
}
/**
*
* @param contentUrl the URL to connect to
*/
public void setContentUrl(String contentUrl)
public void setConnection(OpenOfficeConnection connection)
{
this.contentUrl = contentUrl;
this.connection = connection;
}
/**
* Initialises the bean by establishing an UNO connection
*/
public synchronized void init()
{
connection = new MyUnoConnection(contentUrl);
PropertyCheck.mandatory("OpenOfficeMetadataExtracter", "connection", connection);
// attempt to make an connection
try
{
@@ -109,7 +105,7 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
// create temporary files to convert from and to
File tempFromFile = TempFileProvider.createTempFile(
"UnoContentTransformer_", "."
"OpenOfficeMetadataExtracter-", "."
+ getMimetypeService().getExtension(sourceMimetype));
// download the content from the source reader
reader.getContent(tempFromFile);
@@ -158,9 +154,9 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
}
}
public String toUrl(File file, MyUnoConnection connection) throws ConnectException
public String toUrl(File file, OpenOfficeConnection connection) throws ConnectException
{
Object fcp = connection.getFileContentService();
Object fcp = connection.getFileContentProvider();
XFileIdentifierConverter fic = (XFileIdentifierConverter) UnoRuntime.queryInterface(
XFileIdentifierConverter.class, fcp);
return fic.getFileURLFromSystemPath("", file.getAbsolutePath());
@@ -181,17 +177,4 @@ public class UnoMetadataExtracter extends AbstractMetadataExtracter
property.Value = value;
return property;
}
static class MyUnoConnection extends UnoConnection
{
public MyUnoConnection(String url)
{
super(url);
}
public Object getFileContentService() throws ConnectException
{
return getService("com.sun.star.ucb.FileContentProvider");
}
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen Møller
* Copyright (C) 2005 Jesper Steen M<EFBFBD>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -16,20 +16,27 @@
*/
package org.alfresco.repo.content.metadata;
import net.sf.jooreports.openoffice.connection.OpenOfficeConnection;
import net.sf.jooreports.openoffice.connection.SocketOpenOfficeConnection;
/**
* @author Jesper Steen Møller
* @author Jesper Steen Møller
*/
public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
public class OpenOfficeMetadataExtracterTest extends AbstractMetadataExtracterTest
{
private UnoMetadataExtracter extracter;
private OpenOfficeMetadataExtracter extracter;
@Override
public void setUp() throws Exception
{
super.setUp();
extracter = new UnoMetadataExtracter();
OpenOfficeConnection connection = new SocketOpenOfficeConnection();
extracter = new OpenOfficeMetadataExtracter();
extracter.setMimetypeService(mimetypeMap);
extracter.setConnection(connection);
extracter.init();
}
@@ -48,7 +55,7 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
return;
}
for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
for (String mimetype : OpenOfficeMetadataExtracter.SUPPORTED_MIMETYPES)
{
double reliability = extracter.getReliability(mimetype);
assertTrue("Expected above zero reliability", reliability > 0.0);
@@ -61,7 +68,7 @@ public class UnoMetadataExtracterTest extends AbstractMetadataExtracterTest
{
return;
}
for (String mimetype : UnoMetadataExtracter.SUPPORTED_MIMETYPES)
for (String mimetype : OpenOfficeMetadataExtracter.SUPPORTED_MIMETYPES)
{
testExtractFromMimetype(mimetype);
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005 Jesper Steen M<>ller
* Copyright (C) 2005 Jesper Steen M<>ller
*
* Licensed under the Mozilla Public License version 1.1
* with a permitted attribution clause. You may obtain a
@@ -31,7 +31,7 @@ import org.pdfbox.pdmodel.PDDocumentInformation;
/**
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
{

View File

@@ -5,7 +5,7 @@ import org.alfresco.repo.content.MimetypeMap;
/**
* @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter
*
* @author Jesper Steen M<EFBFBD>ller
* @author Jesper Steen Møller
*/
public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
{