mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-24 17:32:48 +00:00
Merged V1.3 to HEAD(3161:3179)
svn merge svn://www.alfresco.org:3691/alfresco/BRANCHES/V1.3@3161 svn://www.alfresco.org:3691/alfresco/BRANCHES/V1.3@3179 . git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@3406 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -150,14 +150,6 @@ public abstract class AbstractContentAccessor implements ContentAccessor
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Derived classes must implement this to help determine if the underlying
|
||||
* IO Channel is still open.
|
||||
*
|
||||
* @return Returns true if the underlying IO Channel is open
|
||||
*/
|
||||
protected abstract boolean isChannelOpen();
|
||||
|
||||
public String getContentUrl()
|
||||
{
|
||||
return contentUrl;
|
||||
|
@@ -141,8 +141,7 @@ public abstract class AbstractContentReader extends AbstractContentAccessor impl
|
||||
}
|
||||
}
|
||||
|
||||
/** helper implementation for base class */
|
||||
protected boolean isChannelOpen()
|
||||
public synchronized boolean isChannelOpen()
|
||||
{
|
||||
if (channel != null)
|
||||
{
|
||||
|
@@ -156,8 +156,7 @@ public abstract class AbstractContentWriter extends AbstractContentAccessor impl
|
||||
}
|
||||
}
|
||||
|
||||
/** helper implementation for base class */
|
||||
protected boolean isChannelOpen()
|
||||
public synchronized boolean isChannelOpen()
|
||||
{
|
||||
if (channel != null)
|
||||
{
|
||||
|
@@ -23,6 +23,8 @@ import java.io.OutputStream;
|
||||
import javax.transaction.RollbackException;
|
||||
import javax.transaction.UserTransaction;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.filestore.FileContentWriter;
|
||||
import org.alfresco.repo.content.transform.ContentTransformer;
|
||||
@@ -43,18 +45,21 @@ import org.alfresco.service.cmr.repository.StoreRef;
|
||||
import org.alfresco.service.namespace.NamespaceService;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.service.transaction.TransactionService;
|
||||
import org.alfresco.util.BaseSpringTest;
|
||||
import org.alfresco.util.ApplicationContextHelper;
|
||||
import org.alfresco.util.GUID;
|
||||
import org.alfresco.util.PropertyMap;
|
||||
import org.alfresco.util.TempFileProvider;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
|
||||
/**
|
||||
* @see org.alfresco.repo.content.RoutingContentService
|
||||
*
|
||||
* @author Derek Hulley
|
||||
*/
|
||||
public class RoutingContentServiceTest extends BaseSpringTest
|
||||
public class RoutingContentServiceTest extends TestCase
|
||||
{
|
||||
private static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext();
|
||||
|
||||
private static final String SOME_CONTENT = "ABC";
|
||||
|
||||
private static final String TEST_NAMESPACE = "http://www.alfresco.org/test/RoutingContentServiceTest";
|
||||
@@ -62,24 +67,30 @@ public class RoutingContentServiceTest extends BaseSpringTest
|
||||
private ContentService contentService;
|
||||
private PolicyComponent policyComponent;
|
||||
private NodeService nodeService;
|
||||
private AuthenticationComponent authenticationComponent;
|
||||
private UserTransaction txn;
|
||||
private NodeRef rootNodeRef;
|
||||
private NodeRef contentNodeRef;
|
||||
private AuthenticationComponent authenticationComponent;
|
||||
|
||||
public RoutingContentServiceTest()
|
||||
{
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onSetUpInTransaction() throws Exception
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
super.onSetUpInTransaction();
|
||||
nodeService = (NodeService) applicationContext.getBean("dbNodeService");
|
||||
contentService = (ContentService) applicationContext.getBean(ServiceRegistry.CONTENT_SERVICE.getLocalName());
|
||||
this.policyComponent = (PolicyComponent)this.applicationContext.getBean("policyComponent");
|
||||
this.authenticationComponent = (AuthenticationComponent)this.applicationContext.getBean("authenticationComponent");
|
||||
nodeService = (NodeService) ctx.getBean("dbNodeService");
|
||||
contentService = (ContentService) ctx.getBean(ServiceRegistry.CONTENT_SERVICE.getLocalName());
|
||||
this.policyComponent = (PolicyComponent) ctx.getBean("policyComponent");
|
||||
this.authenticationComponent = (AuthenticationComponent) ctx.getBean("authenticationComponent");
|
||||
|
||||
// authenticate
|
||||
this.authenticationComponent.setSystemUserAsCurrentUser();
|
||||
|
||||
// start the transaction
|
||||
txn = getUserTransaction();
|
||||
txn.begin();
|
||||
|
||||
// create a store and get the root node
|
||||
StoreRef storeRef = new StoreRef(StoreRef.PROTOCOL_WORKSPACE, getName());
|
||||
if (!nodeService.exists(storeRef))
|
||||
@@ -103,7 +114,7 @@ public class RoutingContentServiceTest extends BaseSpringTest
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onTearDownInTransaction() throws Exception
|
||||
public void tearDown() throws Exception
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -113,12 +124,22 @@ public class RoutingContentServiceTest extends BaseSpringTest
|
||||
{
|
||||
// ignore
|
||||
}
|
||||
super.onTearDownInTransaction();
|
||||
try
|
||||
{
|
||||
if (txn != null)
|
||||
{
|
||||
txn.rollback();
|
||||
}
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
private UserTransaction getUserTransaction()
|
||||
{
|
||||
TransactionService transactionService = (TransactionService)applicationContext.getBean("transactionComponent");
|
||||
TransactionService transactionService = (TransactionService) ctx.getBean("transactionComponent");
|
||||
return (UserTransaction) transactionService.getUserTransaction();
|
||||
}
|
||||
|
||||
@@ -236,8 +257,8 @@ public class RoutingContentServiceTest extends BaseSpringTest
|
||||
assertFalse("Reader should indicate that content is missing", reader.exists());
|
||||
|
||||
// check the indexing doesn't spank everthing
|
||||
setComplete();
|
||||
endTransaction();
|
||||
txn.commit();
|
||||
txn = null;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -405,8 +426,8 @@ public class RoutingContentServiceTest extends BaseSpringTest
|
||||
public void testConcurrentWritesNoTxn() throws Exception
|
||||
{
|
||||
// ensure that the transaction is ended - ofcourse, we need to force a commit
|
||||
setComplete();
|
||||
endTransaction();
|
||||
txn.commit();
|
||||
txn = null;
|
||||
|
||||
ContentWriter writer1 = contentService.getWriter(contentNodeRef, ContentModel.PROP_CONTENT, true);
|
||||
ContentWriter writer2 = contentService.getWriter(contentNodeRef, ContentModel.PROP_CONTENT, true);
|
||||
@@ -425,8 +446,8 @@ public class RoutingContentServiceTest extends BaseSpringTest
|
||||
public void testConcurrentWritesWithSingleTxn() throws Exception
|
||||
{
|
||||
// want to operate in a user transaction
|
||||
setComplete();
|
||||
endTransaction();
|
||||
txn.commit();
|
||||
txn = null;
|
||||
|
||||
UserTransaction txn = getUserTransaction();
|
||||
txn.begin();
|
||||
@@ -472,8 +493,8 @@ public class RoutingContentServiceTest extends BaseSpringTest
|
||||
public synchronized void testConcurrentWritesWithMultipleTxns() throws Exception
|
||||
{
|
||||
// commit node so that threads can see node
|
||||
setComplete();
|
||||
endTransaction();
|
||||
txn.commit();
|
||||
txn = null;
|
||||
|
||||
UserTransaction txn = getUserTransaction();
|
||||
txn.begin();
|
||||
@@ -527,8 +548,8 @@ public class RoutingContentServiceTest extends BaseSpringTest
|
||||
public void testTransformation() throws Exception
|
||||
{
|
||||
// commit node so that threads can see node
|
||||
setComplete();
|
||||
endTransaction();
|
||||
txn.commit();
|
||||
txn = null;
|
||||
|
||||
UserTransaction txn = getUserTransaction();
|
||||
txn.begin();
|
||||
@@ -655,4 +676,27 @@ public class RoutingContentServiceTest extends BaseSpringTest
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the system is able to handle the uploading of content with an unknown mimetype.
|
||||
* The unknown mimetype should be preserved, but treated just like an octet stream.
|
||||
*/
|
||||
public void testUnknownMimetype() throws Exception
|
||||
{
|
||||
String bogusMimetype = "text/bamboozle";
|
||||
// get a writer onto the node
|
||||
ContentWriter writer = contentService.getWriter(contentNodeRef, ContentModel.PROP_CONTENT, true);
|
||||
writer.setMimetype(bogusMimetype);
|
||||
|
||||
// write something in
|
||||
writer.putContent(SOME_CONTENT);
|
||||
|
||||
// commit the transaction to ensure that it goes in OK
|
||||
txn.commit();
|
||||
|
||||
// so far, so good
|
||||
ContentReader reader = contentService.getReader(contentNodeRef, ContentModel.PROP_CONTENT);
|
||||
assertNotNull("Should be able to get reader", reader);
|
||||
assertEquals("Unknown mimetype was changed", bogusMimetype, reader.getMimetype());
|
||||
}
|
||||
}
|
||||
|
@@ -1,220 +1,220 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.alfresco.error.AlfrescoRuntimeException;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.cmr.repository.MimetypeService;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
abstract public class AbstractMetadataExtracter implements MetadataExtracter
|
||||
{
|
||||
protected static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
|
||||
|
||||
private MimetypeService mimetypeService;
|
||||
private MetadataExtracterRegistry registry;
|
||||
private Set<String> supportedMimetypes;
|
||||
private double reliability;
|
||||
private long extractionTime;
|
||||
|
||||
protected AbstractMetadataExtracter(String supportedMimetype, double reliability, long extractionTime)
|
||||
{
|
||||
this.supportedMimetypes = Collections.singleton(supportedMimetype);
|
||||
this.reliability = reliability;
|
||||
this.extractionTime = extractionTime;
|
||||
}
|
||||
|
||||
protected AbstractMetadataExtracter(Set<String> supportedMimetypes, double reliability, long extractionTime)
|
||||
{
|
||||
this.supportedMimetypes = supportedMimetypes;
|
||||
this.reliability = reliability;
|
||||
this.extractionTime = extractionTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the registry to register with
|
||||
*
|
||||
* @param registry a metadata extracter registry
|
||||
*/
|
||||
public void setRegistry(MetadataExtracterRegistry registry)
|
||||
{
|
||||
this.registry = registry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper setter of the mimetype service. This is not always required.
|
||||
*
|
||||
* @param mimetypeService
|
||||
*/
|
||||
public void setMimetypeService(MimetypeService mimetypeService)
|
||||
{
|
||||
this.mimetypeService = mimetypeService;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the mimetype helper
|
||||
*/
|
||||
protected MimetypeService getMimetypeService()
|
||||
{
|
||||
return mimetypeService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers this instance of the extracter with the registry.
|
||||
*
|
||||
* @see #setRegistry(MetadataExtracterRegistry)
|
||||
*/
|
||||
public void register()
|
||||
{
|
||||
if (registry == null)
|
||||
{
|
||||
logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
|
||||
" extracter: " + this);
|
||||
return;
|
||||
}
|
||||
registry.register(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Default reliability check that returns the reliability as configured by the contstructor
|
||||
* if the mimetype is in the list of supported mimetypes.
|
||||
*
|
||||
* @param mimetype the mimetype to check
|
||||
*/
|
||||
public double getReliability(String mimetype)
|
||||
{
|
||||
if (supportedMimetypes.contains(mimetype))
|
||||
return reliability;
|
||||
else
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
public long getExtractionTime()
|
||||
{
|
||||
return extractionTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the mimetype is supported.
|
||||
*
|
||||
* @param reader the reader to check
|
||||
* @throws AlfrescoRuntimeException if the mimetype is not supported
|
||||
*/
|
||||
protected void checkReliability(ContentReader reader)
|
||||
{
|
||||
String mimetype = reader.getMimetype();
|
||||
if (getReliability(mimetype) <= 0.0)
|
||||
{
|
||||
throw new AlfrescoRuntimeException(
|
||||
"Metadata extracter does not support mimetype: \n" +
|
||||
" reader: " + reader + "\n" +
|
||||
" supported: " + supportedMimetypes + "\n" +
|
||||
" extracter: " + this);
|
||||
}
|
||||
}
|
||||
|
||||
public final void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
|
||||
{
|
||||
// check the reliability
|
||||
checkReliability(reader);
|
||||
|
||||
try
|
||||
{
|
||||
extractInternal(reader, destination);
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
throw new ContentIOException("Metadata extraction failed: \n" +
|
||||
" reader: " + reader,
|
||||
e);
|
||||
}
|
||||
finally
|
||||
{
|
||||
// check that the reader was closed
|
||||
if (!reader.isClosed())
|
||||
{
|
||||
logger.error("Content reader not closed by metadata extracter: \n" +
|
||||
" reader: " + reader + "\n" +
|
||||
" extracter: " + this);
|
||||
}
|
||||
}
|
||||
|
||||
// done
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Completed metadata extraction: \n" +
|
||||
" reader: " + reader + "\n" +
|
||||
" extracter: " + this);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Override to provide the necessary extraction logic. Implementations must ensure that the reader
|
||||
* is closed before the method exits.
|
||||
*
|
||||
* @param reader the source of the content
|
||||
* @param destination the property map to fill
|
||||
* @throws Throwable an exception
|
||||
*/
|
||||
protected abstract void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable;
|
||||
|
||||
/**
|
||||
* Examines a value or string for nulls and adds it to the map (if
|
||||
* non-empty)
|
||||
*
|
||||
* @param prop Alfresco's <code>ContentModel.PROP_</code> to set.
|
||||
* @param value Value to set it to
|
||||
* @param destination Map into which to set it
|
||||
* @return true, if set, false otherwise
|
||||
*/
|
||||
protected boolean trimPut(QName prop, Object value, Map<QName, Serializable> destination)
|
||||
{
|
||||
if (value == null)
|
||||
return false;
|
||||
if (value instanceof String)
|
||||
{
|
||||
String svalue = ((String) value).trim();
|
||||
if (svalue.length() > 0)
|
||||
{
|
||||
destination.put(prop, svalue);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
else if (value instanceof Serializable)
|
||||
{
|
||||
destination.put(prop, (Serializable) value);
|
||||
}
|
||||
else
|
||||
{
|
||||
destination.put(prop, value.toString());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.alfresco.error.AlfrescoRuntimeException;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.cmr.repository.MimetypeService;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
abstract public class AbstractMetadataExtracter implements MetadataExtracter
|
||||
{
|
||||
protected static Log logger = LogFactory.getLog(AbstractMetadataExtracter.class);
|
||||
|
||||
private MimetypeService mimetypeService;
|
||||
private MetadataExtracterRegistry registry;
|
||||
private Set<String> supportedMimetypes;
|
||||
private double reliability;
|
||||
private long extractionTime;
|
||||
|
||||
protected AbstractMetadataExtracter(String supportedMimetype, double reliability, long extractionTime)
|
||||
{
|
||||
this.supportedMimetypes = Collections.singleton(supportedMimetype);
|
||||
this.reliability = reliability;
|
||||
this.extractionTime = extractionTime;
|
||||
}
|
||||
|
||||
protected AbstractMetadataExtracter(Set<String> supportedMimetypes, double reliability, long extractionTime)
|
||||
{
|
||||
this.supportedMimetypes = supportedMimetypes;
|
||||
this.reliability = reliability;
|
||||
this.extractionTime = extractionTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the registry to register with
|
||||
*
|
||||
* @param registry a metadata extracter registry
|
||||
*/
|
||||
public void setRegistry(MetadataExtracterRegistry registry)
|
||||
{
|
||||
this.registry = registry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper setter of the mimetype service. This is not always required.
|
||||
*
|
||||
* @param mimetypeService
|
||||
*/
|
||||
public void setMimetypeService(MimetypeService mimetypeService)
|
||||
{
|
||||
this.mimetypeService = mimetypeService;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the mimetype helper
|
||||
*/
|
||||
protected MimetypeService getMimetypeService()
|
||||
{
|
||||
return mimetypeService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers this instance of the extracter with the registry.
|
||||
*
|
||||
* @see #setRegistry(MetadataExtracterRegistry)
|
||||
*/
|
||||
public void register()
|
||||
{
|
||||
if (registry == null)
|
||||
{
|
||||
logger.warn("Property 'registry' has not been set. Ignoring auto-registration: \n" +
|
||||
" extracter: " + this);
|
||||
return;
|
||||
}
|
||||
registry.register(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Default reliability check that returns the reliability as configured by the contstructor
|
||||
* if the mimetype is in the list of supported mimetypes.
|
||||
*
|
||||
* @param mimetype the mimetype to check
|
||||
*/
|
||||
public double getReliability(String mimetype)
|
||||
{
|
||||
if (supportedMimetypes.contains(mimetype))
|
||||
return reliability;
|
||||
else
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
public long getExtractionTime()
|
||||
{
|
||||
return extractionTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the mimetype is supported.
|
||||
*
|
||||
* @param reader the reader to check
|
||||
* @throws AlfrescoRuntimeException if the mimetype is not supported
|
||||
*/
|
||||
protected void checkReliability(ContentReader reader)
|
||||
{
|
||||
String mimetype = reader.getMimetype();
|
||||
if (getReliability(mimetype) <= 0.0)
|
||||
{
|
||||
throw new AlfrescoRuntimeException(
|
||||
"Metadata extracter does not support mimetype: \n" +
|
||||
" reader: " + reader + "\n" +
|
||||
" supported: " + supportedMimetypes + "\n" +
|
||||
" extracter: " + this);
|
||||
}
|
||||
}
|
||||
|
||||
public final void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException
|
||||
{
|
||||
// check the reliability
|
||||
checkReliability(reader);
|
||||
|
||||
try
|
||||
{
|
||||
extractInternal(reader, destination);
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
throw new ContentIOException("Metadata extraction failed: \n" +
|
||||
" reader: " + reader,
|
||||
e);
|
||||
}
|
||||
finally
|
||||
{
|
||||
// check that the reader was closed
|
||||
if (!reader.isClosed())
|
||||
{
|
||||
logger.error("Content reader not closed by metadata extracter: \n" +
|
||||
" reader: " + reader + "\n" +
|
||||
" extracter: " + this);
|
||||
}
|
||||
}
|
||||
|
||||
// done
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Completed metadata extraction: \n" +
|
||||
" reader: " + reader + "\n" +
|
||||
" extracter: " + this);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Override to provide the necessary extraction logic. Implementations must ensure that the reader
|
||||
* is closed before the method exits.
|
||||
*
|
||||
* @param reader the source of the content
|
||||
* @param destination the property map to fill
|
||||
* @throws Throwable an exception
|
||||
*/
|
||||
protected abstract void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable;
|
||||
|
||||
/**
|
||||
* Examines a value or string for nulls and adds it to the map (if
|
||||
* non-empty)
|
||||
*
|
||||
* @param prop Alfresco's <code>ContentModel.PROP_</code> to set.
|
||||
* @param value Value to set it to
|
||||
* @param destination Map into which to set it
|
||||
* @return true, if set, false otherwise
|
||||
*/
|
||||
protected boolean trimPut(QName prop, Object value, Map<QName, Serializable> destination)
|
||||
{
|
||||
if (value == null)
|
||||
return false;
|
||||
if (value instanceof String)
|
||||
{
|
||||
String svalue = ((String) value).trim();
|
||||
if (svalue.length() > 0)
|
||||
{
|
||||
destination.put(prop, svalue);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
else if (value instanceof Serializable)
|
||||
{
|
||||
destination.put(prop, (Serializable) value);
|
||||
}
|
||||
else
|
||||
{
|
||||
destination.put(prop, value.toString());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@@ -1,116 +1,116 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.repo.content.filestore.FileContentReader;
|
||||
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.util.ApplicationContextHelper;
|
||||
import org.alfresco.util.TempFileProvider;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
|
||||
/**
|
||||
* @see org.alfresco.repo.content.metadata.MetadataExtracter
|
||||
* @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public abstract class AbstractMetadataExtracterTest extends TestCase
|
||||
{
|
||||
private static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext();
|
||||
|
||||
protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";
|
||||
protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog";
|
||||
protected static final String QUICK_CREATOR = "Nevin Nollop";
|
||||
|
||||
protected MimetypeMap mimetypeMap;
|
||||
|
||||
protected abstract MetadataExtracter getExtracter();
|
||||
|
||||
/**
|
||||
* Ensures that the temp locations are cleaned out before the tests start
|
||||
*/
|
||||
@Override
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
this.mimetypeMap = (MimetypeMap) ctx.getBean("mimetypeService");
|
||||
|
||||
// perform a little cleaning up
|
||||
long now = System.currentTimeMillis();
|
||||
TempFileProvider.TempFileCleanerJob.removeFiles(now);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that all objects are present
|
||||
*/
|
||||
public void testSetUp() throws Exception
|
||||
{
|
||||
assertNotNull("MimetypeMap not present", mimetypeMap);
|
||||
// check that the quick resources are available
|
||||
File sourceFile = AbstractContentTransformerTest.loadQuickTestFile("txt");
|
||||
assertNotNull("quick.* files should be available from Tests", sourceFile);
|
||||
}
|
||||
|
||||
protected void testExtractFromMimetype(String mimetype) throws Exception
|
||||
{
|
||||
Map<QName, Serializable> properties = extractFromMimetype(mimetype);
|
||||
// check
|
||||
testCommonMetadata(mimetype, properties);
|
||||
}
|
||||
|
||||
protected Map<QName, Serializable> extractFromMimetype(String mimetype) throws Exception
|
||||
{
|
||||
Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
|
||||
|
||||
// get the extension for the mimetype
|
||||
String ext = mimetypeMap.getExtension(mimetype);
|
||||
|
||||
// attempt to get a source file for each mimetype
|
||||
File sourceFile = AbstractContentTransformerTest.loadQuickTestFile(ext);
|
||||
if (sourceFile == null)
|
||||
{
|
||||
throw new FileNotFoundException("No quick." + ext + " file found for test");
|
||||
}
|
||||
|
||||
// construct a reader onto the source file
|
||||
ContentReader sourceReader = new FileContentReader(sourceFile);
|
||||
sourceReader.setMimetype(mimetype);
|
||||
getExtracter().extract(sourceReader, properties);
|
||||
return properties;
|
||||
}
|
||||
|
||||
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
|
||||
{
|
||||
assertEquals(
|
||||
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
|
||||
QUICK_TITLE, properties.get(ContentModel.PROP_TITLE));
|
||||
assertEquals(
|
||||
"Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype,
|
||||
QUICK_DESCRIPTION, properties.get(ContentModel.PROP_DESCRIPTION));
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.repo.content.filestore.FileContentReader;
|
||||
import org.alfresco.repo.content.transform.AbstractContentTransformerTest;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.alfresco.util.ApplicationContextHelper;
|
||||
import org.alfresco.util.TempFileProvider;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
|
||||
/**
|
||||
* @see org.alfresco.repo.content.metadata.MetadataExtracter
|
||||
* @see org.alfresco.repo.content.metadata.AbstractMetadataExtracter
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public abstract class AbstractMetadataExtracterTest extends TestCase
|
||||
{
|
||||
private static ApplicationContext ctx = ApplicationContextHelper.getApplicationContext();
|
||||
|
||||
protected static final String QUICK_TITLE = "The quick brown fox jumps over the lazy dog";
|
||||
protected static final String QUICK_DESCRIPTION = "Gym class featuring a brown fox and lazy dog";
|
||||
protected static final String QUICK_CREATOR = "Nevin Nollop";
|
||||
|
||||
protected MimetypeMap mimetypeMap;
|
||||
|
||||
protected abstract MetadataExtracter getExtracter();
|
||||
|
||||
/**
|
||||
* Ensures that the temp locations are cleaned out before the tests start
|
||||
*/
|
||||
@Override
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
this.mimetypeMap = (MimetypeMap) ctx.getBean("mimetypeService");
|
||||
|
||||
// perform a little cleaning up
|
||||
long now = System.currentTimeMillis();
|
||||
TempFileProvider.TempFileCleanerJob.removeFiles(now);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that all objects are present
|
||||
*/
|
||||
public void testSetUp() throws Exception
|
||||
{
|
||||
assertNotNull("MimetypeMap not present", mimetypeMap);
|
||||
// check that the quick resources are available
|
||||
File sourceFile = AbstractContentTransformerTest.loadQuickTestFile("txt");
|
||||
assertNotNull("quick.* files should be available from Tests", sourceFile);
|
||||
}
|
||||
|
||||
protected void testExtractFromMimetype(String mimetype) throws Exception
|
||||
{
|
||||
Map<QName, Serializable> properties = extractFromMimetype(mimetype);
|
||||
// check
|
||||
testCommonMetadata(mimetype, properties);
|
||||
}
|
||||
|
||||
protected Map<QName, Serializable> extractFromMimetype(String mimetype) throws Exception
|
||||
{
|
||||
Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
|
||||
|
||||
// get the extension for the mimetype
|
||||
String ext = mimetypeMap.getExtension(mimetype);
|
||||
|
||||
// attempt to get a source file for each mimetype
|
||||
File sourceFile = AbstractContentTransformerTest.loadQuickTestFile(ext);
|
||||
if (sourceFile == null)
|
||||
{
|
||||
throw new FileNotFoundException("No quick." + ext + " file found for test");
|
||||
}
|
||||
|
||||
// construct a reader onto the source file
|
||||
ContentReader sourceReader = new FileContentReader(sourceFile);
|
||||
sourceReader.setMimetype(mimetype);
|
||||
getExtracter().extract(sourceReader, properties);
|
||||
return properties;
|
||||
}
|
||||
|
||||
protected void testCommonMetadata(String mimetype, Map<QName, Serializable> properties)
|
||||
{
|
||||
assertEquals(
|
||||
"Property " + ContentModel.PROP_TITLE + " not found for mimetype " + mimetype,
|
||||
QUICK_TITLE, properties.get(ContentModel.PROP_TITLE));
|
||||
assertEquals(
|
||||
"Property " + ContentModel.PROP_DESCRIPTION + " not found for mimetype " + mimetype,
|
||||
QUICK_DESCRIPTION, properties.get(ContentModel.PROP_DESCRIPTION));
|
||||
}
|
||||
}
|
||||
|
@@ -1,169 +1,169 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.swing.text.ChangedCharSetException;
|
||||
import javax.swing.text.MutableAttributeSet;
|
||||
import javax.swing.text.html.HTML;
|
||||
import javax.swing.text.html.HTMLEditorKit;
|
||||
import javax.swing.text.html.parser.ParserDelegator;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class HtmlMetadataExtracter extends AbstractMetadataExtracter
|
||||
{
|
||||
private static final Set<String> MIMETYPES = new HashSet<String>(5);
|
||||
static
|
||||
{
|
||||
MIMETYPES.add(MimetypeMap.MIMETYPE_HTML);
|
||||
MIMETYPES.add(MimetypeMap.MIMETYPE_XHTML);
|
||||
}
|
||||
|
||||
public HtmlMetadataExtracter()
|
||||
{
|
||||
super(MIMETYPES, 1.0, 1000);
|
||||
}
|
||||
|
||||
public void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable
|
||||
{
|
||||
final Map<QName, Serializable> tempDestination = new HashMap<QName, Serializable>();
|
||||
|
||||
HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback()
|
||||
{
|
||||
StringBuffer title = null;
|
||||
boolean inHead = false;
|
||||
|
||||
public void handleText(char[] data, int pos)
|
||||
{
|
||||
if (title != null)
|
||||
{
|
||||
title.append(data);
|
||||
}
|
||||
}
|
||||
|
||||
public void handleComment(char[] data, int pos)
|
||||
{
|
||||
// Perhaps sniff for Office 9+ metadata in here?
|
||||
}
|
||||
|
||||
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos)
|
||||
{
|
||||
if (HTML.Tag.HEAD.equals(t))
|
||||
{
|
||||
inHead = true;
|
||||
}
|
||||
else if (HTML.Tag.TITLE.equals(t) && inHead)
|
||||
{
|
||||
title = new StringBuffer();
|
||||
}
|
||||
else
|
||||
handleSimpleTag(t, a, pos);
|
||||
}
|
||||
|
||||
public void handleEndTag(HTML.Tag t, int pos)
|
||||
{
|
||||
if (HTML.Tag.HEAD.equals(t))
|
||||
{
|
||||
inHead = false;
|
||||
}
|
||||
else if (HTML.Tag.TITLE.equals(t) && title != null)
|
||||
{
|
||||
trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination);
|
||||
title = null;
|
||||
}
|
||||
}
|
||||
|
||||
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)
|
||||
{
|
||||
if (HTML.Tag.META.equals(t))
|
||||
{
|
||||
Object nameO = a.getAttribute(HTML.Attribute.NAME);
|
||||
Object valueO = a.getAttribute(HTML.Attribute.CONTENT);
|
||||
if (nameO == null || valueO == null)
|
||||
return;
|
||||
|
||||
String name = nameO.toString();
|
||||
|
||||
if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author")
|
||||
|| name.equalsIgnoreCase("dc.creator"))
|
||||
{
|
||||
trimPut(ContentModel.PROP_AUTHOR, valueO, tempDestination);
|
||||
}
|
||||
if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
|
||||
{
|
||||
trimPut(ContentModel.PROP_DESCRIPTION, valueO, tempDestination);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void handleError(String errorMsg, int pos)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
String charsetGuess = "UTF-8";
|
||||
int tries = 0;
|
||||
while (tries < 3)
|
||||
{
|
||||
tempDestination.clear();
|
||||
Reader r = null;
|
||||
InputStream cis = null;
|
||||
try
|
||||
{
|
||||
cis = reader.getContentInputStream();
|
||||
// TODO: for now, use default charset; we should attempt to map from html meta-data
|
||||
r = new InputStreamReader(cis);
|
||||
HTMLEditorKit.Parser parser = new ParserDelegator();
|
||||
parser.parse(r, callback, tries > 0);
|
||||
destination.putAll(tempDestination);
|
||||
break;
|
||||
}
|
||||
catch (ChangedCharSetException ccse)
|
||||
{
|
||||
tries++;
|
||||
charsetGuess = ccse.getCharSetSpec();
|
||||
int begin = charsetGuess.indexOf("charset=");
|
||||
if (begin > 0)
|
||||
charsetGuess = charsetGuess.substring(begin + 8, charsetGuess.length());
|
||||
reader = reader.getReader();
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (r != null)
|
||||
r.close();
|
||||
if (cis != null)
|
||||
cis.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.swing.text.ChangedCharSetException;
|
||||
import javax.swing.text.MutableAttributeSet;
|
||||
import javax.swing.text.html.HTML;
|
||||
import javax.swing.text.html.HTMLEditorKit;
|
||||
import javax.swing.text.html.parser.ParserDelegator;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class HtmlMetadataExtracter extends AbstractMetadataExtracter
|
||||
{
|
||||
private static final Set<String> MIMETYPES = new HashSet<String>(5);
|
||||
static
|
||||
{
|
||||
MIMETYPES.add(MimetypeMap.MIMETYPE_HTML);
|
||||
MIMETYPES.add(MimetypeMap.MIMETYPE_XHTML);
|
||||
}
|
||||
|
||||
public HtmlMetadataExtracter()
|
||||
{
|
||||
super(MIMETYPES, 1.0, 1000);
|
||||
}
|
||||
|
||||
public void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable
|
||||
{
|
||||
final Map<QName, Serializable> tempDestination = new HashMap<QName, Serializable>();
|
||||
|
||||
HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback()
|
||||
{
|
||||
StringBuffer title = null;
|
||||
boolean inHead = false;
|
||||
|
||||
public void handleText(char[] data, int pos)
|
||||
{
|
||||
if (title != null)
|
||||
{
|
||||
title.append(data);
|
||||
}
|
||||
}
|
||||
|
||||
public void handleComment(char[] data, int pos)
|
||||
{
|
||||
// Perhaps sniff for Office 9+ metadata in here?
|
||||
}
|
||||
|
||||
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos)
|
||||
{
|
||||
if (HTML.Tag.HEAD.equals(t))
|
||||
{
|
||||
inHead = true;
|
||||
}
|
||||
else if (HTML.Tag.TITLE.equals(t) && inHead)
|
||||
{
|
||||
title = new StringBuffer();
|
||||
}
|
||||
else
|
||||
handleSimpleTag(t, a, pos);
|
||||
}
|
||||
|
||||
public void handleEndTag(HTML.Tag t, int pos)
|
||||
{
|
||||
if (HTML.Tag.HEAD.equals(t))
|
||||
{
|
||||
inHead = false;
|
||||
}
|
||||
else if (HTML.Tag.TITLE.equals(t) && title != null)
|
||||
{
|
||||
trimPut(ContentModel.PROP_TITLE, title.toString(), tempDestination);
|
||||
title = null;
|
||||
}
|
||||
}
|
||||
|
||||
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)
|
||||
{
|
||||
if (HTML.Tag.META.equals(t))
|
||||
{
|
||||
Object nameO = a.getAttribute(HTML.Attribute.NAME);
|
||||
Object valueO = a.getAttribute(HTML.Attribute.CONTENT);
|
||||
if (nameO == null || valueO == null)
|
||||
return;
|
||||
|
||||
String name = nameO.toString();
|
||||
|
||||
if (name.equalsIgnoreCase("creator") || name.equalsIgnoreCase("author")
|
||||
|| name.equalsIgnoreCase("dc.creator"))
|
||||
{
|
||||
trimPut(ContentModel.PROP_AUTHOR, valueO, tempDestination);
|
||||
}
|
||||
if (name.equalsIgnoreCase("description") || name.equalsIgnoreCase("dc.description"))
|
||||
{
|
||||
trimPut(ContentModel.PROP_DESCRIPTION, valueO, tempDestination);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void handleError(String errorMsg, int pos)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
String charsetGuess = "UTF-8";
|
||||
int tries = 0;
|
||||
while (tries < 3)
|
||||
{
|
||||
tempDestination.clear();
|
||||
Reader r = null;
|
||||
InputStream cis = null;
|
||||
try
|
||||
{
|
||||
cis = reader.getContentInputStream();
|
||||
// TODO: for now, use default charset; we should attempt to map from html meta-data
|
||||
r = new InputStreamReader(cis);
|
||||
HTMLEditorKit.Parser parser = new ParserDelegator();
|
||||
parser.parse(r, callback, tries > 0);
|
||||
destination.putAll(tempDestination);
|
||||
break;
|
||||
}
|
||||
catch (ChangedCharSetException ccse)
|
||||
{
|
||||
tries++;
|
||||
charsetGuess = ccse.getCharSetSpec();
|
||||
int begin = charsetGuess.indexOf("charset=");
|
||||
if (begin > 0)
|
||||
charsetGuess = charsetGuess.substring(begin + 8, charsetGuess.length());
|
||||
reader = reader.getReader();
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (r != null)
|
||||
r.close();
|
||||
if (cis != null)
|
||||
cis.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,57 +1,57 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
|
||||
/**
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
{
|
||||
private MetadataExtracter extracter;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
super.setUp();
|
||||
extracter = new HtmlMetadataExtracter();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the same transformer regardless - it is allowed
|
||||
*/
|
||||
protected MetadataExtracter getExtracter()
|
||||
{
|
||||
return extracter;
|
||||
}
|
||||
|
||||
public void testReliability() throws Exception
|
||||
{
|
||||
double reliability = 0.0;
|
||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
||||
assertEquals("Mimetype text should not be supported", 0.0, reliability);
|
||||
|
||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_HTML);
|
||||
assertEquals("HTML should be supported", 1.0, reliability);
|
||||
}
|
||||
|
||||
public void testHtmlExtraction() throws Exception
|
||||
{
|
||||
testExtractFromMimetype(MimetypeMap.MIMETYPE_HTML);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
|
||||
/**
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class HtmlMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
{
|
||||
private MetadataExtracter extracter;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
super.setUp();
|
||||
extracter = new HtmlMetadataExtracter();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the same transformer regardless - it is allowed
|
||||
*/
|
||||
protected MetadataExtracter getExtracter()
|
||||
{
|
||||
return extracter;
|
||||
}
|
||||
|
||||
public void testReliability() throws Exception
|
||||
{
|
||||
double reliability = 0.0;
|
||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
||||
assertEquals("Mimetype text should not be supported", 0.0, reliability);
|
||||
|
||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_HTML);
|
||||
assertEquals("HTML should be supported", 1.0, reliability);
|
||||
}
|
||||
|
||||
public void testHtmlExtraction() throws Exception
|
||||
{
|
||||
testExtractFromMimetype(MimetypeMap.MIMETYPE_HTML);
|
||||
}
|
||||
}
|
||||
|
@@ -1,72 +1,72 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public interface MetadataExtracter
|
||||
{
|
||||
/**
|
||||
* Provides the approximate accuracy with which this extracter can extract
|
||||
* metadata for the mimetype.
|
||||
* <p>
|
||||
*
|
||||
* @param sourceMimetype the source mimetype
|
||||
* @return Returns a score 0.0 to 1.0. 0.0 indicates that the extraction
|
||||
* cannot be performed at all. 1.0 indicates that the extraction can
|
||||
* be performed perfectly.
|
||||
*/
|
||||
public double getReliability(String sourceMimetype);
|
||||
|
||||
/**
|
||||
* Provides an estimate, usually a worst case guess, of how long an
|
||||
* extraction will take.
|
||||
* <p>
|
||||
* This method is used to determine, up front, which of a set of equally
|
||||
* reliant transformers will be used for a specific extraction.
|
||||
*
|
||||
* @return Returns the approximate number of milliseconds per transformation
|
||||
*/
|
||||
public long getExtractionTime();
|
||||
|
||||
/**
|
||||
* Extracts the metadata from the content provided by the reader and source
|
||||
* mimetype to the supplied map.
|
||||
* <p>
|
||||
* The extraction viability can be determined by an up front call to
|
||||
* {@link #getReliability(String)}.
|
||||
* <p>
|
||||
* The source mimetype <b>must</b> be available on the
|
||||
* {@link org.alfresco.service.cmr.repository.ContentAccessor#getMimetype()} method
|
||||
* of the reader.
|
||||
*
|
||||
* @param reader the source of the content
|
||||
* @param destination the destination of the extraction
|
||||
* @throws ContentIOException if an IO exception occurs
|
||||
*/
|
||||
public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException;
|
||||
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public interface MetadataExtracter
|
||||
{
|
||||
/**
|
||||
* Provides the approximate accuracy with which this extracter can extract
|
||||
* metadata for the mimetype.
|
||||
* <p>
|
||||
*
|
||||
* @param sourceMimetype the source mimetype
|
||||
* @return Returns a score 0.0 to 1.0. 0.0 indicates that the extraction
|
||||
* cannot be performed at all. 1.0 indicates that the extraction can
|
||||
* be performed perfectly.
|
||||
*/
|
||||
public double getReliability(String sourceMimetype);
|
||||
|
||||
/**
|
||||
* Provides an estimate, usually a worst case guess, of how long an
|
||||
* extraction will take.
|
||||
* <p>
|
||||
* This method is used to determine, up front, which of a set of equally
|
||||
* reliant transformers will be used for a specific extraction.
|
||||
*
|
||||
* @return Returns the approximate number of milliseconds per transformation
|
||||
*/
|
||||
public long getExtractionTime();
|
||||
|
||||
/**
|
||||
* Extracts the metadata from the content provided by the reader and source
|
||||
* mimetype to the supplied map.
|
||||
* <p>
|
||||
* The extraction viability can be determined by an up front call to
|
||||
* {@link #getReliability(String)}.
|
||||
* <p>
|
||||
* The source mimetype <b>must</b> be available on the
|
||||
* {@link org.alfresco.service.cmr.repository.ContentAccessor#getMimetype()} method
|
||||
* of the reader.
|
||||
*
|
||||
* @param reader the source of the content
|
||||
* @param destination the destination of the extraction
|
||||
* @throws ContentIOException if an IO exception occurs
|
||||
*/
|
||||
public void extract(ContentReader reader, Map<QName, Serializable> destination) throws ContentIOException;
|
||||
|
||||
}
|
||||
|
@@ -1,191 +1,172 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReadWriteLock;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
||||
import org.alfresco.error.AlfrescoRuntimeException;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
/**
|
||||
* Holds and provides the most appropriate metadate extracter for a particular
|
||||
* mimetype.
|
||||
* <p>
|
||||
* The extracters themselves know how well they are able to extract metadata.
|
||||
*
|
||||
* @see org.alfresco.repo.content.metadata.MetadataExtracter
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class MetadataExtracterRegistry
|
||||
{
|
||||
private static final Log logger = LogFactory.getLog(MetadataExtracterRegistry.class);
|
||||
|
||||
private List<MetadataExtracter> extracters;
|
||||
private Map<String, MetadataExtracter> extracterCache;
|
||||
|
||||
private MimetypeMap mimetypeMap;
|
||||
/** Controls read access to the cache */
|
||||
private Lock extracterCacheReadLock;
|
||||
/** controls write access to the cache */
|
||||
private Lock extracterCacheWriteLock;
|
||||
|
||||
public MetadataExtracterRegistry()
|
||||
{
|
||||
// initialise lists
|
||||
extracters = new ArrayList<MetadataExtracter>(10);
|
||||
extracterCache = new HashMap<String, MetadataExtracter>(17);
|
||||
|
||||
// create lock objects for access to the cache
|
||||
ReadWriteLock extractionCacheLock = new ReentrantReadWriteLock();
|
||||
extracterCacheReadLock = extractionCacheLock.readLock();
|
||||
extracterCacheWriteLock = extractionCacheLock.writeLock();
|
||||
}
|
||||
|
||||
/**
|
||||
* The mimetype map that will be used to check requests against
|
||||
*
|
||||
* @param mimetypeMap a map of mimetypes
|
||||
*/
|
||||
public void setMimetypeMap(MimetypeMap mimetypeMap)
|
||||
{
|
||||
this.mimetypeMap = mimetypeMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register an instance of an extracter for use
|
||||
*
|
||||
* @param extracter an extracter
|
||||
*/
|
||||
public void register(MetadataExtracter extracter)
|
||||
{
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Registering metadata extracter: " + extracter);
|
||||
}
|
||||
|
||||
extracterCacheWriteLock.lock();
|
||||
try
|
||||
{
|
||||
extracters.add(extracter);
|
||||
extracterCache.clear();
|
||||
}
|
||||
finally
|
||||
{
|
||||
extracterCacheWriteLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the best metadata extracter. This is a combination of the most
|
||||
* reliable and the most performant extracter.
|
||||
* <p>
|
||||
* The result is cached for quicker access next time.
|
||||
*
|
||||
* @param mimetype the source MIME of the extraction
|
||||
* @return Returns a metadata extracter that can extract metadata from the
|
||||
* chosen MIME type.
|
||||
*/
|
||||
public MetadataExtracter getExtracter(String sourceMimetype)
|
||||
{
|
||||
// check that the mimetypes are valid
|
||||
if (!mimetypeMap.getMimetypes().contains(sourceMimetype))
|
||||
{
|
||||
throw new AlfrescoRuntimeException("Unknown extraction source mimetype: " + sourceMimetype);
|
||||
}
|
||||
|
||||
MetadataExtracter extracter = null;
|
||||
extracterCacheReadLock.lock();
|
||||
try
|
||||
{
|
||||
if (extracterCache.containsKey(sourceMimetype))
|
||||
{
|
||||
// the translation has been requested before
|
||||
// it might have been null
|
||||
return extracterCache.get(sourceMimetype);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
extracterCacheReadLock.unlock();
|
||||
}
|
||||
|
||||
// the translation has not been requested before
|
||||
// get a write lock on the cache
|
||||
// no double check done as it is not an expensive task
|
||||
extracterCacheWriteLock.lock();
|
||||
try
|
||||
{
|
||||
// find the most suitable transformer - may be empty list
|
||||
extracter = findBestExtracter(sourceMimetype);
|
||||
// store the result even if it is null
|
||||
extracterCache.put(sourceMimetype, extracter);
|
||||
return extracter;
|
||||
}
|
||||
finally
|
||||
{
|
||||
extracterCacheWriteLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sourceMimetype The MIME type under examination
|
||||
* @return The fastest of the most reliable extracters in <code>extracters</code>
|
||||
* for the given MIME type, or null if none is available.
|
||||
*/
|
||||
private MetadataExtracter findBestExtracter(String sourceMimetype)
|
||||
{
|
||||
double bestReliability = -1;
|
||||
long bestTime = Long.MAX_VALUE;
|
||||
logger.debug("Finding best extracter for " + sourceMimetype);
|
||||
|
||||
MetadataExtracter bestExtracter = null;
|
||||
|
||||
for (MetadataExtracter ext : extracters)
|
||||
{
|
||||
double r = ext.getReliability(sourceMimetype);
|
||||
if (r <= 0.0)
|
||||
{
|
||||
// extraction not achievable
|
||||
continue;
|
||||
}
|
||||
else if (r == bestReliability)
|
||||
{
|
||||
long time = ext.getExtractionTime();
|
||||
if (time < bestTime)
|
||||
{
|
||||
bestExtracter = ext;
|
||||
bestTime = time;
|
||||
}
|
||||
}
|
||||
else if (r > bestReliability)
|
||||
{
|
||||
bestExtracter = ext;
|
||||
bestReliability = r;
|
||||
bestTime = ext.getExtractionTime();
|
||||
}
|
||||
}
|
||||
return bestExtracter;
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReadWriteLock;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
/**
|
||||
* Holds and provides the most appropriate metadate extracter for a particular
|
||||
* mimetype.
|
||||
* <p>
|
||||
* The extracters themselves know how well they are able to extract metadata.
|
||||
*
|
||||
* @see org.alfresco.repo.content.metadata.MetadataExtracter
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class MetadataExtracterRegistry
|
||||
{
|
||||
private static final Log logger = LogFactory.getLog(MetadataExtracterRegistry.class);
|
||||
|
||||
private List<MetadataExtracter> extracters;
|
||||
private Map<String, MetadataExtracter> extracterCache;
|
||||
|
||||
/** Controls read access to the cache */
|
||||
private Lock extracterCacheReadLock;
|
||||
/** controls write access to the cache */
|
||||
private Lock extracterCacheWriteLock;
|
||||
|
||||
public MetadataExtracterRegistry()
|
||||
{
|
||||
// initialise lists
|
||||
extracters = new ArrayList<MetadataExtracter>(10);
|
||||
extracterCache = new HashMap<String, MetadataExtracter>(17);
|
||||
|
||||
// create lock objects for access to the cache
|
||||
ReadWriteLock extractionCacheLock = new ReentrantReadWriteLock();
|
||||
extracterCacheReadLock = extractionCacheLock.readLock();
|
||||
extracterCacheWriteLock = extractionCacheLock.writeLock();
|
||||
}
|
||||
|
||||
/**
|
||||
* Register an instance of an extracter for use
|
||||
*
|
||||
* @param extracter an extracter
|
||||
*/
|
||||
public void register(MetadataExtracter extracter)
|
||||
{
|
||||
if (logger.isDebugEnabled())
|
||||
{
|
||||
logger.debug("Registering metadata extracter: " + extracter);
|
||||
}
|
||||
|
||||
extracterCacheWriteLock.lock();
|
||||
try
|
||||
{
|
||||
extracters.add(extracter);
|
||||
extracterCache.clear();
|
||||
}
|
||||
finally
|
||||
{
|
||||
extracterCacheWriteLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the best metadata extracter. This is a combination of the most
|
||||
* reliable and the most performant extracter.
|
||||
* <p>
|
||||
* The result is cached for quicker access next time.
|
||||
*
|
||||
* @param mimetype the source MIME of the extraction
|
||||
* @return Returns a metadata extracter that can extract metadata from the
|
||||
* chosen MIME type.
|
||||
*/
|
||||
public MetadataExtracter getExtracter(String sourceMimetype)
|
||||
{
|
||||
MetadataExtracter extracter = null;
|
||||
extracterCacheReadLock.lock();
|
||||
try
|
||||
{
|
||||
if (extracterCache.containsKey(sourceMimetype))
|
||||
{
|
||||
// the translation has been requested before
|
||||
// it might have been null
|
||||
return extracterCache.get(sourceMimetype);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
extracterCacheReadLock.unlock();
|
||||
}
|
||||
|
||||
// the translation has not been requested before
|
||||
// get a write lock on the cache
|
||||
// no double check done as it is not an expensive task
|
||||
extracterCacheWriteLock.lock();
|
||||
try
|
||||
{
|
||||
// find the most suitable transformer - may be empty list
|
||||
extracter = findBestExtracter(sourceMimetype);
|
||||
// store the result even if it is null
|
||||
extracterCache.put(sourceMimetype, extracter);
|
||||
return extracter;
|
||||
}
|
||||
finally
|
||||
{
|
||||
extracterCacheWriteLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sourceMimetype The MIME type under examination
|
||||
* @return The fastest of the most reliable extracters in <code>extracters</code>
|
||||
* for the given MIME type, or null if none is available.
|
||||
*/
|
||||
private MetadataExtracter findBestExtracter(String sourceMimetype)
|
||||
{
|
||||
double bestReliability = -1;
|
||||
long bestTime = Long.MAX_VALUE;
|
||||
logger.debug("Finding best extracter for " + sourceMimetype);
|
||||
|
||||
MetadataExtracter bestExtracter = null;
|
||||
|
||||
for (MetadataExtracter ext : extracters)
|
||||
{
|
||||
double r = ext.getReliability(sourceMimetype);
|
||||
if (r <= 0.0)
|
||||
{
|
||||
// extraction not achievable
|
||||
continue;
|
||||
}
|
||||
else if (r == bestReliability)
|
||||
{
|
||||
long time = ext.getExtractionTime();
|
||||
if (time < bestTime)
|
||||
{
|
||||
bestExtracter = ext;
|
||||
bestTime = time;
|
||||
}
|
||||
}
|
||||
else if (r > bestReliability)
|
||||
{
|
||||
bestExtracter = ext;
|
||||
bestReliability = r;
|
||||
bestTime = ext.getExtractionTime();
|
||||
}
|
||||
}
|
||||
return bestExtracter;
|
||||
}
|
||||
}
|
@@ -1,101 +1,101 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.apache.poi.hpsf.PropertySet;
|
||||
import org.apache.poi.hpsf.PropertySetFactory;
|
||||
import org.apache.poi.hpsf.SummaryInformation;
|
||||
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
|
||||
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
|
||||
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
|
||||
|
||||
/**
|
||||
* Office file format Metadata Extracter
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class OfficeMetadataExtracter extends AbstractMetadataExtracter
|
||||
{
|
||||
public static String[] SUPPORTED_MIMETYPES = new String[] {
|
||||
MimetypeMap.MIMETYPE_WORD,
|
||||
MimetypeMap.MIMETYPE_EXCEL,
|
||||
MimetypeMap.MIMETYPE_PPT};
|
||||
|
||||
public OfficeMetadataExtracter()
|
||||
{
|
||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
|
||||
}
|
||||
|
||||
public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable
|
||||
{
|
||||
POIFSReaderListener readerListener = new POIFSReaderListener()
|
||||
{
|
||||
public void processPOIFSReaderEvent(final POIFSReaderEvent event)
|
||||
{
|
||||
try
|
||||
{
|
||||
PropertySet ps = PropertySetFactory.create(event.getStream());
|
||||
if (ps instanceof SummaryInformation)
|
||||
{
|
||||
SummaryInformation si = (SummaryInformation) ps;
|
||||
|
||||
// Titled aspect
|
||||
trimPut(ContentModel.PROP_TITLE, si.getTitle(), destination);
|
||||
trimPut(ContentModel.PROP_DESCRIPTION, si.getSubject(), destination);
|
||||
|
||||
// Auditable aspect
|
||||
trimPut(ContentModel.PROP_CREATED, si.getCreateDateTime(), destination);
|
||||
trimPut(ContentModel.PROP_MODIFIED, si.getLastSaveDateTime(), destination);
|
||||
trimPut(ContentModel.PROP_AUTHOR, si.getAuthor(), destination);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new ContentIOException("Property set stream: " + event.getPath() + event.getName(), ex);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
InputStream is = null;
|
||||
try
|
||||
{
|
||||
is = reader.getContentInputStream();
|
||||
POIFSReader poiFSReader = new POIFSReader();
|
||||
poiFSReader.registerListener(readerListener, SummaryInformation.DEFAULT_STREAM_NAME);
|
||||
poiFSReader.read(is);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (is != null)
|
||||
{
|
||||
try { is.close(); } catch (IOException e) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentIOException;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.apache.poi.hpsf.PropertySet;
|
||||
import org.apache.poi.hpsf.PropertySetFactory;
|
||||
import org.apache.poi.hpsf.SummaryInformation;
|
||||
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
|
||||
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
|
||||
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
|
||||
|
||||
/**
|
||||
* Office file format Metadata Extracter
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class OfficeMetadataExtracter extends AbstractMetadataExtracter
|
||||
{
|
||||
public static String[] SUPPORTED_MIMETYPES = new String[] {
|
||||
MimetypeMap.MIMETYPE_WORD,
|
||||
MimetypeMap.MIMETYPE_EXCEL,
|
||||
MimetypeMap.MIMETYPE_PPT};
|
||||
|
||||
public OfficeMetadataExtracter()
|
||||
{
|
||||
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)), 1.0, 1000);
|
||||
}
|
||||
|
||||
public void extractInternal(ContentReader reader, final Map<QName, Serializable> destination) throws Throwable
|
||||
{
|
||||
POIFSReaderListener readerListener = new POIFSReaderListener()
|
||||
{
|
||||
public void processPOIFSReaderEvent(final POIFSReaderEvent event)
|
||||
{
|
||||
try
|
||||
{
|
||||
PropertySet ps = PropertySetFactory.create(event.getStream());
|
||||
if (ps instanceof SummaryInformation)
|
||||
{
|
||||
SummaryInformation si = (SummaryInformation) ps;
|
||||
|
||||
// Titled aspect
|
||||
trimPut(ContentModel.PROP_TITLE, si.getTitle(), destination);
|
||||
trimPut(ContentModel.PROP_DESCRIPTION, si.getSubject(), destination);
|
||||
|
||||
// Auditable aspect
|
||||
trimPut(ContentModel.PROP_CREATED, si.getCreateDateTime(), destination);
|
||||
trimPut(ContentModel.PROP_MODIFIED, si.getLastSaveDateTime(), destination);
|
||||
trimPut(ContentModel.PROP_AUTHOR, si.getAuthor(), destination);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new ContentIOException("Property set stream: " + event.getPath() + event.getName(), ex);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
InputStream is = null;
|
||||
try
|
||||
{
|
||||
is = reader.getContentInputStream();
|
||||
POIFSReader poiFSReader = new POIFSReader();
|
||||
poiFSReader.registerListener(readerListener, SummaryInformation.DEFAULT_STREAM_NAME);
|
||||
poiFSReader.read(is);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (is != null)
|
||||
{
|
||||
try { is.close(); } catch (IOException e) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,75 +1,75 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.Calendar;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.pdfbox.pdmodel.PDDocument;
|
||||
import org.pdfbox.pdmodel.PDDocumentInformation;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
|
||||
{
|
||||
public PdfBoxMetadataExtracter()
|
||||
{
|
||||
super(MimetypeMap.MIMETYPE_PDF, 1.0, 1000);
|
||||
}
|
||||
|
||||
public void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable
|
||||
{
|
||||
PDDocument pdf = null;
|
||||
InputStream is = null;
|
||||
try
|
||||
{
|
||||
is = reader.getContentInputStream();
|
||||
// stream the document in
|
||||
pdf = PDDocument.load(is);
|
||||
// Scoop out the metadata
|
||||
PDDocumentInformation docInfo = pdf.getDocumentInformation();
|
||||
|
||||
trimPut(ContentModel.PROP_AUTHOR, docInfo.getAuthor(), destination);
|
||||
trimPut(ContentModel.PROP_TITLE, docInfo.getTitle(), destination);
|
||||
trimPut(ContentModel.PROP_DESCRIPTION, docInfo.getSubject(), destination);
|
||||
|
||||
Calendar created = docInfo.getCreationDate();
|
||||
if (created != null)
|
||||
destination.put(ContentModel.PROP_CREATED, created.getTime());
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (is != null)
|
||||
{
|
||||
try { is.close(); } catch (IOException e) {}
|
||||
}
|
||||
if (pdf != null)
|
||||
{
|
||||
try { pdf.close(); } catch (Throwable e) { e.printStackTrace(); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2005 Jesper Steen Møller
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.Calendar;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.alfresco.service.cmr.repository.ContentReader;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.pdfbox.pdmodel.PDDocument;
|
||||
import org.pdfbox.pdmodel.PDDocumentInformation;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter
|
||||
{
|
||||
public PdfBoxMetadataExtracter()
|
||||
{
|
||||
super(MimetypeMap.MIMETYPE_PDF, 1.0, 1000);
|
||||
}
|
||||
|
||||
public void extractInternal(ContentReader reader, Map<QName, Serializable> destination) throws Throwable
|
||||
{
|
||||
PDDocument pdf = null;
|
||||
InputStream is = null;
|
||||
try
|
||||
{
|
||||
is = reader.getContentInputStream();
|
||||
// stream the document in
|
||||
pdf = PDDocument.load(is);
|
||||
// Scoop out the metadata
|
||||
PDDocumentInformation docInfo = pdf.getDocumentInformation();
|
||||
|
||||
trimPut(ContentModel.PROP_AUTHOR, docInfo.getAuthor(), destination);
|
||||
trimPut(ContentModel.PROP_TITLE, docInfo.getTitle(), destination);
|
||||
trimPut(ContentModel.PROP_DESCRIPTION, docInfo.getSubject(), destination);
|
||||
|
||||
Calendar created = docInfo.getCreationDate();
|
||||
if (created != null)
|
||||
destination.put(ContentModel.PROP_CREATED, created.getTime());
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (is != null)
|
||||
{
|
||||
try { is.close(); } catch (IOException e) {}
|
||||
}
|
||||
if (pdf != null)
|
||||
{
|
||||
try { pdf.close(); } catch (Throwable e) { e.printStackTrace(); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,43 +1,43 @@
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
|
||||
/**
|
||||
* @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
{
|
||||
private MetadataExtracter extracter;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
super.setUp();
|
||||
extracter = new PdfBoxMetadataExtracter();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the same transformer regardless - it is allowed
|
||||
*/
|
||||
protected MetadataExtracter getExtracter()
|
||||
{
|
||||
return extracter;
|
||||
}
|
||||
|
||||
public void testReliability() throws Exception
|
||||
{
|
||||
double reliability = 0.0;
|
||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
||||
assertEquals("Mimetype should not be supported", 0.0, reliability);
|
||||
|
||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PDF);
|
||||
assertEquals("Mimetype should be supported", 1.0, reliability);
|
||||
}
|
||||
|
||||
public void testPdfExtraction() throws Exception
|
||||
{
|
||||
testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF);
|
||||
}
|
||||
}
|
||||
package org.alfresco.repo.content.metadata;
|
||||
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
|
||||
/**
|
||||
* @see org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter
|
||||
*
|
||||
* @author Jesper Steen Møller
|
||||
*/
|
||||
public class PdfBoxMetadataExtracterTest extends AbstractMetadataExtracterTest
|
||||
{
|
||||
private MetadataExtracter extracter;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception
|
||||
{
|
||||
super.setUp();
|
||||
extracter = new PdfBoxMetadataExtracter();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the same transformer regardless - it is allowed
|
||||
*/
|
||||
protected MetadataExtracter getExtracter()
|
||||
{
|
||||
return extracter;
|
||||
}
|
||||
|
||||
public void testReliability() throws Exception
|
||||
{
|
||||
double reliability = 0.0;
|
||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_TEXT_PLAIN);
|
||||
assertEquals("Mimetype should not be supported", 0.0, reliability);
|
||||
|
||||
reliability = extracter.getReliability(MimetypeMap.MIMETYPE_PDF);
|
||||
assertEquals("Mimetype should be supported", 1.0, reliability);
|
||||
}
|
||||
|
||||
public void testPdfExtraction() throws Exception
|
||||
{
|
||||
testExtractFromMimetype(MimetypeMap.MIMETYPE_PDF);
|
||||
}
|
||||
}
|
||||
|
@@ -252,13 +252,13 @@ public abstract class AbstractContentTransformer implements ContentTransformer
|
||||
finally
|
||||
{
|
||||
// check that the reader and writer are both closed
|
||||
if (!reader.isClosed())
|
||||
if (reader.isChannelOpen())
|
||||
{
|
||||
logger.error("Content reader not closed by transformer: \n" +
|
||||
" reader: " + reader + "\n" +
|
||||
" transformer: " + this);
|
||||
}
|
||||
if (!writer.isClosed())
|
||||
if (writer.isChannelOpen())
|
||||
{
|
||||
logger.error("Content writer not closed by transformer: \n" +
|
||||
" writer: " + writer + "\n" +
|
||||
|
@@ -25,11 +25,8 @@ import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReadWriteLock;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
||||
import org.alfresco.error.AlfrescoRuntimeException;
|
||||
import org.alfresco.repo.content.MimetypeMap;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.springframework.util.Assert;
|
||||
|
||||
/**
|
||||
* Holds and provides the most appropriate content transformer for
|
||||
@@ -47,7 +44,6 @@ public class ContentTransformerRegistry
|
||||
private static final Log logger = LogFactory.getLog(ContentTransformerRegistry.class);
|
||||
|
||||
private List<ContentTransformer> transformers;
|
||||
private MimetypeMap mimetypeMap;
|
||||
/** Cache of previously used transactions */
|
||||
private Map<TransformationKey, List<ContentTransformer>> transformationCache;
|
||||
/** Controls read access to the transformation cache */
|
||||
@@ -58,11 +54,8 @@ public class ContentTransformerRegistry
|
||||
/**
|
||||
* @param mimetypeMap all the mimetypes available to the system
|
||||
*/
|
||||
public ContentTransformerRegistry(MimetypeMap mimetypeMap)
|
||||
public ContentTransformerRegistry()
|
||||
{
|
||||
Assert.notNull(mimetypeMap, "The MimetypeMap is mandatory");
|
||||
this.mimetypeMap = mimetypeMap;
|
||||
|
||||
this.transformers = new ArrayList<ContentTransformer>(10);
|
||||
transformationCache = new HashMap<TransformationKey, List<ContentTransformer>>(17);
|
||||
|
||||
@@ -143,16 +136,6 @@ public class ContentTransformerRegistry
|
||||
*/
|
||||
public ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
|
||||
{
|
||||
// check that the mimetypes are valid
|
||||
if (!mimetypeMap.getMimetypes().contains(sourceMimetype))
|
||||
{
|
||||
throw new AlfrescoRuntimeException("Unknown source mimetype: " + sourceMimetype);
|
||||
}
|
||||
if (!mimetypeMap.getMimetypes().contains(targetMimetype))
|
||||
{
|
||||
throw new AlfrescoRuntimeException("Unknown target mimetype: " + targetMimetype);
|
||||
}
|
||||
|
||||
TransformationKey key = new TransformationKey(sourceMimetype, targetMimetype);
|
||||
List<ContentTransformer> transformers = null;
|
||||
transformationCacheReadLock.lock();
|
||||
|
@@ -69,7 +69,7 @@ public class ContentTransformerRegistryTest extends AbstractContentTransformerTe
|
||||
bytes[i] = (byte)i;
|
||||
}
|
||||
// create the dummyRegistry
|
||||
dummyRegistry = new ContentTransformerRegistry(mimetypeMap);
|
||||
dummyRegistry = new ContentTransformerRegistry();
|
||||
// create some dummy transformers for reliability tests
|
||||
new DummyTransformer(mimetypeMap, dummyRegistry, A, B, 0.3, 10L);
|
||||
new DummyTransformer(mimetypeMap, dummyRegistry, A, B, 0.6, 10L);
|
||||
|
Reference in New Issue
Block a user