Fix AR-499: Indexing of content pushed to the background for slow (>50ms) transformations.

- We can make this configurable if required
 - This mostly means that text formats will be indexed atomically while slower transformations will be pushed back


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@3061 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Derek Hulley
2006-06-08 13:12:31 +00:00
parent 14d4b785b9
commit 6fad540064
5 changed files with 206 additions and 127 deletions

View File

@@ -369,6 +369,7 @@
org.alfresco.service.cmr.repository.ContentService.getReader=ACL_NODE.0.sys:base.ReadContent org.alfresco.service.cmr.repository.ContentService.getReader=ACL_NODE.0.sys:base.ReadContent
org.alfresco.service.cmr.repository.ContentService.getWriter=ACL_NODE.0.sys:base.WriteContent org.alfresco.service.cmr.repository.ContentService.getWriter=ACL_NODE.0.sys:base.WriteContent
org.alfresco.service.cmr.repository.ContentService.isTransformable=ACL_ALLOW org.alfresco.service.cmr.repository.ContentService.isTransformable=ACL_ALLOW
org.alfresco.service.cmr.repository.ContentService.getTransformer=ACL_ALLOW
org.alfresco.service.cmr.repository.ContentService.transform=ACL_ALLOW org.alfresco.service.cmr.repository.ContentService.transform=ACL_ALLOW
org.alfresco.service.cmr.repository.ContentService.getTempWriter=ACL_ALLOW org.alfresco.service.cmr.repository.ContentService.getTempWriter=ACL_ALLOW
</value> </value>

View File

@@ -373,6 +373,18 @@ public class RoutingContentService implements ContentService
// done // done
} }
/**
* @see org.alfresco.repo.content.transform.ContentTransformerRegistry
* @see org.alfresco.repo.content.transform.ContentTransformer
*/
public ContentTransformer getTransformer(String sourceMimetype, String targetMimetype)
{
// look for a transformer
ContentTransformer transformer = transformerRegistry.getTransformer(sourceMimetype, targetMimetype);
// done
return transformer;
}
/** /**
* @see org.alfresco.repo.content.transform.ContentTransformerRegistry * @see org.alfresco.repo.content.transform.ContentTransformerRegistry
* @see org.alfresco.repo.content.transform.ContentTransformer * @see org.alfresco.repo.content.transform.ContentTransformer

View File

@@ -25,6 +25,7 @@ import javax.transaction.UserTransaction;
import org.alfresco.model.ContentModel; import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.filestore.FileContentWriter; import org.alfresco.repo.content.filestore.FileContentWriter;
import org.alfresco.repo.content.transform.ContentTransformer;
import org.alfresco.repo.policy.JavaBehaviour; import org.alfresco.repo.policy.JavaBehaviour;
import org.alfresco.repo.policy.PolicyComponent; import org.alfresco.repo.policy.PolicyComponent;
import org.alfresco.repo.security.authentication.AuthenticationComponent; import org.alfresco.repo.security.authentication.AuthenticationComponent;
@@ -563,6 +564,8 @@ public class RoutingContentServiceTest extends BaseSpringTest
txn.setRollbackOnly(); txn.setRollbackOnly();
writer.setMimetype("text/plain"); writer.setMimetype("text/plain");
ContentTransformer transformer = contentService.getTransformer(reader.getMimetype(), writer.getMimetype());
assertNotNull("Expected a valid transformer", transformer);
contentService.transform(reader, writer); contentService.transform(reader, writer);
// get the content from the writer // get the content from the writer
reader = writer.getReader(); reader = writer.getReader();

View File

@@ -38,6 +38,7 @@ import javax.transaction.xa.XAResource;
import org.alfresco.model.ContentModel; import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.transform.ContentTransformer;
import org.alfresco.repo.search.IndexerException; import org.alfresco.repo.search.IndexerException;
import org.alfresco.repo.search.impl.lucene.fts.FTSIndexerAware; import org.alfresco.repo.search.impl.lucene.fts.FTSIndexerAware;
import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer; import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer;
@@ -53,7 +54,6 @@ import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentService; import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.ContentWriter; import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.InvalidNodeRefException; import org.alfresco.service.cmr.repository.InvalidNodeRefException;
import org.alfresco.service.cmr.repository.NoTransformerException;
import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService; import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.repository.Path; import org.alfresco.service.cmr.repository.Path;
@@ -111,6 +111,9 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
*/ */
private ContentService contentService; private ContentService contentService;
/** the maximum transformation time to allow atomically, defaulting to 20ms */
private long maxAtomicTransformationTime = 20;
/** /**
* A list of all deletions we have made - at merge these deletions need to be made against the main index. * A list of all deletions we have made - at merge these deletions need to be made against the main index.
* *
@@ -196,9 +199,21 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
this.contentService = contentService; this.contentService = contentService;
} }
/******************************************************************************************************************************************************************************* /**
* * Indexer Implementation * ************************** * Set the maximum average transformation time allowed to a transformer in order to have
* the transformation performed in the current transaction. The default is 20ms.
*
* @param maxAtomicTransformationTime the maximum average time that a text transformation may
* take in order to be performed atomically.
*/ */
public void setMaxAtomicTransformationTime(long maxAtomicTransformationTime)
{
this.maxAtomicTransformationTime = maxAtomicTransformationTime;
}
/*===========================
* Indexer Implementation
============================*/
/** /**
* Utility method to check we are in the correct state to do work Also keeps track of the dirty flag. * Utility method to check we are in the correct state to do work Also keeps track of the dirty flag.
@@ -1177,19 +1192,20 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
for (QName propertyName : properties.keySet()) for (QName propertyName : properties.keySet())
{ {
Serializable value = properties.get(propertyName); Serializable value = properties.get(propertyName);
isAtomic = indexProperty(nodeRef, propertyName, value, xdoc, isAtomic, true);
if (indexAllProperties) if (indexAllProperties)
{ {
indexProperty(nodeRef, propertyName, value, xdoc, false, false); indexProperty(nodeRef, propertyName, value, xdoc, false);
}
else
{
isAtomic &= indexProperty(nodeRef, propertyName, value, xdoc, true);
} }
} }
boolean isRoot = nodeRef.equals(nodeService.getRootNode(nodeRef.getStoreRef())); boolean isRoot = nodeRef.equals(nodeService.getRootNode(nodeRef.getStoreRef()));
StringBuilder parentBuffer = new StringBuilder();
StringBuilder qNameBuffer = new StringBuilder(64); StringBuilder qNameBuffer = new StringBuilder(64);
int containerCount = 0;
for (Iterator<Pair<Path, QName>> it = paths.iterator(); it.hasNext(); /**/) for (Iterator<Pair<Path, QName>> it = paths.iterator(); it.hasNext(); /**/)
{ {
Pair<Path, QName> pair = it.next(); Pair<Path, QName> pair = it.next();
@@ -1361,11 +1377,19 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
} }
} }
private boolean indexProperty(NodeRef nodeRef, QName propertyName, Serializable value, Document doc, /**
boolean isAtomic, boolean indexAtomicProperties) * @param indexAtomicPropertiesOnly true to ignore all properties that must be indexed
* non-atomically
* @return Returns true if the property was indexed atomically, or false if it
* should be done asynchronously
*/
private boolean indexProperty(
NodeRef nodeRef, QName propertyName, Serializable value, Document doc,
boolean indexAtomicPropertiesOnly)
{ {
String attributeName = "@" String attributeName = "@" + QName.createQName(
+ QName.createQName(propertyName.getNamespaceURI(), ISO9075.encode(propertyName.getLocalName())); propertyName.getNamespaceURI(),
ISO9075.encode(propertyName.getLocalName()));
boolean store = true; boolean store = true;
boolean index = true; boolean index = true;
@@ -1382,59 +1406,69 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
atomic = propertyDef.isIndexedAtomically(); atomic = propertyDef.isIndexedAtomically();
isContent = propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT); isContent = propertyDef.getDataType().getName().equals(DataTypeDefinition.CONTENT);
} }
isAtomic &= atomic; if (value == null)
{
// the value is null
return true;
}
else if (indexAtomicPropertiesOnly && !atomic)
{
// we are only doing atomic properties and the property is definitely non-atomic
return false;
}
if (value != null) if (!indexAtomicPropertiesOnly)
{
if (indexAtomicProperties == atomic)
{
if (!indexAtomicProperties)
{ {
doc.removeFields(propertyName.toString()); doc.removeFields(propertyName.toString());
} }
boolean wereAllAtomic = true;
// convert value to String // convert value to String
for (String strValue : DefaultTypeConverter.INSTANCE.getCollection(String.class, value)) for (String strValue : DefaultTypeConverter.INSTANCE.getCollection(String.class, value))
{ {
if (strValue != null) if (strValue == null)
{ {
// nothing to index
continue;
}
// String strValue = ValueConverter.convert(String.class, value); // String strValue = ValueConverter.convert(String.class, value);
// TODO: Need to add with the correct language based analyser // TODO: Need to add with the correct language based analyser
if (isContent) if (isContent)
{ {
ContentData contentData = DefaultTypeConverter.INSTANCE.convert(ContentData.class, value); ContentData contentData = DefaultTypeConverter.INSTANCE.convert(ContentData.class, value);
if (contentData.getMimetype() != null && index) if (!index || contentData.getMimetype() == null)
{ {
// no mimetype or property not indexed
continue;
}
// store mimetype in index - even if content does not index it is useful // store mimetype in index - even if content does not index it is useful
doc.add(new Field(attributeName + ".mimetype", contentData.getMimetype(), false, true, doc.add(new Field(
false)); attributeName + ".mimetype",
contentData.getMimetype(),
false, true, false));
ContentReader reader = contentService.getReader(nodeRef, propertyName); ContentReader reader = contentService.getReader(nodeRef, propertyName);
if (reader != null && reader.exists()) if (reader != null && reader.exists())
{ {
boolean readerReady = true; boolean readerReady = true;
// transform if necessary (it is not a UTF-8 // transform if necessary (it is not a UTF-8 text document)
// text document) if (!EqualsHelper.nullSafeEquals(reader.getMimetype(), MimetypeMap.MIMETYPE_TEXT_PLAIN)
if (!EqualsHelper.nullSafeEquals(reader.getMimetype(),
MimetypeMap.MIMETYPE_TEXT_PLAIN)
|| !EqualsHelper.nullSafeEquals(reader.getEncoding(), "UTF-8")) || !EqualsHelper.nullSafeEquals(reader.getEncoding(), "UTF-8"))
{ {
ContentWriter writer = contentService.getTempWriter(); // get the transformer
writer.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN); ContentTransformer transformer = contentService.getTransformer(
// this is what the analyzers expect on the stream reader.getMimetype(),
writer.setEncoding("UTF-8"); MimetypeMap.MIMETYPE_TEXT_PLAIN);
try // is this transformer good enough?
{ if (transformer == null)
contentService.transform(reader, writer);
// point the reader to the new-written content
reader = writer.getReader();
}
catch (NoTransformerException e)
{ {
// log it // log it
if (s_logger.isDebugEnabled()) if (s_logger.isDebugEnabled())
{ {
s_logger.debug("Not indexed: No transformation", e); s_logger.debug(
"Not indexed: No transformation: \n" +
" source: " + reader + "\n" +
" target: " + MimetypeMap.MIMETYPE_TEXT_PLAIN);
} }
// don't index from the reader // don't index from the reader
readerReady = false; readerReady = false;
@@ -1442,6 +1476,26 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
doc.add(Field.Text("TEXT", NOT_INDEXED_NO_TRANSFORMATION)); doc.add(Field.Text("TEXT", NOT_INDEXED_NO_TRANSFORMATION));
doc.add(Field.Text(attributeName, NOT_INDEXED_NO_TRANSFORMATION)); doc.add(Field.Text(attributeName, NOT_INDEXED_NO_TRANSFORMATION));
} }
else if (indexAtomicPropertiesOnly && transformer.getTransformationTime() > maxAtomicTransformationTime)
{
// only indexing atomic properties
// indexing will take too long, so push it to the background
wereAllAtomic = false;
}
else
{
// We have a transformer that is fast enough
ContentWriter writer = contentService.getTempWriter();
writer.setMimetype(MimetypeMap.MIMETYPE_TEXT_PLAIN);
// this is what the analyzers expect on the stream
writer.setEncoding("UTF-8");
try
{
transformer.transform(reader, writer);
// point the reader to the new-written content
reader = writer.getReader();
}
catch (ContentIOException e) catch (ContentIOException e)
{ {
// log it // log it
@@ -1457,6 +1511,7 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
doc.add(Field.Text(attributeName, NOT_INDEXED_TRANSFORMATION_FAILED)); doc.add(Field.Text(attributeName, NOT_INDEXED_TRANSFORMATION_FAILED));
} }
} }
}
// add the text field using the stream from the // add the text field using the stream from the
// reader, but only if the reader is valid // reader, but only if the reader is valid
if (readerReady) if (readerReady)
@@ -1483,12 +1538,11 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
isr = new InputStreamReader(ris); isr = new InputStreamReader(ris);
} }
doc.add(Field.Text("@" doc.add(Field.Text("@" + QName.createQName(
+ QName.createQName(propertyName.getNamespaceURI(), ISO9075 propertyName.getNamespaceURI(),
.encode(propertyName.getLocalName())), isr)); ISO9075.encode(propertyName.getLocalName())), isr));
} }
} }
else else
// URL not present (null reader) or no content at the URL (file missing) // URL not present (null reader) or no content at the URL (file missing)
{ {
@@ -1505,17 +1559,13 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
doc.add(Field.Text(attributeName, NOT_INDEXED_CONTENT_MISSING)); doc.add(Field.Text(attributeName, NOT_INDEXED_CONTENT_MISSING));
} }
} }
}
else else
{ {
doc.add(new Field(attributeName, strValue, store, index, tokenise)); doc.add(new Field(attributeName, strValue, store, index, tokenise));
} }
} }
}
}
}
return isAtomic; return wereAllAtomic;
} }
private Map<ChildAssociationRef, Counter> getNodeCounts(NodeRef nodeRef) private Map<ChildAssociationRef, Counter> getNodeCounts(NodeRef nodeRef)

View File

@@ -16,6 +16,7 @@
*/ */
package org.alfresco.service.cmr.repository; package org.alfresco.service.cmr.repository;
import org.alfresco.repo.content.transform.ContentTransformer;
import org.alfresco.service.cmr.dictionary.InvalidTypeException; import org.alfresco.service.cmr.dictionary.InvalidTypeException;
import org.alfresco.service.namespace.QName; import org.alfresco.service.namespace.QName;
@@ -107,6 +108,18 @@ public interface ContentService
public void transform(ContentReader reader, ContentWriter writer) public void transform(ContentReader reader, ContentWriter writer)
throws NoTransformerException, ContentIOException; throws NoTransformerException, ContentIOException;
/**
* Fetch the transformer that is capable of transforming the content in the
* given source mimetype to the given target mimetype.
*
* @param the source mimetype
* @param the target mimetype
* @return Returns a transformer that can be used, or null if one was not available
*
* @see ContentAccessor#getMimetype()
*/
public ContentTransformer getTransformer(String sourceMimetype, String targetMimetype);
/** /**
* Returns whether a transformer exists that can read the content from * Returns whether a transformer exists that can read the content from
* the reader and write the content back out to the writer. * the reader and write the content back out to the writer.