From fc2ebe47d0525b72d4304202f90db639322f9249 Mon Sep 17 00:00:00 2001 From: Andrew Hind Date: Fri, 27 Apr 2007 14:39:33 +0000 Subject: [PATCH] Restructure before WCM indexer impl. Fix and retest 2.1 test issues. git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5569 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- .../index/MissingContentReindexComponent.java | 4 +- .../MissingContentReindexComponentTest.java | 4 +- .../org/alfresco/repo/search/Indexer.java | 1 - .../org/alfresco/repo/search/IndexerSPI.java | 17 + .../repo/search/QueryParameterDefImpl.java | 2 +- .../lucene/AbstractLuceneIndexerImpl2.java | 786 +++++++++++++++ .../repo/search/impl/lucene/LuceneBase2.java | 12 + .../impl/lucene/LuceneIndexerImpl2.java | 908 ++---------------- .../repo/search/impl/lucene/LuceneTest2.java | 13 +- .../impl/lucene/index/TransactionStatus.java | 240 ++--- 10 files changed, 1034 insertions(+), 953 deletions(-) create mode 100644 source/java/org/alfresco/repo/search/impl/lucene/AbstractLuceneIndexerImpl2.java diff --git a/source/java/org/alfresco/repo/node/index/MissingContentReindexComponent.java b/source/java/org/alfresco/repo/node/index/MissingContentReindexComponent.java index 491a307fb2..c79bf84f10 100644 --- a/source/java/org/alfresco/repo/node/index/MissingContentReindexComponent.java +++ b/source/java/org/alfresco/repo/node/index/MissingContentReindexComponent.java @@ -26,7 +26,7 @@ package org.alfresco.repo.node.index; import java.util.List; -import org.alfresco.repo.search.impl.lucene.LuceneIndexerImpl2; +import org.alfresco.repo.search.impl.lucene.AbstractLuceneIndexerImpl2; import org.alfresco.repo.transaction.TransactionUtil; import org.alfresco.repo.transaction.TransactionUtil.TransactionWork; import org.alfresco.service.cmr.repository.NodeRef; @@ -100,7 +100,7 @@ public class MissingContentReindexComponent extends AbstractReindexComponent // search for it in the index, sorting with youngest first sp.setLanguage(SearchService.LANGUAGE_LUCENE); - sp.setQuery("TEXT:" + LuceneIndexerImpl2.NOT_INDEXED_CONTENT_MISSING); + sp.setQuery("TEXT:" + AbstractLuceneIndexerImpl2.NOT_INDEXED_CONTENT_MISSING); sp.addSort(SearchParameters.SORT_IN_DOCUMENT_ORDER_DESCENDING); ResultSet results = null; try diff --git a/source/java/org/alfresco/repo/node/index/MissingContentReindexComponentTest.java b/source/java/org/alfresco/repo/node/index/MissingContentReindexComponentTest.java index fb1cbbefde..2a681fd5e3 100644 --- a/source/java/org/alfresco/repo/node/index/MissingContentReindexComponentTest.java +++ b/source/java/org/alfresco/repo/node/index/MissingContentReindexComponentTest.java @@ -31,7 +31,7 @@ import org.alfresco.repo.content.AbstractContentStore; import org.alfresco.repo.content.ContentStore; import org.alfresco.repo.node.db.NodeDaoService; import org.alfresco.repo.search.Indexer; -import org.alfresco.repo.search.impl.lucene.LuceneIndexerImpl2; +import org.alfresco.repo.search.impl.lucene.AbstractLuceneIndexerImpl2; import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer; import org.alfresco.repo.security.authentication.AuthenticationComponent; import org.alfresco.repo.transaction.TransactionComponent; @@ -134,7 +134,7 @@ public class MissingContentReindexComponentTest extends TestCase SearchParameters sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage(SearchService.LANGUAGE_LUCENE); - sp.setQuery("TEXT:" + LuceneIndexerImpl2.NOT_INDEXED_CONTENT_MISSING); + sp.setQuery("TEXT:" + AbstractLuceneIndexerImpl2.NOT_INDEXED_CONTENT_MISSING); sp.addSort(SearchParameters.SORT_IN_DOCUMENT_ORDER_DESCENDING); ResultSet results = null; try diff --git a/source/java/org/alfresco/repo/search/Indexer.java b/source/java/org/alfresco/repo/search/Indexer.java index fd731b639e..414fb9b419 100644 --- a/source/java/org/alfresco/repo/search/Indexer.java +++ b/source/java/org/alfresco/repo/search/Indexer.java @@ -24,7 +24,6 @@ */ package org.alfresco.repo.search; -import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexerImpl; import org.alfresco.service.cmr.repository.ChildAssociationRef; import org.alfresco.service.cmr.repository.NodeRef; diff --git a/source/java/org/alfresco/repo/search/IndexerSPI.java b/source/java/org/alfresco/repo/search/IndexerSPI.java index 9d0223fd90..2f3ff65e63 100644 --- a/source/java/org/alfresco/repo/search/IndexerSPI.java +++ b/source/java/org/alfresco/repo/search/IndexerSPI.java @@ -26,10 +26,27 @@ package org.alfresco.repo.search; import org.alfresco.repo.search.impl.lucene.fts.FTSIndexerAware; +/** + * Add support for FTS indexing + * + * @author andyh + * + */ public interface IndexerSPI extends Indexer { + /** + * Register call back handler when the indexing chunk is done + * + * @param callBack + */ public void registerCallBack(FTSIndexerAware callBack); + /** + * Peform a chunk of background FTS (and other non atomic property) indexing + * + * @param i + * @return - the number of docs updates + */ public int updateFullTextSearch(int i); } diff --git a/source/java/org/alfresco/repo/search/QueryParameterDefImpl.java b/source/java/org/alfresco/repo/search/QueryParameterDefImpl.java index acadbd9477..b4dd4b6d13 100644 --- a/source/java/org/alfresco/repo/search/QueryParameterDefImpl.java +++ b/source/java/org/alfresco/repo/search/QueryParameterDefImpl.java @@ -24,9 +24,9 @@ */ package org.alfresco.repo.search; +import org.alfresco.service.cmr.dictionary.DataTypeDefinition; import org.alfresco.service.cmr.dictionary.DictionaryService; import org.alfresco.service.cmr.dictionary.PropertyDefinition; -import org.alfresco.service.cmr.dictionary.DataTypeDefinition; import org.alfresco.service.cmr.search.QueryParameterDefinition; import org.alfresco.service.namespace.NamespacePrefixResolver; import org.alfresco.service.namespace.NamespaceService; diff --git a/source/java/org/alfresco/repo/search/impl/lucene/AbstractLuceneIndexerImpl2.java b/source/java/org/alfresco/repo/search/impl/lucene/AbstractLuceneIndexerImpl2.java new file mode 100644 index 0000000000..c06c504fbd --- /dev/null +++ b/source/java/org/alfresco/repo/search/impl/lucene/AbstractLuceneIndexerImpl2.java @@ -0,0 +1,786 @@ +/* + * Copyright (C) 2005-2007 Alfresco Software Limited. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + * As a special exception to the terms and conditions of version 2.0 of + * the GPL, you may redistribute this Program in connection with Free/Libre + * and Open Source Software ("FLOSS") applications as described in Alfresco's + * FLOSS exception. You should have recieved a copy of the text describing + * the FLOSS exception, and it is also available here: + * http://www.alfresco.com/legal/licensing" + */ +package org.alfresco.repo.search.impl.lucene; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.ListIterator; +import java.util.Set; + +import javax.transaction.Status; +import javax.transaction.xa.XAResource; + +import org.alfresco.repo.search.IndexerException; +import org.alfresco.repo.search.impl.lucene.index.TransactionStatus; +import org.alfresco.service.cmr.repository.InvalidNodeRefException; +import org.alfresco.service.cmr.repository.NodeRef; +import org.apache.log4j.Logger; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; + +/** + * Common support for indexing across implementations + * + * @author andyh + */ +public abstract class AbstractLuceneIndexerImpl2 extends LuceneBase2 +{ + /** + * Enum for indexing actions against a node + */ + protected enum Action + { + INDEX, REINDEX, DELETE, CASCADEREINDEX + } + + protected enum IndexUpdateStatus + { + UNMODIFIED, SYNCRONOUS, ASYNCHRONOUS; + } + + + protected long docs; + + // Failure codes to index when problems occur indexing content + + protected static class Command + { + S ref; + + Action action; + + Command(S ref, Action action) + { + this.ref = ref; + this.action = action; + } + + public String toString() + { + StringBuffer buffer = new StringBuffer(); + if (action == Action.INDEX) + { + buffer.append("Index "); + } + else if (action == Action.DELETE) + { + buffer.append("Delete "); + } + else if (action == Action.REINDEX) + { + buffer.append("Reindex "); + } + else + { + buffer.append("Unknown ... "); + } + buffer.append(ref); + return buffer.toString(); + } + + } + + /** + * No transform available + */ + public static final String NOT_INDEXED_NO_TRANSFORMATION = "nint"; + + /** + * Tranfrom failed + */ + public static final String NOT_INDEXED_TRANSFORMATION_FAILED = "nitf"; + + /** + * No content + */ + public static final String NOT_INDEXED_CONTENT_MISSING = "nicm"; + + /** + * No type conversion + */ + public static final String NOT_INDEXED_NO_TYPE_CONVERSION = "nintc"; + + /** + * Logger + */ + private static Logger s_logger = Logger.getLogger(AbstractLuceneIndexerImpl2.class); + + protected static Set deletePrimary(Collection nodeRefs, IndexReader reader, boolean delete) + throws LuceneIndexException + { + + Set refs = new LinkedHashSet(); + + for (String nodeRef : nodeRefs) + { + + try + { + TermDocs td = reader.termDocs(new Term("PRIMARYPARENT", nodeRef)); + while (td.next()) + { + int doc = td.doc(); + Document document = reader.document(doc); + String id = document.get("ID"); + refs.add(id); + if (delete) + { + reader.deleteDocument(doc); + } + } + } + catch (IOException e) + { + throw new LuceneIndexException("Failed to delete node by primary parent for " + nodeRef, e); + } + } + + return refs; + + } + + protected static Set deleteReference(Collection nodeRefs, IndexReader reader, boolean delete) + throws LuceneIndexException + { + + Set refs = new LinkedHashSet(); + + for (String nodeRef : nodeRefs) + { + + try + { + TermDocs td = reader.termDocs(new Term("PARENT", nodeRef)); + while (td.next()) + { + int doc = td.doc(); + Document document = reader.document(doc); + String id = document.get("ID"); + refs.add(id); + if (delete) + { + reader.deleteDocument(doc); + } + } + } + catch (IOException e) + { + throw new LuceneIndexException("Failed to delete node by parent for " + nodeRef, e); + } + } + + return refs; + + } + + protected static Set deleteContainerAndBelow(String nodeRef, IndexReader reader, boolean delete, + boolean cascade) throws LuceneIndexException + { + Set refs = new LinkedHashSet(); + + try + { + if (delete) + { + reader.deleteDocuments(new Term("ID", nodeRef)); + } + refs.add(nodeRef); + if (cascade) + { + TermDocs td = reader.termDocs(new Term("ANCESTOR", nodeRef)); + while (td.next()) + { + int doc = td.doc(); + Document document = reader.document(doc); + String id = document.get("ID"); + refs.add(id); + if (delete) + { + reader.deleteDocument(doc); + } + } + } + } + catch (IOException e) + { + throw new LuceneIndexException("Failed to delete container and below for " + nodeRef, e); + } + return refs; + } + + /** the maximum transformation time to allow atomically, defaulting to 20ms */ + protected long maxAtomicTransformationTime = 20; + + /** + * A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO: + * Consider if this information needs to be persisted for recovery + */ + protected Set deletions = new LinkedHashSet(); + + /** + * List of pending indexing commands. + */ + protected List> commandList = new ArrayList>(10000); + + /** + * Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just + * fixing up non atomically indexed things from one or more other updates. + */ + protected IndexUpdateStatus indexUpdateStatus = IndexUpdateStatus.UNMODIFIED; + + /** + * Set the max time allowed to transform content atomically + * + * @param maxAtomicTransformationTime + */ + public void setMaxAtomicTransformationTime(long maxAtomicTransformationTime) + { + this.maxAtomicTransformationTime = maxAtomicTransformationTime; + } + + /** + * Utility method to check we are in the correct state to do work Also keeps track of the dirty flag. + * + * @throws IOException + */ + + protected void checkAbleToDoWork(IndexUpdateStatus indexUpdateStatus) + { + if (this.indexUpdateStatus == IndexUpdateStatus.UNMODIFIED) + { + this.indexUpdateStatus = indexUpdateStatus; + } + else if (this.indexUpdateStatus == indexUpdateStatus) + { + return; + } + else + { + throw new IndexerException("Can not mix FTS and transactional updates"); + } + + switch (getStatus()) + { + case UNKNOWN: + try + { + setStatus(TransactionStatus.ACTIVE); + } + catch (IOException e) + { + throw new LuceneIndexException("Failed to set TX active", e); + } + break; + case ACTIVE: + // OK + break; + default: + // All other states are a problem + throw new IndexerException(buildErrorString()); + } + } + + /** + * Utility method to report errors about invalid state. + * + * @return - an error based on status + */ + private String buildErrorString() + { + StringBuilder buffer = new StringBuilder(128); + buffer.append("The indexer is unable to accept more work: "); + switch (getStatus().getStatus()) + { + case Status.STATUS_COMMITTED: + buffer.append("The indexer has been committed"); + break; + case Status.STATUS_COMMITTING: + buffer.append("The indexer is committing"); + break; + case Status.STATUS_MARKED_ROLLBACK: + buffer.append("The indexer is marked for rollback"); + break; + case Status.STATUS_PREPARED: + buffer.append("The indexer is prepared to commit"); + break; + case Status.STATUS_PREPARING: + buffer.append("The indexer is preparing to commit"); + break; + case Status.STATUS_ROLLEDBACK: + buffer.append("The indexer has been rolled back"); + break; + case Status.STATUS_ROLLING_BACK: + buffer.append("The indexer is rolling back"); + break; + case Status.STATUS_UNKNOWN: + buffer.append("The indexer is in an unknown state"); + break; + default: + break; + } + return buffer.toString(); + } + + /** + * Commit this index + */ + public void commit() throws LuceneIndexException + { + switch (getStatus().getStatus()) + { + case Status.STATUS_COMMITTING: + throw new LuceneIndexException("Unable to commit: Transaction is committing"); + case Status.STATUS_COMMITTED: + throw new LuceneIndexException("Unable to commit: Transaction is commited "); + case Status.STATUS_ROLLING_BACK: + throw new LuceneIndexException("Unable to commit: Transaction is rolling back"); + case Status.STATUS_ROLLEDBACK: + throw new LuceneIndexException("Unable to commit: Transaction is aleady rolled back"); + case Status.STATUS_MARKED_ROLLBACK: + throw new LuceneIndexException("Unable to commit: Transaction is marked for roll back"); + case Status.STATUS_PREPARING: + throw new LuceneIndexException("Unable to commit: Transaction is preparing"); + case Status.STATUS_ACTIVE: + // special case - commit from active + prepare(); + // drop through to do the commit; + default: + if (getStatus().getStatus() != Status.STATUS_PREPARED) + { + throw new LuceneIndexException("Index must be prepared to commit"); + } + try + { + setStatus(TransactionStatus.COMMITTING); + if (isModified()) + { + doCommit(); + } + setStatus(TransactionStatus.COMMITTED); + } + catch (IOException e) + { + // If anything goes wrong we try and do a roll back + rollback(); + throw new LuceneIndexException("Commit failed", e); + } + catch (LuceneIndexException e) + { + // If anything goes wrong we try and do a roll back + rollback(); + throw new LuceneIndexException("Commit failed", e); + } + finally + { + // Make sure we tidy up + // deleteDelta(); + } + break; + } + } + + /** + * Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper + * serialisation against the index as would a data base transaction. + * + * @return + */ + public int prepare() throws LuceneIndexException + { + switch (getStatus().getStatus()) + { + case Status.STATUS_COMMITTING: + throw new IndexerException("Unable to prepare: Transaction is committing"); + case Status.STATUS_COMMITTED: + throw new IndexerException("Unable to prepare: Transaction is commited "); + case Status.STATUS_ROLLING_BACK: + throw new IndexerException("Unable to prepare: Transaction is rolling back"); + case Status.STATUS_ROLLEDBACK: + throw new IndexerException("Unable to prepare: Transaction is aleady rolled back"); + case Status.STATUS_MARKED_ROLLBACK: + throw new IndexerException("Unable to prepare: Transaction is marked for roll back"); + case Status.STATUS_PREPARING: + throw new IndexerException("Unable to prepare: Transaction is already preparing"); + case Status.STATUS_PREPARED: + throw new IndexerException("Unable to prepare: Transaction is already prepared"); + default: + try + { + setStatus(TransactionStatus.PREPARING); + if (isModified()) + { + doPrepare(); + } + setStatus(TransactionStatus.PREPARED); + return isModified() ? XAResource.XA_OK : XAResource.XA_RDONLY; + } + catch (IOException e) + { + // If anything goes wrong we try and do a roll back + rollback(); + throw new LuceneIndexException("Commit failed", e); + } + catch (LuceneIndexException e) + { + setRollbackOnly(); + throw new LuceneIndexException("Index failed to prepare", e); + } + } + } + + /** + * Has this index been modified? + * + * @return + */ + public boolean isModified() + { + return indexUpdateStatus != IndexUpdateStatus.UNMODIFIED; + } + + /** + * Roll back the index changes (this just means they are never added) + */ + public void rollback() throws LuceneIndexException + { + switch (getStatus().getStatus()) + { + + case Status.STATUS_COMMITTED: + throw new IndexerException("Unable to roll back: Transaction is committed "); + case Status.STATUS_ROLLING_BACK: + throw new IndexerException("Unable to roll back: Transaction is rolling back"); + case Status.STATUS_ROLLEDBACK: + throw new IndexerException("Unable to roll back: Transaction is already rolled back"); + case Status.STATUS_COMMITTING: + // Can roll back during commit + default: + try + { + setStatus(TransactionStatus.ROLLINGBACK); + doRollBack(); + setStatus(TransactionStatus.ROLLEDBACK); + } + catch (IOException e) + { + throw new LuceneIndexException("rollback failed ", e); + } + break; + } + } + + /** + * Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow + * roll back. + */ + public void setRollbackOnly() + { + switch (getStatus().getStatus()) + { + case Status.STATUS_COMMITTING: + throw new IndexerException("Unable to mark for rollback: Transaction is committing"); + case Status.STATUS_COMMITTED: + throw new IndexerException("Unable to mark for rollback: Transaction is committed"); + default: + try + { + doSetRollbackOnly(); + setStatus(TransactionStatus.MARKED_ROLLBACK); + } + catch (IOException e) + { + throw new LuceneIndexException("Set rollback only failed ", e); + } + break; + } + } + + protected abstract void doPrepare() throws IOException; + + protected abstract void doCommit() throws IOException; + + protected abstract void doRollBack() throws IOException; + + protected abstract void doSetRollbackOnly() throws IOException; + + protected abstract List createDocuments(String stringNodeRef, boolean isNew, boolean indexAllProperties, + boolean includeDirectoryDocuments); + + protected Set deleteImpl(String nodeRef, boolean forReindex, boolean cascade, IndexReader mainReader) + throws LuceneIndexException, IOException + + { + // startTimer(); + getDeltaReader(); + // outputTime("Delete "+nodeRef+" size = "+getDeltaWriter().docCount()); + Set refs = new LinkedHashSet(); + Set temp = null; + + if (forReindex) + { + temp = deleteContainerAndBelow(nodeRef, getDeltaReader(), true, cascade); + refs.addAll(temp); + deletions.addAll(temp); + temp = deleteContainerAndBelow(nodeRef, mainReader, false, cascade); + refs.addAll(temp); + deletions.addAll(temp); + } + else + { + // Delete all and reindex as they could be secondary links we have deleted and they need to be updated. + // Most will skip any indexing as they will really have gone. + temp = deleteContainerAndBelow(nodeRef, getDeltaReader(), true, cascade); + deletions.addAll(temp); + refs.addAll(temp); + temp = deleteContainerAndBelow(nodeRef, mainReader, false, cascade); + deletions.addAll(temp); + refs.addAll(temp); + + Set leafrefs = new LinkedHashSet(); + leafrefs.addAll(deletePrimary(deletions, getDeltaReader(), true)); + leafrefs.addAll(deletePrimary(deletions, mainReader, false)); + // May not have to delete references + leafrefs.addAll(deleteReference(deletions, getDeltaReader(), true)); + leafrefs.addAll(deleteReference(deletions, mainReader, false)); + refs.addAll(leafrefs); + deletions.addAll(leafrefs); + + } + + return refs; + + } + + protected void indexImpl(String nodeRef, boolean isNew) throws LuceneIndexException, IOException + { + IndexWriter writer = getDeltaWriter(); + + // avoid attempting to index nodes that don't exist + + try + { + List docs = createDocuments(nodeRef, isNew, false, true); + for (Document doc : docs) + { + try + { + writer.addDocument(doc); + } + catch (IOException e) + { + throw new LuceneIndexException("Failed to add document to index", e); + } + } + } + catch (InvalidNodeRefException e) + { + // The node does not exist + return; + } + + } + + void indexImpl(Set refs, boolean isNew) throws LuceneIndexException, IOException + { + for (String ref : refs) + { + indexImpl(ref, isNew); + } + } + + protected void index(T ref) throws LuceneIndexException + { + addCommand(new Command(ref, Action.INDEX)); + } + + protected void reindex(T ref, boolean cascadeReindexDirectories) throws LuceneIndexException + { + addCommand(new Command(ref, cascadeReindexDirectories ? Action.CASCADEREINDEX : Action.REINDEX)); + } + + protected void delete(T ref) throws LuceneIndexException + { + addCommand(new Command(ref, Action.DELETE)); + } + + private void addCommand(Command command) + { + if (commandList.size() > 0) + { + Command last = commandList.get(commandList.size() - 1); + if ((last.action == command.action) && (last.ref.equals(command.ref))) + { + return; + } + } + purgeCommandList(command); + commandList.add(command); + + if (commandList.size() > getLuceneConfig().getIndexerBatchSize()) + { + flushPending(); + } + } + + private void purgeCommandList(Command command) + { + if (command.action == Action.DELETE) + { + removeFromCommandList(command, false); + } + else if (command.action == Action.REINDEX) + { + removeFromCommandList(command, true); + } + else if (command.action == Action.INDEX) + { + removeFromCommandList(command, true); + } + else if (command.action == Action.CASCADEREINDEX) + { + removeFromCommandList(command, true); + } + } + + private void removeFromCommandList(Command command, boolean matchExact) + { + for (ListIterator> it = commandList.listIterator(commandList.size()); it.hasPrevious(); /**/) + { + Command current = it.previous(); + if (matchExact) + { + if ((current.action == command.action) && (current.ref.equals(command.ref))) + { + it.remove(); + return; + } + } + else + { + if (current.ref.equals(command.ref)) + { + it.remove(); + } + } + } + } + + public void flushPending() throws LuceneIndexException + { + IndexReader mainReader = null; + try + { + mainReader = getReader(); + Set forIndex = new LinkedHashSet(); + + for (Command command : commandList) + { + if (command.action == Action.INDEX) + { + // Indexing just requires the node to be added to the list + forIndex.add(command.ref.toString()); + } + else if (command.action == Action.REINDEX) + { + // Reindex is a delete and then and index + Set set = deleteImpl(command.ref.toString(), true, false, mainReader); + + // Deleting any pending index actions + // - make sure we only do at most one index + forIndex.removeAll(set); + // Add the nodes for index + forIndex.addAll(set); + } + else if (command.action == Action.CASCADEREINDEX) + { + // Reindex is a delete and then and index + Set set = deleteImpl(command.ref.toString(), true, true, mainReader); + + // Deleting any pending index actions + // - make sure we only do at most one index + forIndex.removeAll(set); + // Add the nodes for index + forIndex.addAll(set); + } + else if (command.action == Action.DELETE) + { + // Delete the nodes + Set set = deleteImpl(command.ref.toString(), false, true, mainReader); + // Remove any pending indexes + forIndex.removeAll(set); + // Add the leaf nodes for reindex + forIndex.addAll(set); + } + } + commandList.clear(); + indexImpl(forIndex, false); + docs = getDeltaWriter().docCount(); + } + catch (IOException e) + { + // If anything goes wrong we try and do a roll back + throw new LuceneIndexException("Failed to flush index", e); + } + finally + { + if (mainReader != null) + { + try + { + mainReader.close(); + } + catch (IOException e) + { + throw new LuceneIndexException("Filed to close main reader", e); + } + } + // Make sure deletes are sent + try + { + closeDeltaReader(); + } + catch (IOException e) + { + + } + // Make sure writes and updates are sent. + try + { + closeDeltaWriter(); + } + catch (IOException e) + { + + } + } + } + +} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneBase2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneBase2.java index e54d74de7e..3f25e1964f 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneBase2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneBase2.java @@ -84,6 +84,8 @@ public abstract class LuceneBase2 private LuceneConfig config; + private TransactionStatus status = TransactionStatus.UNKNOWN; + // "lucene-indexes"; /** @@ -254,8 +256,16 @@ public abstract class LuceneBase2 protected void setStatus(TransactionStatus status) throws IOException { indexInfo.setStatus(deltaId, status, null, null); + this.status = status; + } + + protected TransactionStatus getStatus() + { + return status; } + + private DictionaryService dictionaryService; protected IndexReader getReader() throws LuceneIndexException, IOException @@ -294,4 +304,6 @@ public abstract class LuceneBase2 return indexInfo.doWithWriteLock(lockWork); } + + } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java index 1d0b8da11b..ed17ab57db 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java @@ -35,26 +35,19 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; -import java.util.ListIterator; import java.util.Locale; import java.util.Map; import java.util.Set; -import javax.transaction.Status; -import javax.transaction.xa.XAResource; - import org.alfresco.model.ContentModel; import org.alfresco.repo.content.MimetypeMap; import org.alfresco.repo.content.transform.ContentTransformer; import org.alfresco.repo.search.IndexerException; import org.alfresco.repo.search.impl.lucene.fts.FTSIndexerAware; import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer; -import org.alfresco.repo.search.impl.lucene.index.TransactionStatus; import org.alfresco.service.cmr.dictionary.AspectDefinition; import org.alfresco.service.cmr.dictionary.DataTypeDefinition; import org.alfresco.service.cmr.dictionary.DictionaryService; @@ -66,7 +59,6 @@ import org.alfresco.service.cmr.repository.ContentIOException; import org.alfresco.service.cmr.repository.ContentReader; import org.alfresco.service.cmr.repository.ContentService; import org.alfresco.service.cmr.repository.ContentWriter; -import org.alfresco.service.cmr.repository.InvalidNodeRefException; import org.alfresco.service.cmr.repository.MLText; import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.cmr.repository.NodeService; @@ -74,8 +66,6 @@ import org.alfresco.service.cmr.repository.Path; import org.alfresco.service.cmr.repository.StoreRef; import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; import org.alfresco.service.cmr.repository.datatype.TypeConversionException; -import org.alfresco.service.cmr.search.ResultSetRow; -import org.alfresco.service.cmr.search.SearchParameters; import org.alfresco.service.namespace.QName; import org.alfresco.util.EqualsHelper; import org.alfresco.util.ISO9075; @@ -98,26 +88,10 @@ import org.apache.lucene.search.BooleanClause.Occur; * * @author andyh */ -public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 +public class LuceneIndexerImpl2 extends AbstractLuceneIndexerImpl2 implements LuceneIndexer2 { - public static final String NOT_INDEXED_NO_TRANSFORMATION = "nint"; - - public static final String NOT_INDEXED_TRANSFORMATION_FAILED = "nitf"; - - public static final String NOT_INDEXED_CONTENT_MISSING = "nicm"; - - public static final String NOT_INDEXED_NO_TYPE_CONVERSION = "nintc"; - private static Logger s_logger = Logger.getLogger(LuceneIndexerImpl2.class); - /** - * Enum for indexing actions against a node - */ - private enum Action - { - INDEX, REINDEX, DELETE, CASCADEREINDEX - }; - /** * The node service we use to get information about nodes */ @@ -128,49 +102,15 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 */ private ContentService contentService; - /** the maximum transformation time to allow atomically, defaulting to 20ms */ - private long maxAtomicTransformationTime = 20; - - /** - * A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO: Consider if this information needs to be persisted for recovery - */ - private Set deletions = new LinkedHashSet(); - - private long docs; - - /** - * The status of this index - follows javax.transaction.Status - */ - - private int status = Status.STATUS_UNKNOWN; - - /** - * Has this index been modified? - */ - - private boolean isModified = false; - - /** - * Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just fixing up non atomically indexed things from one or more other - * updates. - */ - - private Boolean isFTSUpdate = null; - - /** - * List of pending indexing commands. - */ - private List commandList = new ArrayList(10000); - /** * Call back to make after doing non atomic indexing */ - private FTSIndexerAware callBack; + FTSIndexerAware callBack; /** * Count of remaining items to index non atomically */ - private int remainingCount = 0; + int remainingCount = 0; /** * A list of stuff that requires non atomic indexing @@ -215,89 +155,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 this.contentService = contentService; } - public void setMaxAtomicTransformationTime(long maxAtomicTransformationTime) - { - this.maxAtomicTransformationTime = maxAtomicTransformationTime; - } - - /* - * =========================== Indexer Implementation ============================ - */ - - /** - * Utility method to check we are in the correct state to do work Also keeps track of the dirty flag. - */ - - private void checkAbleToDoWork(boolean isFTS, boolean isModified) - { - if (isFTSUpdate == null) - { - isFTSUpdate = Boolean.valueOf(isFTS); - } - else - { - if (isFTS != isFTSUpdate.booleanValue()) - { - throw new IndexerException("Can not mix FTS and transactional updates"); - } - } - - switch (status) - { - case Status.STATUS_UNKNOWN: - status = Status.STATUS_ACTIVE; - break; - case Status.STATUS_ACTIVE: - // OK - break; - default: - // All other states are a problem - throw new IndexerException(buildErrorString()); - } - this.isModified = isModified; - } - - /** - * Utility method to report errors about invalid state. - * - * @return - */ - private String buildErrorString() - { - StringBuilder buffer = new StringBuilder(128); - buffer.append("The indexer is unable to accept more work: "); - switch (status) - { - case Status.STATUS_COMMITTED: - buffer.append("The indexer has been committed"); - break; - case Status.STATUS_COMMITTING: - buffer.append("The indexer is committing"); - break; - case Status.STATUS_MARKED_ROLLBACK: - buffer.append("The indexer is marked for rollback"); - break; - case Status.STATUS_PREPARED: - buffer.append("The indexer is prepared to commit"); - break; - case Status.STATUS_PREPARING: - buffer.append("The indexer is preparing to commit"); - break; - case Status.STATUS_ROLLEDBACK: - buffer.append("The indexer has been rolled back"); - break; - case Status.STATUS_ROLLING_BACK: - buffer.append("The indexer is rolling back"); - break; - case Status.STATUS_UNKNOWN: - buffer.append("The indexer is in an unknown state"); - break; - default: - break; - } - return buffer.toString(); - } - /* * Indexer Implementation */ @@ -308,7 +165,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 { s_logger.debug("Create node " + relationshipRef.getChildRef()); } - checkAbleToDoWork(false, true); + checkAbleToDoWork(IndexUpdateStatus.SYNCRONOUS); try { NodeRef childRef = relationshipRef.getChildRef(); @@ -343,7 +200,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 Document document = mainReader.document(doc); String id = document.get("ID"); NodeRef ref = new NodeRef(id); - deleteImpl(ref, false, true, mainReader); + deleteImpl(ref.toString(), false, true, mainReader); } } catch (IOException e) @@ -373,7 +230,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 { s_logger.debug("Update node " + nodeRef); } - checkAbleToDoWork(false, true); + checkAbleToDoWork(IndexUpdateStatus.SYNCRONOUS); try { reindex(nodeRef, false); @@ -391,7 +248,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 { s_logger.debug("Delete node " + relationshipRef.getChildRef()); } - checkAbleToDoWork(false, true); + checkAbleToDoWork(IndexUpdateStatus.SYNCRONOUS); try { // The requires a reindex - a delete may remove too much from under this node - that also lives under @@ -412,7 +269,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 { s_logger.debug("Create child " + relationshipRef); } - checkAbleToDoWork(false, true); + checkAbleToDoWork(IndexUpdateStatus.SYNCRONOUS); try { // TODO: Optimise @@ -433,7 +290,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 { s_logger.debug("Update child " + relationshipBeforeRef + " to " + relationshipAfterRef); } - checkAbleToDoWork(false, true); + checkAbleToDoWork(IndexUpdateStatus.SYNCRONOUS); try { // TODO: Optimise @@ -456,7 +313,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 { s_logger.debug("Delete child " + relationshipRef); } - checkAbleToDoWork(false, true); + checkAbleToDoWork(IndexUpdateStatus.SYNCRONOUS); try { // TODO: Optimise @@ -478,7 +335,9 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 * * @param storeRef * @param deltaId - * @return + * @param config + * @return - the indexer instance + * @throws LuceneIndexException */ public static LuceneIndexerImpl2 getUpdateIndexer(StoreRef storeRef, String deltaId, LuceneConfig config) throws LuceneIndexException @@ -497,89 +356,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 * Transactional support Used by the resource manager for indexers. */ - /** - * Commit this index - */ - - public void commit() throws LuceneIndexException - { - switch (status) - { - case Status.STATUS_COMMITTING: - throw new LuceneIndexException("Unable to commit: Transaction is committing"); - case Status.STATUS_COMMITTED: - throw new LuceneIndexException("Unable to commit: Transaction is commited "); - case Status.STATUS_ROLLING_BACK: - throw new LuceneIndexException("Unable to commit: Transaction is rolling back"); - case Status.STATUS_ROLLEDBACK: - throw new LuceneIndexException("Unable to commit: Transaction is aleady rolled back"); - case Status.STATUS_MARKED_ROLLBACK: - throw new LuceneIndexException("Unable to commit: Transaction is marked for roll back"); - case Status.STATUS_PREPARING: - throw new LuceneIndexException("Unable to commit: Transaction is preparing"); - case Status.STATUS_ACTIVE: - // special case - commit from active - prepare(); - // drop through to do the commit; - default: - if (status != Status.STATUS_PREPARED) - { - throw new LuceneIndexException("Index must be prepared to commit"); - } - status = Status.STATUS_COMMITTING; - try - { - setStatus(TransactionStatus.COMMITTING); - if (isModified()) - { - if (isFTSUpdate.booleanValue()) - { - doFTSIndexCommit(); - // FTS does not trigger indexing request - } - else - { - // Build the deletion terms - // Set terms = new LinkedHashSet(); - // for (NodeRef nodeRef : deletions) - // { - // terms.add(new Term("ID", nodeRef.toString())); - // } - // Merge - // mergeDeltaIntoMain(terms); - setInfo(docs, getDeletions(), false); - luceneFullTextSearchIndexer.requiresIndex(store); - } - } - status = Status.STATUS_COMMITTED; - if (callBack != null) - { - callBack.indexCompleted(store, remainingCount, null); - } - setStatus(TransactionStatus.COMMITTED); - } - catch (IOException e) - { - // If anything goes wrong we try and do a roll back - rollback(); - throw new LuceneIndexException("Commit failed", e); - } - catch (LuceneIndexException e) - { - // If anything goes wrong we try and do a roll back - rollback(); - throw new LuceneIndexException("Commit failed", e); - } - finally - { - // Make sure we tidy up - // deleteDelta(); - } - break; - } - } - - private void doFTSIndexCommit() throws LuceneIndexException + void doFTSIndexCommit() throws LuceneIndexException { IndexReader mainReader = null; IndexReader deltaReader = null; @@ -597,36 +374,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 for (Helper helper : toFTSIndex) { - // BooleanQuery query = new BooleanQuery(); - // query.add(new TermQuery(new Term("ID", helper.nodeRef.toString())), true, false); - // query.add(new TermQuery(new Term("TX", helper.tx)), true, false); - // query.add(new TermQuery(new Term("ISNODE", "T")), false, false); - - deletions.add(helper.nodeRef); - - // try - // { - // Hits hits = mainSearcher.search(query); - // if (hits.length() > 0) - // { - // for (int i = 0; i < hits.length(); i++) - // { - // mainReader.delete(hits.id(i)); - // } - // } - // else - // { - // hits = deltaSearcher.search(query); - // for (int i = 0; i < hits.length(); i++) - // { - // deltaReader.delete(hits.id(i)); - // } - // } - // } - // catch (IOException e) - // { - // throw new LuceneIndexException("Failed to delete an FTS update from the original index", e); - // } + deletions.add(helper.ref); } } @@ -698,498 +446,9 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 } - /** - * Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper serialisation against the index as would a data base transaction. - * - * @return - */ - public int prepare() throws LuceneIndexException - { + - switch (status) - { - case Status.STATUS_COMMITTING: - throw new IndexerException("Unable to prepare: Transaction is committing"); - case Status.STATUS_COMMITTED: - throw new IndexerException("Unable to prepare: Transaction is commited "); - case Status.STATUS_ROLLING_BACK: - throw new IndexerException("Unable to prepare: Transaction is rolling back"); - case Status.STATUS_ROLLEDBACK: - throw new IndexerException("Unable to prepare: Transaction is aleady rolled back"); - case Status.STATUS_MARKED_ROLLBACK: - throw new IndexerException("Unable to prepare: Transaction is marked for roll back"); - case Status.STATUS_PREPARING: - throw new IndexerException("Unable to prepare: Transaction is already preparing"); - case Status.STATUS_PREPARED: - throw new IndexerException("Unable to prepare: Transaction is already prepared"); - default: - status = Status.STATUS_PREPARING; - try - { - setStatus(TransactionStatus.PREPARING); - if (isModified()) - { - saveDelta(); - flushPending(); - // prepareToMergeIntoMain(); - } - status = Status.STATUS_PREPARED; - setStatus(TransactionStatus.PREPARED); - return isModified ? XAResource.XA_OK : XAResource.XA_RDONLY; - } - catch (IOException e) - { - // If anything goes wrong we try and do a roll back - rollback(); - throw new LuceneIndexException("Commit failed", e); - } - catch (LuceneIndexException e) - { - setRollbackOnly(); - throw new LuceneIndexException("Index failed to prepare", e); - } - } - } - - /** - * Has this index been modified? - * - * @return - */ - public boolean isModified() - { - return isModified; - } - - /** - * Return the javax.transaction.Status integer status code - * - * @return - */ - public int getStatus() - { - return status; - } - - /** - * Roll back the index changes (this just means they are never added) - */ - - public void rollback() throws LuceneIndexException - { - switch (status) - { - - case Status.STATUS_COMMITTED: - throw new IndexerException("Unable to roll back: Transaction is committed "); - case Status.STATUS_ROLLING_BACK: - throw new IndexerException("Unable to roll back: Transaction is rolling back"); - case Status.STATUS_ROLLEDBACK: - throw new IndexerException("Unable to roll back: Transaction is already rolled back"); - case Status.STATUS_COMMITTING: - // Can roll back during commit - default: - status = Status.STATUS_ROLLING_BACK; - // if (isModified()) - // { - // deleteDelta(); - // } - try - { - setStatus(TransactionStatus.ROLLINGBACK); - setStatus(TransactionStatus.ROLLEDBACK); - } - catch (IOException e) - { - throw new LuceneIndexException("roolback failed ", e); - } - - if (callBack != null) - { - callBack.indexCompleted(store, 0, null); - } - break; - } - } - - /** - * Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow roll back. - */ - - public void setRollbackOnly() - { - switch (status) - { - case Status.STATUS_COMMITTING: - throw new IndexerException("Unable to mark for rollback: Transaction is committing"); - case Status.STATUS_COMMITTED: - throw new IndexerException("Unable to mark for rollback: Transaction is committed"); - default: - status = Status.STATUS_MARKED_ROLLBACK; - break; - } - } - - /* - * Implementation - */ - - private void index(NodeRef nodeRef) throws LuceneIndexException - { - addCommand(new Command(nodeRef, Action.INDEX)); - } - - private void reindex(NodeRef nodeRef, boolean cascadeReindexDirectories) throws LuceneIndexException - { - addCommand(new Command(nodeRef, cascadeReindexDirectories ? Action.CASCADEREINDEX : Action.REINDEX)); - } - - private void delete(NodeRef nodeRef) throws LuceneIndexException - { - addCommand(new Command(nodeRef, Action.DELETE)); - } - - private void addCommand(Command command) - { - if (commandList.size() > 0) - { - Command last = commandList.get(commandList.size() - 1); - if ((last.action == command.action) && (last.nodeRef.equals(command.nodeRef))) - { - return; - } - } - purgeCommandList(command); - commandList.add(command); - - if (commandList.size() > getLuceneConfig().getIndexerBatchSize()) - { - flushPending(); - } - } - - private void purgeCommandList(Command command) - { - if (command.action == Action.DELETE) - { - removeFromCommandList(command, false); - } - else if (command.action == Action.REINDEX) - { - removeFromCommandList(command, true); - } - else if (command.action == Action.INDEX) - { - removeFromCommandList(command, true); - } - else if (command.action == Action.CASCADEREINDEX) - { - removeFromCommandList(command, true); - } - } - - private void removeFromCommandList(Command command, boolean matchExact) - { - for (ListIterator it = commandList.listIterator(commandList.size()); it.hasPrevious(); /**/) - { - Command current = it.previous(); - if (matchExact) - { - if ((current.action == command.action) && (current.nodeRef.equals(command.nodeRef))) - { - it.remove(); - return; - } - } - else - { - if (current.nodeRef.equals(command.nodeRef)) - { - it.remove(); - } - } - } - } - - public void flushPending() throws LuceneIndexException - { - IndexReader mainReader = null; - try - { - mainReader = getReader(); - Set forIndex = new LinkedHashSet(); - - for (Command command : commandList) - { - if (command.action == Action.INDEX) - { - // Indexing just requires the node to be added to the list - forIndex.add(command.nodeRef); - } - else if (command.action == Action.REINDEX) - { - // Reindex is a delete and then and index - Set set = deleteImpl(command.nodeRef, true, false, mainReader); - - // Deleting any pending index actions - // - make sure we only do at most one index - forIndex.removeAll(set); - // Add the nodes for index - forIndex.addAll(set); - } - else if (command.action == Action.CASCADEREINDEX) - { - // Reindex is a delete and then and index - Set set = deleteImpl(command.nodeRef, true, true, mainReader); - - // Deleting any pending index actions - // - make sure we only do at most one index - forIndex.removeAll(set); - // Add the nodes for index - forIndex.addAll(set); - } - else if (command.action == Action.DELETE) - { - // Delete the nodes - Set set = deleteImpl(command.nodeRef, false, true, mainReader); - // Remove any pending indexes - forIndex.removeAll(set); - // Add the leaf nodes for reindex - forIndex.addAll(set); - } - } - commandList.clear(); - indexImpl(forIndex, false); - docs = getDeltaWriter().docCount(); - } - catch (IOException e) - { - // If anything goes wrong we try and do a roll back - throw new LuceneIndexException("Failed to flush index", e); - } - finally - { - if (mainReader != null) - { - try - { - mainReader.close(); - } - catch (IOException e) - { - throw new LuceneIndexException("Filed to close main reader", e); - } - } - // Make sure deletes are sent - try - { - closeDeltaReader(); - } - catch (IOException e) - { - - } - // Make sure writes and updates are sent. - try - { - closeDeltaWriter(); - } - catch (IOException e) - { - - } - } - } - - private Set deleteImpl(NodeRef nodeRef, boolean forReindex, boolean cascade, IndexReader mainReader) - throws LuceneIndexException, IOException - - { - // startTimer(); - getDeltaReader(); - // outputTime("Delete "+nodeRef+" size = "+getDeltaWriter().docCount()); - Set refs = new LinkedHashSet(); - Set temp = null; - - if (forReindex) - { - temp = deleteContainerAndBelow(nodeRef, getDeltaReader(), true, cascade); - refs.addAll(temp); - deletions.addAll(temp); - temp = deleteContainerAndBelow(nodeRef, mainReader, false, cascade); - refs.addAll(temp); - deletions.addAll(temp); - } - else - { - // Delete all and reindex as they could be secondary links we have deleted and they need to be updated. - // Most will skip any indexing as they will really have gone. - temp = deleteContainerAndBelow(nodeRef, getDeltaReader(), true, cascade); - deletions.addAll(temp); - refs.addAll(temp); - temp = deleteContainerAndBelow(nodeRef, mainReader, false, cascade); - deletions.addAll(temp); - refs.addAll(temp); - - Set leafrefs = new LinkedHashSet(); - leafrefs.addAll(deletePrimary(deletions, getDeltaReader(), true)); - leafrefs.addAll(deletePrimary(deletions, mainReader, false)); - // May not have to delete references - leafrefs.addAll(deleteReference(deletions, getDeltaReader(), true)); - leafrefs.addAll(deleteReference(deletions, mainReader, false)); - refs.addAll(leafrefs); - deletions.addAll(leafrefs); - - } - - return refs; - - } - - private Set deletePrimary(Collection nodeRefs, IndexReader reader, boolean delete) - throws LuceneIndexException - { - - Set refs = new LinkedHashSet(); - - for (NodeRef nodeRef : nodeRefs) - { - - try - { - TermDocs td = reader.termDocs(new Term("PRIMARYPARENT", nodeRef.toString())); - while (td.next()) - { - int doc = td.doc(); - Document document = reader.document(doc); - String id = document.get("ID"); - NodeRef ref = new NodeRef(id); - refs.add(ref); - if (delete) - { - reader.deleteDocument(doc); - } - } - } - catch (IOException e) - { - throw new LuceneIndexException("Failed to delete node by primary parent for " + nodeRef.toString(), e); - } - } - - return refs; - - } - - private Set deleteReference(Collection nodeRefs, IndexReader reader, boolean delete) - throws LuceneIndexException - { - - Set refs = new LinkedHashSet(); - - for (NodeRef nodeRef : nodeRefs) - { - - try - { - TermDocs td = reader.termDocs(new Term("PARENT", nodeRef.toString())); - while (td.next()) - { - int doc = td.doc(); - Document document = reader.document(doc); - String id = document.get("ID"); - NodeRef ref = new NodeRef(id); - refs.add(ref); - if (delete) - { - reader.deleteDocument(doc); - } - } - } - catch (IOException e) - { - throw new LuceneIndexException("Failed to delete node by parent for " + nodeRef.toString(), e); - } - } - - return refs; - - } - - private Set deleteContainerAndBelow(NodeRef nodeRef, IndexReader reader, boolean delete, boolean cascade) - throws LuceneIndexException - { - Set refs = new LinkedHashSet(); - - try - { - if (delete) - { - reader.deleteDocuments(new Term("ID", nodeRef.toString())); - } - refs.add(nodeRef); - if (cascade) - { - TermDocs td = reader.termDocs(new Term("ANCESTOR", nodeRef.toString())); - while (td.next()) - { - int doc = td.doc(); - Document document = reader.document(doc); - String id = document.get("ID"); - NodeRef ref = new NodeRef(id); - refs.add(ref); - if (delete) - { - reader.deleteDocument(doc); - } - } - } - } - catch (IOException e) - { - throw new LuceneIndexException("Failed to delete container and below for " + nodeRef.toString(), e); - } - return refs; - } - - private void indexImpl(Set nodeRefs, boolean isNew) throws LuceneIndexException, IOException - { - for (NodeRef ref : nodeRefs) - { - indexImpl(ref, isNew); - } - } - - private void indexImpl(NodeRef nodeRef, boolean isNew) throws LuceneIndexException, IOException - { - IndexWriter writer = getDeltaWriter(); - - // avoid attempting to index nodes that don't exist - - try - { - List docs = createDocuments(nodeRef, isNew, false, true); - for (Document doc : docs) - { - try - { - writer.addDocument(doc /* - * TODO: Select the language based analyser - */); - } - catch (IOException e) - { - throw new LuceneIndexException("Failed to add document to index", e); - } - } - } - catch (InvalidNodeRefException e) - { - // The node does not exist - return; - } - - } + static class Counter { @@ -1242,9 +501,12 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 } } - private List createDocuments(NodeRef nodeRef, boolean isNew, boolean indexAllProperties, + public List createDocuments(String stringNodeRef, boolean isNew, boolean indexAllProperties, boolean includeDirectoryDocuments) { + NodeRef nodeRef = new NodeRef(stringNodeRef); + + Map nodeCounts = getNodeCounts(nodeRef); List docs = new ArrayList(); ChildAssociationRef qNameRef = null; @@ -1439,7 +701,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 * Does the node type or any applied aspect allow this node to have child associations? * * @param nodeRef - * @return + * @return true if the node may have children */ private boolean mayHaveChildren(NodeRef nodeRef) { @@ -1750,7 +1012,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 { doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO)); } - + // TODO: Use the node locale in preferanced to the system locale Locale locale = null; @@ -1924,7 +1186,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 public int updateFullTextSearch(int size) throws LuceneIndexException { - checkAbleToDoWork(true, false); + checkAbleToDoWork(IndexUpdateStatus.ASYNCHRONOUS); // if (!mainIndexExists()) // { // remainingCount = size; @@ -1941,7 +1203,6 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 int count = 0; Searcher searcher = null; - LuceneResultSet results = null; try { searcher = getSearcher(null); @@ -1961,27 +1222,23 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 throw new LuceneIndexException( "Failed to execute query to find content which needs updating in the index", e); } - results = new LuceneResultSet(hits, searcher, nodeService, null, new SearchParameters()); - for (ResultSetRow row : results) + for(int i = 0; i < hits.length(); i++) { - LuceneResultSetRow lrow = (LuceneResultSetRow) row; - Helper helper = new Helper(lrow.getNodeRef(), lrow.getDocument().getField("TX").stringValue()); + Document doc = hits.doc(i); + Helper helper = new Helper(doc.getField("ID").stringValue(), doc.getField("TX").stringValue()); toFTSIndex.add(helper); if (++count >= size) { break; } } - count = results.length(); + + count = hits.length(); } finally { - if (results != null) - { - results.close(); // closes the searcher - } - else if (searcher != null) + if (searcher != null) { try { @@ -1996,7 +1253,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 if (toFTSIndex.size() > 0) { - checkAbleToDoWork(true, true); + checkAbleToDoWork(IndexUpdateStatus.ASYNCHRONOUS); IndexWriter writer = null; try @@ -2005,14 +1262,14 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 for (Helper helper : toFTSIndex) { // Document document = helper.document; - NodeRef ref = helper.nodeRef; - // bypass nodes that have disappeared + NodeRef ref = new NodeRef(helper.ref); +// bypass nodes that have disappeared if (!nodeService.exists(ref)) { continue; } - List docs = createDocuments(ref, false, true, false); + List docs = createDocuments(ref.toString(), false, true, false); for (Document doc : docs) { try @@ -2080,81 +1337,70 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2 private static class Helper { - NodeRef nodeRef; + String ref; String tx; - Helper(NodeRef nodeRef, String tx) + Helper(String ref, String tx) { - this.nodeRef = nodeRef; + this.ref = ref; this.tx = tx; } } - private static class Command - { - NodeRef nodeRef; - - Action action; - - Command(NodeRef nodeRef, Action action) - { - this.nodeRef = nodeRef; - this.action = action; - } - - public String toString() - { - StringBuffer buffer = new StringBuffer(); - if (action == Action.INDEX) - { - buffer.append("Index "); - } - else if (action == Action.DELETE) - { - buffer.append("Delete "); - } - else if (action == Action.REINDEX) - { - buffer.append("Reindex "); - } - else - { - buffer.append("Unknown ... "); - } - buffer.append(nodeRef); - return buffer.toString(); - } - - } - - private FullTextSearchIndexer luceneFullTextSearchIndexer; + FullTextSearchIndexer luceneFullTextSearchIndexer; public void setLuceneFullTextSearchIndexer(FullTextSearchIndexer luceneFullTextSearchIndexer) { this.luceneFullTextSearchIndexer = luceneFullTextSearchIndexer; } - public Set getDeletions() - { - HashSet deletedRefAsString = new HashSet(deletions.size()); - for(NodeRef ref : deletions) - { - deletedRefAsString.add(ref.toString()); - } - return deletedRefAsString; - } - public boolean getDeleteOnlyNodes() { - if (isFTSUpdate != null) + return indexUpdateStatus == IndexUpdateStatus.ASYNCHRONOUS; + } + + public Set getDeletions() + { + return Collections.unmodifiableSet(deletions); + } + + protected void doPrepare() throws IOException + { + saveDelta(); + flushPending(); + // prepareToMergeIntoMain(); + } + + protected void doCommit() throws IOException + { + if (indexUpdateStatus == IndexUpdateStatus.ASYNCHRONOUS) { - return isFTSUpdate.booleanValue(); + doFTSIndexCommit(); + // FTS does not trigger indexing request } else { - return false; + setInfo(docs, getDeletions(), false); + luceneFullTextSearchIndexer.requiresIndex(store); } + if (callBack != null) + { + callBack.indexCompleted(store, remainingCount, null); + } + } + + protected void doRollBack() throws IOException + { + if (callBack != null) + { + callBack.indexCompleted(store, 0, null); + } + } + + protected void doSetRollbackOnly() throws IOException + { + } } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java index 03e01da7d7..907db0061c 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java @@ -385,6 +385,7 @@ public class LuceneTest2 extends TestCase // this.getClass().getClassLoader().getResourceAsStream("test.doc"); // writer.putContent(is); writer.putContent("The quick brown fox jumped over the lazy dog \u00E0\u00EA\u00EE\u00F0\u00F1\u00F6\u00FB\u00FF"); + //System.out.println("Size is "+writer.getSize()); nodeService.addChild(rootNodeRef, n8, ContentModel.ASSOC_CHILDREN, QName.createQName("{namespace}eight-0")); nodeService.addChild(n1, n8, ASSOC_TYPE_QNAME, QName.createQName("{namespace}eight-1")); @@ -490,7 +491,7 @@ public class LuceneTest2 extends TestCase } - public void xtestDeleteSecondaryAssocToContainer() throws Exception + public void testDeleteSecondaryAssocToContainer() throws Exception { luceneFTS.pause(); @@ -739,7 +740,7 @@ public class LuceneTest2 extends TestCase } - public void xtestDeleteSecondaryAssocToLeaf() throws Exception + public void testDeleteSecondaryAssocToLeaf() throws Exception { luceneFTS.pause(); @@ -978,7 +979,7 @@ public class LuceneTest2 extends TestCase luceneFTS.resume(); } - public void xtestDeleteIssue() throws Exception + public void testDeleteIssue() throws Exception { testTX.commit(); @@ -1143,7 +1144,7 @@ public class LuceneTest2 extends TestCase } - public void xtestDeltaIssue() throws Exception + public void testDeltaIssue() throws Exception { luceneFTS.pause(); final NodeService pns = (NodeService) ctx.getBean("NodeService"); @@ -1316,7 +1317,7 @@ public class LuceneTest2 extends TestCase } } - public void xtestSort() throws Exception + public void testSort() throws Exception { luceneFTS.pause(); buildBaseIndex(); @@ -2727,7 +2728,7 @@ public class LuceneTest2 extends TestCase results.close(); results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "@" - + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".size:\"110\"", null, null); + + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ".size:\"62\"", null, null); assertEquals(1, results.length()); results.close(); diff --git a/source/java/org/alfresco/repo/search/impl/lucene/index/TransactionStatus.java b/source/java/org/alfresco/repo/search/impl/lucene/index/TransactionStatus.java index 07413aebf8..ca4f59ad99 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/index/TransactionStatus.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/index/TransactionStatus.java @@ -1,5 +1,7 @@ package org.alfresco.repo.search.impl.lucene.index; +import javax.transaction.Status; + /** * Status of indexes that make up the whole index. This starts with the value from javax.transaction.Status. @@ -22,7 +24,9 @@ package org.alfresco.repo.search.impl.lucene.index; public enum TransactionStatus { - // Match the order in javax.transaction.Status so ordinal values are correct + /** + * Active TX + */ ACTIVE { public boolean isCommitted() @@ -44,8 +48,16 @@ public enum TransactionStatus { return previous == null; } + + public int getStatus() + { + return Status.STATUS_ACTIVE; + } }, + /** + * TX marked for rollback + */ MARKED_ROLLBACK { public boolean isCommitted() @@ -67,8 +79,16 @@ public enum TransactionStatus { return previous.allowsRollbackOrMark(previous); } + + public int getStatus() + { + return Status.STATUS_MARKED_ROLLBACK; + } }, + /** + * TX prepared + */ PREPARED { public boolean isCommitted() @@ -90,8 +110,16 @@ public enum TransactionStatus { return previous == TransactionStatus.PREPARING; } + + public int getStatus() + { + return Status.STATUS_PREPARED; + } }, + /** + * TX Committed + */ COMMITTED { public boolean isCommitted() @@ -113,8 +141,16 @@ public enum TransactionStatus { return previous == TransactionStatus.COMMITTING; } + + public int getStatus() + { + return Status.STATUS_COMMITTED; + } }, + /** + * TX rolled back + */ ROLLEDBACK { public boolean isCommitted() @@ -136,8 +172,16 @@ public enum TransactionStatus { return previous == TransactionStatus.ROLLINGBACK; } + + public int getStatus() + { + return Status.STATUS_ROLLEDBACK; + } }, + /** + * TX state is unknown + */ UNKNOWN { public boolean isCommitted() @@ -159,8 +203,16 @@ public enum TransactionStatus { return false; } + + public int getStatus() + { + return Status.STATUS_UNKNOWN; + } }, + /** + * No transaction + */ NO_TRANSACTION { public boolean isCommitted() @@ -182,8 +234,16 @@ public enum TransactionStatus { return false; } + + public int getStatus() + { + return Status.STATUS_NO_TRANSACTION; + } }, + /** + * TX is preparing + */ PREPARING { public boolean isCommitted() @@ -205,8 +265,16 @@ public enum TransactionStatus { return previous == TransactionStatus.ACTIVE; } + + public int getStatus() + { + return Status.STATUS_PREPARING; + } }, + /** + * TX is committing + */ COMMITTING { public boolean isCommitted() @@ -228,8 +296,16 @@ public enum TransactionStatus { return previous == TransactionStatus.PREPARED; } + + public int getStatus() + { + return Status.STATUS_COMMITTING; + } }, + /** + * TX rolling back + */ ROLLINGBACK { public boolean isCommitted() @@ -251,9 +327,14 @@ public enum TransactionStatus { return previous.allowsRollbackOrMark(previous); } + + public int getStatus() + { + return Status.STATUS_ROLLING_BACK; + } }, - /* + /** * This entry is the source for an active merge. The result will be in a new index. */ MERGE @@ -277,9 +358,14 @@ public enum TransactionStatus { return false; } + + public int getStatus() + { + return Status.STATUS_COMMITTED; + } }, - /* + /** * A new index element that is being made by a merge. */ MERGE_TARGET @@ -303,115 +389,15 @@ public enum TransactionStatus { return false; } + + public int getStatus() + { + return Status.STATUS_ACTIVE; + } }, - /* - * These index overlays require reindexing - */ -// COMMITTED_REQUIRES_REINDEX -// { -// public boolean isCommitted() -// { -// return true; -// } -// -// public boolean isTransient() -// { -// return false; -// } -// -// public boolean canBeReordered() -// { -// return false; -// } -// -// public boolean follows(TransactionStatus previous) -// { -// return false; -// } -// }, - /* - * These index overlays are reindexing - */ -// COMMITTED_REINDEXING -// { -// public boolean isCommitted() -// { -// return true; -// } -// -// -// public boolean canBeReordered() -// { -// return false; -// } -// -// public boolean isTransient() -// { -// return false; -// } -// -// public boolean follows(TransactionStatus previous) -// { -// return false; -// } -// }, - - /* - * These index overlays have ben reindexed. - */ -// COMMITTED_REINDEXED -// { -// public boolean isCommitted() -// { -// return true; -// } -// -// public boolean isTransient() -// { -// return false; -// } -// -// public boolean canBeReordered() -// { -// return false; -// } -// -// public boolean follows(TransactionStatus previous) -// { -// return false; -// } -// }, - - /* - * Committed but the index still has deletions - */ - -// COMMITTED_WITH_DELETIONS -// { -// public boolean isCommitted() -// { -// return true; -// } -// -// public boolean isTransient() -// { -// return false; -// } -// -// public boolean canBeReordered() -// { -// return false; -// } -// -// public boolean follows(TransactionStatus previous) -// { -// return false; -// } -// }, - - /* + /** * Pending deleted are being committed to for the delta. */ COMMITTED_DELETING @@ -435,9 +421,14 @@ public enum TransactionStatus { return false; } + + public int getStatus() + { + return Status.STATUS_COMMITTED; + } }, - /* + /** * An entry that may be deleted */ DELETABLE @@ -461,15 +452,44 @@ public enum TransactionStatus { return true; } + + public int getStatus() + { + return Status.STATUS_UNKNOWN; + } }; + /** + * Is this a commited inex entry? + * @return - true if committed + */ public abstract boolean isCommitted(); + /** + * Is this transient + * @return - true if no information needs to be persisted + */ public abstract boolean isTransient(); + /** + * Can this be reordered with respect to other TXs + * @return - true if this can be reordered (fixed after prepare) + */ public abstract boolean canBeReordered(); + /** + * Can this state follow the one given? + * @param previous state + * @return - true if transition to this state is allowed + */ public abstract boolean follows(TransactionStatus previous); + + /** + * Get the javax.transaction.Status best matching this state + * + * @return - the int TX state + */ + public abstract int getStatus(); private boolean allowsRollbackOrMark(TransactionStatus previous) {