From ae5ef978d7058efec5308ce4075af58db9ebd5a6 Mon Sep 17 00:00:00 2001 From: Andrew Hind Date: Tue, 21 Apr 2009 09:43:01 +0000 Subject: [PATCH] Performance improvement - mainly for PATHs ending * - cache isNode lookup - ENH-378 git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@14025 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- .../lucene/FilterIndexReaderByStringId.java | 28 +- .../impl/lucene/index/CachingIndexReader.java | 4 + .../search/impl/lucene/index/IndexInfo.java | 334 +++++++++++++++--- ...nceCountingReadOnlyIndexReaderFactory.java | 100 +++++- .../search/impl/lucene/query/LeafScorer.java | 23 +- .../search/impl/lucene/query/PathScorer.java | 17 +- 6 files changed, 434 insertions(+), 72 deletions(-) diff --git a/source/java/org/alfresco/repo/search/impl/lucene/FilterIndexReaderByStringId.java b/source/java/org/alfresco/repo/search/impl/lucene/FilterIndexReaderByStringId.java index 3f6e20a029..6adf8e2675 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/FilterIndexReaderByStringId.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/FilterIndexReaderByStringId.java @@ -42,6 +42,7 @@ import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.OpenBitSet; /** @@ -54,7 +55,7 @@ public class FilterIndexReaderByStringId extends FilterIndexReader { private static Log s_logger = LogFactory.getLog(FilterIndexReaderByStringId.class); - BitSet deletedDocuments; + OpenBitSet deletedDocuments; private String id; @@ -71,7 +72,7 @@ public class FilterIndexReaderByStringId extends FilterIndexReader super(reader); this.id = id; - deletedDocuments = new BitSet(reader.maxDoc()); + deletedDocuments = new OpenBitSet(reader.maxDoc()); if (s_logger.isDebugEnabled()) { @@ -87,7 +88,7 @@ public class FilterIndexReaderByStringId extends FilterIndexReader TermDocs td = reader.termDocs(new Term("ID", stringRef)); while (td.next()) { - deletedDocuments.set(td.doc(), true); + deletedDocuments.set(td.doc()); } } } @@ -106,7 +107,7 @@ public class FilterIndexReaderByStringId extends FilterIndexReader Document doc = hits.doc(i); if (doc.getField("ISCONTAINER") == null) { - deletedDocuments.set(hits.id(i), true); + deletedDocuments.set(hits.id(i)); // There should only be one thing to delete // break; } @@ -131,7 +132,7 @@ public class FilterIndexReaderByStringId extends FilterIndexReader */ public static class FilterTermDocs implements TermDocs { - BitSet deletedDocuments; + OpenBitSet deletedDocuments; protected TermDocs in; @@ -142,7 +143,7 @@ public class FilterIndexReaderByStringId extends FilterIndexReader * @param in * @param deletedDocuments */ - public FilterTermDocs(String id, TermDocs in, BitSet deletedDocuments) + public FilterTermDocs(String id, TermDocs in, OpenBitSet deletedDocuments) { this.in = in; this.deletedDocuments = deletedDocuments; @@ -268,41 +269,44 @@ public class FilterIndexReaderByStringId extends FilterIndexReader public static class FilterTermPositions extends FilterTermDocs implements TermPositions { + TermPositions tp; + /** * @param id * @param in * @param deletedDocuements */ - public FilterTermPositions(String id, TermPositions in, BitSet deletedDocuements) + public FilterTermPositions(String id, TermPositions in, OpenBitSet deletedDocuements) { super(id, in, deletedDocuements); + tp = in; } public int nextPosition() throws IOException { - return ((TermPositions) this.in).nextPosition(); + return tp.nextPosition(); } public byte[] getPayload(byte[] data, int offset) throws IOException { - return ((TermPositions) this.in).getPayload(data, offset); + return tp.getPayload(data, offset); } public int getPayloadLength() { - return ((TermPositions) this.in).getPayloadLength(); + return tp.getPayloadLength(); } public boolean isPayloadAvailable() { - return ((TermPositions) this.in).isPayloadAvailable(); + return tp.isPayloadAvailable(); } } @Override public int numDocs() { - return super.numDocs() - deletedDocuments.cardinality(); + return super.numDocs() - (int)deletedDocuments.cardinality(); } @Override diff --git a/source/java/org/alfresco/repo/search/impl/lucene/index/CachingIndexReader.java b/source/java/org/alfresco/repo/search/impl/lucene/index/CachingIndexReader.java index 70194d8c84..d77addb994 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/index/CachingIndexReader.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/index/CachingIndexReader.java @@ -26,6 +26,8 @@ package org.alfresco.repo.search.impl.lucene.index; import java.io.IOException; +import org.apache.lucene.index.TermDocs; + /** * * @author andyh @@ -49,4 +51,6 @@ public interface CachingIndexReader public String getType(int n) throws IOException; + public TermDocs getNodeDocs() throws IOException; + } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/index/IndexInfo.java b/source/java/org/alfresco/repo/search/impl/lucene/index/IndexInfo.java index 89e55befc2..fffb83ed11 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/index/IndexInfo.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/index/IndexInfo.java @@ -64,7 +64,12 @@ import org.alfresco.error.AlfrescoRuntimeException; import org.alfresco.repo.search.IndexerException; import org.alfresco.repo.search.impl.lucene.FilterIndexReaderByStringId; import org.alfresco.repo.search.impl.lucene.LuceneConfig; +import org.alfresco.repo.search.impl.lucene.LuceneXPathHandler; import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser; +import org.alfresco.repo.search.impl.lucene.query.PathQuery; +import org.alfresco.service.cmr.dictionary.DictionaryService; +import org.alfresco.service.namespace.NamespaceService; +import org.alfresco.util.ApplicationContextHelper; import org.alfresco.util.GUID; import org.alfresco.util.TraceableThreadFactory; import org.apache.commons.logging.Log; @@ -78,22 +83,28 @@ import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.RAMDirectory; import org.safehaus.uuid.UUID; +import org.saxpath.SAXPathException; import org.springframework.context.ApplicationEvent; import org.springframework.context.ApplicationListener; import org.springframework.context.ConfigurableApplicationContext; import org.springframework.context.event.ContextRefreshedEvent; +import com.werken.saxpath.XPathReader; + /** * The information that makes up an index. IndexInfoVersion Repeated information of the form *
    @@ -177,7 +188,7 @@ public class IndexInfo implements IndexMonitor * The directory relative to the root path */ private String relativePath; - + /** * The file holding the index information */ @@ -326,9 +337,9 @@ public class IndexInfo implements IndexMonitor private ThreadPoolExecutor threadPoolExecutor; private LuceneConfig config; - + private List applicationListeners = new LinkedList(); - + static { // We do not require any of the lucene in-built locking. @@ -406,7 +417,7 @@ public class IndexInfo implements IndexMonitor catch (IOException e) { throw new AlfrescoRuntimeException("Failed to determine index relative path", e); - } + } } else { @@ -438,7 +449,7 @@ public class IndexInfo implements IndexMonitor { throw new AlfrescoRuntimeException("Failed to determine index relative path", e); } - + } // Create an empty in memory index @@ -476,7 +487,7 @@ public class IndexInfo implements IndexMonitor { throw new AlfrescoRuntimeException("The index must be held in a directory"); } - + // Create the info files. File indexInfoFile = new File(this.indexDirectory, INDEX_INFO); File indexInfoBackupFile = new File(this.indexDirectory, INDEX_INFO_BACKUP); @@ -681,7 +692,7 @@ public class IndexInfo implements IndexMonitor cleaner.schedule(); } }, 0, 20000); - + publishDiscoveryEvent(); } @@ -691,6 +702,7 @@ public class IndexInfo implements IndexMonitor { return true; } + public Object doWork() throws Exception { setStatusFromFile(); @@ -1926,15 +1938,15 @@ public class IndexInfo implements IndexMonitor File location = new File(indexDirectory, id).getCanonicalFile(); if (IndexReader.indexExists(location)) { - if ((config != null) && (size > config.getMaxDocsForInMemoryMerge())) - { - reader = IndexReader.open(location); - } - else + if ((config != null) && (size < config.getMaxDocsForInMemoryMerge())) { RAMDirectory rd = new RAMDirectory(location); reader = IndexReader.open(rd); } + else + { + reader = IndexReader.open(location); + } } else { @@ -2436,9 +2448,25 @@ public class IndexInfo implements IndexMonitor readIndexInfo(indexLocation); } } - + + static Query getPathQuery(String path) throws SAXPathException + { + ConfigurableApplicationContext ac = ApplicationContextHelper.getApplicationContext(); + XPathReader reader = new XPathReader(); + LuceneXPathHandler handler = new LuceneXPathHandler(); + handler.setNamespacePrefixResolver((NamespaceService)ac.getBean("namespaceService")); + handler.setDictionaryService((DictionaryService)ac.getBean("dictionaryService")); + reader.setXPathHandler(handler); + reader.parse(path); + PathQuery pathQuery = handler.getQuery(); + pathQuery.setRepeats(false); + return pathQuery; + } + private static void readIndexInfo(File indexLocation) throws Throwable { + long start; + long end; IndexInfo ii = new IndexInfo(indexLocation, null); ii.readWriteLock.writeLock().lock(); @@ -2457,16 +2485,228 @@ public class IndexInfo implements IndexMonitor ii.releaseWriteLock(); } IndexReader reader = ii.getMainIndexReferenceCountingReadOnlyIndexReader(); - TermEnum terms = reader.terms(new Term("@{http://www.alfresco.org/model/user/1.0}members", "")); - while (terms.next() && terms.term().field().equals("@{http://www.alfresco.org/model/user/1.0}members")) - { - System.out.println("F = " + terms.term().field() + " V = " + terms.term().text() + " F = " + terms.docFreq()); - if (terms.term().text().equals("xirmsi")) - { - System.out.println("Matched"); - } - } - terms.close(); + System.out.println(reader.getFieldNames(FieldOption.ALL)); + IndexSearcher searcher = new IndexSearcher(reader); + Query query = new TermQuery(new Term("@{http://www.travelmuse.com/wcm}DestinationName", "bambino")); + start = System.nanoTime(); + Hits hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}DestinationName:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.travelmuse.com/wcm}DestinationName", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}DestinationName:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new WildcardQuery(new Term("@{http://www.travelmuse.com/wcm}DestinationPhoto", "*")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}DestinationPhoto:* = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new WildcardQuery(new Term("@{http://www.travelmuse.com/wcm}DestinationPhoto", "*")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}DestinationPhoto:* = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.travelmuse.com/wcm}ThemeName", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}ThemeName:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.travelmuse.com/wcm}ThemeName", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}ThemeName:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.travelmuse.com/wcm}ActivityName", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}ActivityName:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.travelmuse.com/wcm}ActivityName", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}ActivityName:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.travelmuse.com/wcm}EditorialItemTitle", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}EditorialItemTitle:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.travelmuse.com/wcm}EditorialItemTitle", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}EditorialItemTitle:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.travelmuse.com/wcm}PoiName", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}PoiName:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.travelmuse.com/wcm}PoiName", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}PoiName:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.travelmuse.com/wcm}PropertyName", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}PropertyName:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.travelmuse.com/wcm}PropertyName", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.travelmuse.com/wcm}PropertyName:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.alfresco.org/model/content/1.0}content", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.alfresco.org/model/content/1.0}content:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = new TermQuery(new Term("@{http://www.alfresco.org/model/content/1.0}content", "bambino")); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("@{http://www.alfresco.org/model/content/1.0}content:bambino = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = getPathQuery("/www/avm_webapps/ROOT/editorial//*"); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("/www/avm_webapps/ROOT/editorial//* = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = getPathQuery("/www/avm_webapps/ROOT/editorial//*"); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("/www/avm_webapps/ROOT/editorial//* = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + + searcher = new IndexSearcher(reader); + query = getPathQuery("/www/avm_webapps/ROOT/tag//*"); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("/www/avm_webapps/ROOT/tag//* = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = getPathQuery("/www/avm_webapps/ROOT/tag//*"); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("/www/avm_webapps/ROOT/tag//* = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = getPathQuery("/www/avm_webapps/ROOT/poi//*"); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("/www/avm_webapps/ROOT/poi//* = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = getPathQuery("/www/avm_webapps/ROOT/poi//*"); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("/www/avm_webapps/ROOT/poi//* = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = getPathQuery("/www/avm_webapps/ROOT/property//*"); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("/www/avm_webapps/ROOT/property//* = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = getPathQuery("/www/avm_webapps/ROOT/property//*"); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("/www/avm_webapps/ROOT/property//* = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = getPathQuery("/www/avm_webapps/ROOT/web-reviews//*"); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("/www/avm_webapps/ROOT/web-reviews//* = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + searcher = new IndexSearcher(reader); + query = getPathQuery("/www/avm_webapps/ROOT/web-reviews//*"); + start = System.nanoTime(); + hits = searcher.search(query); + end = System.nanoTime(); + System.out.println("/www/avm_webapps/ROOT/web-reviews//* = " + hits.length() + " in " + ((end - start) / 1e9)); + searcher.close(); + + + // TermEnum terms = reader.terms(new Term("@{http://www.alfresco.org/model/user/1.0}members", "")); + // while (terms.next() && terms.term().field().equals("@{http://www.alfresco.org/model/user/1.0}members")) + // { + // System.out.println("F = " + terms.term().field() + " V = " + terms.term().text() + " F = " + + // terms.docFreq()); + // if (terms.term().text().equals("xirmsi")) + // { + // System.out.println("Matched"); + // } + // } + // terms.close(); } @@ -3290,7 +3530,7 @@ public class IndexInfo implements IndexMonitor } dumpInfo(); - + notifyListeners("MergedDeletions", toDelete.size()); return null; @@ -3302,7 +3542,7 @@ public class IndexInfo implements IndexMonitor } }); - + } finally { @@ -3551,7 +3791,7 @@ public class IndexInfo implements IndexMonitor registerReferenceCountingIndexReader(finalMergeTargetId, finalNewReader); notifyListeners("MergedIndexes", toMerge.size()); - + dumpInfo(); writeStatus(); @@ -3710,9 +3950,10 @@ public class IndexInfo implements IndexMonitor } return false; } - + /* * (non-Javadoc) + * * @see org.alfresco.repo.search.impl.lucene.index.IndexMonitor#getRelativePath() */ public String getRelativePath() @@ -3720,7 +3961,9 @@ public class IndexInfo implements IndexMonitor return this.relativePath; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see org.alfresco.repo.search.impl.lucene.index.IndexMonitor#getStatusSnapshot() */ public Map getStatusSnapshot() @@ -3740,10 +3983,12 @@ public class IndexInfo implements IndexMonitor finally { readWriteLock.writeLock().unlock(); - } + } } - - /* (non-Javadoc) + + /* + * (non-Javadoc) + * * @see org.alfresco.repo.search.impl.lucene.index.IndexMonitor#getActualSize() */ public long getActualSize() throws IOException @@ -3772,7 +4017,9 @@ public class IndexInfo implements IndexMonitor } } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see org.alfresco.repo.search.impl.lucene.index.IndexMonitor#getUsedSize() */ public long getUsedSize() throws IOException @@ -3787,8 +4034,10 @@ public class IndexInfo implements IndexMonitor releaseReadLock(); } } - - /* (non-Javadoc) + + /* + * (non-Javadoc) + * * @see org.alfresco.repo.search.impl.lucene.index.IndexMonitor#getNumberOfDocuments() */ public int getNumberOfDocuments() throws IOException @@ -3804,12 +4053,14 @@ public class IndexInfo implements IndexMonitor } } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see org.alfresco.repo.search.impl.lucene.index.IndexMonitor#getNumberOfFields() */ public int getNumberOfFields() throws IOException { - + IndexReader reader = getMainIndexReferenceCountingReadOnlyIndexReader(); try { @@ -3821,7 +4072,9 @@ public class IndexInfo implements IndexMonitor } } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see org.alfresco.repo.search.impl.lucene.index.IndexMonitor#getNumberOfIndexedFields() */ public int getNumberOfIndexedFields() throws IOException @@ -3839,8 +4092,9 @@ public class IndexInfo implements IndexMonitor /* * (non-Javadoc) + * * @see org.alfresco.repo.search.impl.lucene.index.IndexMonitor#addApplicationListener(org.springframework.context. - * ApplicationListener) + * ApplicationListener) */ public void addApplicationListener(ApplicationListener listener) { @@ -3864,7 +4118,7 @@ public class IndexInfo implements IndexMonitor } return size; } - + private void publishDiscoveryEvent() { if (this.config == null) @@ -3883,7 +4137,7 @@ public class IndexInfo implements IndexMonitor // that will fire when it has applicationContext.addApplicationListener(new ApplicationListener() { - + public void onApplicationEvent(ApplicationEvent event) { if (event instanceof ContextRefreshedEvent) diff --git a/source/java/org/alfresco/repo/search/impl/lucene/index/ReferenceCountingReadOnlyIndexReaderFactory.java b/source/java/org/alfresco/repo/search/impl/lucene/index/ReferenceCountingReadOnlyIndexReaderFactory.java index ac912f42ff..e5c7fe5d88 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/index/ReferenceCountingReadOnlyIndexReaderFactory.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/index/ReferenceCountingReadOnlyIndexReaderFactory.java @@ -57,6 +57,10 @@ import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.FilterIndexReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.util.OpenBitSet; public class ReferenceCountingReadOnlyIndexReaderFactory { @@ -103,7 +107,7 @@ public class ReferenceCountingReadOnlyIndexReaderFactory private static final long serialVersionUID = 7693185658022810428L; private static java.lang.reflect.Field s_field; - + String id; int refCount = 0; @@ -146,15 +150,16 @@ public class ReferenceCountingReadOnlyIndexReaderFactory } catch (NoSuchFieldException e) { - throw new AlfrescoRuntimeException("Reference counting index reader needs access to org.apache.lucene.index.IndexReader.closed to work correctly (incompatible version of lucene)", e); + throw new AlfrescoRuntimeException( + "Reference counting index reader needs access to org.apache.lucene.index.IndexReader.closed to work correctly (incompatible version of lucene)", e); } } - + ReferenceCountingReadOnlyIndexReader(String id, IndexReader indexReader, boolean enableCaching, LuceneConfig config) { super(indexReader); this.id = id; - if(enableCaching && (config != null)) + if (enableCaching && (config != null)) { this.enableCaching = config.isCacheEnabled(); } @@ -172,7 +177,7 @@ public class ReferenceCountingReadOnlyIndexReaderFactory { s_logger.debug(Thread.currentThread().getName() + ": Reader " + id + " - increment - ref count is " + refCount + " ... " + super.toString()); } - if(!wrapper_closed) + if (!wrapper_closed) { try { @@ -180,7 +185,7 @@ public class ReferenceCountingReadOnlyIndexReaderFactory } catch (IllegalArgumentException e) { - throw new AlfrescoRuntimeException("Failed to mark index as open ..", e); + throw new AlfrescoRuntimeException("Failed to mark index as open ..", e); } catch (IllegalAccessException e) { @@ -269,7 +274,7 @@ public class ReferenceCountingReadOnlyIndexReaderFactory throw new IllegalStateException(Thread.currentThread().getName() + "Indexer is closed " + id); } decrementReferenceCount(); - if(!wrapper_closed) + if (!wrapper_closed) { incRef(); } @@ -351,6 +356,8 @@ public class ReferenceCountingReadOnlyIndexReaderFactory private final MultipleValueFieldAccessor MV_LINKASPECT_FIELD_ACCESSOR = new MultipleValueFieldAccessor("LINKASPECT"); + private OpenBitSet nodes = null; + private T manageCache(ConcurrentHashMap> cache, Accessor accessor, int n, FieldSelector fieldSelector, int limit) throws IOException { Integer key = Integer.valueOf(n); @@ -575,6 +582,21 @@ public class ReferenceCountingReadOnlyIndexReaderFactory return answer.toArray(new String[answer.size()]); } + + public synchronized TermDocs getNodeDocs() throws IOException + { + if (nodes == null) + { + TermDocs nodeDocs = termDocs(new Term("ISNODE", "T")); + nodes = new OpenBitSet(); + while (nodeDocs.next()) + { + nodes.set(nodeDocs.doc()); + } + nodeDocs.close(); + } + return new TermDocSet(nodes); + } } static class WithUseCount implements Comparable> @@ -622,4 +644,68 @@ public class ReferenceCountingReadOnlyIndexReaderFactory } } + + static class TermDocSet implements TermDocs + { + OpenBitSet set; + + int position = -1; + + TermDocSet(OpenBitSet set) + { + this.set = set; + } + + public void close() throws IOException + { + // Noop + } + + public int doc() + { + return position; + } + + public int freq() + { + return 1; + } + + public boolean next() throws IOException + { + position++; + position = set.nextSetBit(position); + return (position != -1); + } + + public int read(int[] docs, int[] freqs) throws IOException + { + throw new UnsupportedOperationException(); + } + + public void seek(Term term) throws IOException + { + throw new UnsupportedOperationException(); + } + + public void seek(TermEnum termEnum) throws IOException + { + throw new UnsupportedOperationException(); + } + + public boolean skipTo(int target) throws IOException + { + do + { + if (!next()) + { + return false; + } + } + while (target > doc()); + return true; + + } + + } } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/query/LeafScorer.java b/source/java/org/alfresco/repo/search/impl/lucene/query/LeafScorer.java index 8458b57fbc..c97e144e37 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/query/LeafScorer.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/query/LeafScorer.java @@ -49,6 +49,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermPositions; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Scorer; @@ -103,7 +104,7 @@ public class LeafScorer extends Scorer IndexReader reader; - private TermPositions allNodes; + private TermDocs allNodes; TermPositions level0; @@ -125,7 +126,7 @@ public class LeafScorer extends Scorer private int[] cats; - // private TermPositions tp; + private boolean matchAllLeaves; /** * Constructor - should use an arg object ... @@ -144,7 +145,7 @@ public class LeafScorer extends Scorer * @param repeat * @param tp */ - public LeafScorer(Weight weight, TermPositions root, TermPositions level0, ContainerScorer containerScorer, StructuredFieldPosition[] sfps, TermPositions allNodes, + public LeafScorer(Weight weight, TermPositions root, TermPositions level0, ContainerScorer containerScorer, StructuredFieldPosition[] sfps, TermDocs allNodes, HashMap selfIds, IndexReader reader, Similarity similarity, byte[] norms, DictionaryService dictionaryService, boolean repeat, TermPositions tp) { super(similarity); @@ -167,6 +168,8 @@ public class LeafScorer extends Scorer this.level0 = level0; this.dictionaryService = dictionaryService; this.repeat = repeat; + + matchAllLeaves = allNodes(); try { initialise(); @@ -388,7 +391,7 @@ public class LeafScorer extends Scorer } } - if (allNodes()) + if (matchAllLeaves) { int position = 0; parents = new int[10000]; @@ -513,11 +516,11 @@ public class LeafScorer extends Scorer counter = 0; } - if (allNodes()) + if (matchAllLeaves) { while (more) { - if (allNodes.next() && root.next()) + if (allNodes.next()) { if (check()) { @@ -736,7 +739,7 @@ public class LeafScorer extends Scorer private boolean check() throws IOException { - if (allNodes()) + if (matchAllLeaves) { this.counter = 0; int position; @@ -1027,7 +1030,7 @@ public class LeafScorer extends Scorer public int doc() { - if (allNodes()) + if (matchAllLeaves) { return allNodes.doc(); } @@ -1045,7 +1048,7 @@ public class LeafScorer extends Scorer countInCounter = 1; counter = 0; - if (allNodes()) + if (matchAllLeaves) { allNodes.skipTo(target); root.skipTo(allNodes.doc()); // must match @@ -1055,7 +1058,7 @@ public class LeafScorer extends Scorer } while (more) { - if (allNodes.next() && root.next()) + if (allNodes.next()) { if (check()) { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/query/PathScorer.java b/source/java/org/alfresco/repo/search/impl/lucene/query/PathScorer.java index a085ea9b5d..bd82b897a7 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/query/PathScorer.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/query/PathScorer.java @@ -28,12 +28,14 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import org.alfresco.repo.search.impl.lucene.index.CachingIndexReader; import org.alfresco.repo.search.impl.lucene.query.LeafScorer.Counter; import org.alfresco.service.cmr.dictionary.DictionaryService; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermPositions; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Scorer; @@ -141,7 +143,16 @@ public class PathScorer extends Scorer TermPositions level0 = null; - TermPositions nodePositions = reader.termPositions(new Term("ISNODE", "T")); + TermDocs nodeDocs; + if (reader instanceof CachingIndexReader) + { + CachingIndexReader cachingIndexReader = (CachingIndexReader) reader; + nodeDocs = cachingIndexReader.getNodeDocs(); + } + else + { + nodeDocs = reader.termDocs(new Term("ISNODE", "T")); + } // StructuredFieldPosition[] test = // (StructuredFieldPosition[])structuredFieldPositions.toArray(new @@ -160,11 +171,11 @@ public class PathScorer extends Scorer if((cs == null) && (pathQuery.getQNameStructuredFieldPositions().get(pathQuery.getQNameStructuredFieldPositions().size()-1)).linkSelf()) { - nodePositions = reader.termPositions(new Term("ISROOT", "T")); + nodeDocs = reader.termDocs(new Term("ISROOT", "T")); } - LeafScorer ls = new LeafScorer(weight, rootLeafPositions, level0, cs, (StructuredFieldPosition[]) pathQuery.getQNameStructuredFieldPositions().toArray(new StructuredFieldPosition[] {}), nodePositions, + LeafScorer ls = new LeafScorer(weight, rootLeafPositions, level0, cs, (StructuredFieldPosition[]) pathQuery.getQNameStructuredFieldPositions().toArray(new StructuredFieldPosition[] {}), nodeDocs, selfIds, reader, similarity, reader.norms(pathQuery.getQnameField()), dictionarySertvice, repeat, tp); return new PathScorer(similarity, ls);