Performance improvements for new indexing:

disable lucene locks, faster FTS deletes and overlays, in memory merging

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@3311 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andrew Hind
2006-07-12 16:50:31 +00:00
parent 1ec0296c22
commit 9025ad537a
2 changed files with 66 additions and 18 deletions

View File

@@ -22,16 +22,15 @@ import java.util.Set;
import org.alfresco.error.AlfrescoRuntimeException; import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.cmr.repository.NodeRef;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FilterIndexReader; import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermPositions; import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits; import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiSearcher;
import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
@@ -63,21 +62,25 @@ public class FilterIndexReaderByNodeRefs2 extends FilterIndexReader
Searcher searcher = new IndexSearcher(reader); Searcher searcher = new IndexSearcher(reader);
for (NodeRef nodeRef : deletions) for (NodeRef nodeRef : deletions)
{ {
BooleanQuery query = new BooleanQuery(); TermQuery query = new TermQuery(new Term("ID", nodeRef.toString()));
query.add(new TermQuery(new Term("ID", nodeRef.toString())), true, false);
query.add(new TermQuery(new Term("ISNODE", "T")), false, false);
Hits hits = searcher.search(query); Hits hits = searcher.search(query);
if (hits.length() > 0) if (hits.length() > 0)
{ {
for (int i = 0; i < hits.length(); i++) for (int i = 0; i < hits.length(); i++)
{
Document doc = hits.doc(i);
if (doc.getField("ISCONTAINER") == null)
{ {
deletedDocuments.set(hits.id(i), true); deletedDocuments.set(hits.id(i), true);
// There should only be one thing to delete
//break;
} }
} }
} }
} }
} }
}
catch (IOException e) catch (IOException e)
{ {
throw new AlfrescoRuntimeException("Failed to construct filtering index reader", e); throw new AlfrescoRuntimeException("Failed to construct filtering index reader", e);

View File

@@ -47,7 +47,6 @@ import java.util.zip.CRC32;
import org.alfresco.error.AlfrescoRuntimeException; import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.search.IndexerException; import org.alfresco.repo.search.IndexerException;
import org.alfresco.repo.search.impl.lucene.FilterIndexReaderByNodeRefs;
import org.alfresco.repo.search.impl.lucene.FilterIndexReaderByNodeRefs2; import org.alfresco.repo.search.impl.lucene.FilterIndexReaderByNodeRefs2;
import org.alfresco.service.cmr.repository.NodeRef; import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.StoreRef; import org.alfresco.service.cmr.repository.StoreRef;
@@ -61,12 +60,14 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits; import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.InputStream;
import org.apache.lucene.store.OutputStream;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
/** /**
@@ -205,6 +206,11 @@ public class IndexInfo
private static HashMap<File, IndexInfo> indexInfos = new HashMap<File, IndexInfo>(); private static HashMap<File, IndexInfo> indexInfos = new HashMap<File, IndexInfo>();
static
{
System.setProperty("disableLuceneLocks", "true");
}
public static synchronized IndexInfo getIndexInfo(File file) public static synchronized IndexInfo getIndexInfo(File file)
{ {
IndexInfo indexInfo = indexInfos.get(file); IndexInfo indexInfo = indexInfos.get(file);
@@ -249,7 +255,6 @@ public class IndexInfo
// a spanking new index // a spanking new index
version = 0; version = 0;
} }
// Open the files and channels // Open the files and channels
@@ -364,7 +369,7 @@ public class IndexInfo
} }
for (String id : deletable) for (String id : deletable)
{ {
IndexEntry entry = indexEntries.remove(id); indexEntries.remove(id);
deleteQueue.add(id); deleteQueue.add(id);
} }
synchronized (cleaner) synchronized (cleaner)
@@ -2008,7 +2013,6 @@ public class IndexInfo
} }
} }
// Check it is not deleting // Check it is not deleting
boolean foundDelta;
for (IndexEntry entry : indexEntries.values()) for (IndexEntry entry : indexEntries.values())
{ {
if (entry.getType() == IndexType.DELTA) if (entry.getType() == IndexType.DELTA)
@@ -2095,16 +2099,20 @@ public class IndexInfo
{ {
Searcher searcher = new IndexSearcher(reader); Searcher searcher = new IndexSearcher(reader);
BooleanQuery query = new BooleanQuery(); TermQuery query = new TermQuery(new Term("ID", nodeRef.toString()));
query.add(new TermQuery(new Term("ID", nodeRef.toString())), true, false);
query.add(new TermQuery(new Term("ISNODE", "T")), false, false);
Hits hits = searcher.search(query); Hits hits = searcher.search(query);
if (hits.length() > 0) if (hits.length() > 0)
{ {
for (int i = 0; i < hits.length(); i++) for (int i = 0; i < hits.length(); i++)
{
Document doc = hits.doc(i);
if (doc.getField("ISCONTAINER") == null)
{ {
reader.delete(hits.id(i)); reader.delete(hits.id(i));
invalidIndexes.add(key); invalidIndexes.add(key);
// There should only be one thing to delete
// break;
}
} }
} }
searcher.close(); searcher.close();
@@ -2332,7 +2340,10 @@ public class IndexInfo
{ {
int count = 0; int count = 0;
IndexReader[] readers = new IndexReader[toMerge.size() - 1]; IndexReader[] readers = new IndexReader[toMerge.size() - 1];
RAMDirectory ramDirectory = null;
IndexWriter writer = null; IndexWriter writer = null;
long docCount = 0;
File outputLocation = null;
for (IndexEntry entry : toMerge.values()) for (IndexEntry entry : toMerge.values())
{ {
File location = new File(indexDirectory, entry.getName()); File location = new File(indexDirectory, entry.getName());
@@ -2348,10 +2359,20 @@ public class IndexInfo
reader = IndexReader.open(emptyIndex); reader = IndexReader.open(emptyIndex);
} }
readers[count++] = reader; readers[count++] = reader;
docCount += entry.getDocumentCount();
} }
else if (entry.getStatus() == TransactionStatus.MERGE_TARGET) else if (entry.getStatus() == TransactionStatus.MERGE_TARGET)
{
outputLocation = location;
if (docCount < 10000)
{
ramDirectory = new RAMDirectory();
writer = new IndexWriter(ramDirectory, new StandardAnalyzer(), true);
}
else
{ {
writer = new IndexWriter(location, new StandardAnalyzer(), true); writer = new IndexWriter(location, new StandardAnalyzer(), true);
}
writer.setUseCompoundFile(true); writer.setUseCompoundFile(true);
writer.minMergeDocs = 1000; writer.minMergeDocs = 1000;
writer.mergeFactor = 5; writer.mergeFactor = 5;
@@ -2360,6 +2381,30 @@ public class IndexInfo
} }
writer.addIndexes(readers); writer.addIndexes(readers);
writer.close(); writer.close();
if (ramDirectory != null)
{
String[] files = ramDirectory.list();
Directory directory = FSDirectory.getDirectory(outputLocation, true);
for (int i = 0; i < files.length; i++)
{
// make place on ram disk
OutputStream os = directory.createFile(files[i]);
// read current file
InputStream is = ramDirectory.openFile(files[i]);
// and copy to ram disk
int len = (int) is.length();
byte[] buf = new byte[len];
is.readBytes(buf, 0, len);
os.writeBytes(buf, len);
// graceful cleanup
is.close();
os.close();
}
ramDirectory.close();
directory.close();
}
for (IndexReader reader : readers) for (IndexReader reader : readers)
{ {
reader.close(); reader.close();