From 308e60373d1176a5f673df72cfbf7a2474f0439f Mon Sep 17 00:00:00 2001 From: Andrew Hind Date: Fri, 13 Mar 2009 14:56:27 +0000 Subject: [PATCH] MOB-585: Upgrade to lucene 2.4.1 with all collateral damage EXCEPT for the query parser and new options - passes index tests - bootstraps - index reader management looks OK - should behave as before (but be faster in places ...we will see what the build box says) git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@13624 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261 --- .../lucene/FilterIndexReaderByStringId.java | 15 + .../analysis/AlfrescoStandardFilter.java | 13 +- .../search/impl/lucene/index/IndexInfo.java | 10 + ...nceCountingReadOnlyIndexReaderFactory.java | 54 ++- .../lucene/query/CachingTermPositions.java | 15 + .../org/apache/lucene/store/FSDirectory.java | 313 +++++++++++------- 6 files changed, 287 insertions(+), 133 deletions(-) diff --git a/source/java/org/alfresco/repo/search/impl/lucene/FilterIndexReaderByStringId.java b/source/java/org/alfresco/repo/search/impl/lucene/FilterIndexReaderByStringId.java index 8424611a7d..3f6e20a029 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/FilterIndexReaderByStringId.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/FilterIndexReaderByStringId.java @@ -282,6 +282,21 @@ public class FilterIndexReaderByStringId extends FilterIndexReader { return ((TermPositions) this.in).nextPosition(); } + + public byte[] getPayload(byte[] data, int offset) throws IOException + { + return ((TermPositions) this.in).getPayload(data, offset); + } + + public int getPayloadLength() + { + return ((TermPositions) this.in).getPayloadLength(); + } + + public boolean isPayloadAvailable() + { + return ((TermPositions) this.in).isPayloadAvailable(); + } } @Override diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardFilter.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardFilter.java index 2f03324d63..3891c9ab83 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardFilter.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/AlfrescoStandardFilter.java @@ -30,9 +30,10 @@ import java.util.StringTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.standard.StandardTokenizerConstants; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.standard.StandardTokenizer; -public class AlfrescoStandardFilter extends TokenFilter implements StandardTokenizerConstants +public class AlfrescoStandardFilter extends TokenFilter { /** Construct filtering in. */ @@ -41,13 +42,13 @@ public class AlfrescoStandardFilter extends TokenFilter implements StandardToken super(in); } - private static final String APOSTROPHE_TYPE = tokenImage[APOSTROPHE]; + private static final String APOSTROPHE_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.APOSTROPHE]; - private static final String ACRONYM_TYPE = tokenImage[ACRONYM]; + private static final String ACRONYM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ACRONYM]; - private static final String HOST_TYPE = tokenImage[HOST]; + private static final String HOST_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST]; - private static final String ALPHANUM_TYPE = tokenImage[ALPHANUM]; + private static final String ALPHANUM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM]; private Queue hostTokens = null; diff --git a/source/java/org/alfresco/repo/search/impl/lucene/index/IndexInfo.java b/source/java/org/alfresco/repo/search/impl/lucene/index/IndexInfo.java index 9a75570361..ddd0e3d09e 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/index/IndexInfo.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/index/IndexInfo.java @@ -73,7 +73,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; import org.apache.lucene.search.Hits; @@ -450,6 +452,8 @@ public class IndexInfo implements IndexMonitor writer.setWriteLockTimeout(writeLockTimeout); writer.setMaxFieldLength(maxFieldLength); writer.setTermIndexInterval(termIndexInterval); + writer.setMergeScheduler(new SerialMergeScheduler()); + writer.setMergePolicy(new LogDocMergePolicy()); writer.close(); } catch (IOException e) @@ -515,6 +519,8 @@ public class IndexInfo implements IndexMonitor writer.setWriteLockTimeout(writeLockTimeout); writer.setMaxFieldLength(maxFieldLength); writer.setTermIndexInterval(termIndexInterval); + writer.setMergeScheduler(new SerialMergeScheduler()); + writer.setMergePolicy(new LogDocMergePolicy()); writer.optimize(); long docs = writer.docCount(); writer.close(); @@ -839,6 +845,8 @@ public class IndexInfo implements IndexMonitor writer.setWriteLockTimeout(writeLockTimeout); writer.setMaxFieldLength(maxFieldLength); writer.setTermIndexInterval(termIndexInterval); + writer.setMergeScheduler(new SerialMergeScheduler()); + writer.setMergePolicy(new LogDocMergePolicy()); return writer; } @@ -3455,6 +3463,8 @@ public class IndexInfo implements IndexMonitor writer.setMergeFactor(mergerMergeFactor); writer.setMaxMergeDocs(mergerMaxMergeDocs); writer.setWriteLockTimeout(writeLockTimeout); + writer.setMergeScheduler(new SerialMergeScheduler()); + writer.setMergePolicy(new LogDocMergePolicy()); } } writer.addIndexes(readers); diff --git a/source/java/org/alfresco/repo/search/impl/lucene/index/ReferenceCountingReadOnlyIndexReaderFactory.java b/source/java/org/alfresco/repo/search/impl/lucene/index/ReferenceCountingReadOnlyIndexReaderFactory.java index ed414bcf83..ac912f42ff 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/index/ReferenceCountingReadOnlyIndexReaderFactory.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/index/ReferenceCountingReadOnlyIndexReaderFactory.java @@ -45,6 +45,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import net.sf.ehcache.CacheManager; +import org.alfresco.error.AlfrescoRuntimeException; import org.alfresco.repo.search.impl.lucene.LuceneConfig; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -101,6 +102,8 @@ public class ReferenceCountingReadOnlyIndexReaderFactory private static final long serialVersionUID = 7693185658022810428L; + private static java.lang.reflect.Field s_field; + String id; int refCount = 0; @@ -109,7 +112,7 @@ public class ReferenceCountingReadOnlyIndexReaderFactory boolean allowsDeletions; - boolean closed = false; + boolean wrapper_closed = false; ConcurrentHashMap isCategory = new ConcurrentHashMap(); @@ -125,10 +128,28 @@ public class ReferenceCountingReadOnlyIndexReaderFactory ConcurrentHashMap>> linkAspectCache = new ConcurrentHashMap>>(); - boolean enableCaching; + private boolean enableCaching; private LuceneConfig config; + static + { + Class c = IndexReader.class; + try + { + s_field = c.getDeclaredField("closed"); + s_field.setAccessible(true); + } + catch (SecurityException e) + { + throw new AlfrescoRuntimeException("Reference counting index reader needs access to org.apache.lucene.index.IndexReader.closed to work correctly", e); + } + catch (NoSuchFieldException e) + { + throw new AlfrescoRuntimeException("Reference counting index reader needs access to org.apache.lucene.index.IndexReader.closed to work correctly (incompatible version of lucene)", e); + } + } + ReferenceCountingReadOnlyIndexReader(String id, IndexReader indexReader, boolean enableCaching, LuceneConfig config) { super(indexReader); @@ -142,7 +163,7 @@ public class ReferenceCountingReadOnlyIndexReaderFactory public synchronized void incrementReferenceCount() { - if (closed) + if (wrapper_closed) { throw new IllegalStateException(Thread.currentThread().getName() + "Indexer is closed " + id); } @@ -151,6 +172,21 @@ public class ReferenceCountingReadOnlyIndexReaderFactory { s_logger.debug(Thread.currentThread().getName() + ": Reader " + id + " - increment - ref count is " + refCount + " ... " + super.toString()); } + if(!wrapper_closed) + { + try + { + s_field.set(this, false); + } + catch (IllegalArgumentException e) + { + throw new AlfrescoRuntimeException("Failed to mark index as open ..", e); + } + catch (IllegalAccessException e) + { + throw new AlfrescoRuntimeException("Failed to mark index as open ..", e); + } + } } public synchronized void decrementReferenceCount() throws IOException @@ -180,7 +216,7 @@ public class ReferenceCountingReadOnlyIndexReaderFactory // No tidy up } in.close(); - closed = true; + wrapper_closed = true; } else { @@ -204,12 +240,12 @@ public class ReferenceCountingReadOnlyIndexReaderFactory public synchronized boolean getClosed() { - return closed; + return wrapper_closed; } public synchronized void setInvalidForReuse() throws IOException { - if (closed) + if (wrapper_closed) { throw new IllegalStateException(Thread.currentThread().getName() + "Indexer is closed " + id); } @@ -228,11 +264,15 @@ public class ReferenceCountingReadOnlyIndexReaderFactory { s_logger.debug(Thread.currentThread().getName() + ": Reader " + id + " closing" + " ... " + super.toString()); } - if (closed) + if (wrapper_closed) { throw new IllegalStateException(Thread.currentThread().getName() + "Indexer is closed " + id); } decrementReferenceCount(); + if(!wrapper_closed) + { + incRef(); + } } @Override diff --git a/source/java/org/alfresco/repo/search/impl/lucene/query/CachingTermPositions.java b/source/java/org/alfresco/repo/search/impl/lucene/query/CachingTermPositions.java index 9e09f8c277..a6e3d0b406 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/query/CachingTermPositions.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/query/CachingTermPositions.java @@ -184,5 +184,20 @@ public class CachingTermPositions implements TermPositions delegate.close(); clear(); } + + public byte[] getPayload(byte[] data, int offset) throws IOException + { + return delegate.getPayload(data, offset); + } + + public int getPayloadLength() + { + return delegate.getPayloadLength(); + } + + public boolean isPayloadAvailable() + { + return delegate.isPayloadAvailable(); + } } \ No newline at end of file diff --git a/source/java/org/apache/lucene/store/FSDirectory.java b/source/java/org/apache/lucene/store/FSDirectory.java index c3a1c7c068..8cc3128cc7 100644 --- a/source/java/org/apache/lucene/store/FSDirectory.java +++ b/source/java/org/apache/lucene/store/FSDirectory.java @@ -24,7 +24,8 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.Hashtable; +import java.util.HashMap; +import java.util.Map; import org.apache.lucene.index.IndexFileNameFilter; @@ -46,7 +47,6 @@ import org.apache.lucene.index.IndexWriter; * synchronization on directories.

* * @see Directory - * @author Doug Cutting */ public class FSDirectory extends Directory { @@ -58,7 +58,7 @@ public class FSDirectory extends Directory { * instance from the cache. See LUCENE-776 * for some relevant discussion. */ - private static final Hashtable DIRECTORIES = new Hashtable(); + private static final Map DIRECTORIES = new HashMap(); private static boolean disableLocks = false; @@ -239,7 +239,7 @@ public class FSDirectory extends Directory { if (directory.exists()) { String[] files = directory.list(IndexFileNameFilter.getFilter()); // clear old files if (files == null) - throw new IOException("Cannot read directory " + directory.getAbsolutePath()); + throw new IOException("cannot read directory " + directory.getAbsolutePath() + ": list() returned null"); for (int i = 0; i < files.length; i++) { File file = new File(directory, files[i]); if (!file.delete()) @@ -291,6 +291,12 @@ public class FSDirectory extends Directory { } catch (ClassCastException e) { throw new IOException("unable to cast LockClass " + lockClassName + " instance to a LockFactory"); } + + if (lockFactory instanceof NativeFSLockFactory) { + ((NativeFSLockFactory) lockFactory).setLockDir(path); + } else if (lockFactory instanceof SimpleFSLockFactory) { + ((SimpleFSLockFactory) lockFactory).setLockDir(path); + } } else { // Our default lock is SimpleFSLockFactory; // default lockDir is our index directory: @@ -311,17 +317,20 @@ public class FSDirectory extends Directory { /** Returns an array of strings, one for each Lucene index file in the directory. */ public String[] list() { + ensureOpen(); return directory.list(IndexFileNameFilter.getFilter()); } /** Returns true iff a file with the given name exists. */ public boolean fileExists(String name) { + ensureOpen(); File file = new File(directory, name); return file.exists(); } /** Returns the time the named file was last modified. */ public long fileModified(String name) { + ensureOpen(); File file = new File(directory, name); return file.lastModified(); } @@ -334,18 +343,21 @@ public class FSDirectory extends Directory { /** Set the modified time of an existing file to now. */ public void touchFile(String name) { + ensureOpen(); File file = new File(directory, name); file.setLastModified(System.currentTimeMillis()); } /** Returns the length in bytes of a file in the directory. */ public long fileLength(String name) { + ensureOpen(); File file = new File(directory, name); return file.length(); } /** Removes an existing file in the directory. */ public void deleteFile(String name) throws IOException { + ensureOpen(); File file = new File(directory, name); if (!file.delete()) throw new IOException("Cannot delete " + file); @@ -357,6 +369,7 @@ public class FSDirectory extends Directory { */ public synchronized void renameFile(String from, String to) throws IOException { + ensureOpen(); File old = new File(directory, from); File nu = new File(directory, to); @@ -421,7 +434,7 @@ public class FSDirectory extends Directory { /** Creates a new, empty file in the directory with the given name. Returns a stream writing this file. */ public IndexOutput createOutput(String name) throws IOException { - + ensureOpen(); File file = new File(directory, name); if (file.exists() && !file.delete()) // delete existing, if any throw new IOException("Cannot overwrite: " + file); @@ -429,9 +442,50 @@ public class FSDirectory extends Directory { return new FSIndexOutput(file); } - /** Returns a stream reading an existing file. */ + public void sync(String name) throws IOException { + ensureOpen(); + File fullFile = new File(directory, name); + boolean success = false; + int retryCount = 0; + IOException exc = null; + while(!success && retryCount < 5) { + retryCount++; + RandomAccessFile file = null; + try { + try { + file = new RandomAccessFile(fullFile, "rw"); + file.getFD().sync(); + success = true; + } finally { + if (file != null) + file.close(); + } + } catch (IOException ioe) { + if (exc == null) + exc = ioe; + try { + // Pause 5 msec + Thread.sleep(5); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + } + } + } + if (!success) + // Throw original exception + throw exc; + } + + // Inherit javadoc public IndexInput openInput(String name) throws IOException { - return new FSIndexInput(new File(directory, name)); + ensureOpen(); + return openInput(name, BufferedIndexInput.BUFFER_SIZE); + } + + // Inherit javadoc + public IndexInput openInput(String name, int bufferSize) throws IOException { + ensureOpen(); + return new FSIndexInput(new File(directory, name), bufferSize); } /** @@ -442,6 +496,7 @@ public class FSDirectory extends Directory { public String getLockID() { + ensureOpen(); String dirName; // name to be hashed try { dirName = directory.getCanonicalPath(); @@ -466,7 +521,8 @@ public class FSDirectory extends Directory { /** Closes the store to future operations. */ public synchronized void close() { - if (--refCount <= 0) { + if (isOpen && --refCount <= 0) { + isOpen = false; synchronized (DIRECTORIES) { DIRECTORIES.remove(directory); } @@ -474,6 +530,7 @@ public class FSDirectory extends Directory { } public File getFile() { + ensureOpen(); return directory; } @@ -481,128 +538,144 @@ public class FSDirectory extends Directory { public String toString() { return this.getClass().getName() + "@" + directory; } -} + protected static class FSIndexInput extends BufferedIndexInput { + + protected static class Descriptor extends RandomAccessFile { + // remember if the file is open, so that we don't try to close it + // more than once + protected volatile boolean isOpen; + long position; + final long length; + + public Descriptor(File file, String mode) throws IOException { + super(file, mode); + isOpen=true; + length=length(); + } + + public void close() throws IOException { + if (isOpen) { + isOpen=false; + super.close(); + } + } + + protected void finalize() throws Throwable { + try { + close(); + } finally { + super.finalize(); + } + } + } + + protected final Descriptor file; + boolean isClone; + + public FSIndexInput(File path) throws IOException { + this(path, BufferedIndexInput.BUFFER_SIZE); + } + + public FSIndexInput(File path, int bufferSize) throws IOException { + super(bufferSize); + file = new Descriptor(path, "r"); + } + + /** IndexInput methods */ + protected void readInternal(byte[] b, int offset, int len) + throws IOException { + synchronized (file) { + long position = getFilePointer(); + if (position != file.position) { + file.seek(position); + file.position = position; + } + int total = 0; + do { + int i = file.read(b, offset+total, len-total); + if (i == -1) + throw new IOException("read past EOF"); + file.position += i; + total += i; + } while (total < len); + } + } + + public void close() throws IOException { + // only close the file if this is not a clone + if (!isClone) file.close(); + } + + protected void seekInternal(long position) { + } + + public long length() { + return file.length; + } + + public Object clone() { + FSIndexInput clone = (FSIndexInput)super.clone(); + clone.isClone = true; + return clone; + } + + /** Method used for testing. Returns true if the underlying + * file descriptor is valid. + */ + boolean isFDValid() throws IOException { + return file.getFD().valid(); + } + } -class FSIndexInput extends BufferedIndexInput { - - private static class Descriptor extends RandomAccessFile { + protected static class FSIndexOutput extends BufferedIndexOutput { + RandomAccessFile file = null; + // remember if the file is open, so that we don't try to close it // more than once - private boolean isOpen; - long position; - final long length; - - public Descriptor(File file, String mode) throws IOException { - super(file, mode); - isOpen=true; - length=length(); - getChannel(); - } + private volatile boolean isOpen; + public FSIndexOutput(File path) throws IOException { + file = new RandomAccessFile(path, "rw"); + file.getChannel(); + isOpen = true; + } + + /** output methods: */ + public void flushBuffer(byte[] b, int offset, int size) throws IOException { + file.write(b, offset, size); + } public void close() throws IOException { + // only close the file if it has not been closed yet if (isOpen) { - isOpen=false; - super.close(); + boolean success = false; + try { + super.close(); + success = true; + } finally { + isOpen = false; + if (!success) { + try { + file.close(); + } catch (Throwable t) { + // Suppress so we don't mask original exception + } + } else + file.close(); + } } } - - protected void finalize() throws Throwable { - try { - close(); - } finally { - super.finalize(); - } + + /** Random-access methods */ + public void seek(long pos) throws IOException { + super.seek(pos); + file.seek(pos); } - } - - private final Descriptor file; - boolean isClone; - - public FSIndexInput(File path) throws IOException { - file = new Descriptor(path, "r"); - } - - /** IndexInput methods */ - protected void readInternal(byte[] b, int offset, int len) - throws IOException { - synchronized (file) { - long position = getFilePointer(); - if (position != file.position) { - file.seek(position); - file.position = position; - } - int total = 0; - do { - int i = file.read(b, offset+total, len-total); - if (i == -1) - throw new IOException("read past EOF"); - file.position += i; - total += i; - } while (total < len); + public long length() throws IOException { + return file.length(); + } + public void setLength(long length) throws IOException { + file.setLength(length); } - } - - public void close() throws IOException { - // only close the file if this is not a clone - if (!isClone) file.close(); - } - - protected void seekInternal(long position) { - } - - public long length() { - return file.length; - } - - public Object clone() { - FSIndexInput clone = (FSIndexInput)super.clone(); - clone.isClone = true; - return clone; - } - - /** Method used for testing. Returns true if the underlying - * file descriptor is valid. - */ - boolean isFDValid() throws IOException { - return file.getFD().valid(); } } - - -class FSIndexOutput extends BufferedIndexOutput { - RandomAccessFile file = null; - - // remember if the file is open, so that we don't try to close it - // more than once - private boolean isOpen; - - public FSIndexOutput(File path) throws IOException { - file = new RandomAccessFile(path, "rw"); - file.getChannel(); - isOpen = true; - } - - /** output methods: */ - public void flushBuffer(byte[] b, int size) throws IOException { - file.write(b, 0, size); - } - public void close() throws IOException { - // only close the file if it has not been closed yet - if (isOpen) { - super.close(); - file.close(); - isOpen = false; - } - } - - /** Random-access methods */ - public void seek(long pos) throws IOException { - super.seek(pos); - file.seek(pos); - } - public long length() throws IOException { - return file.length(); - } - -}