mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-24 17:32:48 +00:00
Heinous merge from HEAD. Seems to basically work. Be on guard however.
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/BRANCHES/WCM-DEV2/root@4137 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -42,6 +42,11 @@ public class ClosingIndexSearcher extends IndexSearcher
|
||||
this.reader = r;
|
||||
}
|
||||
|
||||
/*package*/ IndexReader getReader()
|
||||
{
|
||||
return reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException
|
||||
{
|
||||
|
@@ -29,13 +29,13 @@ import org.alfresco.service.namespace.QName;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser;
|
||||
|
||||
/**
|
||||
* Analyse properties according to the property definition.
|
||||
*
|
||||
* The default is to use the standard tokeniser. The tokeniser should not have
|
||||
* been called when indexeing properties that require no tokenisation. (tokenise
|
||||
* been called when indexing properties that require no tokenisation. (tokenise
|
||||
* should be set to false when adding the field to the document)
|
||||
*
|
||||
* @author andyh
|
||||
@@ -60,7 +60,7 @@ public class LuceneAnalyser extends Analyzer
|
||||
*/
|
||||
public LuceneAnalyser(DictionaryService dictionaryService)
|
||||
{
|
||||
this(new StandardAnalyzer());
|
||||
this(new AlfrescoStandardAnalyser());
|
||||
this.dictionaryService = dictionaryService;
|
||||
}
|
||||
|
||||
|
@@ -145,7 +145,7 @@ public abstract class LuceneBase2
|
||||
}
|
||||
}
|
||||
|
||||
protected Searcher getSearcher(LuceneIndexer2 luceneIndexer) throws LuceneIndexException
|
||||
protected ClosingIndexSearcher getSearcher(LuceneIndexer2 luceneIndexer) throws LuceneIndexException
|
||||
{
|
||||
// If we know the delta id we should do better
|
||||
|
||||
|
@@ -1696,13 +1696,13 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
|
||||
return false;
|
||||
}
|
||||
|
||||
public void updateFullTextSearch(int size) throws LuceneIndexException
|
||||
public int updateFullTextSearch(int size) throws LuceneIndexException
|
||||
{
|
||||
checkAbleToDoWork(true, false);
|
||||
if (!mainIndexExists())
|
||||
{
|
||||
remainingCount = size;
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
try
|
||||
{
|
||||
@@ -1723,7 +1723,7 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
|
||||
if(searcher == null)
|
||||
{
|
||||
remainingCount = size;
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
Hits hits;
|
||||
try
|
||||
@@ -1817,7 +1817,9 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
|
||||
}
|
||||
}
|
||||
|
||||
remainingCount = count - writer.docCount();
|
||||
int done = writer.docCount();
|
||||
remainingCount = count - done;
|
||||
return done;
|
||||
}
|
||||
catch (LuceneIndexException e)
|
||||
{
|
||||
@@ -1825,8 +1827,14 @@ public class LuceneIndexerImpl extends LuceneBase implements LuceneIndexer
|
||||
{
|
||||
closeDeltaWriter();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
catch (LuceneIndexException e)
|
||||
{
|
||||
|
@@ -43,7 +43,6 @@ import org.alfresco.repo.search.IndexerException;
|
||||
import org.alfresco.repo.search.impl.lucene.fts.FTSIndexerAware;
|
||||
import org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer;
|
||||
import org.alfresco.repo.search.impl.lucene.index.TransactionStatus;
|
||||
import org.alfresco.repo.search.impl.lucene.index.IndexInfo.LockWork;
|
||||
import org.alfresco.service.cmr.dictionary.AspectDefinition;
|
||||
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
|
||||
import org.alfresco.service.cmr.dictionary.DictionaryService;
|
||||
@@ -1316,7 +1315,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
|
||||
if (includeDirectoryDocuments)
|
||||
{
|
||||
if (nodeTypeDef.getChildAssociations().size() > 0)
|
||||
if (nodeTypeDef != null && nodeTypeDef.getChildAssociations().size() > 0)
|
||||
{
|
||||
if (directPaths.contains(pair.getFirst()))
|
||||
{
|
||||
@@ -1748,7 +1747,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
return false;
|
||||
}
|
||||
|
||||
public void updateFullTextSearch(int size) throws LuceneIndexException
|
||||
public int updateFullTextSearch(int size) throws LuceneIndexException
|
||||
{
|
||||
checkAbleToDoWork(true, false);
|
||||
// if (!mainIndexExists())
|
||||
@@ -1775,7 +1774,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
if (searcher == null)
|
||||
{
|
||||
remainingCount = size;
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
Hits hits;
|
||||
try
|
||||
@@ -1869,7 +1868,9 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
}
|
||||
}
|
||||
|
||||
remainingCount = count - writer.docCount();
|
||||
int done = writer.docCount();
|
||||
remainingCount = count - done;
|
||||
return done;
|
||||
}
|
||||
catch (LuceneIndexException e)
|
||||
{
|
||||
@@ -1877,8 +1878,13 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
{
|
||||
closeDeltaWriter();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
|
@@ -156,7 +156,25 @@ public class LuceneQueryParser extends QueryParser
|
||||
}
|
||||
else if (field.equals("TYPE"))
|
||||
{
|
||||
TypeDefinition target = dictionaryService.getType(QName.createQName(queryText));
|
||||
TypeDefinition target;
|
||||
if(queryText.startsWith("{"))
|
||||
{
|
||||
target = dictionaryService.getType(QName.createQName(queryText));
|
||||
}
|
||||
else
|
||||
{
|
||||
int colonPosition = queryText.indexOf(':');
|
||||
if (colonPosition == -1)
|
||||
{
|
||||
// use the default namespace
|
||||
target = dictionaryService.getType(QName.createQName(namespacePrefixResolver.getNamespaceURI(""), queryText));
|
||||
}
|
||||
else
|
||||
{
|
||||
// find the prefix
|
||||
target = dictionaryService.getType(QName.createQName(namespacePrefixResolver.getNamespaceURI(queryText.substring(0, colonPosition)), queryText.substring(colonPosition + 1)));
|
||||
}
|
||||
}
|
||||
if (target == null)
|
||||
{
|
||||
throw new SearcherException("Invalid type: " + queryText);
|
||||
@@ -186,7 +204,26 @@ public class LuceneQueryParser extends QueryParser
|
||||
}
|
||||
else if (field.equals("ASPECT"))
|
||||
{
|
||||
AspectDefinition target = dictionaryService.getAspect(QName.createQName(queryText));
|
||||
AspectDefinition target;
|
||||
if(queryText.startsWith("{"))
|
||||
{
|
||||
target = dictionaryService.getAspect(QName.createQName(queryText));
|
||||
}
|
||||
else
|
||||
{
|
||||
int colonPosition = queryText.indexOf(':');
|
||||
if (colonPosition == -1)
|
||||
{
|
||||
// use the default namespace
|
||||
target = dictionaryService.getAspect(QName.createQName(namespacePrefixResolver.getNamespaceURI(""), queryText));
|
||||
}
|
||||
else
|
||||
{
|
||||
// find the prefix
|
||||
target = dictionaryService.getAspect(QName.createQName(namespacePrefixResolver.getNamespaceURI(queryText.substring(0, colonPosition)), queryText.substring(colonPosition + 1)));
|
||||
}
|
||||
}
|
||||
|
||||
QName targetQName = target.getName();
|
||||
HashSet<QName> subclasses = new HashSet<QName>();
|
||||
for (QName classRef : dictionaryService.getAllAspects())
|
||||
|
@@ -59,17 +59,13 @@ import org.saxpath.SAXPathException;
|
||||
import com.werken.saxpath.XPathReader;
|
||||
|
||||
/**
|
||||
* The Lucene implementation of Searcher At the moment we support only lucene
|
||||
* based queries.
|
||||
*
|
||||
* TODO: Support for other query languages
|
||||
* The Lucene implementation of Searcher At the moment we support only lucene based queries. TODO: Support for other query languages
|
||||
*
|
||||
* @author andyh
|
||||
*
|
||||
*/
|
||||
public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* Default field name
|
||||
*/
|
||||
@@ -90,9 +86,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
|
||||
*/
|
||||
|
||||
/**
|
||||
* Get an initialised searcher for the store and transaction Normally we do
|
||||
* not search against a a store and delta. Currently only gets the searcher
|
||||
* against the main index.
|
||||
* Get an initialised searcher for the store and transaction Normally we do not search against a a store and delta. Currently only gets the searcher against the main index.
|
||||
*
|
||||
* @param storeRef
|
||||
* @param deltaId
|
||||
@@ -115,9 +109,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an intialised searcher for the store. No transactional ammendsmends
|
||||
* are searched.
|
||||
*
|
||||
* Get an intialised searcher for the store. No transactional ammendsmends are searched.
|
||||
*
|
||||
* @param storeRef
|
||||
* @return
|
||||
@@ -134,7 +126,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
|
||||
|
||||
public boolean indexExists()
|
||||
{
|
||||
//return mainIndexExists();
|
||||
// return mainIndexExists();
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -220,7 +212,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
|
||||
|
||||
Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser(
|
||||
dictionaryService), namespacePrefixResolver, dictionaryService, defaultOperator);
|
||||
Searcher searcher = getSearcher(indexer);
|
||||
ClosingIndexSearcher searcher = getSearcher(indexer);
|
||||
if (searcher == null)
|
||||
{
|
||||
// no index return an empty result set
|
||||
@@ -238,7 +230,14 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
|
||||
switch (sd.getSortType())
|
||||
{
|
||||
case FIELD:
|
||||
fields[index++] = new SortField(sd.getField(), !sd.isAscending());
|
||||
if (searcher.getReader().getFieldNames().contains(sd.getField()))
|
||||
{
|
||||
fields[index++] = new SortField(sd.getField(), !sd.isAscending());
|
||||
}
|
||||
else
|
||||
{
|
||||
fields[index++] = new SortField(null, SortField.DOC, !sd.isAscending());
|
||||
}
|
||||
break;
|
||||
case DOCUMENT:
|
||||
fields[index++] = new SortField(null, SortField.DOC, !sd.isAscending());
|
||||
@@ -348,8 +347,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
|
||||
}
|
||||
|
||||
/**
|
||||
* The definitions must provide a default value, or of not there must be a
|
||||
* parameter to provide the value
|
||||
* The definitions must provide a default value, or of not there must be a parameter to provide the value
|
||||
*
|
||||
* @param definition
|
||||
* @param queryParameters
|
||||
@@ -396,12 +394,9 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
|
||||
}
|
||||
|
||||
/*
|
||||
* Parameterise the query string - not sure if it is required to escape
|
||||
* lucence spacials chars The parameters could be used to build the query -
|
||||
* the contents of parameters should alread have been escaped if required.
|
||||
* ... mush better to provide the parameters and work out what to do TODO:
|
||||
* conditional query escapement - may be we should have a parameter type
|
||||
* that is not escaped
|
||||
* Parameterise the query string - not sure if it is required to escape lucence spacials chars The parameters could be used to build the query - the contents of parameters
|
||||
* should alread have been escaped if required. ... mush better to provide the parameters and work out what to do TODO: conditional query escapement - may be we should have a
|
||||
* parameter type that is not escaped
|
||||
*/
|
||||
private String parameterise(String unparameterised, Map<QName, QueryParameterDefinition> map,
|
||||
QueryParameter[] queryParameters, NamespacePrefixResolver nspr) throws QueryParameterisationException
|
||||
@@ -567,7 +562,6 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
|
||||
|
||||
/**
|
||||
* @return Returns true if the pattern is present, otherwise false.
|
||||
*
|
||||
* @see #setIndexer(Indexer)
|
||||
* @see #setSearcher(SearchService)
|
||||
*/
|
||||
|
@@ -37,7 +37,9 @@ import junit.framework.TestCase;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.dictionary.DictionaryDAO;
|
||||
import org.alfresco.repo.dictionary.DictionaryNamespaceComponent;
|
||||
import org.alfresco.repo.dictionary.M2Model;
|
||||
import org.alfresco.repo.dictionary.NamespaceDAOImpl;
|
||||
import org.alfresco.repo.node.BaseNodeServiceTest;
|
||||
import org.alfresco.repo.search.QueryParameterDefImpl;
|
||||
import org.alfresco.repo.search.QueryRegisterComponent;
|
||||
@@ -159,7 +161,7 @@ public class LuceneTest2 extends TestCase
|
||||
|
||||
private QueryRegisterComponent queryRegisterComponent;
|
||||
|
||||
private NamespacePrefixResolver namespacePrefixResolver;
|
||||
private DictionaryNamespaceComponent namespacePrefixResolver;
|
||||
|
||||
private LuceneIndexerAndSearcher indexerAndSearcher;
|
||||
|
||||
@@ -171,6 +173,8 @@ public class LuceneTest2 extends TestCase
|
||||
|
||||
private NodeRef[] documentOrder;
|
||||
|
||||
private NamespaceDAOImpl namespaceDao;
|
||||
|
||||
public LuceneTest2()
|
||||
{
|
||||
super();
|
||||
@@ -185,10 +189,13 @@ public class LuceneTest2 extends TestCase
|
||||
luceneFTS = (FullTextSearchIndexer) ctx.getBean("LuceneFullTextSearchIndexer");
|
||||
contentService = (ContentService) ctx.getBean("contentService");
|
||||
queryRegisterComponent = (QueryRegisterComponent) ctx.getBean("queryRegisterComponent");
|
||||
namespacePrefixResolver = (NamespacePrefixResolver) ctx.getBean("namespaceService");
|
||||
namespacePrefixResolver = (DictionaryNamespaceComponent) ctx.getBean("namespaceService");
|
||||
indexerAndSearcher = (LuceneIndexerAndSearcher) ctx.getBean("luceneIndexerAndSearcherFactory");
|
||||
transactionService = (TransactionService) ctx.getBean("transactionComponent");
|
||||
serviceRegistry = (ServiceRegistry) ctx.getBean(ServiceRegistry.SERVICE_REGISTRY);
|
||||
|
||||
namespaceDao = (NamespaceDAOImpl) ctx.getBean("namespaceDAO");
|
||||
|
||||
|
||||
this.authenticationComponent = (AuthenticationComponent) ctx.getBean("authenticationComponent");
|
||||
|
||||
@@ -208,7 +215,9 @@ public class LuceneTest2 extends TestCase
|
||||
assertNotNull(modelStream);
|
||||
M2Model model = M2Model.createModel(modelStream);
|
||||
dictionaryDAO.putModel(model);
|
||||
|
||||
|
||||
namespaceDao.addPrefix("test", TEST_NAMESPACE);
|
||||
|
||||
StoreRef storeRef = nodeService.createStore(StoreRef.PROTOCOL_WORKSPACE, "Test_" + System.currentTimeMillis());
|
||||
rootNodeRef = nodeService.getRootNode(storeRef);
|
||||
|
||||
@@ -372,6 +381,10 @@ public class LuceneTest2 extends TestCase
|
||||
super(arg0);
|
||||
}
|
||||
|
||||
public void firstTest() throws Exception
|
||||
{
|
||||
testSort();
|
||||
}
|
||||
|
||||
public void test0() throws Exception
|
||||
{
|
||||
@@ -1054,6 +1067,17 @@ public class LuceneTest2 extends TestCase
|
||||
results.close();
|
||||
|
||||
luceneFTS.resume();
|
||||
|
||||
|
||||
SearchParameters sp17 = new SearchParameters();
|
||||
sp17.addStore(rootNodeRef.getStoreRef());
|
||||
sp17.setLanguage(SearchService.LANGUAGE_LUCENE);
|
||||
sp17.setQuery("PATH:\"//.\"");
|
||||
sp17.addSort("cabbage", false);
|
||||
results = searcher.query(sp17);
|
||||
results.close();
|
||||
|
||||
luceneFTS.resume();
|
||||
}
|
||||
|
||||
public void test1() throws Exception
|
||||
@@ -1861,19 +1885,39 @@ public class LuceneTest2 extends TestCase
|
||||
null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\"" + testType.toPrefixString(namespacePrefixResolver) + "\"", null,
|
||||
null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\"" + testSuperType.toString() + "\"",
|
||||
null, null);
|
||||
assertEquals(13, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TYPE:\"" + testSuperType.toPrefixString(namespacePrefixResolver) + "\"",
|
||||
null, null);
|
||||
assertEquals(13, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\""
|
||||
+ ISO9075.getXPathName(testAspect) + "\"", null, null);
|
||||
+ testAspect.toString() + "\"", null, null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\""
|
||||
+ testAspect.toPrefixString(namespacePrefixResolver) + "\"", null, null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\""
|
||||
+ ISO9075.getXPathName(testSuperAspect) + "\"", null, null);
|
||||
+ testAspect.toString() + "\"", null, null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "ASPECT:\""
|
||||
+ testAspect.toPrefixString(namespacePrefixResolver) + "\"", null, null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
|
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Alfresco, Inc.
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
|
||||
public class AlfrescoStandardAnalyser extends Analyzer
|
||||
{
|
||||
private Set stopSet;
|
||||
|
||||
/**
|
||||
* An array containing some common English words that are usually not useful for searching.
|
||||
*/
|
||||
public static final String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
|
||||
|
||||
/** Builds an analyzer. */
|
||||
public AlfrescoStandardAnalyser()
|
||||
{
|
||||
this(STOP_WORDS);
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the given stop words. */
|
||||
public AlfrescoStandardAnalyser(String[] stopWords)
|
||||
{
|
||||
stopSet = StopFilter.makeStopSet(stopWords);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
|
||||
*/
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
result = new AlfrescoStandardFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new StopFilter(result, stopSet);
|
||||
return result;
|
||||
}
|
||||
}
|
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Alfresco, Inc.
|
||||
*
|
||||
* Licensed under the Mozilla Public License version 1.1
|
||||
* with a permitted attribution clause. You may obtain a
|
||||
* copy of the License at
|
||||
*
|
||||
* http://www.alfresco.org/legal/license.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific
|
||||
* language governing permissions and limitations under the
|
||||
* License.
|
||||
*/
|
||||
package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.Queue;
|
||||
import java.util.Stack;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizerConstants;
|
||||
|
||||
public class AlfrescoStandardFilter extends TokenFilter implements StandardTokenizerConstants
|
||||
{
|
||||
|
||||
/** Construct filtering <i>in</i>. */
|
||||
public AlfrescoStandardFilter(TokenStream in)
|
||||
{
|
||||
super(in);
|
||||
}
|
||||
|
||||
private static final String APOSTROPHE_TYPE = tokenImage[APOSTROPHE];
|
||||
|
||||
private static final String ACRONYM_TYPE = tokenImage[ACRONYM];
|
||||
|
||||
private static final String HOST_TYPE = tokenImage[HOST];
|
||||
|
||||
private static final String ALPHANUM_TYPE = tokenImage[ALPHANUM];
|
||||
|
||||
private Queue<org.apache.lucene.analysis.Token> hostTokens = null;
|
||||
|
||||
/**
|
||||
* Returns the next token in the stream, or null at EOS.
|
||||
* <p>
|
||||
* Removes <tt>'s</tt> from the end of words.
|
||||
* <p>
|
||||
* Removes dots from acronyms.
|
||||
* <p>
|
||||
* Splits host names ...
|
||||
*/
|
||||
public final org.apache.lucene.analysis.Token next() throws java.io.IOException
|
||||
{
|
||||
if (hostTokens == null)
|
||||
{
|
||||
org.apache.lucene.analysis.Token t = input.next();
|
||||
|
||||
if (t == null)
|
||||
return null;
|
||||
|
||||
String text = t.termText();
|
||||
String type = t.type();
|
||||
|
||||
if (type == APOSTROPHE_TYPE && // remove 's
|
||||
(text.endsWith("'s") || text.endsWith("'S")))
|
||||
{
|
||||
return new org.apache.lucene.analysis.Token(text.substring(0, text.length() - 2), t.startOffset(), t
|
||||
.endOffset(), type);
|
||||
|
||||
}
|
||||
else if (type == ACRONYM_TYPE)
|
||||
{ // remove dots
|
||||
StringBuffer trimmed = new StringBuffer();
|
||||
for (int i = 0; i < text.length(); i++)
|
||||
{
|
||||
char c = text.charAt(i);
|
||||
if (c != '.')
|
||||
trimmed.append(c);
|
||||
}
|
||||
return new org.apache.lucene.analysis.Token(trimmed.toString(), t.startOffset(), t.endOffset(), type);
|
||||
|
||||
}
|
||||
else if (type == HOST_TYPE)
|
||||
{
|
||||
// <HOST: <ALPHANUM> ("." <ALPHANUM>)+ >
|
||||
// There must be at least two tokens ....
|
||||
hostTokens = new LinkedList<org.apache.lucene.analysis.Token>();
|
||||
StringTokenizer tokeniser = new StringTokenizer(text, ".");
|
||||
int start = t.startOffset();
|
||||
int end;
|
||||
while (tokeniser.hasMoreTokens())
|
||||
{
|
||||
String token = tokeniser.nextToken();
|
||||
end = start + token.length();
|
||||
hostTokens.offer(new org.apache.lucene.analysis.Token(token, start, end, ALPHANUM_TYPE));
|
||||
start = end + 1;
|
||||
}
|
||||
// check if we have an acronym ..... yes a.b.c ends up here ...
|
||||
|
||||
if (text.length() == hostTokens.size() * 2 - 1)
|
||||
{
|
||||
hostTokens = null;
|
||||
// acronym
|
||||
StringBuffer trimmed = new StringBuffer();
|
||||
for (int i = 0; i < text.length(); i++)
|
||||
{
|
||||
char c = text.charAt(i);
|
||||
if (c != '.')
|
||||
trimmed.append(c);
|
||||
}
|
||||
return new org.apache.lucene.analysis.Token(trimmed.toString(), t.startOffset(), t.endOffset(),
|
||||
ALPHANUM_TYPE);
|
||||
}
|
||||
else
|
||||
{
|
||||
return hostTokens.remove();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return t;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
org.apache.lucene.analysis.Token token = hostTokens.remove();
|
||||
if (hostTokens.isEmpty())
|
||||
{
|
||||
hostTokens = null;
|
||||
}
|
||||
return token;
|
||||
}
|
||||
}
|
||||
}
|
@@ -31,7 +31,8 @@ import org.springframework.context.support.ClassPathXmlApplicationContext;
|
||||
|
||||
public class FullTextSearchIndexerImpl implements FTSIndexerAware, FullTextSearchIndexer
|
||||
{
|
||||
private enum State {
|
||||
private enum State
|
||||
{
|
||||
ACTIVE, PAUSING, PAUSED
|
||||
};
|
||||
|
||||
@@ -48,7 +49,7 @@ public class FullTextSearchIndexerImpl implements FTSIndexerAware, FullTextSearc
|
||||
public FullTextSearchIndexerImpl()
|
||||
{
|
||||
super();
|
||||
//System.out.println("Created id is "+this);
|
||||
// System.out.println("Created id is "+this);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -64,8 +65,7 @@ public class FullTextSearchIndexerImpl implements FTSIndexerAware, FullTextSearc
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer#indexCompleted(org.alfresco.repo.ref.StoreRef,
|
||||
* int, java.lang.Exception)
|
||||
* @see org.alfresco.repo.search.impl.lucene.fts.FullTextSearchIndexer#indexCompleted(org.alfresco.repo.ref.StoreRef, int, java.lang.Exception)
|
||||
*/
|
||||
public synchronized void indexCompleted(StoreRef storeRef, int remaining, Exception e)
|
||||
{
|
||||
@@ -83,7 +83,7 @@ public class FullTextSearchIndexerImpl implements FTSIndexerAware, FullTextSearc
|
||||
}
|
||||
finally
|
||||
{
|
||||
//System.out.println("..Index Complete: id is "+this);
|
||||
// System.out.println("..Index Complete: id is "+this);
|
||||
this.notifyAll();
|
||||
}
|
||||
}
|
||||
@@ -96,19 +96,19 @@ public class FullTextSearchIndexerImpl implements FTSIndexerAware, FullTextSearc
|
||||
public synchronized void pause() throws InterruptedException
|
||||
{
|
||||
pauseCount++;
|
||||
//System.out.println("..Waiting "+pauseCount+" id is "+this);
|
||||
// System.out.println("..Waiting "+pauseCount+" id is "+this);
|
||||
while ((indexing.size() > 0))
|
||||
{
|
||||
//System.out.println("Pause: Waiting with count of "+indexing.size()+" id is "+this);
|
||||
// System.out.println("Pause: Waiting with count of "+indexing.size()+" id is "+this);
|
||||
this.wait();
|
||||
}
|
||||
pauseCount--;
|
||||
if(pauseCount == 0)
|
||||
if (pauseCount == 0)
|
||||
{
|
||||
paused = true;
|
||||
this.notifyAll(); // only resumers
|
||||
}
|
||||
//System.out.println("..Remaining "+pauseCount +" paused = "+paused+" id is "+this);
|
||||
// System.out.println("..Remaining "+pauseCount +" paused = "+paused+" id is "+this);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -118,16 +118,16 @@ public class FullTextSearchIndexerImpl implements FTSIndexerAware, FullTextSearc
|
||||
*/
|
||||
public synchronized void resume() throws InterruptedException
|
||||
{
|
||||
if(pauseCount == 0)
|
||||
if (pauseCount == 0)
|
||||
{
|
||||
//System.out.println("Direct resume"+" id is "+this);
|
||||
// System.out.println("Direct resume"+" id is "+this);
|
||||
paused = false;
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
while(pauseCount > 0)
|
||||
while (pauseCount > 0)
|
||||
{
|
||||
//System.out.println("Reusme waiting on "+pauseCount+" id is "+this);
|
||||
// System.out.println("Reusme waiting on "+pauseCount+" id is "+this);
|
||||
this.wait();
|
||||
}
|
||||
paused = false;
|
||||
@@ -136,13 +136,13 @@ public class FullTextSearchIndexerImpl implements FTSIndexerAware, FullTextSearc
|
||||
|
||||
private synchronized boolean isPaused() throws InterruptedException
|
||||
{
|
||||
if(pauseCount == 0)
|
||||
if (pauseCount == 0)
|
||||
{
|
||||
return paused;
|
||||
return paused;
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
while(pauseCount > 0)
|
||||
while (pauseCount > 0)
|
||||
{
|
||||
this.wait();
|
||||
}
|
||||
@@ -160,17 +160,22 @@ public class FullTextSearchIndexerImpl implements FTSIndexerAware, FullTextSearc
|
||||
// Use the calling thread to index
|
||||
// Parallel indexing via multiple Quartz thread initiating indexing
|
||||
|
||||
StoreRef toIndex = getNextRef();
|
||||
if (toIndex != null)
|
||||
int done = 0;
|
||||
while (done == 0)
|
||||
{
|
||||
//System.out.println("Indexing "+toIndex+" at "+(new java.util.Date()));
|
||||
IndexerSPI indexer = luceneIndexerAndSearcherFactory.getIndexer(toIndex);
|
||||
indexer.registerCallBack(this);
|
||||
indexer.updateFullTextSearch(1000);
|
||||
}
|
||||
else
|
||||
{
|
||||
//System.out.println("Nothing to Indexing at "+(new java.util.Date()));
|
||||
StoreRef toIndex = getNextRef();
|
||||
if (toIndex != null)
|
||||
{
|
||||
// System.out.println("Indexing "+toIndex+" at "+(new java.util.Date()));
|
||||
IndexerSPI indexer = luceneIndexerAndSearcherFactory.getIndexer(toIndex);
|
||||
indexer.registerCallBack(this);
|
||||
done += indexer.updateFullTextSearch(1000);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
// System.out.println("Nothing to Indexing at "+(new java.util.Date()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,7 +183,7 @@ public class FullTextSearchIndexerImpl implements FTSIndexerAware, FullTextSearc
|
||||
{
|
||||
if (paused || (pauseCount > 0))
|
||||
{
|
||||
//System.out.println("Indexing suspended"+" id is "+this);
|
||||
// System.out.println("Indexing suspended"+" id is "+this);
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -189,6 +194,8 @@ public class FullTextSearchIndexerImpl implements FTSIndexerAware, FullTextSearc
|
||||
if (!indexing.contains(ref))
|
||||
{
|
||||
nextStoreRef = ref;
|
||||
// FIFO
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -52,7 +52,7 @@ import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.util.GUID;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
@@ -306,7 +306,7 @@ public class IndexInfo
|
||||
IndexWriter writer;
|
||||
try
|
||||
{
|
||||
writer = new IndexWriter(oldIndex, new StandardAnalyzer(), false);
|
||||
writer = new IndexWriter(oldIndex, new AlfrescoStandardAnalyser(), false);
|
||||
writer.setUseCompoundFile(writerUseCompoundFile);
|
||||
writer.minMergeDocs = writerMinMergeDocs;
|
||||
writer.mergeFactor = writerMergeFactor;
|
||||
@@ -442,7 +442,7 @@ public class IndexInfo
|
||||
IndexWriter writer;
|
||||
try
|
||||
{
|
||||
writer = new IndexWriter(emptyIndex, new StandardAnalyzer(), true);
|
||||
writer = new IndexWriter(emptyIndex, new AlfrescoStandardAnalyser(), true);
|
||||
writer.setUseCompoundFile(writerUseCompoundFile);
|
||||
writer.minMergeDocs = writerMinMergeDocs;
|
||||
writer.mergeFactor = writerMergeFactor;
|
||||
@@ -2424,11 +2424,11 @@ public class IndexInfo
|
||||
if (docCount < maxDocsForInMemoryMerge)
|
||||
{
|
||||
ramDirectory = new RAMDirectory();
|
||||
writer = new IndexWriter(ramDirectory, new StandardAnalyzer(), true);
|
||||
writer = new IndexWriter(ramDirectory, new AlfrescoStandardAnalyser(), true);
|
||||
}
|
||||
else
|
||||
{
|
||||
writer = new IndexWriter(location, new StandardAnalyzer(), true);
|
||||
writer = new IndexWriter(location, new AlfrescoStandardAnalyser(), true);
|
||||
|
||||
}
|
||||
writer.setUseCompoundFile(mergerUseCompoundFile);
|
||||
|
@@ -25,7 +25,7 @@ import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.cmr.repository.StoreRef;
|
||||
import org.alfresco.util.GUID;
|
||||
import org.alfresco.util.TempFileProvider;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
@@ -95,7 +95,7 @@ public static final String[] UPDATE_LIST_2 = { "alpha2", "bravo2", "charlie2", "
|
||||
|
||||
String guid = GUID.generate();
|
||||
ii.setStatus(guid, TransactionStatus.ACTIVE, null, null);
|
||||
IndexWriter writer = ii.getDeltaIndexWriter(guid, new StandardAnalyzer());
|
||||
IndexWriter writer = ii.getDeltaIndexWriter(guid, new AlfrescoStandardAnalyser());
|
||||
|
||||
Document doc = new Document();
|
||||
for (int k = 0; k < 15; k++)
|
||||
@@ -193,7 +193,7 @@ public static final String[] UPDATE_LIST_2 = { "alpha2", "bravo2", "charlie2", "
|
||||
|
||||
String guid = GUID.generate();
|
||||
ii.setStatus(guid, TransactionStatus.ACTIVE, null, null);
|
||||
IndexWriter writer = ii.getDeltaIndexWriter(guid, new StandardAnalyzer());
|
||||
IndexWriter writer = ii.getDeltaIndexWriter(guid, new AlfrescoStandardAnalyser());
|
||||
|
||||
Document doc = new Document();
|
||||
for (int k = 0; k < 15; k++)
|
||||
@@ -383,7 +383,7 @@ public static final String[] UPDATE_LIST_2 = { "alpha2", "bravo2", "charlie2", "
|
||||
|
||||
String guid = GUID.generate();
|
||||
ii.setStatus(guid, TransactionStatus.ACTIVE, null, null);
|
||||
IndexWriter writer = ii.getDeltaIndexWriter(guid, new StandardAnalyzer());
|
||||
IndexWriter writer = ii.getDeltaIndexWriter(guid, new AlfrescoStandardAnalyser());
|
||||
|
||||
Document doc = new Document();
|
||||
for (int k = 0; k < 15; k++)
|
||||
@@ -469,7 +469,7 @@ public static final String[] UPDATE_LIST_2 = { "alpha2", "bravo2", "charlie2", "
|
||||
|
||||
String guid = GUID.generate();
|
||||
ii.setStatus(guid, TransactionStatus.ACTIVE, null, null);
|
||||
IndexWriter writer = ii.getDeltaIndexWriter(guid, new StandardAnalyzer());
|
||||
IndexWriter writer = ii.getDeltaIndexWriter(guid, new AlfrescoStandardAnalyser());
|
||||
|
||||
Document doc = new Document();
|
||||
for (int k = 0; k < 15; k++)
|
||||
@@ -649,7 +649,7 @@ public static final String[] UPDATE_LIST_2 = { "alpha2", "bravo2", "charlie2", "
|
||||
|
||||
String guid = GUID.generate();
|
||||
ii.setStatus(guid, TransactionStatus.ACTIVE, null, null);
|
||||
IndexWriter writer = ii.getDeltaIndexWriter(guid, new StandardAnalyzer());
|
||||
IndexWriter writer = ii.getDeltaIndexWriter(guid, new AlfrescoStandardAnalyser());
|
||||
|
||||
Document doc = new Document();
|
||||
for (int k = 0; k < 15; k++)
|
||||
@@ -740,7 +740,7 @@ public static final String[] UPDATE_LIST_2 = { "alpha2", "bravo2", "charlie2", "
|
||||
|
||||
String guid = GUID.generate();
|
||||
ii.setStatus(guid, TransactionStatus.ACTIVE, null, null);
|
||||
IndexWriter writer = ii.getDeltaIndexWriter(guid, new StandardAnalyzer());
|
||||
IndexWriter writer = ii.getDeltaIndexWriter(guid, new AlfrescoStandardAnalyser());
|
||||
|
||||
Document doc = new Document();
|
||||
for (int k = 0; k < 15; k++)
|
||||
|
Reference in New Issue
Block a user