mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-24 17:32:48 +00:00
Updates for locale based seraching and indexing
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@4737 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -131,9 +131,9 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
|
||||
|
||||
private String lockDirectory;
|
||||
|
||||
private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL;
|
||||
private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.EXACT_LANGUAGE_AND_ALL;
|
||||
|
||||
private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL;
|
||||
private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.EXACT_LANGUAGE_AND_ALL;
|
||||
|
||||
/**
|
||||
* Private constructor for the singleton TODO: FIt in with IOC
|
||||
@@ -1170,9 +1170,10 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
|
||||
return defaultMLIndexAnalysisMode;
|
||||
}
|
||||
|
||||
public void setDefaultMLIndexAnalysisMode(String mode)
|
||||
public void setDefaultMLIndexAnalysisMode(MLAnalysisMode mode)
|
||||
{
|
||||
defaultMLIndexAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
|
||||
//defaultMLIndexAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
|
||||
defaultMLIndexAnalysisMode = mode;
|
||||
}
|
||||
|
||||
public MLAnalysisMode getDefaultMLSearchAnalysisMode()
|
||||
@@ -1180,9 +1181,10 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
|
||||
return defaultMLSearchAnalysisMode;
|
||||
}
|
||||
|
||||
public void setDefaultMLSearchAnalysisMode(String mode)
|
||||
public void setDefaultMLSearchAnalysisMode(MLAnalysisMode mode)
|
||||
{
|
||||
defaultMLSearchAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
|
||||
//defaultMLSearchAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
|
||||
defaultMLSearchAnalysisMode = mode;
|
||||
}
|
||||
|
||||
|
||||
|
@@ -18,6 +18,7 @@ package org.alfresco.repo.search.impl.lucene;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
@@ -26,6 +27,7 @@ import java.util.Locale;
|
||||
import java.util.Set;
|
||||
|
||||
import org.alfresco.i18n.I18NUtil;
|
||||
import org.alfresco.repo.search.MLAnalysisMode;
|
||||
import org.alfresco.repo.search.SearcherException;
|
||||
import org.alfresco.repo.search.impl.lucene.query.PathQuery;
|
||||
import org.alfresco.service.cmr.dictionary.AspectDefinition;
|
||||
@@ -59,6 +61,8 @@ public class LuceneQueryParser extends QueryParser
|
||||
|
||||
private SearchParameters searchParameters;
|
||||
|
||||
private LuceneConfig config;
|
||||
|
||||
/**
|
||||
* Parses a query string, returning a {@link org.apache.lucene.search.Query}.
|
||||
*
|
||||
@@ -68,12 +72,13 @@ public class LuceneQueryParser extends QueryParser
|
||||
* the default field for query terms.
|
||||
* @param analyzer
|
||||
* used to find terms in the query text.
|
||||
* @param config
|
||||
* @throws ParseException
|
||||
* if the parsing fails
|
||||
*/
|
||||
static public Query parse(String query, String field, Analyzer analyzer,
|
||||
NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService,
|
||||
Operator defaultOperator, SearchParameters searchParameters) throws ParseException
|
||||
Operator defaultOperator, SearchParameters searchParameters, LuceneConfig config) throws ParseException
|
||||
{
|
||||
if (s_logger.isDebugEnabled())
|
||||
{
|
||||
@@ -84,6 +89,7 @@ public class LuceneQueryParser extends QueryParser
|
||||
parser.setNamespacePrefixResolver(namespacePrefixResolver);
|
||||
parser.setDictionaryService(dictionaryService);
|
||||
parser.setSearchParameters(searchParameters);
|
||||
parser.setLuceneConfig(config);
|
||||
// TODO: Apply locale contstraints at the top level if required for the non ML doc types.
|
||||
Query result = parser.parse(query);
|
||||
if (s_logger.isDebugEnabled())
|
||||
@@ -93,6 +99,11 @@ public class LuceneQueryParser extends QueryParser
|
||||
return result;
|
||||
}
|
||||
|
||||
private void setLuceneConfig(LuceneConfig config)
|
||||
{
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
private void setSearchParameters(SearchParameters searchParameters)
|
||||
{
|
||||
this.searchParameters = searchParameters;
|
||||
@@ -158,7 +169,10 @@ public class LuceneQueryParser extends QueryParser
|
||||
{
|
||||
// The super implementation will create phrase queries etc if required
|
||||
Query part = getFieldQuery("@" + qname.toString(), queryText);
|
||||
query.add(part, Occur.SHOULD);
|
||||
if (part != null)
|
||||
{
|
||||
query.add(part, Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
return query;
|
||||
}
|
||||
@@ -168,7 +182,10 @@ public class LuceneQueryParser extends QueryParser
|
||||
for (String fieldName : text)
|
||||
{
|
||||
Query part = getFieldQuery(fieldName, queryText);
|
||||
query.add(part, Occur.SHOULD);
|
||||
if (part != null)
|
||||
{
|
||||
query.add(part, Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
return query;
|
||||
}
|
||||
@@ -251,7 +268,10 @@ public class LuceneQueryParser extends QueryParser
|
||||
for (QName qname : subclasses)
|
||||
{
|
||||
TermQuery termQuery = new TermQuery(new Term(field, qname.toString()));
|
||||
booleanQuery.add(termQuery, Occur.SHOULD);
|
||||
if (termQuery != null)
|
||||
{
|
||||
booleanQuery.add(termQuery, Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
return booleanQuery;
|
||||
}
|
||||
@@ -333,7 +353,10 @@ public class LuceneQueryParser extends QueryParser
|
||||
for (QName qname : subclasses)
|
||||
{
|
||||
TermQuery termQuery = new TermQuery(new Term(field, qname.toString()));
|
||||
booleanQuery.add(termQuery, Occur.SHOULD);
|
||||
if (termQuery != null)
|
||||
{
|
||||
booleanQuery.add(termQuery, Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
return booleanQuery;
|
||||
}
|
||||
@@ -369,7 +392,8 @@ public class LuceneQueryParser extends QueryParser
|
||||
}
|
||||
else if (field.startsWith("@"))
|
||||
{
|
||||
return attributeQueryBuilder(field, queryText, new FieldQuery());
|
||||
Query query = attributeQueryBuilder(field, queryText, new FieldQuery());
|
||||
return query;
|
||||
}
|
||||
else if (field.equals("ALL"))
|
||||
{
|
||||
@@ -414,9 +438,12 @@ public class LuceneQueryParser extends QueryParser
|
||||
QName container = pd.getContainerClass().getName();
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
Query typeQuery = getFieldQuery("TYPE", container.toString());
|
||||
query.add(typeQuery, Occur.MUST);
|
||||
Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*");
|
||||
query.add(presenceQuery, Occur.MUST_NOT);
|
||||
if ((typeQuery != null) && (presenceQuery != null))
|
||||
{
|
||||
query.add(typeQuery, Occur.MUST);
|
||||
query.add(presenceQuery, Occur.MUST_NOT);
|
||||
}
|
||||
return query;
|
||||
}
|
||||
else
|
||||
@@ -435,9 +462,12 @@ public class LuceneQueryParser extends QueryParser
|
||||
QName container = pd.getContainerClass().getName();
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
Query typeQuery = getFieldQuery("TYPE", container.toString());
|
||||
query.add(typeQuery, Occur.MUST);
|
||||
Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*");
|
||||
query.add(presenceQuery, Occur.MUST);
|
||||
if ((typeQuery != null) && (presenceQuery != null))
|
||||
{
|
||||
query.add(typeQuery, Occur.MUST);
|
||||
query.add(presenceQuery, Occur.MUST);
|
||||
}
|
||||
return query;
|
||||
}
|
||||
else
|
||||
@@ -455,7 +485,10 @@ public class LuceneQueryParser extends QueryParser
|
||||
{
|
||||
// The super implementation will create phrase queries etc if required
|
||||
Query part = getFieldQuery("@" + qname.toString(), queryText);
|
||||
query.add(part, Occur.SHOULD);
|
||||
if (part != null)
|
||||
{
|
||||
query.add(part, Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
return query;
|
||||
}
|
||||
@@ -585,7 +618,10 @@ public class LuceneQueryParser extends QueryParser
|
||||
{
|
||||
// The super implementation will create phrase queries etc if required
|
||||
Query part = getPrefixQuery("@" + qname.toString(), termStr);
|
||||
query.add(part, Occur.SHOULD);
|
||||
if (part != null)
|
||||
{
|
||||
query.add(part, Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
return query;
|
||||
}
|
||||
@@ -611,7 +647,10 @@ public class LuceneQueryParser extends QueryParser
|
||||
{
|
||||
// The super implementation will create phrase queries etc if required
|
||||
Query part = getWildcardQuery("@" + qname.toString(), termStr);
|
||||
query.add(part, Occur.SHOULD);
|
||||
if (part != null)
|
||||
{
|
||||
query.add(part, Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
return query;
|
||||
}
|
||||
@@ -637,7 +676,10 @@ public class LuceneQueryParser extends QueryParser
|
||||
{
|
||||
// The super implementation will create phrase queries etc if required
|
||||
Query part = getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity);
|
||||
query.add(part, Occur.SHOULD);
|
||||
if (part != null)
|
||||
{
|
||||
query.add(part, Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
return query;
|
||||
}
|
||||
@@ -772,7 +814,10 @@ public class LuceneQueryParser extends QueryParser
|
||||
StringBuilder builder = new StringBuilder(queryText.length() + 10);
|
||||
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
|
||||
Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString());
|
||||
booleanQuery.add(subQuery, Occur.SHOULD);
|
||||
if (subQuery != null)
|
||||
{
|
||||
booleanQuery.add(subQuery, Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
return booleanQuery;
|
||||
}
|
||||
@@ -781,21 +826,62 @@ public class LuceneQueryParser extends QueryParser
|
||||
{
|
||||
// Build a sub query for each locale and or the results together -
|
||||
// - add an explicit condition for the locale
|
||||
BooleanQuery booleanQuery = new BooleanQuery();
|
||||
|
||||
MLAnalysisMode analysisMode = searchParameters.getMlAnalaysisMode() == null ? config
|
||||
.getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode();
|
||||
|
||||
if (analysisMode.includesAll())
|
||||
{
|
||||
return subQueryBuilder.getQuery(expandedFieldName, queryText);
|
||||
}
|
||||
|
||||
List<Locale> locales = searchParameters.getLocales();
|
||||
List<Locale> expandedLocales = new ArrayList<Locale>();
|
||||
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
|
||||
.getLocale()) : locales))
|
||||
{
|
||||
BooleanQuery subQuery = new BooleanQuery();
|
||||
Query contentQuery = subQueryBuilder.getQuery(expandedFieldName, queryText);
|
||||
subQuery.add(contentQuery, Occur.MUST);
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append(expandedFieldName).append(".locale");
|
||||
Query localeQuery = getFieldQuery(builder.toString(), locale.toString());
|
||||
subQuery.add(localeQuery, Occur.MUST);
|
||||
booleanQuery.add(subQuery, Occur.SHOULD);
|
||||
expandedLocales.addAll(MLAnalysisMode.getLocales(analysisMode, locale, true));
|
||||
}
|
||||
return booleanQuery;
|
||||
|
||||
if (expandedLocales.size() > 0)
|
||||
{
|
||||
BooleanQuery booleanQuery = new BooleanQuery();
|
||||
Query contentQuery = subQueryBuilder.getQuery(expandedFieldName, queryText);
|
||||
if (contentQuery != null)
|
||||
{
|
||||
booleanQuery.add(contentQuery, Occur.MUST);
|
||||
BooleanQuery subQuery = new BooleanQuery();
|
||||
for (Locale locale : (expandedLocales))
|
||||
{
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append(expandedFieldName).append(".locale");
|
||||
String localeString = locale.toString();
|
||||
if (localeString.indexOf("*") == -1)
|
||||
{
|
||||
Query localeQuery = getFieldQuery(builder.toString(), localeString);
|
||||
if (localeQuery != null)
|
||||
{
|
||||
subQuery.add(localeQuery, Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Query localeQuery = getWildcardQuery(builder.toString(), localeString);
|
||||
if (localeQuery != null)
|
||||
{
|
||||
subQuery.add(localeQuery, Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
}
|
||||
booleanQuery.add(subQuery, Occur.MUST);
|
||||
}
|
||||
return booleanQuery;
|
||||
}
|
||||
else
|
||||
{
|
||||
return subQueryBuilder.getQuery(expandedFieldName, queryText);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@@ -215,7 +215,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
|
||||
}
|
||||
|
||||
Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser(
|
||||
dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters);
|
||||
dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters, getLuceneConfig());
|
||||
ClosingIndexSearcher searcher = getSearcher(indexer);
|
||||
if (searcher == null)
|
||||
{
|
||||
|
@@ -2543,6 +2543,14 @@ public class LuceneTest2 extends TestCase
|
||||
|
||||
// Test stop words are equivalent
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"the\"", null, null);
|
||||
assertEquals(0, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"and\"", null, null);
|
||||
assertEquals(0, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over the lazy\"", null, null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
@@ -2685,11 +2693,41 @@ public class LuceneTest2 extends TestCase
|
||||
results = searcher.query(sp);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
// locale serach in en_US for en_UK
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("d\\:content:\"fox\"");
|
||||
sp.addLocale(Locale.US);
|
||||
results = searcher.query(sp);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
// Direct ML tests
|
||||
|
||||
QName mlQName = QName.createQName(TEST_NAMESPACE, "ml");
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY);
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":and");
|
||||
results = searcher.query(sp);
|
||||
assertEquals(0, results.length());
|
||||
results.close();
|
||||
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY);
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":\"and\"");
|
||||
results = searcher.query(sp);
|
||||
assertEquals(0, results.length());
|
||||
results.close();
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
|
@@ -3,6 +3,7 @@ package org.alfresco.repo.search.impl.lucene.analysis;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
@@ -38,91 +39,20 @@ public class MLTokenDuplicator extends Tokenizer
|
||||
this.source = source;
|
||||
this.locale = locale;
|
||||
|
||||
boolean l = locale.getLanguage().length() != 0;
|
||||
boolean c = locale.getCountry().length() != 0;
|
||||
boolean v = locale.getVariant().length() != 0;
|
||||
|
||||
prefixes = new HashSet<String>(4);
|
||||
if (mlAnalaysisMode.includesAll())
|
||||
Collection<Locale> locales = MLAnalysisMode.getLocales(mlAnalaysisMode, locale, false);
|
||||
prefixes = new HashSet<String>(locales.size());
|
||||
for(Locale toAdd : locales)
|
||||
{
|
||||
prefixes.add("");
|
||||
}
|
||||
|
||||
if (mlAnalaysisMode.includesExact())
|
||||
{
|
||||
StringBuffer result = new StringBuffer();
|
||||
result.append("{").append(locale.toString()).append("}");
|
||||
prefixes.add(result.toString());
|
||||
}
|
||||
|
||||
if (mlAnalaysisMode.includesContaining())
|
||||
{
|
||||
if (v)
|
||||
String localeString = toAdd.toString();
|
||||
if(localeString.length() == 0)
|
||||
{
|
||||
Locale noVarient = new Locale(locale.getLanguage(), locale.getCountry(), "");
|
||||
StringBuffer result = new StringBuffer();
|
||||
result.append("{").append(noVarient.toString()).append("}");
|
||||
prefixes.add(result.toString());
|
||||
|
||||
Locale noCountry = new Locale(locale.getLanguage(), "", "");
|
||||
result = new StringBuffer();
|
||||
result.append("{").append(noCountry.toString()).append("}");
|
||||
prefixes.add(result.toString());
|
||||
prefixes.add("");
|
||||
}
|
||||
if (c)
|
||||
else
|
||||
{
|
||||
Locale noCountry = new Locale(locale.getLanguage(), "", "");
|
||||
StringBuffer result = new StringBuffer();
|
||||
result.append("{").append(noCountry.toString()).append("}");
|
||||
prefixes.add(result.toString());
|
||||
}
|
||||
}
|
||||
|
||||
if (mlAnalaysisMode.includesContained())
|
||||
{
|
||||
// varients have not contained
|
||||
if (!v)
|
||||
{
|
||||
if (!c)
|
||||
{
|
||||
if (!l)
|
||||
{
|
||||
// All
|
||||
for (Locale toAdd : Locale.getAvailableLocales())
|
||||
{
|
||||
StringBuffer result = new StringBuffer();
|
||||
result.append("{").append(toAdd.toString()).append("}");
|
||||
prefixes.add(result.toString());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// All that match language
|
||||
for (Locale toAdd : Locale.getAvailableLocales())
|
||||
{
|
||||
if (locale.getLanguage().equals(toAdd.getLanguage()))
|
||||
{
|
||||
StringBuffer result = new StringBuffer();
|
||||
result.append("{").append(toAdd.toString()).append("}");
|
||||
prefixes.add(result.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// All that match language and country
|
||||
for (Locale toAdd : Locale.getAvailableLocales())
|
||||
{
|
||||
if ((locale.getLanguage().equals(toAdd.getLanguage()))
|
||||
&& (locale.getCountry().equals(toAdd.getCountry())))
|
||||
{
|
||||
StringBuffer result = new StringBuffer();
|
||||
result.append("{").append(toAdd.toString()).append("}");
|
||||
prefixes.add(result.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
StringBuilder builder = new StringBuilder(16);
|
||||
builder.append("{").append(localeString).append("}");
|
||||
prefixes.add(builder.toString());
|
||||
}
|
||||
}
|
||||
if(s_logger.isDebugEnabled())
|
||||
|
Reference in New Issue
Block a user