mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-24 17:32:48 +00:00
Fix fuzzy, prefix and wildcard queries for MLText typed attributes
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5723 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -219,8 +219,6 @@ public class ADMLuceneTest extends TestCase
|
||||
|
||||
testTX = transactionService.getUserTransaction();
|
||||
testTX.begin();
|
||||
|
||||
|
||||
this.authenticationComponent.setSystemUserAsCurrentUser();
|
||||
|
||||
// load in the test model
|
||||
@@ -316,7 +314,7 @@ public class ADMLuceneTest extends TestCase
|
||||
// - andit has to go in type d:any as d:content is not allowed to be multivalued
|
||||
|
||||
ArrayList<Serializable> contentValues = new ArrayList<Serializable>();
|
||||
contentValues.add(new ContentData(null, "text/plain", 0L, "UTF-16", Locale.CHINESE ));
|
||||
contentValues.add(new ContentData(null, "text/plain", 0L, "UTF-16", Locale.UK ));
|
||||
testProperties.put(QName.createQName(TEST_NAMESPACE, "content-many-ista"), contentValues);
|
||||
|
||||
|
||||
@@ -381,7 +379,7 @@ public class ADMLuceneTest extends TestCase
|
||||
getOrderProperties()).getChildRef();
|
||||
|
||||
Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
|
||||
properties.put(ContentModel.PROP_CONTENT, new ContentData(null, "text/plain", 0L, "UTF-8", Locale.CHINESE ));
|
||||
properties.put(ContentModel.PROP_CONTENT, new ContentData(null, "text/plain", 0L, "UTF-8", Locale.UK ));
|
||||
n14 = nodeService.createNode(n13, ASSOC_TYPE_QNAME, QName.createQName("{namespace}fourteen"),
|
||||
ContentModel.TYPE_CONTENT, properties).getChildRef();
|
||||
// nodeService.addAspect(n14, DictionaryBootstrap.ASPECT_QNAME_CONTENT,
|
||||
@@ -2612,6 +2610,61 @@ public class ADMLuceneTest extends TestCase
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":cabba*");
|
||||
sp.addLocale(new Locale("en"));
|
||||
results = searcher.query(sp);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":ca*ge");
|
||||
sp.addLocale(new Locale("en"));
|
||||
results = searcher.query(sp);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":*bage");
|
||||
sp.addLocale(new Locale("en"));
|
||||
results = searcher.query(sp);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":cabage~");
|
||||
sp.addLocale(new Locale("en"));
|
||||
results = searcher.query(sp);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":*b?ag?");
|
||||
sp.addLocale(new Locale("en"));
|
||||
results = searcher.query(sp);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":cho*");
|
||||
sp.setMlAnalaysisMode(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES);
|
||||
sp.addLocale(new Locale("fr"));
|
||||
results = searcher.query(sp);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
// multivalued content in type d:any
|
||||
// This should not be indexed as we can not know what to do with content here.
|
||||
|
||||
@@ -2756,6 +2809,31 @@ public class ADMLuceneTest extends TestCase
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
// Test wildcards in text
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:laz*", null, null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:laz~", null, null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:la?y", null, null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:?a?y", null, null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:*azy", null, null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:*az*", null, null);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
// Accents
|
||||
|
||||
|
@@ -37,6 +37,8 @@ import java.util.Set;
|
||||
import org.alfresco.i18n.I18NUtil;
|
||||
import org.alfresco.repo.search.MLAnalysisMode;
|
||||
import org.alfresco.repo.search.SearcherException;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.MLTokenDuplicator;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.VerbatimAnalyser;
|
||||
import org.alfresco.repo.search.impl.lucene.query.PathQuery;
|
||||
import org.alfresco.service.cmr.dictionary.AspectDefinition;
|
||||
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
|
||||
@@ -48,6 +50,7 @@ import org.alfresco.service.namespace.NamespacePrefixResolver;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
@@ -408,7 +411,7 @@ public class LuceneQueryParser extends QueryParser
|
||||
}
|
||||
else if (field.startsWith("@"))
|
||||
{
|
||||
Query query = attributeQueryBuilder(field, queryText, new FieldQuery());
|
||||
Query query = attributeQueryBuilder(field, queryText, new FieldQuery(), true);
|
||||
return query;
|
||||
}
|
||||
else if (field.equals("ALL"))
|
||||
@@ -636,7 +639,7 @@ public class LuceneQueryParser extends QueryParser
|
||||
{
|
||||
if (field.startsWith("@"))
|
||||
{
|
||||
return attributeQueryBuilder(field, termStr, new PrefixQuery());
|
||||
return attributeQueryBuilder(field, termStr, new PrefixQuery(), false);
|
||||
}
|
||||
else if (field.equals("TEXT"))
|
||||
{
|
||||
@@ -668,7 +671,7 @@ public class LuceneQueryParser extends QueryParser
|
||||
{
|
||||
if (field.startsWith("@"))
|
||||
{
|
||||
return attributeQueryBuilder(field, termStr, new WildcardQuery());
|
||||
return attributeQueryBuilder(field, termStr, new WildcardQuery(), false);
|
||||
}
|
||||
|
||||
else if (field.equals("TEXT"))
|
||||
@@ -701,7 +704,7 @@ public class LuceneQueryParser extends QueryParser
|
||||
{
|
||||
if (field.startsWith("@"))
|
||||
{
|
||||
return attributeQueryBuilder(field, termStr, new FuzzyQuery(minSimilarity));
|
||||
return attributeQueryBuilder(field, termStr, new FuzzyQuery(minSimilarity), false);
|
||||
}
|
||||
|
||||
else if (field.equals("TEXT"))
|
||||
@@ -798,7 +801,7 @@ public class LuceneQueryParser extends QueryParser
|
||||
}
|
||||
}
|
||||
|
||||
private Query attributeQueryBuilder(String field, String queryText, SubQuery subQueryBuilder) throws ParseException
|
||||
private Query attributeQueryBuilder(String field, String queryText, SubQuery subQueryBuilder, boolean isAnalysed) throws ParseException
|
||||
{
|
||||
// Expand prefixes
|
||||
|
||||
@@ -851,17 +854,49 @@ public class LuceneQueryParser extends QueryParser
|
||||
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
|
||||
.getLocale()) : locales))
|
||||
{
|
||||
StringBuilder builder = new StringBuilder(queryText.length() + 10);
|
||||
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
|
||||
Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString());
|
||||
if (subQuery != null)
|
||||
|
||||
if(isAnalysed)
|
||||
{
|
||||
booleanQuery.add(subQuery, Occur.SHOULD);
|
||||
StringBuilder builder = new StringBuilder(queryText.length() + 10);
|
||||
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
|
||||
Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString());
|
||||
if (subQuery != null)
|
||||
{
|
||||
booleanQuery.add(subQuery, Occur.SHOULD);
|
||||
}
|
||||
else
|
||||
{
|
||||
booleanQuery.add(new TermQuery(new Term("NO_TOKENS", "__")), Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
booleanQuery.add(new TermQuery(new Term("NO_TOKENS", "__")), Occur.SHOULD);
|
||||
// analyse ml text
|
||||
MLAnalysisMode analysisMode = searchParameters.getMlAnalaysisMode() == null ? config
|
||||
.getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode();
|
||||
// Do the analysis here
|
||||
VerbatimAnalyser vba = new VerbatimAnalyser(false);
|
||||
MLTokenDuplicator duplicator = new MLTokenDuplicator(vba.tokenStream(field, new StringReader(queryText)), locale, null, analysisMode);
|
||||
Token t;
|
||||
try
|
||||
{
|
||||
while( (t = duplicator.next()) != null)
|
||||
{
|
||||
Query subQuery = subQueryBuilder.getQuery(expandedFieldName, t.termText());
|
||||
booleanQuery.add(subQuery, Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
|
||||
}
|
||||
if(booleanQuery.getClauses().length == 0)
|
||||
{
|
||||
booleanQuery.add(new TermQuery(new Term("NO_TOKENS", "__")), Occur.SHOULD);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
return booleanQuery;
|
||||
}
|
||||
|
Reference in New Issue
Block a user