mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-24 17:32:48 +00:00
Fix fuzzy, prefix and wildcard queries for MLText typed attributes
git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5723 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -219,8 +219,6 @@ public class ADMLuceneTest extends TestCase
|
|||||||
|
|
||||||
testTX = transactionService.getUserTransaction();
|
testTX = transactionService.getUserTransaction();
|
||||||
testTX.begin();
|
testTX.begin();
|
||||||
|
|
||||||
|
|
||||||
this.authenticationComponent.setSystemUserAsCurrentUser();
|
this.authenticationComponent.setSystemUserAsCurrentUser();
|
||||||
|
|
||||||
// load in the test model
|
// load in the test model
|
||||||
@@ -316,7 +314,7 @@ public class ADMLuceneTest extends TestCase
|
|||||||
// - andit has to go in type d:any as d:content is not allowed to be multivalued
|
// - andit has to go in type d:any as d:content is not allowed to be multivalued
|
||||||
|
|
||||||
ArrayList<Serializable> contentValues = new ArrayList<Serializable>();
|
ArrayList<Serializable> contentValues = new ArrayList<Serializable>();
|
||||||
contentValues.add(new ContentData(null, "text/plain", 0L, "UTF-16", Locale.CHINESE ));
|
contentValues.add(new ContentData(null, "text/plain", 0L, "UTF-16", Locale.UK ));
|
||||||
testProperties.put(QName.createQName(TEST_NAMESPACE, "content-many-ista"), contentValues);
|
testProperties.put(QName.createQName(TEST_NAMESPACE, "content-many-ista"), contentValues);
|
||||||
|
|
||||||
|
|
||||||
@@ -381,7 +379,7 @@ public class ADMLuceneTest extends TestCase
|
|||||||
getOrderProperties()).getChildRef();
|
getOrderProperties()).getChildRef();
|
||||||
|
|
||||||
Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
|
Map<QName, Serializable> properties = new HashMap<QName, Serializable>();
|
||||||
properties.put(ContentModel.PROP_CONTENT, new ContentData(null, "text/plain", 0L, "UTF-8", Locale.CHINESE ));
|
properties.put(ContentModel.PROP_CONTENT, new ContentData(null, "text/plain", 0L, "UTF-8", Locale.UK ));
|
||||||
n14 = nodeService.createNode(n13, ASSOC_TYPE_QNAME, QName.createQName("{namespace}fourteen"),
|
n14 = nodeService.createNode(n13, ASSOC_TYPE_QNAME, QName.createQName("{namespace}fourteen"),
|
||||||
ContentModel.TYPE_CONTENT, properties).getChildRef();
|
ContentModel.TYPE_CONTENT, properties).getChildRef();
|
||||||
// nodeService.addAspect(n14, DictionaryBootstrap.ASPECT_QNAME_CONTENT,
|
// nodeService.addAspect(n14, DictionaryBootstrap.ASPECT_QNAME_CONTENT,
|
||||||
@@ -2612,6 +2610,61 @@ public class ADMLuceneTest extends TestCase
|
|||||||
assertEquals(1, results.length());
|
assertEquals(1, results.length());
|
||||||
results.close();
|
results.close();
|
||||||
|
|
||||||
|
sp = new SearchParameters();
|
||||||
|
sp.addStore(rootNodeRef.getStoreRef());
|
||||||
|
sp.setLanguage("lucene");
|
||||||
|
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":cabba*");
|
||||||
|
sp.addLocale(new Locale("en"));
|
||||||
|
results = searcher.query(sp);
|
||||||
|
assertEquals(1, results.length());
|
||||||
|
results.close();
|
||||||
|
|
||||||
|
sp = new SearchParameters();
|
||||||
|
sp.addStore(rootNodeRef.getStoreRef());
|
||||||
|
sp.setLanguage("lucene");
|
||||||
|
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":ca*ge");
|
||||||
|
sp.addLocale(new Locale("en"));
|
||||||
|
results = searcher.query(sp);
|
||||||
|
assertEquals(1, results.length());
|
||||||
|
results.close();
|
||||||
|
|
||||||
|
sp = new SearchParameters();
|
||||||
|
sp.addStore(rootNodeRef.getStoreRef());
|
||||||
|
sp.setLanguage("lucene");
|
||||||
|
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":*bage");
|
||||||
|
sp.addLocale(new Locale("en"));
|
||||||
|
results = searcher.query(sp);
|
||||||
|
assertEquals(1, results.length());
|
||||||
|
results.close();
|
||||||
|
|
||||||
|
sp = new SearchParameters();
|
||||||
|
sp.addStore(rootNodeRef.getStoreRef());
|
||||||
|
sp.setLanguage("lucene");
|
||||||
|
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":cabage~");
|
||||||
|
sp.addLocale(new Locale("en"));
|
||||||
|
results = searcher.query(sp);
|
||||||
|
assertEquals(1, results.length());
|
||||||
|
results.close();
|
||||||
|
|
||||||
|
sp = new SearchParameters();
|
||||||
|
sp.addStore(rootNodeRef.getStoreRef());
|
||||||
|
sp.setLanguage("lucene");
|
||||||
|
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":*b?ag?");
|
||||||
|
sp.addLocale(new Locale("en"));
|
||||||
|
results = searcher.query(sp);
|
||||||
|
assertEquals(1, results.length());
|
||||||
|
results.close();
|
||||||
|
|
||||||
|
sp = new SearchParameters();
|
||||||
|
sp.addStore(rootNodeRef.getStoreRef());
|
||||||
|
sp.setLanguage("lucene");
|
||||||
|
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":cho*");
|
||||||
|
sp.setMlAnalaysisMode(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES);
|
||||||
|
sp.addLocale(new Locale("fr"));
|
||||||
|
results = searcher.query(sp);
|
||||||
|
assertEquals(1, results.length());
|
||||||
|
results.close();
|
||||||
|
|
||||||
// multivalued content in type d:any
|
// multivalued content in type d:any
|
||||||
// This should not be indexed as we can not know what to do with content here.
|
// This should not be indexed as we can not know what to do with content here.
|
||||||
|
|
||||||
@@ -2755,7 +2808,32 @@ public class ADMLuceneTest extends TestCase
|
|||||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over a lazy\"", null, null);
|
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over a lazy\"", null, null);
|
||||||
assertEquals(1, results.length());
|
assertEquals(1, results.length());
|
||||||
results.close();
|
results.close();
|
||||||
|
|
||||||
|
// Test wildcards in text
|
||||||
|
|
||||||
|
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:laz*", null, null);
|
||||||
|
assertEquals(1, results.length());
|
||||||
|
results.close();
|
||||||
|
|
||||||
|
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:laz~", null, null);
|
||||||
|
assertEquals(1, results.length());
|
||||||
|
results.close();
|
||||||
|
|
||||||
|
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:la?y", null, null);
|
||||||
|
assertEquals(1, results.length());
|
||||||
|
results.close();
|
||||||
|
|
||||||
|
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:?a?y", null, null);
|
||||||
|
assertEquals(1, results.length());
|
||||||
|
results.close();
|
||||||
|
|
||||||
|
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:*azy", null, null);
|
||||||
|
assertEquals(1, results.length());
|
||||||
|
results.close();
|
||||||
|
|
||||||
|
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:*az*", null, null);
|
||||||
|
assertEquals(1, results.length());
|
||||||
|
results.close();
|
||||||
|
|
||||||
// Accents
|
// Accents
|
||||||
|
|
||||||
|
@@ -37,6 +37,8 @@ import java.util.Set;
|
|||||||
import org.alfresco.i18n.I18NUtil;
|
import org.alfresco.i18n.I18NUtil;
|
||||||
import org.alfresco.repo.search.MLAnalysisMode;
|
import org.alfresco.repo.search.MLAnalysisMode;
|
||||||
import org.alfresco.repo.search.SearcherException;
|
import org.alfresco.repo.search.SearcherException;
|
||||||
|
import org.alfresco.repo.search.impl.lucene.analysis.MLTokenDuplicator;
|
||||||
|
import org.alfresco.repo.search.impl.lucene.analysis.VerbatimAnalyser;
|
||||||
import org.alfresco.repo.search.impl.lucene.query.PathQuery;
|
import org.alfresco.repo.search.impl.lucene.query.PathQuery;
|
||||||
import org.alfresco.service.cmr.dictionary.AspectDefinition;
|
import org.alfresco.service.cmr.dictionary.AspectDefinition;
|
||||||
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
|
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
|
||||||
@@ -48,6 +50,7 @@ import org.alfresco.service.namespace.NamespacePrefixResolver;
|
|||||||
import org.alfresco.service.namespace.QName;
|
import org.alfresco.service.namespace.QName;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
@@ -408,7 +411,7 @@ public class LuceneQueryParser extends QueryParser
|
|||||||
}
|
}
|
||||||
else if (field.startsWith("@"))
|
else if (field.startsWith("@"))
|
||||||
{
|
{
|
||||||
Query query = attributeQueryBuilder(field, queryText, new FieldQuery());
|
Query query = attributeQueryBuilder(field, queryText, new FieldQuery(), true);
|
||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
else if (field.equals("ALL"))
|
else if (field.equals("ALL"))
|
||||||
@@ -636,7 +639,7 @@ public class LuceneQueryParser extends QueryParser
|
|||||||
{
|
{
|
||||||
if (field.startsWith("@"))
|
if (field.startsWith("@"))
|
||||||
{
|
{
|
||||||
return attributeQueryBuilder(field, termStr, new PrefixQuery());
|
return attributeQueryBuilder(field, termStr, new PrefixQuery(), false);
|
||||||
}
|
}
|
||||||
else if (field.equals("TEXT"))
|
else if (field.equals("TEXT"))
|
||||||
{
|
{
|
||||||
@@ -668,7 +671,7 @@ public class LuceneQueryParser extends QueryParser
|
|||||||
{
|
{
|
||||||
if (field.startsWith("@"))
|
if (field.startsWith("@"))
|
||||||
{
|
{
|
||||||
return attributeQueryBuilder(field, termStr, new WildcardQuery());
|
return attributeQueryBuilder(field, termStr, new WildcardQuery(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (field.equals("TEXT"))
|
else if (field.equals("TEXT"))
|
||||||
@@ -701,7 +704,7 @@ public class LuceneQueryParser extends QueryParser
|
|||||||
{
|
{
|
||||||
if (field.startsWith("@"))
|
if (field.startsWith("@"))
|
||||||
{
|
{
|
||||||
return attributeQueryBuilder(field, termStr, new FuzzyQuery(minSimilarity));
|
return attributeQueryBuilder(field, termStr, new FuzzyQuery(minSimilarity), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (field.equals("TEXT"))
|
else if (field.equals("TEXT"))
|
||||||
@@ -798,7 +801,7 @@ public class LuceneQueryParser extends QueryParser
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Query attributeQueryBuilder(String field, String queryText, SubQuery subQueryBuilder) throws ParseException
|
private Query attributeQueryBuilder(String field, String queryText, SubQuery subQueryBuilder, boolean isAnalysed) throws ParseException
|
||||||
{
|
{
|
||||||
// Expand prefixes
|
// Expand prefixes
|
||||||
|
|
||||||
@@ -851,17 +854,49 @@ public class LuceneQueryParser extends QueryParser
|
|||||||
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
|
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
|
||||||
.getLocale()) : locales))
|
.getLocale()) : locales))
|
||||||
{
|
{
|
||||||
StringBuilder builder = new StringBuilder(queryText.length() + 10);
|
|
||||||
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
|
if(isAnalysed)
|
||||||
Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString());
|
|
||||||
if (subQuery != null)
|
|
||||||
{
|
{
|
||||||
booleanQuery.add(subQuery, Occur.SHOULD);
|
StringBuilder builder = new StringBuilder(queryText.length() + 10);
|
||||||
|
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
|
||||||
|
Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString());
|
||||||
|
if (subQuery != null)
|
||||||
|
{
|
||||||
|
booleanQuery.add(subQuery, Occur.SHOULD);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
booleanQuery.add(new TermQuery(new Term("NO_TOKENS", "__")), Occur.SHOULD);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
booleanQuery.add(new TermQuery(new Term("NO_TOKENS", "__")), Occur.SHOULD);
|
// analyse ml text
|
||||||
|
MLAnalysisMode analysisMode = searchParameters.getMlAnalaysisMode() == null ? config
|
||||||
|
.getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode();
|
||||||
|
// Do the analysis here
|
||||||
|
VerbatimAnalyser vba = new VerbatimAnalyser(false);
|
||||||
|
MLTokenDuplicator duplicator = new MLTokenDuplicator(vba.tokenStream(field, new StringReader(queryText)), locale, null, analysisMode);
|
||||||
|
Token t;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
while( (t = duplicator.next()) != null)
|
||||||
|
{
|
||||||
|
Query subQuery = subQueryBuilder.getQuery(expandedFieldName, t.termText());
|
||||||
|
booleanQuery.add(subQuery, Occur.SHOULD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (IOException e)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
if(booleanQuery.getClauses().length == 0)
|
||||||
|
{
|
||||||
|
booleanQuery.add(new TermQuery(new Term("NO_TOKENS", "__")), Occur.SHOULD);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
return booleanQuery;
|
return booleanQuery;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user