Updates for locale based seraching and indexing

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@4737 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andrew Hind
2007-01-05 13:07:30 +00:00
parent f2c6f03164
commit 736a7f1ed6
9 changed files with 1412 additions and 139 deletions

View File

@@ -384,6 +384,12 @@
</property> </property>
<property name="lockPollInterval"> <property name="lockPollInterval">
<value>${lucene.lock.poll.interval}</value> <value>${lucene.lock.poll.interval}</value>
</property>
<property name="defaultMLIndexAnalysisMode">
<value>EXACT_LANGUAGE_AND_ALL</value>
</property>
<property name="defaultMLSearchAnalysisMode">
<value>EXACT_LANGUAGE_AND_ALL</value>
</property> </property>
</bean> </bean>

View File

@@ -0,0 +1,71 @@
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<beans>
<!-- This config will index tokens for all language searches and base language searches -->
<!-- By default the serach will only find ML docs and content in the correct language -->
<!-- ONLY USE THIS FOR CLEAN INSTALLATIONS UNTIL THERE IS A FIX TO SET LOCALE FOR ALL CONTENT -->
<!-- - or old content will be indexed with no locale and you will not find it ...
<!-- See MLAnalysisMode for all the available options -->
<bean id="luceneIndexerAndSearcherFactory"
class="org.alfresco.repo.search.impl.lucene.LuceneIndexerAndSearcherFactory2">
<property name="nodeService">
<ref bean="mlAwareNodeService" />
</property>
<property name="dictionaryService">
<ref bean="dictionaryService" />
</property>
<property name="nameSpaceService">
<ref bean="namespaceService" />
</property>
<property name="luceneFullTextSearchIndexer">
<ref bean="LuceneFullTextSearchIndexer" />
</property>
<property name="indexRootLocation">
<value>${dir.indexes}</value>
</property>
<property name="contentService">
<ref bean="contentService" />
</property>
<property name="queryRegister">
<ref bean="queryRegisterComponent" />
</property>
<property name="maxAtomicTransformationTime">
<value>${lucene.maxAtomicTransformationTime}</value> <!-- milliseconds allowed for inline text transformation -->
</property>
<property name="queryMaxClauses">
<value>${lucene.query.maxClauses}</value>
</property>
<property name="indexerBatchSize">
<value>${lucene.indexer.batchSize}</value>
</property>
<property name="lockDirectory">
<value>${dir.indexes.lock}</value>
</property>
<property name="indexerMaxFieldLength">
<value>${lucene.indexer.maxFieldLength}</value>
</property>
<property name="writeLockTimeout">
<value>${lucene.write.lock.timeout}</value>
</property>
<property name="commitLockTimeout">
<value>${lucene.commit.lock.timeout}</value>
</property>
<property name="lockPollInterval">
<value>${lucene.lock.poll.interval}</value>
</property>
<property name="defaultMLIndexAnalysisMode">
<value>EXACT_LANGUAGE_AND_ALL</value>
</property>
<property name="defaultMLSearchAnalysisMode">
<value>EXACT_LANGUAGE</value>
</property>
</bean>
</beans>

View File

@@ -1,17 +1,21 @@
package org.alfresco.repo.search; package org.alfresco.repo.search;
import java.util.Collection;
import java.util.HashSet;
import java.util.Locale;
import org.alfresco.error.AlfrescoRuntimeException; import org.alfresco.error.AlfrescoRuntimeException;
/** /**
* Enum to specify how multi-lingual properties should be treate for indexing and search. * Enum to specify how multi-lingual properties should be treate for indexing and search. Note that locale new Locale
* ("", "", "") is used to indicate all locales.
* *
* @author andyh * @author andyh
*
*/ */
public enum MLAnalysisMode public enum MLAnalysisMode
{ {
/** /**
* Only exact locale is used. * Only the exact locale is used.
*/ */
LOCALE_ONLY LOCALE_ONLY
{ {
@@ -19,23 +23,46 @@ public enum MLAnalysisMode
{ {
return false; return false;
} }
public boolean includesContained() public boolean includesContained()
{ {
return false; return false;
} }
public boolean includesContaining() public boolean includesContaining()
{ {
return false; return false;
} }
public boolean includesExact() public boolean includesExact()
{ {
return true; return true;
} }
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
}, },
/** /**
* Only the exact locale and no local === all lnaguages * Only the exact locale and no locale === locale + all languages
*/ */
LOCALE_AND_ALL LOCALE_AND_ALL
{ {
@@ -43,23 +70,46 @@ public enum MLAnalysisMode
{ {
return true; return true;
} }
public boolean includesContained() public boolean includesContained()
{ {
return false; return false;
} }
public boolean includesContaining() public boolean includesContaining()
{ {
return false; return false;
} }
public boolean includesExact() public boolean includesExact()
{ {
return true; return true;
} }
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
}, },
/** /**
* Expand the locale to include all the locales that contain it. * Expand the locale to include all the locales that contain it. So "en_GB" would be "en_GB", "en", but not all
* en_GB would be en_GB, en, but not all languages * languages "".
*/ */
LOCALE_AND_ALL_CONTAINING_LOCALES LOCALE_AND_ALL_CONTAINING_LOCALES
{ {
@@ -67,23 +117,45 @@ public enum MLAnalysisMode
{ {
return false; return false;
} }
public boolean includesContained() public boolean includesContained()
{ {
return false; return false;
} }
public boolean includesContaining() public boolean includesContaining()
{ {
return true; return true;
} }
public boolean includesExact() public boolean includesExact()
{ {
return true; return true;
} }
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
}, },
/** /**
* Expand the locale to include all the locales that contain it. * Expand the locale to include all the locales that contain it. "en_GB" would be "en_GB", "en", and all "".
* en_GB would be en_GB, en, and all.
*/ */
LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL
{ {
@@ -91,23 +163,45 @@ public enum MLAnalysisMode
{ {
return true; return true;
} }
public boolean includesContained() public boolean includesContained()
{ {
return false; return false;
} }
public boolean includesContaining() public boolean includesContaining()
{ {
return true; return true;
} }
public boolean includesExact() public boolean includesExact()
{ {
return true; return true;
} }
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
}, },
/** /**
* Expand to all the locales that are contained by this. * Expand to all the locales that are contained by this. "en" would expand to "en", "en_GB", "en_US", ....
* en would expand to en, en_GB, en_US, ....
*/ */
LOCALE_AND_ALL_CONTAINED_LOCALES LOCALE_AND_ALL_CONTAINED_LOCALES
{ {
@@ -115,22 +209,45 @@ public enum MLAnalysisMode
{ {
return false; return false;
} }
public boolean includesContained() public boolean includesContained()
{ {
return true; return true;
} }
public boolean includesContaining() public boolean includesContaining()
{ {
return false; return false;
} }
public boolean includesExact() public boolean includesExact()
{ {
return true; return true;
} }
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
}, },
/** /**
* No prefix only * Just the all locale, "", === new Locale("", "", "")
*/ */
ALL_ONLY ALL_ONLY
{ {
@@ -138,38 +255,652 @@ public enum MLAnalysisMode
{ {
return true; return true;
} }
public boolean includesContained() public boolean includesContained()
{ {
return false; return false;
} }
public boolean includesContaining() public boolean includesContaining()
{ {
return false; return false;
} }
public boolean includesExact() public boolean includesExact()
{ {
return false; return false;
} }
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* All language matches. Only worry about language level matches for locale.
*/
ALL_LANGUAGES
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return true;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* All language matches and ALL
*/
ALL_LANGUAGES_AND_ALL
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return true;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* Exact language matches (do not include all sub varients of the language)
*/
EXACT_LANGUAGE
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return true;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* Exact language matches (do not include all sub varients of the language) and ALL
*/
EXACT_LANGUAGE_AND_ALL
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return true;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* Exact country matches (do not include all sub varients of the country)
*/
EXACT_COUNRTY
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return true;
}
},
/**
* Exact country matches (do not include all sub varients of the country) and ALL
*/
EXACT_COUNTRY_AND_ALL
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return true;
}
},
/**
* All country matches
*/
ALL_COUNTRIES
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return true;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* All countries and ALL
*/
ALL_COUNTRIES_AND_ALL
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return true;
}
public boolean includesExactCountryMatch()
{
return false;
}
}; };
public static MLAnalysisMode getMLAnalysisMode(String mode) public static MLAnalysisMode getMLAnalysisMode(String mode)
{ {
for(MLAnalysisMode test : MLAnalysisMode.values()) for (MLAnalysisMode test : MLAnalysisMode.values())
{ {
if(test.toString().equalsIgnoreCase(mode)) if (test.toString().equalsIgnoreCase(mode))
{ {
return test; return test;
} }
} }
throw new AlfrescoRuntimeException("Unknown ML Analysis mode "+mode); throw new AlfrescoRuntimeException("Unknown ML Analysis mode " + mode);
} }
public abstract boolean includesAll(); public abstract boolean includesAll();
public abstract boolean includesContained(); public abstract boolean includesContained();
public abstract boolean includesContaining(); public abstract boolean includesContaining();
public abstract boolean includesExact(); public abstract boolean includesExact();
public abstract boolean includesAllLanguageMatches();
public abstract boolean includesExactLanguageMatch();
public abstract boolean includesAllCountryMatches();
public abstract boolean includesExactCountryMatch();
public static Collection<Locale> getLocales(MLAnalysisMode mlAnalaysisMode, Locale locale, boolean withWildcards)
{
HashSet<Locale> locales = new HashSet<Locale>();
boolean l = locale.getLanguage().length() != 0;
boolean c = locale.getCountry().length() != 0;
boolean v = locale.getVariant().length() != 0;
if (mlAnalaysisMode.includesAll())
{
if (withWildcards)
{
locales.add(new Locale("", "", ""));
locales.add(new Locale("*", "", ""));
}
else
{
locales.add(new Locale("", "", ""));
}
}
if (mlAnalaysisMode.includesExact())
{
locales.add(locale);
}
if (mlAnalaysisMode.includesContaining())
{
if (v)
{
Locale noVarient = new Locale(locale.getLanguage(), locale.getCountry(), "");
locales.add(noVarient);
Locale noCountry = new Locale(locale.getLanguage(), "", "");
locales.add(noCountry);
}
if (c)
{
Locale noCountry = new Locale(locale.getLanguage(), "", "");
locales.add(noCountry);
}
}
if (mlAnalaysisMode.includesContained())
{
// varients have not contained
if (!v)
{
if (!c)
{
if (!l)
{
// All
if (withWildcards)
{
locales.add(new Locale("", "", ""));
locales.add(new Locale("*", "", ""));
}
else
{
for (Locale toAdd : Locale.getAvailableLocales())
{
locales.add(toAdd);
}
}
}
else
{
// All that match language
if (withWildcards)
{
locales.add(new Locale(locale.getLanguage(), "", ""));
locales.add(new Locale(locale.getLanguage(), "*", ""));
}
else
{
for (Locale toAdd : Locale.getAvailableLocales())
{
if (locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
{
locales.add(toAdd);
}
}
}
}
}
else
{
if (withWildcards)
{
locales.add(new Locale(locale.getLanguage(), locale.getCountry(), ""));
locales.add(new Locale(locale.getLanguage(), locale.getCountry(), "*"));
}
else
{
// All that match language and country
for (Locale toAdd : Locale.getAvailableLocales())
{
if ((locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
&& (locale.getCountry().equals("") || locale.getCountry()
.equals(toAdd.getCountry())))
{
locales.add(toAdd);
}
}
}
}
}
}
if (mlAnalaysisMode.includesAllLanguageMatches())
{
if (withWildcards)
{
locales.add(new Locale(locale.getLanguage(), "", ""));
locales.add(new Locale(locale.getLanguage(), "*", ""));
}
else
{
// All that match language
for (Locale toAdd : Locale.getAvailableLocales())
{
if (locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
{
locales.add(toAdd);
}
}
}
}
if (mlAnalaysisMode.includesExactLanguageMatch())
{
if (withWildcards)
{
locales.add(new Locale(locale.getLanguage(), "", ""));
}
else
{
locales.add(new Locale(locale.getLanguage(), "", ""));
}
}
if (mlAnalaysisMode.includesAllCountryMatches())
{
if (withWildcards)
{
locales.add(new Locale(locale.getLanguage(), locale.getCountry(), ""));
if(locale.getCountry().equals(""))
{
locales.add(new Locale(locale.getLanguage(), "*", ""));
}
else
{
locales.add(new Locale(locale.getLanguage(), locale.getCountry(), "*"));
}
}
else
{
// All that match language
for (Locale toAdd : Locale.getAvailableLocales())
{
if ((locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
&& (locale.getCountry().equals("") || locale.getCountry().equals(toAdd.getCountry())))
{
locales.add(toAdd);
}
}
}
}
if (mlAnalaysisMode.includesExactCountryMatch())
{
if (withWildcards)
{
if(locale.getCountry().equals(""))
{
locales.add(new Locale(locale.getLanguage(), "", ""));
locales.add(new Locale(locale.getLanguage(), "*", ""));
}
else
{
locales.add(new Locale(locale.getLanguage(), locale.getCountry(), ""));
}
}
else
{
if (locale.getCountry().equals(""))
{
for (Locale toAdd : Locale.getAvailableLocales())
{
if (locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
{
locales.add(toAdd);
}
}
}
else
{
locales.add(new Locale(locale.getLanguage(), locale.getCountry(), ""));
}
}
}
return locales;
}
} }

View File

@@ -0,0 +1,409 @@
package org.alfresco.repo.search;
import java.util.HashSet;
import java.util.Locale;
import junit.framework.TestCase;
public class MLAnaysisModeExpansionTest extends TestCase
{
public MLAnaysisModeExpansionTest()
{
super();
}
public MLAnaysisModeExpansionTest(String arg0)
{
super(arg0);
}
public void testIdentity()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_ONLY, locale, false));
assertEquals(1, locales.size());
assertTrue(locales.contains(locale));
}
public void testIdentityAndAll()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL, locale, false));
assertEquals(2, locales.size());
assertTrue(locales.contains(locale));
assertTrue(locales.contains(new Locale("", "", "")));
}
public void testAll()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_ONLY, locale, false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
}
public void testContaining()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES, locale, false));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
}
public void testContainingAndAll()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL, locale, false));
assertEquals(3, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
}
public void testContained()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "", ""), false));
assertEquals(9, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
}
public void testLang()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES, new Locale("en", "GB", ""), false));
assertEquals(9, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES_AND_ALL, new Locale("en", "GB", ""), false));
assertEquals(10, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
}
public void testExactLang()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE_AND_ALL, new Locale("en", "GB", ""), false));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
}
public void testCountry()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "", ""), false));
assertEquals(9, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "", ""), false));
assertEquals(10, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "GB", ""), false));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
}
public void testExactCountry()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "", ""), false));
assertEquals(9, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "GB", ""), false));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "", ""), false));
assertEquals(10, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
}
public void testIdentityWC()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_ONLY, locale, true));
assertEquals(1, locales.size());
assertTrue(locales.contains(locale));
}
public void testIdentityAndAllWC()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL, locale, true));
assertEquals(3, locales.size());
assertTrue(locales.contains(locale));
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
}
public void testAllWC()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_ONLY, locale, true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
}
public void testContainingWC()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES, locale, true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
}
public void testContainingAndAllWC()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL, locale, true));
assertEquals(4, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
}
public void testContainedWC()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "*", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
}
public void testLangWC()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "GB", "*")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES, new Locale("en", "GB", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "*", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES_AND_ALL, new Locale("en", "GB", ""), true));
assertEquals(4, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "*", "")));
}
public void testExactLangWC()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "GB", "*")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE, new Locale("en", "GB", ""), true));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE_AND_ALL, new Locale("en", "GB", ""), true));
assertEquals(3, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
}
public void testCountryWC()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "GB", "*")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "*", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "GB", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "GB", "*")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "", ""), true));
assertEquals(4, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "*", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "GB", ""), true));
assertEquals(4, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "GB", "*")));
}
public void testExactCountryWC()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "GB", "*")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "GB", ""), true));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "*", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "GB", ""), true));
assertEquals(3, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "", ""), true));
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "*", "")));
}
}

View File

@@ -131,9 +131,9 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
private String lockDirectory; private String lockDirectory;
private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL; private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.EXACT_LANGUAGE_AND_ALL;
private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL; private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.EXACT_LANGUAGE_AND_ALL;
/** /**
* Private constructor for the singleton TODO: FIt in with IOC * Private constructor for the singleton TODO: FIt in with IOC
@@ -1170,9 +1170,10 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
return defaultMLIndexAnalysisMode; return defaultMLIndexAnalysisMode;
} }
public void setDefaultMLIndexAnalysisMode(String mode) public void setDefaultMLIndexAnalysisMode(MLAnalysisMode mode)
{ {
defaultMLIndexAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode); //defaultMLIndexAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
defaultMLIndexAnalysisMode = mode;
} }
public MLAnalysisMode getDefaultMLSearchAnalysisMode() public MLAnalysisMode getDefaultMLSearchAnalysisMode()
@@ -1180,9 +1181,10 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
return defaultMLSearchAnalysisMode; return defaultMLSearchAnalysisMode;
} }
public void setDefaultMLSearchAnalysisMode(String mode) public void setDefaultMLSearchAnalysisMode(MLAnalysisMode mode)
{ {
defaultMLSearchAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode); //defaultMLSearchAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
defaultMLSearchAnalysisMode = mode;
} }

View File

@@ -18,6 +18,7 @@ package org.alfresco.repo.search.impl.lucene;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
@@ -26,6 +27,7 @@ import java.util.Locale;
import java.util.Set; import java.util.Set;
import org.alfresco.i18n.I18NUtil; import org.alfresco.i18n.I18NUtil;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.SearcherException; import org.alfresco.repo.search.SearcherException;
import org.alfresco.repo.search.impl.lucene.query.PathQuery; import org.alfresco.repo.search.impl.lucene.query.PathQuery;
import org.alfresco.service.cmr.dictionary.AspectDefinition; import org.alfresco.service.cmr.dictionary.AspectDefinition;
@@ -59,6 +61,8 @@ public class LuceneQueryParser extends QueryParser
private SearchParameters searchParameters; private SearchParameters searchParameters;
private LuceneConfig config;
/** /**
* Parses a query string, returning a {@link org.apache.lucene.search.Query}. * Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* *
@@ -68,12 +72,13 @@ public class LuceneQueryParser extends QueryParser
* the default field for query terms. * the default field for query terms.
* @param analyzer * @param analyzer
* used to find terms in the query text. * used to find terms in the query text.
* @param config
* @throws ParseException * @throws ParseException
* if the parsing fails * if the parsing fails
*/ */
static public Query parse(String query, String field, Analyzer analyzer, static public Query parse(String query, String field, Analyzer analyzer,
NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService,
Operator defaultOperator, SearchParameters searchParameters) throws ParseException Operator defaultOperator, SearchParameters searchParameters, LuceneConfig config) throws ParseException
{ {
if (s_logger.isDebugEnabled()) if (s_logger.isDebugEnabled())
{ {
@@ -84,6 +89,7 @@ public class LuceneQueryParser extends QueryParser
parser.setNamespacePrefixResolver(namespacePrefixResolver); parser.setNamespacePrefixResolver(namespacePrefixResolver);
parser.setDictionaryService(dictionaryService); parser.setDictionaryService(dictionaryService);
parser.setSearchParameters(searchParameters); parser.setSearchParameters(searchParameters);
parser.setLuceneConfig(config);
// TODO: Apply locale contstraints at the top level if required for the non ML doc types. // TODO: Apply locale contstraints at the top level if required for the non ML doc types.
Query result = parser.parse(query); Query result = parser.parse(query);
if (s_logger.isDebugEnabled()) if (s_logger.isDebugEnabled())
@@ -93,6 +99,11 @@ public class LuceneQueryParser extends QueryParser
return result; return result;
} }
private void setLuceneConfig(LuceneConfig config)
{
this.config = config;
}
private void setSearchParameters(SearchParameters searchParameters) private void setSearchParameters(SearchParameters searchParameters)
{ {
this.searchParameters = searchParameters; this.searchParameters = searchParameters;
@@ -158,7 +169,10 @@ public class LuceneQueryParser extends QueryParser
{ {
// The super implementation will create phrase queries etc if required // The super implementation will create phrase queries etc if required
Query part = getFieldQuery("@" + qname.toString(), queryText); Query part = getFieldQuery("@" + qname.toString(), queryText);
query.add(part, Occur.SHOULD); if (part != null)
{
query.add(part, Occur.SHOULD);
}
} }
return query; return query;
} }
@@ -168,7 +182,10 @@ public class LuceneQueryParser extends QueryParser
for (String fieldName : text) for (String fieldName : text)
{ {
Query part = getFieldQuery(fieldName, queryText); Query part = getFieldQuery(fieldName, queryText);
query.add(part, Occur.SHOULD); if (part != null)
{
query.add(part, Occur.SHOULD);
}
} }
return query; return query;
} }
@@ -251,7 +268,10 @@ public class LuceneQueryParser extends QueryParser
for (QName qname : subclasses) for (QName qname : subclasses)
{ {
TermQuery termQuery = new TermQuery(new Term(field, qname.toString())); TermQuery termQuery = new TermQuery(new Term(field, qname.toString()));
booleanQuery.add(termQuery, Occur.SHOULD); if (termQuery != null)
{
booleanQuery.add(termQuery, Occur.SHOULD);
}
} }
return booleanQuery; return booleanQuery;
} }
@@ -333,7 +353,10 @@ public class LuceneQueryParser extends QueryParser
for (QName qname : subclasses) for (QName qname : subclasses)
{ {
TermQuery termQuery = new TermQuery(new Term(field, qname.toString())); TermQuery termQuery = new TermQuery(new Term(field, qname.toString()));
booleanQuery.add(termQuery, Occur.SHOULD); if (termQuery != null)
{
booleanQuery.add(termQuery, Occur.SHOULD);
}
} }
return booleanQuery; return booleanQuery;
} }
@@ -369,7 +392,8 @@ public class LuceneQueryParser extends QueryParser
} }
else if (field.startsWith("@")) else if (field.startsWith("@"))
{ {
return attributeQueryBuilder(field, queryText, new FieldQuery()); Query query = attributeQueryBuilder(field, queryText, new FieldQuery());
return query;
} }
else if (field.equals("ALL")) else if (field.equals("ALL"))
{ {
@@ -414,9 +438,12 @@ public class LuceneQueryParser extends QueryParser
QName container = pd.getContainerClass().getName(); QName container = pd.getContainerClass().getName();
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery();
Query typeQuery = getFieldQuery("TYPE", container.toString()); Query typeQuery = getFieldQuery("TYPE", container.toString());
query.add(typeQuery, Occur.MUST);
Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*"); Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*");
query.add(presenceQuery, Occur.MUST_NOT); if ((typeQuery != null) && (presenceQuery != null))
{
query.add(typeQuery, Occur.MUST);
query.add(presenceQuery, Occur.MUST_NOT);
}
return query; return query;
} }
else else
@@ -435,9 +462,12 @@ public class LuceneQueryParser extends QueryParser
QName container = pd.getContainerClass().getName(); QName container = pd.getContainerClass().getName();
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery();
Query typeQuery = getFieldQuery("TYPE", container.toString()); Query typeQuery = getFieldQuery("TYPE", container.toString());
query.add(typeQuery, Occur.MUST);
Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*"); Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*");
query.add(presenceQuery, Occur.MUST); if ((typeQuery != null) && (presenceQuery != null))
{
query.add(typeQuery, Occur.MUST);
query.add(presenceQuery, Occur.MUST);
}
return query; return query;
} }
else else
@@ -455,7 +485,10 @@ public class LuceneQueryParser extends QueryParser
{ {
// The super implementation will create phrase queries etc if required // The super implementation will create phrase queries etc if required
Query part = getFieldQuery("@" + qname.toString(), queryText); Query part = getFieldQuery("@" + qname.toString(), queryText);
query.add(part, Occur.SHOULD); if (part != null)
{
query.add(part, Occur.SHOULD);
}
} }
return query; return query;
} }
@@ -585,7 +618,10 @@ public class LuceneQueryParser extends QueryParser
{ {
// The super implementation will create phrase queries etc if required // The super implementation will create phrase queries etc if required
Query part = getPrefixQuery("@" + qname.toString(), termStr); Query part = getPrefixQuery("@" + qname.toString(), termStr);
query.add(part, Occur.SHOULD); if (part != null)
{
query.add(part, Occur.SHOULD);
}
} }
return query; return query;
} }
@@ -611,7 +647,10 @@ public class LuceneQueryParser extends QueryParser
{ {
// The super implementation will create phrase queries etc if required // The super implementation will create phrase queries etc if required
Query part = getWildcardQuery("@" + qname.toString(), termStr); Query part = getWildcardQuery("@" + qname.toString(), termStr);
query.add(part, Occur.SHOULD); if (part != null)
{
query.add(part, Occur.SHOULD);
}
} }
return query; return query;
} }
@@ -637,7 +676,10 @@ public class LuceneQueryParser extends QueryParser
{ {
// The super implementation will create phrase queries etc if required // The super implementation will create phrase queries etc if required
Query part = getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity); Query part = getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity);
query.add(part, Occur.SHOULD); if (part != null)
{
query.add(part, Occur.SHOULD);
}
} }
return query; return query;
} }
@@ -772,7 +814,10 @@ public class LuceneQueryParser extends QueryParser
StringBuilder builder = new StringBuilder(queryText.length() + 10); StringBuilder builder = new StringBuilder(queryText.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText); builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString()); Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString());
booleanQuery.add(subQuery, Occur.SHOULD); if (subQuery != null)
{
booleanQuery.add(subQuery, Occur.SHOULD);
}
} }
return booleanQuery; return booleanQuery;
} }
@@ -781,21 +826,62 @@ public class LuceneQueryParser extends QueryParser
{ {
// Build a sub query for each locale and or the results together - // Build a sub query for each locale and or the results together -
// - add an explicit condition for the locale // - add an explicit condition for the locale
BooleanQuery booleanQuery = new BooleanQuery();
MLAnalysisMode analysisMode = searchParameters.getMlAnalaysisMode() == null ? config
.getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode();
if (analysisMode.includesAll())
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
List<Locale> locales = searchParameters.getLocales(); List<Locale> locales = searchParameters.getLocales();
List<Locale> expandedLocales = new ArrayList<Locale>();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales)) .getLocale()) : locales))
{ {
BooleanQuery subQuery = new BooleanQuery(); expandedLocales.addAll(MLAnalysisMode.getLocales(analysisMode, locale, true));
Query contentQuery = subQueryBuilder.getQuery(expandedFieldName, queryText);
subQuery.add(contentQuery, Occur.MUST);
StringBuilder builder = new StringBuilder();
builder.append(expandedFieldName).append(".locale");
Query localeQuery = getFieldQuery(builder.toString(), locale.toString());
subQuery.add(localeQuery, Occur.MUST);
booleanQuery.add(subQuery, Occur.SHOULD);
} }
return booleanQuery;
if (expandedLocales.size() > 0)
{
BooleanQuery booleanQuery = new BooleanQuery();
Query contentQuery = subQueryBuilder.getQuery(expandedFieldName, queryText);
if (contentQuery != null)
{
booleanQuery.add(contentQuery, Occur.MUST);
BooleanQuery subQuery = new BooleanQuery();
for (Locale locale : (expandedLocales))
{
StringBuilder builder = new StringBuilder();
builder.append(expandedFieldName).append(".locale");
String localeString = locale.toString();
if (localeString.indexOf("*") == -1)
{
Query localeQuery = getFieldQuery(builder.toString(), localeString);
if (localeQuery != null)
{
subQuery.add(localeQuery, Occur.SHOULD);
}
}
else
{
Query localeQuery = getWildcardQuery(builder.toString(), localeString);
if (localeQuery != null)
{
subQuery.add(localeQuery, Occur.SHOULD);
}
}
}
booleanQuery.add(subQuery, Occur.MUST);
}
return booleanQuery;
}
else
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
} }
else else
{ {

View File

@@ -215,7 +215,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
} }
Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser( Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser(
dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters); dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters, getLuceneConfig());
ClosingIndexSearcher searcher = getSearcher(indexer); ClosingIndexSearcher searcher = getSearcher(indexer);
if (searcher == null) if (searcher == null)
{ {

View File

@@ -2543,6 +2543,14 @@ public class LuceneTest2 extends TestCase
// Test stop words are equivalent // Test stop words are equivalent
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"the\"", null, null);
assertEquals(0, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"and\"", null, null);
assertEquals(0, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over the lazy\"", null, null); results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over the lazy\"", null, null);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
@@ -2685,11 +2693,41 @@ public class LuceneTest2 extends TestCase
results = searcher.query(sp); results = searcher.query(sp);
assertEquals(1, results.length()); assertEquals(1, results.length());
results.close(); results.close();
// locale serach in en_US for en_UK
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("d\\:content:\"fox\"");
sp.addLocale(Locale.US);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
// Direct ML tests // Direct ML tests
QName mlQName = QName.createQName(TEST_NAMESPACE, "ml"); QName mlQName = QName.createQName(TEST_NAMESPACE, "ml");
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY);
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":and");
results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY);
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":\"and\"");
results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
sp = new SearchParameters(); sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef()); sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene"); sp.setLanguage("lucene");

View File

@@ -3,6 +3,7 @@ package org.alfresco.repo.search.impl.lucene.analysis;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Locale; import java.util.Locale;
@@ -38,91 +39,20 @@ public class MLTokenDuplicator extends Tokenizer
this.source = source; this.source = source;
this.locale = locale; this.locale = locale;
boolean l = locale.getLanguage().length() != 0; Collection<Locale> locales = MLAnalysisMode.getLocales(mlAnalaysisMode, locale, false);
boolean c = locale.getCountry().length() != 0; prefixes = new HashSet<String>(locales.size());
boolean v = locale.getVariant().length() != 0; for(Locale toAdd : locales)
prefixes = new HashSet<String>(4);
if (mlAnalaysisMode.includesAll())
{ {
prefixes.add(""); String localeString = toAdd.toString();
} if(localeString.length() == 0)
if (mlAnalaysisMode.includesExact())
{
StringBuffer result = new StringBuffer();
result.append("{").append(locale.toString()).append("}");
prefixes.add(result.toString());
}
if (mlAnalaysisMode.includesContaining())
{
if (v)
{ {
Locale noVarient = new Locale(locale.getLanguage(), locale.getCountry(), ""); prefixes.add("");
StringBuffer result = new StringBuffer();
result.append("{").append(noVarient.toString()).append("}");
prefixes.add(result.toString());
Locale noCountry = new Locale(locale.getLanguage(), "", "");
result = new StringBuffer();
result.append("{").append(noCountry.toString()).append("}");
prefixes.add(result.toString());
} }
if (c) else
{ {
Locale noCountry = new Locale(locale.getLanguage(), "", ""); StringBuilder builder = new StringBuilder(16);
StringBuffer result = new StringBuffer(); builder.append("{").append(localeString).append("}");
result.append("{").append(noCountry.toString()).append("}"); prefixes.add(builder.toString());
prefixes.add(result.toString());
}
}
if (mlAnalaysisMode.includesContained())
{
// varients have not contained
if (!v)
{
if (!c)
{
if (!l)
{
// All
for (Locale toAdd : Locale.getAvailableLocales())
{
StringBuffer result = new StringBuffer();
result.append("{").append(toAdd.toString()).append("}");
prefixes.add(result.toString());
}
}
else
{
// All that match language
for (Locale toAdd : Locale.getAvailableLocales())
{
if (locale.getLanguage().equals(toAdd.getLanguage()))
{
StringBuffer result = new StringBuffer();
result.append("{").append(toAdd.toString()).append("}");
prefixes.add(result.toString());
}
}
}
}
else
{
// All that match language and country
for (Locale toAdd : Locale.getAvailableLocales())
{
if ((locale.getLanguage().equals(toAdd.getLanguage()))
&& (locale.getCountry().equals(toAdd.getCountry())))
{
StringBuffer result = new StringBuffer();
result.append("{").append(toAdd.toString()).append("}");
prefixes.add(result.toString());
}
}
}
} }
} }
if(s_logger.isDebugEnabled()) if(s_logger.isDebugEnabled())