Updates for locale based seraching and indexing

git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@4737 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Andrew Hind
2007-01-05 13:07:30 +00:00
parent f2c6f03164
commit 736a7f1ed6
9 changed files with 1412 additions and 139 deletions

View File

@@ -384,6 +384,12 @@
</property>
<property name="lockPollInterval">
<value>${lucene.lock.poll.interval}</value>
</property>
<property name="defaultMLIndexAnalysisMode">
<value>EXACT_LANGUAGE_AND_ALL</value>
</property>
<property name="defaultMLSearchAnalysisMode">
<value>EXACT_LANGUAGE_AND_ALL</value>
</property>
</bean>

View File

@@ -0,0 +1,71 @@
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<beans>
<!-- This config will index tokens for all language searches and base language searches -->
<!-- By default the serach will only find ML docs and content in the correct language -->
<!-- ONLY USE THIS FOR CLEAN INSTALLATIONS UNTIL THERE IS A FIX TO SET LOCALE FOR ALL CONTENT -->
<!-- - or old content will be indexed with no locale and you will not find it ...
<!-- See MLAnalysisMode for all the available options -->
<bean id="luceneIndexerAndSearcherFactory"
class="org.alfresco.repo.search.impl.lucene.LuceneIndexerAndSearcherFactory2">
<property name="nodeService">
<ref bean="mlAwareNodeService" />
</property>
<property name="dictionaryService">
<ref bean="dictionaryService" />
</property>
<property name="nameSpaceService">
<ref bean="namespaceService" />
</property>
<property name="luceneFullTextSearchIndexer">
<ref bean="LuceneFullTextSearchIndexer" />
</property>
<property name="indexRootLocation">
<value>${dir.indexes}</value>
</property>
<property name="contentService">
<ref bean="contentService" />
</property>
<property name="queryRegister">
<ref bean="queryRegisterComponent" />
</property>
<property name="maxAtomicTransformationTime">
<value>${lucene.maxAtomicTransformationTime}</value> <!-- milliseconds allowed for inline text transformation -->
</property>
<property name="queryMaxClauses">
<value>${lucene.query.maxClauses}</value>
</property>
<property name="indexerBatchSize">
<value>${lucene.indexer.batchSize}</value>
</property>
<property name="lockDirectory">
<value>${dir.indexes.lock}</value>
</property>
<property name="indexerMaxFieldLength">
<value>${lucene.indexer.maxFieldLength}</value>
</property>
<property name="writeLockTimeout">
<value>${lucene.write.lock.timeout}</value>
</property>
<property name="commitLockTimeout">
<value>${lucene.commit.lock.timeout}</value>
</property>
<property name="lockPollInterval">
<value>${lucene.lock.poll.interval}</value>
</property>
<property name="defaultMLIndexAnalysisMode">
<value>EXACT_LANGUAGE_AND_ALL</value>
</property>
<property name="defaultMLSearchAnalysisMode">
<value>EXACT_LANGUAGE</value>
</property>
</bean>
</beans>

View File

@@ -1,17 +1,21 @@
package org.alfresco.repo.search;
import java.util.Collection;
import java.util.HashSet;
import java.util.Locale;
import org.alfresco.error.AlfrescoRuntimeException;
/**
* Enum to specify how multi-lingual properties should be treate for indexing and search.
* Enum to specify how multi-lingual properties should be treate for indexing and search. Note that locale new Locale
* ("", "", "") is used to indicate all locales.
*
* @author andyh
*
*/
public enum MLAnalysisMode
{
/**
* Only exact locale is used.
* Only the exact locale is used.
*/
LOCALE_ONLY
{
@@ -19,23 +23,46 @@ public enum MLAnalysisMode
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return true;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* Only the exact locale and no local === all lnaguages
* Only the exact locale and no locale === locale + all languages
*/
LOCALE_AND_ALL
{
@@ -43,23 +70,46 @@ public enum MLAnalysisMode
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return true;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* Expand the locale to include all the locales that contain it.
* en_GB would be en_GB, en, but not all languages
* Expand the locale to include all the locales that contain it. So "en_GB" would be "en_GB", "en", but not all
* languages "".
*/
LOCALE_AND_ALL_CONTAINING_LOCALES
{
@@ -67,23 +117,45 @@ public enum MLAnalysisMode
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return true;
}
public boolean includesExact()
{
return true;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* Expand the locale to include all the locales that contain it.
* en_GB would be en_GB, en, and all.
* Expand the locale to include all the locales that contain it. "en_GB" would be "en_GB", "en", and all "".
*/
LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL
{
@@ -91,23 +163,45 @@ public enum MLAnalysisMode
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return true;
}
public boolean includesExact()
{
return true;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* Expand to all the locales that are contained by this.
* en would expand to en, en_GB, en_US, ....
* Expand to all the locales that are contained by this. "en" would expand to "en", "en_GB", "en_US", ....
*/
LOCALE_AND_ALL_CONTAINED_LOCALES
{
@@ -115,22 +209,45 @@ public enum MLAnalysisMode
{
return false;
}
public boolean includesContained()
{
return true;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return true;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* No prefix only
* Just the all locale, "", === new Locale("", "", "")
*/
ALL_ONLY
{
@@ -138,18 +255,417 @@ public enum MLAnalysisMode
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* All language matches. Only worry about language level matches for locale.
*/
ALL_LANGUAGES
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return true;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* All language matches and ALL
*/
ALL_LANGUAGES_AND_ALL
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return true;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* Exact language matches (do not include all sub varients of the language)
*/
EXACT_LANGUAGE
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return true;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* Exact language matches (do not include all sub varients of the language) and ALL
*/
EXACT_LANGUAGE_AND_ALL
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return true;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* Exact country matches (do not include all sub varients of the country)
*/
EXACT_COUNRTY
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return true;
}
},
/**
* Exact country matches (do not include all sub varients of the country) and ALL
*/
EXACT_COUNTRY_AND_ALL
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return false;
}
public boolean includesExactCountryMatch()
{
return true;
}
},
/**
* All country matches
*/
ALL_COUNTRIES
{
public boolean includesAll()
{
return false;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return true;
}
public boolean includesExactCountryMatch()
{
return false;
}
},
/**
* All countries and ALL
*/
ALL_COUNTRIES_AND_ALL
{
public boolean includesAll()
{
return true;
}
public boolean includesContained()
{
return false;
}
public boolean includesContaining()
{
return false;
}
public boolean includesExact()
{
return false;
}
public boolean includesAllLanguageMatches()
{
return false;
}
public boolean includesExactLanguageMatch()
{
return false;
}
public boolean includesAllCountryMatches()
{
return true;
}
public boolean includesExactCountryMatch()
{
return false;
}
};
public static MLAnalysisMode getMLAnalysisMode(String mode)
@@ -172,4 +688,219 @@ public enum MLAnalysisMode
public abstract boolean includesExact();
public abstract boolean includesAllLanguageMatches();
public abstract boolean includesExactLanguageMatch();
public abstract boolean includesAllCountryMatches();
public abstract boolean includesExactCountryMatch();
public static Collection<Locale> getLocales(MLAnalysisMode mlAnalaysisMode, Locale locale, boolean withWildcards)
{
HashSet<Locale> locales = new HashSet<Locale>();
boolean l = locale.getLanguage().length() != 0;
boolean c = locale.getCountry().length() != 0;
boolean v = locale.getVariant().length() != 0;
if (mlAnalaysisMode.includesAll())
{
if (withWildcards)
{
locales.add(new Locale("", "", ""));
locales.add(new Locale("*", "", ""));
}
else
{
locales.add(new Locale("", "", ""));
}
}
if (mlAnalaysisMode.includesExact())
{
locales.add(locale);
}
if (mlAnalaysisMode.includesContaining())
{
if (v)
{
Locale noVarient = new Locale(locale.getLanguage(), locale.getCountry(), "");
locales.add(noVarient);
Locale noCountry = new Locale(locale.getLanguage(), "", "");
locales.add(noCountry);
}
if (c)
{
Locale noCountry = new Locale(locale.getLanguage(), "", "");
locales.add(noCountry);
}
}
if (mlAnalaysisMode.includesContained())
{
// varients have not contained
if (!v)
{
if (!c)
{
if (!l)
{
// All
if (withWildcards)
{
locales.add(new Locale("", "", ""));
locales.add(new Locale("*", "", ""));
}
else
{
for (Locale toAdd : Locale.getAvailableLocales())
{
locales.add(toAdd);
}
}
}
else
{
// All that match language
if (withWildcards)
{
locales.add(new Locale(locale.getLanguage(), "", ""));
locales.add(new Locale(locale.getLanguage(), "*", ""));
}
else
{
for (Locale toAdd : Locale.getAvailableLocales())
{
if (locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
{
locales.add(toAdd);
}
}
}
}
}
else
{
if (withWildcards)
{
locales.add(new Locale(locale.getLanguage(), locale.getCountry(), ""));
locales.add(new Locale(locale.getLanguage(), locale.getCountry(), "*"));
}
else
{
// All that match language and country
for (Locale toAdd : Locale.getAvailableLocales())
{
if ((locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
&& (locale.getCountry().equals("") || locale.getCountry()
.equals(toAdd.getCountry())))
{
locales.add(toAdd);
}
}
}
}
}
}
if (mlAnalaysisMode.includesAllLanguageMatches())
{
if (withWildcards)
{
locales.add(new Locale(locale.getLanguage(), "", ""));
locales.add(new Locale(locale.getLanguage(), "*", ""));
}
else
{
// All that match language
for (Locale toAdd : Locale.getAvailableLocales())
{
if (locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
{
locales.add(toAdd);
}
}
}
}
if (mlAnalaysisMode.includesExactLanguageMatch())
{
if (withWildcards)
{
locales.add(new Locale(locale.getLanguage(), "", ""));
}
else
{
locales.add(new Locale(locale.getLanguage(), "", ""));
}
}
if (mlAnalaysisMode.includesAllCountryMatches())
{
if (withWildcards)
{
locales.add(new Locale(locale.getLanguage(), locale.getCountry(), ""));
if(locale.getCountry().equals(""))
{
locales.add(new Locale(locale.getLanguage(), "*", ""));
}
else
{
locales.add(new Locale(locale.getLanguage(), locale.getCountry(), "*"));
}
}
else
{
// All that match language
for (Locale toAdd : Locale.getAvailableLocales())
{
if ((locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
&& (locale.getCountry().equals("") || locale.getCountry().equals(toAdd.getCountry())))
{
locales.add(toAdd);
}
}
}
}
if (mlAnalaysisMode.includesExactCountryMatch())
{
if (withWildcards)
{
if(locale.getCountry().equals(""))
{
locales.add(new Locale(locale.getLanguage(), "", ""));
locales.add(new Locale(locale.getLanguage(), "*", ""));
}
else
{
locales.add(new Locale(locale.getLanguage(), locale.getCountry(), ""));
}
}
else
{
if (locale.getCountry().equals(""))
{
for (Locale toAdd : Locale.getAvailableLocales())
{
if (locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
{
locales.add(toAdd);
}
}
}
else
{
locales.add(new Locale(locale.getLanguage(), locale.getCountry(), ""));
}
}
}
return locales;
}
}

View File

@@ -0,0 +1,409 @@
package org.alfresco.repo.search;
import java.util.HashSet;
import java.util.Locale;
import junit.framework.TestCase;
public class MLAnaysisModeExpansionTest extends TestCase
{
public MLAnaysisModeExpansionTest()
{
super();
}
public MLAnaysisModeExpansionTest(String arg0)
{
super(arg0);
}
public void testIdentity()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_ONLY, locale, false));
assertEquals(1, locales.size());
assertTrue(locales.contains(locale));
}
public void testIdentityAndAll()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL, locale, false));
assertEquals(2, locales.size());
assertTrue(locales.contains(locale));
assertTrue(locales.contains(new Locale("", "", "")));
}
public void testAll()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_ONLY, locale, false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
}
public void testContaining()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES, locale, false));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
}
public void testContainingAndAll()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL, locale, false));
assertEquals(3, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
}
public void testContained()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "", ""), false));
assertEquals(9, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
}
public void testLang()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES, new Locale("en", "GB", ""), false));
assertEquals(9, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES_AND_ALL, new Locale("en", "GB", ""), false));
assertEquals(10, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
}
public void testExactLang()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE_AND_ALL, new Locale("en", "GB", ""), false));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
}
public void testCountry()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "", ""), false));
assertEquals(9, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "", ""), false));
assertEquals(10, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "GB", ""), false));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
}
public void testExactCountry()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "GB", ""), false));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "", ""), false));
assertEquals(9, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "GB", ""), false));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "", ""), false));
assertEquals(10, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "AU", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "US", "")));
assertTrue(locales.contains(new Locale("en", "ZA", "")));
assertTrue(locales.contains(new Locale("en", "CA", "")));
assertTrue(locales.contains(new Locale("en", "IE", "")));
assertTrue(locales.contains(new Locale("en", "NZ", "")));
assertTrue(locales.contains(new Locale("en", "IN", "")));
}
public void testIdentityWC()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_ONLY, locale, true));
assertEquals(1, locales.size());
assertTrue(locales.contains(locale));
}
public void testIdentityAndAllWC()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL, locale, true));
assertEquals(3, locales.size());
assertTrue(locales.contains(locale));
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
}
public void testAllWC()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_ONLY, locale, true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
}
public void testContainingWC()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES, locale, true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
}
public void testContainingAndAllWC()
{
Locale locale = Locale.UK;
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL, locale, true));
assertEquals(4, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
}
public void testContainedWC()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "*", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
}
public void testLangWC()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "GB", "*")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES, new Locale("en", "GB", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "*", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES_AND_ALL, new Locale("en", "GB", ""), true));
assertEquals(4, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "*", "")));
}
public void testExactLangWC()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "GB", "*")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE, new Locale("en", "GB", ""), true));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE_AND_ALL, new Locale("en", "GB", ""), true));
assertEquals(3, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
}
public void testCountryWC()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "GB", "*")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "*", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "GB", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "GB", "*")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "", ""), true));
assertEquals(4, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "*", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "GB", ""), true));
assertEquals(4, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "GB", "*")));
}
public void testExactCountryWC()
{
HashSet<Locale> locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
assertTrue(locales.contains(new Locale("en", "GB", "*")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "GB", ""), true));
assertEquals(1, locales.size());
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "", ""), true));
assertEquals(2, locales.size());
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "*", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "GB", ""), true));
assertEquals(3, locales.size());
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "GB", "")));
locales = new HashSet<Locale>();
locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "", ""), true));
assertTrue(locales.contains(new Locale("", "", "")));
assertTrue(locales.contains(new Locale("*", "", "")));
assertTrue(locales.contains(new Locale("en", "", "")));
assertTrue(locales.contains(new Locale("en", "*", "")));
}
}

View File

@@ -131,9 +131,9 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
private String lockDirectory;
private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL;
private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.EXACT_LANGUAGE_AND_ALL;
private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL;
private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.EXACT_LANGUAGE_AND_ALL;
/**
* Private constructor for the singleton TODO: FIt in with IOC
@@ -1170,9 +1170,10 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
return defaultMLIndexAnalysisMode;
}
public void setDefaultMLIndexAnalysisMode(String mode)
public void setDefaultMLIndexAnalysisMode(MLAnalysisMode mode)
{
defaultMLIndexAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
//defaultMLIndexAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
defaultMLIndexAnalysisMode = mode;
}
public MLAnalysisMode getDefaultMLSearchAnalysisMode()
@@ -1180,9 +1181,10 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
return defaultMLSearchAnalysisMode;
}
public void setDefaultMLSearchAnalysisMode(String mode)
public void setDefaultMLSearchAnalysisMode(MLAnalysisMode mode)
{
defaultMLSearchAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
//defaultMLSearchAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
defaultMLSearchAnalysisMode = mode;
}

View File

@@ -18,6 +18,7 @@ package org.alfresco.repo.search.impl.lucene;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
@@ -26,6 +27,7 @@ import java.util.Locale;
import java.util.Set;
import org.alfresco.i18n.I18NUtil;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.SearcherException;
import org.alfresco.repo.search.impl.lucene.query.PathQuery;
import org.alfresco.service.cmr.dictionary.AspectDefinition;
@@ -59,6 +61,8 @@ public class LuceneQueryParser extends QueryParser
private SearchParameters searchParameters;
private LuceneConfig config;
/**
* Parses a query string, returning a {@link org.apache.lucene.search.Query}.
*
@@ -68,12 +72,13 @@ public class LuceneQueryParser extends QueryParser
* the default field for query terms.
* @param analyzer
* used to find terms in the query text.
* @param config
* @throws ParseException
* if the parsing fails
*/
static public Query parse(String query, String field, Analyzer analyzer,
NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService,
Operator defaultOperator, SearchParameters searchParameters) throws ParseException
Operator defaultOperator, SearchParameters searchParameters, LuceneConfig config) throws ParseException
{
if (s_logger.isDebugEnabled())
{
@@ -84,6 +89,7 @@ public class LuceneQueryParser extends QueryParser
parser.setNamespacePrefixResolver(namespacePrefixResolver);
parser.setDictionaryService(dictionaryService);
parser.setSearchParameters(searchParameters);
parser.setLuceneConfig(config);
// TODO: Apply locale contstraints at the top level if required for the non ML doc types.
Query result = parser.parse(query);
if (s_logger.isDebugEnabled())
@@ -93,6 +99,11 @@ public class LuceneQueryParser extends QueryParser
return result;
}
private void setLuceneConfig(LuceneConfig config)
{
this.config = config;
}
private void setSearchParameters(SearchParameters searchParameters)
{
this.searchParameters = searchParameters;
@@ -158,8 +169,11 @@ public class LuceneQueryParser extends QueryParser
{
// The super implementation will create phrase queries etc if required
Query part = getFieldQuery("@" + qname.toString(), queryText);
if (part != null)
{
query.add(part, Occur.SHOULD);
}
}
return query;
}
else
@@ -168,8 +182,11 @@ public class LuceneQueryParser extends QueryParser
for (String fieldName : text)
{
Query part = getFieldQuery(fieldName, queryText);
if (part != null)
{
query.add(part, Occur.SHOULD);
}
}
return query;
}
@@ -251,8 +268,11 @@ public class LuceneQueryParser extends QueryParser
for (QName qname : subclasses)
{
TermQuery termQuery = new TermQuery(new Term(field, qname.toString()));
if (termQuery != null)
{
booleanQuery.add(termQuery, Occur.SHOULD);
}
}
return booleanQuery;
}
else if (field.equals("EXACTTYPE"))
@@ -333,8 +353,11 @@ public class LuceneQueryParser extends QueryParser
for (QName qname : subclasses)
{
TermQuery termQuery = new TermQuery(new Term(field, qname.toString()));
if (termQuery != null)
{
booleanQuery.add(termQuery, Occur.SHOULD);
}
}
return booleanQuery;
}
else if (field.equals("EXACTASPECT"))
@@ -369,7 +392,8 @@ public class LuceneQueryParser extends QueryParser
}
else if (field.startsWith("@"))
{
return attributeQueryBuilder(field, queryText, new FieldQuery());
Query query = attributeQueryBuilder(field, queryText, new FieldQuery());
return query;
}
else if (field.equals("ALL"))
{
@@ -414,9 +438,12 @@ public class LuceneQueryParser extends QueryParser
QName container = pd.getContainerClass().getName();
BooleanQuery query = new BooleanQuery();
Query typeQuery = getFieldQuery("TYPE", container.toString());
query.add(typeQuery, Occur.MUST);
Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*");
if ((typeQuery != null) && (presenceQuery != null))
{
query.add(typeQuery, Occur.MUST);
query.add(presenceQuery, Occur.MUST_NOT);
}
return query;
}
else
@@ -435,9 +462,12 @@ public class LuceneQueryParser extends QueryParser
QName container = pd.getContainerClass().getName();
BooleanQuery query = new BooleanQuery();
Query typeQuery = getFieldQuery("TYPE", container.toString());
query.add(typeQuery, Occur.MUST);
Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*");
if ((typeQuery != null) && (presenceQuery != null))
{
query.add(typeQuery, Occur.MUST);
query.add(presenceQuery, Occur.MUST);
}
return query;
}
else
@@ -455,8 +485,11 @@ public class LuceneQueryParser extends QueryParser
{
// The super implementation will create phrase queries etc if required
Query part = getFieldQuery("@" + qname.toString(), queryText);
if (part != null)
{
query.add(part, Occur.SHOULD);
}
}
return query;
}
else
@@ -585,8 +618,11 @@ public class LuceneQueryParser extends QueryParser
{
// The super implementation will create phrase queries etc if required
Query part = getPrefixQuery("@" + qname.toString(), termStr);
if (part != null)
{
query.add(part, Occur.SHOULD);
}
}
return query;
}
else
@@ -611,8 +647,11 @@ public class LuceneQueryParser extends QueryParser
{
// The super implementation will create phrase queries etc if required
Query part = getWildcardQuery("@" + qname.toString(), termStr);
if (part != null)
{
query.add(part, Occur.SHOULD);
}
}
return query;
}
else
@@ -637,8 +676,11 @@ public class LuceneQueryParser extends QueryParser
{
// The super implementation will create phrase queries etc if required
Query part = getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity);
if (part != null)
{
query.add(part, Occur.SHOULD);
}
}
return query;
}
else
@@ -772,8 +814,11 @@ public class LuceneQueryParser extends QueryParser
StringBuilder builder = new StringBuilder(queryText.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString());
if (subQuery != null)
{
booleanQuery.add(subQuery, Occur.SHOULD);
}
}
return booleanQuery;
}
// Content
@@ -781,19 +826,54 @@ public class LuceneQueryParser extends QueryParser
{
// Build a sub query for each locale and or the results together -
// - add an explicit condition for the locale
BooleanQuery booleanQuery = new BooleanQuery();
MLAnalysisMode analysisMode = searchParameters.getMlAnalaysisMode() == null ? config
.getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode();
if (analysisMode.includesAll())
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
List<Locale> locales = searchParameters.getLocales();
List<Locale> expandedLocales = new ArrayList<Locale>();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
BooleanQuery subQuery = new BooleanQuery();
expandedLocales.addAll(MLAnalysisMode.getLocales(analysisMode, locale, true));
}
if (expandedLocales.size() > 0)
{
BooleanQuery booleanQuery = new BooleanQuery();
Query contentQuery = subQueryBuilder.getQuery(expandedFieldName, queryText);
subQuery.add(contentQuery, Occur.MUST);
if (contentQuery != null)
{
booleanQuery.add(contentQuery, Occur.MUST);
BooleanQuery subQuery = new BooleanQuery();
for (Locale locale : (expandedLocales))
{
StringBuilder builder = new StringBuilder();
builder.append(expandedFieldName).append(".locale");
Query localeQuery = getFieldQuery(builder.toString(), locale.toString());
subQuery.add(localeQuery, Occur.MUST);
booleanQuery.add(subQuery, Occur.SHOULD);
String localeString = locale.toString();
if (localeString.indexOf("*") == -1)
{
Query localeQuery = getFieldQuery(builder.toString(), localeString);
if (localeQuery != null)
{
subQuery.add(localeQuery, Occur.SHOULD);
}
}
else
{
Query localeQuery = getWildcardQuery(builder.toString(), localeString);
if (localeQuery != null)
{
subQuery.add(localeQuery, Occur.SHOULD);
}
}
}
booleanQuery.add(subQuery, Occur.MUST);
}
return booleanQuery;
}
@@ -801,6 +881,12 @@ public class LuceneQueryParser extends QueryParser
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
}
else
{
return subQueryBuilder.getQuery(expandedFieldName, queryText);
}
}
}

View File

@@ -215,7 +215,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
}
Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser(
dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters);
dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters, getLuceneConfig());
ClosingIndexSearcher searcher = getSearcher(indexer);
if (searcher == null)
{

View File

@@ -2543,6 +2543,14 @@ public class LuceneTest2 extends TestCase
// Test stop words are equivalent
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"the\"", null, null);
assertEquals(0, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"and\"", null, null);
assertEquals(0, results.length());
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over the lazy\"", null, null);
assertEquals(1, results.length());
results.close();
@@ -2686,10 +2694,40 @@ public class LuceneTest2 extends TestCase
assertEquals(1, results.length());
results.close();
// locale serach in en_US for en_UK
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("d\\:content:\"fox\"");
sp.addLocale(Locale.US);
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
// Direct ML tests
QName mlQName = QName.createQName(TEST_NAMESPACE, "ml");
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY);
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":and");
results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY);
sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":\"and\"");
results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");

View File

@@ -3,6 +3,7 @@ package org.alfresco.repo.search.impl.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
@@ -38,91 +39,20 @@ public class MLTokenDuplicator extends Tokenizer
this.source = source;
this.locale = locale;
boolean l = locale.getLanguage().length() != 0;
boolean c = locale.getCountry().length() != 0;
boolean v = locale.getVariant().length() != 0;
prefixes = new HashSet<String>(4);
if (mlAnalaysisMode.includesAll())
Collection<Locale> locales = MLAnalysisMode.getLocales(mlAnalaysisMode, locale, false);
prefixes = new HashSet<String>(locales.size());
for(Locale toAdd : locales)
{
String localeString = toAdd.toString();
if(localeString.length() == 0)
{
prefixes.add("");
}
if (mlAnalaysisMode.includesExact())
{
StringBuffer result = new StringBuffer();
result.append("{").append(locale.toString()).append("}");
prefixes.add(result.toString());
}
if (mlAnalaysisMode.includesContaining())
{
if (v)
{
Locale noVarient = new Locale(locale.getLanguage(), locale.getCountry(), "");
StringBuffer result = new StringBuffer();
result.append("{").append(noVarient.toString()).append("}");
prefixes.add(result.toString());
Locale noCountry = new Locale(locale.getLanguage(), "", "");
result = new StringBuffer();
result.append("{").append(noCountry.toString()).append("}");
prefixes.add(result.toString());
}
if (c)
{
Locale noCountry = new Locale(locale.getLanguage(), "", "");
StringBuffer result = new StringBuffer();
result.append("{").append(noCountry.toString()).append("}");
prefixes.add(result.toString());
}
}
if (mlAnalaysisMode.includesContained())
{
// varients have not contained
if (!v)
{
if (!c)
{
if (!l)
{
// All
for (Locale toAdd : Locale.getAvailableLocales())
{
StringBuffer result = new StringBuffer();
result.append("{").append(toAdd.toString()).append("}");
prefixes.add(result.toString());
}
}
else
{
// All that match language
for (Locale toAdd : Locale.getAvailableLocales())
{
if (locale.getLanguage().equals(toAdd.getLanguage()))
{
StringBuffer result = new StringBuffer();
result.append("{").append(toAdd.toString()).append("}");
prefixes.add(result.toString());
}
}
}
}
else
{
// All that match language and country
for (Locale toAdd : Locale.getAvailableLocales())
{
if ((locale.getLanguage().equals(toAdd.getLanguage()))
&& (locale.getCountry().equals(toAdd.getCountry())))
{
StringBuffer result = new StringBuffer();
result.append("{").append(toAdd.toString()).append("}");
prefixes.add(result.toString());
}
}
}
StringBuilder builder = new StringBuilder(16);
builder.append("{").append(localeString).append("}");
prefixes.add(builder.toString());
}
}
if(s_logger.isDebugEnabled())