diff --git a/config/alfresco/core-services-context.xml b/config/alfresco/core-services-context.xml index 04c614f23a..f381a269c1 100644 --- a/config/alfresco/core-services-context.xml +++ b/config/alfresco/core-services-context.xml @@ -384,6 +384,12 @@ ${lucene.lock.poll.interval} + + + EXACT_LANGUAGE_AND_ALL + + + EXACT_LANGUAGE_AND_ALL diff --git a/config/alfresco/extension/language-specific-index-and-search-context.xml.sample b/config/alfresco/extension/language-specific-index-and-search-context.xml.sample new file mode 100644 index 0000000000..7082320a76 --- /dev/null +++ b/config/alfresco/extension/language-specific-index-and-search-context.xml.sample @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + ${dir.indexes} + + + + + + + + + ${lucene.maxAtomicTransformationTime} + + + ${lucene.query.maxClauses} + + + ${lucene.indexer.batchSize} + + + ${dir.indexes.lock} + + + ${lucene.indexer.maxFieldLength} + + + ${lucene.write.lock.timeout} + + + ${lucene.commit.lock.timeout} + + + ${lucene.lock.poll.interval} + + + EXACT_LANGUAGE_AND_ALL + + + EXACT_LANGUAGE + + + + + + + + \ No newline at end of file diff --git a/source/java/org/alfresco/repo/search/MLAnalysisMode.java b/source/java/org/alfresco/repo/search/MLAnalysisMode.java index cd7c32dd24..779debfa92 100644 --- a/source/java/org/alfresco/repo/search/MLAnalysisMode.java +++ b/source/java/org/alfresco/repo/search/MLAnalysisMode.java @@ -1,17 +1,21 @@ package org.alfresco.repo.search; +import java.util.Collection; +import java.util.HashSet; +import java.util.Locale; + import org.alfresco.error.AlfrescoRuntimeException; /** - * Enum to specify how multi-lingual properties should be treate for indexing and search. + * Enum to specify how multi-lingual properties should be treate for indexing and search. Note that locale new Locale + * ("", "", "") is used to indicate all locales. * * @author andyh - * */ public enum MLAnalysisMode { /** - * Only exact locale is used. + * Only the exact locale is used. */ LOCALE_ONLY { @@ -19,23 +23,46 @@ public enum MLAnalysisMode { return false; } + public boolean includesContained() { return false; } + public boolean includesContaining() { return false; } + public boolean includesExact() { return true; } - + + public boolean includesAllLanguageMatches() + { + return false; + } + + public boolean includesExactLanguageMatch() + { + return false; + } + + public boolean includesAllCountryMatches() + { + return false; + } + + public boolean includesExactCountryMatch() + { + return false; + } + }, - + /** - * Only the exact locale and no local === all lnaguages + * Only the exact locale and no locale === locale + all languages */ LOCALE_AND_ALL { @@ -43,23 +70,46 @@ public enum MLAnalysisMode { return true; } + public boolean includesContained() { return false; } + public boolean includesContaining() { return false; } + public boolean includesExact() { return true; } + + public boolean includesAllLanguageMatches() + { + return false; + } + + public boolean includesExactLanguageMatch() + { + return false; + } + + public boolean includesAllCountryMatches() + { + return false; + } + + public boolean includesExactCountryMatch() + { + return false; + } }, - + /** - * Expand the locale to include all the locales that contain it. - * en_GB would be en_GB, en, but not all languages + * Expand the locale to include all the locales that contain it. So "en_GB" would be "en_GB", "en", but not all + * languages "". */ LOCALE_AND_ALL_CONTAINING_LOCALES { @@ -67,23 +117,45 @@ public enum MLAnalysisMode { return false; } + public boolean includesContained() { return false; } + public boolean includesContaining() { return true; } + public boolean includesExact() { return true; } + + public boolean includesAllLanguageMatches() + { + return false; + } + + public boolean includesExactLanguageMatch() + { + return false; + } + + public boolean includesAllCountryMatches() + { + return false; + } + + public boolean includesExactCountryMatch() + { + return false; + } }, - + /** - * Expand the locale to include all the locales that contain it. - * en_GB would be en_GB, en, and all. + * Expand the locale to include all the locales that contain it. "en_GB" would be "en_GB", "en", and all "". */ LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL { @@ -91,23 +163,45 @@ public enum MLAnalysisMode { return true; } + public boolean includesContained() { return false; } + public boolean includesContaining() { return true; } + public boolean includesExact() { return true; } + + public boolean includesAllLanguageMatches() + { + return false; + } + + public boolean includesExactLanguageMatch() + { + return false; + } + + public boolean includesAllCountryMatches() + { + return false; + } + + public boolean includesExactCountryMatch() + { + return false; + } }, - + /** - * Expand to all the locales that are contained by this. - * en would expand to en, en_GB, en_US, .... + * Expand to all the locales that are contained by this. "en" would expand to "en", "en_GB", "en_US", .... */ LOCALE_AND_ALL_CONTAINED_LOCALES { @@ -115,22 +209,45 @@ public enum MLAnalysisMode { return false; } + public boolean includesContained() { return true; } + public boolean includesContaining() { return false; } + public boolean includesExact() { return true; } + + public boolean includesAllLanguageMatches() + { + return false; + } + + public boolean includesExactLanguageMatch() + { + return false; + } + + public boolean includesAllCountryMatches() + { + return false; + } + + public boolean includesExactCountryMatch() + { + return false; + } }, - + /** - * No prefix only + * Just the all locale, "", === new Locale("", "", "") */ ALL_ONLY { @@ -138,38 +255,652 @@ public enum MLAnalysisMode { return true; } + public boolean includesContained() { return false; } + public boolean includesContaining() { return false; } + public boolean includesExact() { return false; } + + public boolean includesAllLanguageMatches() + { + return false; + } + + public boolean includesExactLanguageMatch() + { + return false; + } + + public boolean includesAllCountryMatches() + { + return false; + } + + public boolean includesExactCountryMatch() + { + return false; + } + }, + + /** + * All language matches. Only worry about language level matches for locale. + */ + + ALL_LANGUAGES + { + public boolean includesAll() + { + return false; + } + + public boolean includesContained() + { + return false; + } + + public boolean includesContaining() + { + return false; + } + + public boolean includesExact() + { + return false; + } + + public boolean includesAllLanguageMatches() + { + return true; + } + + public boolean includesExactLanguageMatch() + { + return false; + } + + public boolean includesAllCountryMatches() + { + return false; + } + + public boolean includesExactCountryMatch() + { + return false; + } + }, + + /** + * All language matches and ALL + */ + + ALL_LANGUAGES_AND_ALL + { + public boolean includesAll() + { + return true; + } + + public boolean includesContained() + { + return false; + } + + public boolean includesContaining() + { + return false; + } + + public boolean includesExact() + { + return false; + } + + public boolean includesAllLanguageMatches() + { + return true; + } + + public boolean includesExactLanguageMatch() + { + return false; + } + + public boolean includesAllCountryMatches() + { + return false; + } + + public boolean includesExactCountryMatch() + { + return false; + } + }, + + /** + * Exact language matches (do not include all sub varients of the language) + */ + + EXACT_LANGUAGE + { + public boolean includesAll() + { + return false; + } + + public boolean includesContained() + { + return false; + } + + public boolean includesContaining() + { + return false; + } + + public boolean includesExact() + { + return false; + } + + public boolean includesAllLanguageMatches() + { + return false; + } + + public boolean includesExactLanguageMatch() + { + return true; + } + + public boolean includesAllCountryMatches() + { + return false; + } + + public boolean includesExactCountryMatch() + { + return false; + } + }, + + /** + * Exact language matches (do not include all sub varients of the language) and ALL + */ + + EXACT_LANGUAGE_AND_ALL + { + public boolean includesAll() + { + return true; + } + + public boolean includesContained() + { + return false; + } + + public boolean includesContaining() + { + return false; + } + + public boolean includesExact() + { + return false; + } + + public boolean includesAllLanguageMatches() + { + return false; + } + + public boolean includesExactLanguageMatch() + { + return true; + } + + public boolean includesAllCountryMatches() + { + return false; + } + + public boolean includesExactCountryMatch() + { + return false; + } + }, + + /** + * Exact country matches (do not include all sub varients of the country) + */ + + EXACT_COUNRTY + { + public boolean includesAll() + { + return false; + } + + public boolean includesContained() + { + return false; + } + + public boolean includesContaining() + { + return false; + } + + public boolean includesExact() + { + return false; + } + + public boolean includesAllLanguageMatches() + { + return false; + } + + public boolean includesExactLanguageMatch() + { + return false; + } + + public boolean includesAllCountryMatches() + { + return false; + } + + public boolean includesExactCountryMatch() + { + return true; + } + }, + + /** + * Exact country matches (do not include all sub varients of the country) and ALL + */ + + EXACT_COUNTRY_AND_ALL + { + public boolean includesAll() + { + return true; + } + + public boolean includesContained() + { + return false; + } + + public boolean includesContaining() + { + return false; + } + + public boolean includesExact() + { + return false; + } + + public boolean includesAllLanguageMatches() + { + return false; + } + + public boolean includesExactLanguageMatch() + { + return false; + } + + public boolean includesAllCountryMatches() + { + return false; + } + + public boolean includesExactCountryMatch() + { + return true; + } + }, + + /** + * All country matches + */ + + ALL_COUNTRIES + { + public boolean includesAll() + { + return false; + } + + public boolean includesContained() + { + return false; + } + + public boolean includesContaining() + { + return false; + } + + public boolean includesExact() + { + return false; + } + + public boolean includesAllLanguageMatches() + { + return false; + } + + public boolean includesExactLanguageMatch() + { + return false; + } + + public boolean includesAllCountryMatches() + { + return true; + } + + public boolean includesExactCountryMatch() + { + return false; + } + }, + + /** + * All countries and ALL + */ + + ALL_COUNTRIES_AND_ALL + { + public boolean includesAll() + { + return true; + } + + public boolean includesContained() + { + return false; + } + + public boolean includesContaining() + { + return false; + } + + public boolean includesExact() + { + return false; + } + + public boolean includesAllLanguageMatches() + { + return false; + } + + public boolean includesExactLanguageMatch() + { + return false; + } + + public boolean includesAllCountryMatches() + { + return true; + } + + public boolean includesExactCountryMatch() + { + return false; + } }; - + public static MLAnalysisMode getMLAnalysisMode(String mode) { - for(MLAnalysisMode test : MLAnalysisMode.values()) + for (MLAnalysisMode test : MLAnalysisMode.values()) { - if(test.toString().equalsIgnoreCase(mode)) + if (test.toString().equalsIgnoreCase(mode)) { return test; } } - throw new AlfrescoRuntimeException("Unknown ML Analysis mode "+mode); + throw new AlfrescoRuntimeException("Unknown ML Analysis mode " + mode); } - + public abstract boolean includesAll(); - + public abstract boolean includesContained(); - + public abstract boolean includesContaining(); - + public abstract boolean includesExact(); - + + public abstract boolean includesAllLanguageMatches(); + + public abstract boolean includesExactLanguageMatch(); + + public abstract boolean includesAllCountryMatches(); + + public abstract boolean includesExactCountryMatch(); + + public static Collection getLocales(MLAnalysisMode mlAnalaysisMode, Locale locale, boolean withWildcards) + { + HashSet locales = new HashSet(); + + boolean l = locale.getLanguage().length() != 0; + boolean c = locale.getCountry().length() != 0; + boolean v = locale.getVariant().length() != 0; + + if (mlAnalaysisMode.includesAll()) + { + if (withWildcards) + { + locales.add(new Locale("", "", "")); + locales.add(new Locale("*", "", "")); + } + else + { + locales.add(new Locale("", "", "")); + } + + } + + if (mlAnalaysisMode.includesExact()) + { + locales.add(locale); + } + + if (mlAnalaysisMode.includesContaining()) + { + if (v) + { + Locale noVarient = new Locale(locale.getLanguage(), locale.getCountry(), ""); + locales.add(noVarient); + + Locale noCountry = new Locale(locale.getLanguage(), "", ""); + locales.add(noCountry); + } + if (c) + { + Locale noCountry = new Locale(locale.getLanguage(), "", ""); + locales.add(noCountry); + } + } + + if (mlAnalaysisMode.includesContained()) + { + // varients have not contained + if (!v) + { + if (!c) + { + if (!l) + { + // All + if (withWildcards) + { + locales.add(new Locale("", "", "")); + locales.add(new Locale("*", "", "")); + } + else + { + for (Locale toAdd : Locale.getAvailableLocales()) + { + locales.add(toAdd); + } + } + } + else + { + // All that match language + if (withWildcards) + { + locales.add(new Locale(locale.getLanguage(), "", "")); + locales.add(new Locale(locale.getLanguage(), "*", "")); + } + else + { + for (Locale toAdd : Locale.getAvailableLocales()) + { + if (locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage())) + { + locales.add(toAdd); + } + } + } + } + } + else + { + if (withWildcards) + { + locales.add(new Locale(locale.getLanguage(), locale.getCountry(), "")); + locales.add(new Locale(locale.getLanguage(), locale.getCountry(), "*")); + } + else + { + // All that match language and country + for (Locale toAdd : Locale.getAvailableLocales()) + { + if ((locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage())) + && (locale.getCountry().equals("") || locale.getCountry() + .equals(toAdd.getCountry()))) + { + locales.add(toAdd); + } + } + } + } + } + } + + if (mlAnalaysisMode.includesAllLanguageMatches()) + { + if (withWildcards) + { + locales.add(new Locale(locale.getLanguage(), "", "")); + locales.add(new Locale(locale.getLanguage(), "*", "")); + } + else + { + // All that match language + for (Locale toAdd : Locale.getAvailableLocales()) + { + if (locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage())) + { + locales.add(toAdd); + } + } + } + } + + if (mlAnalaysisMode.includesExactLanguageMatch()) + { + if (withWildcards) + { + locales.add(new Locale(locale.getLanguage(), "", "")); + } + else + { + locales.add(new Locale(locale.getLanguage(), "", "")); + } + } + + if (mlAnalaysisMode.includesAllCountryMatches()) + { + if (withWildcards) + { + locales.add(new Locale(locale.getLanguage(), locale.getCountry(), "")); + if(locale.getCountry().equals("")) + { + locales.add(new Locale(locale.getLanguage(), "*", "")); + } + else + { + locales.add(new Locale(locale.getLanguage(), locale.getCountry(), "*")); + } + } + else + { + // All that match language + for (Locale toAdd : Locale.getAvailableLocales()) + { + if ((locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage())) + && (locale.getCountry().equals("") || locale.getCountry().equals(toAdd.getCountry()))) + { + locales.add(toAdd); + } + } + } + } + + if (mlAnalaysisMode.includesExactCountryMatch()) + { + if (withWildcards) + { + if(locale.getCountry().equals("")) + { + locales.add(new Locale(locale.getLanguage(), "", "")); + locales.add(new Locale(locale.getLanguage(), "*", "")); + } + else + { + locales.add(new Locale(locale.getLanguage(), locale.getCountry(), "")); + } + + } + else + { + if (locale.getCountry().equals("")) + { + for (Locale toAdd : Locale.getAvailableLocales()) + { + if (locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage())) + { + locales.add(toAdd); + } + } + } + else + { + locales.add(new Locale(locale.getLanguage(), locale.getCountry(), "")); + } + } + } + + return locales; + } } diff --git a/source/java/org/alfresco/repo/search/MLAnaysisModeExpansionTest.java b/source/java/org/alfresco/repo/search/MLAnaysisModeExpansionTest.java new file mode 100644 index 0000000000..9094f580cd --- /dev/null +++ b/source/java/org/alfresco/repo/search/MLAnaysisModeExpansionTest.java @@ -0,0 +1,409 @@ +package org.alfresco.repo.search; + +import java.util.HashSet; +import java.util.Locale; + +import junit.framework.TestCase; + +public class MLAnaysisModeExpansionTest extends TestCase +{ + + public MLAnaysisModeExpansionTest() + { + super(); + } + + public MLAnaysisModeExpansionTest(String arg0) + { + super(arg0); + } + + public void testIdentity() + { + Locale locale = Locale.UK; + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_ONLY, locale, false)); + assertEquals(1, locales.size()); + assertTrue(locales.contains(locale)); + } + + public void testIdentityAndAll() + { + Locale locale = Locale.UK; + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL, locale, false)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(locale)); + assertTrue(locales.contains(new Locale("", "", ""))); + } + + public void testAll() + { + Locale locale = Locale.UK; + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_ONLY, locale, false)); + assertEquals(1, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + } + + public void testContaining() + { + Locale locale = Locale.UK; + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES, locale, false)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + } + + public void testContainingAndAll() + { + Locale locale = Locale.UK; + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL, locale, false)); + assertEquals(3, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + } + + public void testContained() + { + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "", ""), false)); + assertEquals(9, locales.size()); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "AU", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "US", ""))); + assertTrue(locales.contains(new Locale("en", "ZA", ""))); + assertTrue(locales.contains(new Locale("en", "CA", ""))); + assertTrue(locales.contains(new Locale("en", "IE", ""))); + assertTrue(locales.contains(new Locale("en", "NZ", ""))); + assertTrue(locales.contains(new Locale("en", "IN", ""))); + } + + public void testLang() + { + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false)); + assertEquals(1, locales.size()); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES, new Locale("en", "GB", ""), false)); + assertEquals(9, locales.size()); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "AU", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "US", ""))); + assertTrue(locales.contains(new Locale("en", "ZA", ""))); + assertTrue(locales.contains(new Locale("en", "CA", ""))); + assertTrue(locales.contains(new Locale("en", "IE", ""))); + assertTrue(locales.contains(new Locale("en", "NZ", ""))); + assertTrue(locales.contains(new Locale("en", "IN", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES_AND_ALL, new Locale("en", "GB", ""), false)); + assertEquals(10, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "AU", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "US", ""))); + assertTrue(locales.contains(new Locale("en", "ZA", ""))); + assertTrue(locales.contains(new Locale("en", "CA", ""))); + assertTrue(locales.contains(new Locale("en", "IE", ""))); + assertTrue(locales.contains(new Locale("en", "NZ", ""))); + assertTrue(locales.contains(new Locale("en", "IN", ""))); + } + + public void testExactLang() + { + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false)); + assertEquals(1, locales.size()); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE, new Locale("en", "GB", ""), false)); + assertEquals(1, locales.size()); + assertTrue(locales.contains(new Locale("en", "", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE_AND_ALL, new Locale("en", "GB", ""), false)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("en", "", ""))); + } + + public void testCountry() + { + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false)); + assertEquals(1, locales.size()); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "", ""), false)); + assertEquals(9, locales.size()); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "AU", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "US", ""))); + assertTrue(locales.contains(new Locale("en", "ZA", ""))); + assertTrue(locales.contains(new Locale("en", "CA", ""))); + assertTrue(locales.contains(new Locale("en", "IE", ""))); + assertTrue(locales.contains(new Locale("en", "NZ", ""))); + assertTrue(locales.contains(new Locale("en", "IN", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "GB", ""), false)); + assertEquals(1, locales.size()); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "", ""), false)); + assertEquals(10, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "AU", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "US", ""))); + assertTrue(locales.contains(new Locale("en", "ZA", ""))); + assertTrue(locales.contains(new Locale("en", "CA", ""))); + assertTrue(locales.contains(new Locale("en", "IE", ""))); + assertTrue(locales.contains(new Locale("en", "NZ", ""))); + assertTrue(locales.contains(new Locale("en", "IN", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "GB", ""), false)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + } + + public void testExactCountry() + { + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false)); + assertEquals(1, locales.size()); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "GB", ""), false)); + assertEquals(1, locales.size()); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "", ""), false)); + assertEquals(9, locales.size()); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "AU", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "US", ""))); + assertTrue(locales.contains(new Locale("en", "ZA", ""))); + assertTrue(locales.contains(new Locale("en", "CA", ""))); + assertTrue(locales.contains(new Locale("en", "IE", ""))); + assertTrue(locales.contains(new Locale("en", "NZ", ""))); + assertTrue(locales.contains(new Locale("en", "IN", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "GB", ""), false)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "", ""), false)); + assertEquals(10, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "AU", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "US", ""))); + assertTrue(locales.contains(new Locale("en", "ZA", ""))); + assertTrue(locales.contains(new Locale("en", "CA", ""))); + assertTrue(locales.contains(new Locale("en", "IE", ""))); + assertTrue(locales.contains(new Locale("en", "NZ", ""))); + assertTrue(locales.contains(new Locale("en", "IN", ""))); + } + + + public void testIdentityWC() + { + Locale locale = Locale.UK; + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_ONLY, locale, true)); + assertEquals(1, locales.size()); + assertTrue(locales.contains(locale)); + } + + public void testIdentityAndAllWC() + { + Locale locale = Locale.UK; + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL, locale, true)); + assertEquals(3, locales.size()); + assertTrue(locales.contains(locale)); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("*", "", ""))); + } + + public void testAllWC() + { + Locale locale = Locale.UK; + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_ONLY, locale, true)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("*", "", ""))); + } + + public void testContainingWC() + { + Locale locale = Locale.UK; + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES, locale, true)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + } + + public void testContainingAndAllWC() + { + Locale locale = Locale.UK; + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL, locale, true)); + assertEquals(4, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("*", "", ""))); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + } + + public void testContainedWC() + { + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "", ""), true)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("en", "*", ""))); + assertTrue(locales.contains(new Locale("en", "", ""))); + } + + public void testLangWC() + { + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "GB", "*"))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES, new Locale("en", "GB", ""), true)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "*", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES_AND_ALL, new Locale("en", "GB", ""), true)); + assertEquals(4, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("*", "", ""))); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "*", ""))); + } + + public void testExactLangWC() + { + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "GB", "*"))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE, new Locale("en", "GB", ""), true)); + assertEquals(1, locales.size()); + assertTrue(locales.contains(new Locale("en", "", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE_AND_ALL, new Locale("en", "GB", ""), true)); + assertEquals(3, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("*", "", ""))); + assertTrue(locales.contains(new Locale("en", "", ""))); + } + + public void testCountryWC() + { + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "GB", "*"))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "", ""), true)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "*", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "GB", ""), true)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "GB", "*"))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "", ""), true)); + assertEquals(4, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("*", "", ""))); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "*", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "GB", ""), true)); + assertEquals(4, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("*", "", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "GB", "*"))); + } + + public void testExactCountryWC() + { + HashSet locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + assertTrue(locales.contains(new Locale("en", "GB", "*"))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "GB", ""), true)); + assertEquals(1, locales.size()); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "", ""), true)); + assertEquals(2, locales.size()); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "*", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "GB", ""), true)); + assertEquals(3, locales.size()); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("*", "", ""))); + assertTrue(locales.contains(new Locale("en", "GB", ""))); + + locales = new HashSet(); + locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "", ""), true)); + assertTrue(locales.contains(new Locale("", "", ""))); + assertTrue(locales.contains(new Locale("*", "", ""))); + assertTrue(locales.contains(new Locale("en", "", ""))); + assertTrue(locales.contains(new Locale("en", "*", ""))); + } + +} diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerAndSearcherFactory2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerAndSearcherFactory2.java index 5a37e53c6c..96164cc050 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerAndSearcherFactory2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerAndSearcherFactory2.java @@ -131,9 +131,9 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche private String lockDirectory; - private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL; + private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.EXACT_LANGUAGE_AND_ALL; - private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL; + private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.EXACT_LANGUAGE_AND_ALL; /** * Private constructor for the singleton TODO: FIt in with IOC @@ -1170,9 +1170,10 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche return defaultMLIndexAnalysisMode; } - public void setDefaultMLIndexAnalysisMode(String mode) + public void setDefaultMLIndexAnalysisMode(MLAnalysisMode mode) { - defaultMLIndexAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode); + //defaultMLIndexAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode); + defaultMLIndexAnalysisMode = mode; } public MLAnalysisMode getDefaultMLSearchAnalysisMode() @@ -1180,9 +1181,10 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche return defaultMLSearchAnalysisMode; } - public void setDefaultMLSearchAnalysisMode(String mode) + public void setDefaultMLSearchAnalysisMode(MLAnalysisMode mode) { - defaultMLSearchAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode); + //defaultMLSearchAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode); + defaultMLSearchAnalysisMode = mode; } diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java index 5f22803c09..e7cda33784 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java @@ -18,6 +18,7 @@ package org.alfresco.repo.search.impl.lucene; import java.io.IOException; import java.io.StringReader; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; @@ -26,6 +27,7 @@ import java.util.Locale; import java.util.Set; import org.alfresco.i18n.I18NUtil; +import org.alfresco.repo.search.MLAnalysisMode; import org.alfresco.repo.search.SearcherException; import org.alfresco.repo.search.impl.lucene.query.PathQuery; import org.alfresco.service.cmr.dictionary.AspectDefinition; @@ -59,6 +61,8 @@ public class LuceneQueryParser extends QueryParser private SearchParameters searchParameters; + private LuceneConfig config; + /** * Parses a query string, returning a {@link org.apache.lucene.search.Query}. * @@ -68,12 +72,13 @@ public class LuceneQueryParser extends QueryParser * the default field for query terms. * @param analyzer * used to find terms in the query text. + * @param config * @throws ParseException * if the parsing fails */ static public Query parse(String query, String field, Analyzer analyzer, NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService, - Operator defaultOperator, SearchParameters searchParameters) throws ParseException + Operator defaultOperator, SearchParameters searchParameters, LuceneConfig config) throws ParseException { if (s_logger.isDebugEnabled()) { @@ -84,6 +89,7 @@ public class LuceneQueryParser extends QueryParser parser.setNamespacePrefixResolver(namespacePrefixResolver); parser.setDictionaryService(dictionaryService); parser.setSearchParameters(searchParameters); + parser.setLuceneConfig(config); // TODO: Apply locale contstraints at the top level if required for the non ML doc types. Query result = parser.parse(query); if (s_logger.isDebugEnabled()) @@ -93,6 +99,11 @@ public class LuceneQueryParser extends QueryParser return result; } + private void setLuceneConfig(LuceneConfig config) + { + this.config = config; + } + private void setSearchParameters(SearchParameters searchParameters) { this.searchParameters = searchParameters; @@ -158,7 +169,10 @@ public class LuceneQueryParser extends QueryParser { // The super implementation will create phrase queries etc if required Query part = getFieldQuery("@" + qname.toString(), queryText); - query.add(part, Occur.SHOULD); + if (part != null) + { + query.add(part, Occur.SHOULD); + } } return query; } @@ -168,7 +182,10 @@ public class LuceneQueryParser extends QueryParser for (String fieldName : text) { Query part = getFieldQuery(fieldName, queryText); - query.add(part, Occur.SHOULD); + if (part != null) + { + query.add(part, Occur.SHOULD); + } } return query; } @@ -251,7 +268,10 @@ public class LuceneQueryParser extends QueryParser for (QName qname : subclasses) { TermQuery termQuery = new TermQuery(new Term(field, qname.toString())); - booleanQuery.add(termQuery, Occur.SHOULD); + if (termQuery != null) + { + booleanQuery.add(termQuery, Occur.SHOULD); + } } return booleanQuery; } @@ -333,7 +353,10 @@ public class LuceneQueryParser extends QueryParser for (QName qname : subclasses) { TermQuery termQuery = new TermQuery(new Term(field, qname.toString())); - booleanQuery.add(termQuery, Occur.SHOULD); + if (termQuery != null) + { + booleanQuery.add(termQuery, Occur.SHOULD); + } } return booleanQuery; } @@ -369,7 +392,8 @@ public class LuceneQueryParser extends QueryParser } else if (field.startsWith("@")) { - return attributeQueryBuilder(field, queryText, new FieldQuery()); + Query query = attributeQueryBuilder(field, queryText, new FieldQuery()); + return query; } else if (field.equals("ALL")) { @@ -414,9 +438,12 @@ public class LuceneQueryParser extends QueryParser QName container = pd.getContainerClass().getName(); BooleanQuery query = new BooleanQuery(); Query typeQuery = getFieldQuery("TYPE", container.toString()); - query.add(typeQuery, Occur.MUST); Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*"); - query.add(presenceQuery, Occur.MUST_NOT); + if ((typeQuery != null) && (presenceQuery != null)) + { + query.add(typeQuery, Occur.MUST); + query.add(presenceQuery, Occur.MUST_NOT); + } return query; } else @@ -435,9 +462,12 @@ public class LuceneQueryParser extends QueryParser QName container = pd.getContainerClass().getName(); BooleanQuery query = new BooleanQuery(); Query typeQuery = getFieldQuery("TYPE", container.toString()); - query.add(typeQuery, Occur.MUST); Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*"); - query.add(presenceQuery, Occur.MUST); + if ((typeQuery != null) && (presenceQuery != null)) + { + query.add(typeQuery, Occur.MUST); + query.add(presenceQuery, Occur.MUST); + } return query; } else @@ -455,7 +485,10 @@ public class LuceneQueryParser extends QueryParser { // The super implementation will create phrase queries etc if required Query part = getFieldQuery("@" + qname.toString(), queryText); - query.add(part, Occur.SHOULD); + if (part != null) + { + query.add(part, Occur.SHOULD); + } } return query; } @@ -585,7 +618,10 @@ public class LuceneQueryParser extends QueryParser { // The super implementation will create phrase queries etc if required Query part = getPrefixQuery("@" + qname.toString(), termStr); - query.add(part, Occur.SHOULD); + if (part != null) + { + query.add(part, Occur.SHOULD); + } } return query; } @@ -611,7 +647,10 @@ public class LuceneQueryParser extends QueryParser { // The super implementation will create phrase queries etc if required Query part = getWildcardQuery("@" + qname.toString(), termStr); - query.add(part, Occur.SHOULD); + if (part != null) + { + query.add(part, Occur.SHOULD); + } } return query; } @@ -637,7 +676,10 @@ public class LuceneQueryParser extends QueryParser { // The super implementation will create phrase queries etc if required Query part = getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity); - query.add(part, Occur.SHOULD); + if (part != null) + { + query.add(part, Occur.SHOULD); + } } return query; } @@ -772,7 +814,10 @@ public class LuceneQueryParser extends QueryParser StringBuilder builder = new StringBuilder(queryText.length() + 10); builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText); Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString()); - booleanQuery.add(subQuery, Occur.SHOULD); + if (subQuery != null) + { + booleanQuery.add(subQuery, Occur.SHOULD); + } } return booleanQuery; } @@ -781,21 +826,62 @@ public class LuceneQueryParser extends QueryParser { // Build a sub query for each locale and or the results together - // - add an explicit condition for the locale - BooleanQuery booleanQuery = new BooleanQuery(); + + MLAnalysisMode analysisMode = searchParameters.getMlAnalaysisMode() == null ? config + .getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode(); + + if (analysisMode.includesAll()) + { + return subQueryBuilder.getQuery(expandedFieldName, queryText); + } + List locales = searchParameters.getLocales(); + List expandedLocales = new ArrayList(); for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil .getLocale()) : locales)) { - BooleanQuery subQuery = new BooleanQuery(); - Query contentQuery = subQueryBuilder.getQuery(expandedFieldName, queryText); - subQuery.add(contentQuery, Occur.MUST); - StringBuilder builder = new StringBuilder(); - builder.append(expandedFieldName).append(".locale"); - Query localeQuery = getFieldQuery(builder.toString(), locale.toString()); - subQuery.add(localeQuery, Occur.MUST); - booleanQuery.add(subQuery, Occur.SHOULD); + expandedLocales.addAll(MLAnalysisMode.getLocales(analysisMode, locale, true)); } - return booleanQuery; + + if (expandedLocales.size() > 0) + { + BooleanQuery booleanQuery = new BooleanQuery(); + Query contentQuery = subQueryBuilder.getQuery(expandedFieldName, queryText); + if (contentQuery != null) + { + booleanQuery.add(contentQuery, Occur.MUST); + BooleanQuery subQuery = new BooleanQuery(); + for (Locale locale : (expandedLocales)) + { + StringBuilder builder = new StringBuilder(); + builder.append(expandedFieldName).append(".locale"); + String localeString = locale.toString(); + if (localeString.indexOf("*") == -1) + { + Query localeQuery = getFieldQuery(builder.toString(), localeString); + if (localeQuery != null) + { + subQuery.add(localeQuery, Occur.SHOULD); + } + } + else + { + Query localeQuery = getWildcardQuery(builder.toString(), localeString); + if (localeQuery != null) + { + subQuery.add(localeQuery, Occur.SHOULD); + } + } + } + booleanQuery.add(subQuery, Occur.MUST); + } + return booleanQuery; + } + else + { + return subQueryBuilder.getQuery(expandedFieldName, queryText); + } + } else { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java index a8bcf23e62..9ed193153d 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java @@ -215,7 +215,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2 } Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser( - dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters); + dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters, getLuceneConfig()); ClosingIndexSearcher searcher = getSearcher(indexer); if (searcher == null) { diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java index d04dd0ca3b..fa46e827fc 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java @@ -2543,6 +2543,14 @@ public class LuceneTest2 extends TestCase // Test stop words are equivalent + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"the\"", null, null); + assertEquals(0, results.length()); + results.close(); + + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"and\"", null, null); + assertEquals(0, results.length()); + results.close(); + results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over the lazy\"", null, null); assertEquals(1, results.length()); results.close(); @@ -2685,11 +2693,41 @@ public class LuceneTest2 extends TestCase results = searcher.query(sp); assertEquals(1, results.length()); results.close(); + + // locale serach in en_US for en_UK + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setQuery("d\\:content:\"fox\""); + sp.addLocale(Locale.US); + results = searcher.query(sp); + assertEquals(1, results.length()); + results.close(); // Direct ML tests QName mlQName = QName.createQName(TEST_NAMESPACE, "ml"); + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY); + sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":and"); + results = searcher.query(sp); + assertEquals(0, results.length()); + results.close(); + + + sp = new SearchParameters(); + sp.addStore(rootNodeRef.getStoreRef()); + sp.setLanguage("lucene"); + sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY); + sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":\"and\""); + results = searcher.query(sp); + assertEquals(0, results.length()); + results.close(); + sp = new SearchParameters(); sp.addStore(rootNodeRef.getStoreRef()); sp.setLanguage("lucene"); diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java index 2c7f4038ee..996ce7c1fd 100644 --- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java +++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java @@ -3,6 +3,7 @@ package org.alfresco.repo.search.impl.lucene.analysis; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; +import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.Locale; @@ -38,91 +39,20 @@ public class MLTokenDuplicator extends Tokenizer this.source = source; this.locale = locale; - boolean l = locale.getLanguage().length() != 0; - boolean c = locale.getCountry().length() != 0; - boolean v = locale.getVariant().length() != 0; - - prefixes = new HashSet(4); - if (mlAnalaysisMode.includesAll()) + Collection locales = MLAnalysisMode.getLocales(mlAnalaysisMode, locale, false); + prefixes = new HashSet(locales.size()); + for(Locale toAdd : locales) { - prefixes.add(""); - } - - if (mlAnalaysisMode.includesExact()) - { - StringBuffer result = new StringBuffer(); - result.append("{").append(locale.toString()).append("}"); - prefixes.add(result.toString()); - } - - if (mlAnalaysisMode.includesContaining()) - { - if (v) + String localeString = toAdd.toString(); + if(localeString.length() == 0) { - Locale noVarient = new Locale(locale.getLanguage(), locale.getCountry(), ""); - StringBuffer result = new StringBuffer(); - result.append("{").append(noVarient.toString()).append("}"); - prefixes.add(result.toString()); - - Locale noCountry = new Locale(locale.getLanguage(), "", ""); - result = new StringBuffer(); - result.append("{").append(noCountry.toString()).append("}"); - prefixes.add(result.toString()); + prefixes.add(""); } - if (c) + else { - Locale noCountry = new Locale(locale.getLanguage(), "", ""); - StringBuffer result = new StringBuffer(); - result.append("{").append(noCountry.toString()).append("}"); - prefixes.add(result.toString()); - } - } - - if (mlAnalaysisMode.includesContained()) - { - // varients have not contained - if (!v) - { - if (!c) - { - if (!l) - { - // All - for (Locale toAdd : Locale.getAvailableLocales()) - { - StringBuffer result = new StringBuffer(); - result.append("{").append(toAdd.toString()).append("}"); - prefixes.add(result.toString()); - } - } - else - { - // All that match language - for (Locale toAdd : Locale.getAvailableLocales()) - { - if (locale.getLanguage().equals(toAdd.getLanguage())) - { - StringBuffer result = new StringBuffer(); - result.append("{").append(toAdd.toString()).append("}"); - prefixes.add(result.toString()); - } - } - } - } - else - { - // All that match language and country - for (Locale toAdd : Locale.getAvailableLocales()) - { - if ((locale.getLanguage().equals(toAdd.getLanguage())) - && (locale.getCountry().equals(toAdd.getCountry()))) - { - StringBuffer result = new StringBuffer(); - result.append("{").append(toAdd.toString()).append("}"); - prefixes.add(result.toString()); - } - } - } + StringBuilder builder = new StringBuilder(16); + builder.append("{").append(localeString).append("}"); + prefixes.add(builder.toString()); } } if(s_logger.isDebugEnabled())