diff --git a/config/alfresco/core-services-context.xml b/config/alfresco/core-services-context.xml
index 04c614f23a..f381a269c1 100644
--- a/config/alfresco/core-services-context.xml
+++ b/config/alfresco/core-services-context.xml
@@ -384,6 +384,12 @@
${lucene.lock.poll.interval}
+
+
+ EXACT_LANGUAGE_AND_ALL
+
+
+ EXACT_LANGUAGE_AND_ALL
diff --git a/config/alfresco/extension/language-specific-index-and-search-context.xml.sample b/config/alfresco/extension/language-specific-index-and-search-context.xml.sample
new file mode 100644
index 0000000000..7082320a76
--- /dev/null
+++ b/config/alfresco/extension/language-specific-index-and-search-context.xml.sample
@@ -0,0 +1,71 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${dir.indexes}
+
+
+
+
+
+
+
+
+ ${lucene.maxAtomicTransformationTime}
+
+
+ ${lucene.query.maxClauses}
+
+
+ ${lucene.indexer.batchSize}
+
+
+ ${dir.indexes.lock}
+
+
+ ${lucene.indexer.maxFieldLength}
+
+
+ ${lucene.write.lock.timeout}
+
+
+ ${lucene.commit.lock.timeout}
+
+
+ ${lucene.lock.poll.interval}
+
+
+ EXACT_LANGUAGE_AND_ALL
+
+
+ EXACT_LANGUAGE
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/source/java/org/alfresco/repo/search/MLAnalysisMode.java b/source/java/org/alfresco/repo/search/MLAnalysisMode.java
index cd7c32dd24..779debfa92 100644
--- a/source/java/org/alfresco/repo/search/MLAnalysisMode.java
+++ b/source/java/org/alfresco/repo/search/MLAnalysisMode.java
@@ -1,17 +1,21 @@
package org.alfresco.repo.search;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Locale;
+
import org.alfresco.error.AlfrescoRuntimeException;
/**
- * Enum to specify how multi-lingual properties should be treate for indexing and search.
+ * Enum to specify how multi-lingual properties should be treate for indexing and search. Note that locale new Locale
+ * ("", "", "") is used to indicate all locales.
*
* @author andyh
- *
*/
public enum MLAnalysisMode
{
/**
- * Only exact locale is used.
+ * Only the exact locale is used.
*/
LOCALE_ONLY
{
@@ -19,23 +23,46 @@ public enum MLAnalysisMode
{
return false;
}
+
public boolean includesContained()
{
return false;
}
+
public boolean includesContaining()
{
return false;
}
+
public boolean includesExact()
{
return true;
}
-
+
+ public boolean includesAllLanguageMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return false;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return false;
+ }
+
},
-
+
/**
- * Only the exact locale and no local === all lnaguages
+ * Only the exact locale and no locale === locale + all languages
*/
LOCALE_AND_ALL
{
@@ -43,23 +70,46 @@ public enum MLAnalysisMode
{
return true;
}
+
public boolean includesContained()
{
return false;
}
+
public boolean includesContaining()
{
return false;
}
+
public boolean includesExact()
{
return true;
}
+
+ public boolean includesAllLanguageMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return false;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return false;
+ }
},
-
+
/**
- * Expand the locale to include all the locales that contain it.
- * en_GB would be en_GB, en, but not all languages
+ * Expand the locale to include all the locales that contain it. So "en_GB" would be "en_GB", "en", but not all
+ * languages "".
*/
LOCALE_AND_ALL_CONTAINING_LOCALES
{
@@ -67,23 +117,45 @@ public enum MLAnalysisMode
{
return false;
}
+
public boolean includesContained()
{
return false;
}
+
public boolean includesContaining()
{
return true;
}
+
public boolean includesExact()
{
return true;
}
+
+ public boolean includesAllLanguageMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return false;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return false;
+ }
},
-
+
/**
- * Expand the locale to include all the locales that contain it.
- * en_GB would be en_GB, en, and all.
+ * Expand the locale to include all the locales that contain it. "en_GB" would be "en_GB", "en", and all "".
*/
LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL
{
@@ -91,23 +163,45 @@ public enum MLAnalysisMode
{
return true;
}
+
public boolean includesContained()
{
return false;
}
+
public boolean includesContaining()
{
return true;
}
+
public boolean includesExact()
{
return true;
}
+
+ public boolean includesAllLanguageMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return false;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return false;
+ }
},
-
+
/**
- * Expand to all the locales that are contained by this.
- * en would expand to en, en_GB, en_US, ....
+ * Expand to all the locales that are contained by this. "en" would expand to "en", "en_GB", "en_US", ....
*/
LOCALE_AND_ALL_CONTAINED_LOCALES
{
@@ -115,22 +209,45 @@ public enum MLAnalysisMode
{
return false;
}
+
public boolean includesContained()
{
return true;
}
+
public boolean includesContaining()
{
return false;
}
+
public boolean includesExact()
{
return true;
}
+
+ public boolean includesAllLanguageMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return false;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return false;
+ }
},
-
+
/**
- * No prefix only
+ * Just the all locale, "", === new Locale("", "", "")
*/
ALL_ONLY
{
@@ -138,38 +255,652 @@ public enum MLAnalysisMode
{
return true;
}
+
public boolean includesContained()
{
return false;
}
+
public boolean includesContaining()
{
return false;
}
+
public boolean includesExact()
{
return false;
}
+
+ public boolean includesAllLanguageMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return false;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return false;
+ }
+ },
+
+ /**
+ * All language matches. Only worry about language level matches for locale.
+ */
+
+ ALL_LANGUAGES
+ {
+ public boolean includesAll()
+ {
+ return false;
+ }
+
+ public boolean includesContained()
+ {
+ return false;
+ }
+
+ public boolean includesContaining()
+ {
+ return false;
+ }
+
+ public boolean includesExact()
+ {
+ return false;
+ }
+
+ public boolean includesAllLanguageMatches()
+ {
+ return true;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return false;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return false;
+ }
+ },
+
+ /**
+ * All language matches and ALL
+ */
+
+ ALL_LANGUAGES_AND_ALL
+ {
+ public boolean includesAll()
+ {
+ return true;
+ }
+
+ public boolean includesContained()
+ {
+ return false;
+ }
+
+ public boolean includesContaining()
+ {
+ return false;
+ }
+
+ public boolean includesExact()
+ {
+ return false;
+ }
+
+ public boolean includesAllLanguageMatches()
+ {
+ return true;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return false;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return false;
+ }
+ },
+
+ /**
+ * Exact language matches (do not include all sub varients of the language)
+ */
+
+ EXACT_LANGUAGE
+ {
+ public boolean includesAll()
+ {
+ return false;
+ }
+
+ public boolean includesContained()
+ {
+ return false;
+ }
+
+ public boolean includesContaining()
+ {
+ return false;
+ }
+
+ public boolean includesExact()
+ {
+ return false;
+ }
+
+ public boolean includesAllLanguageMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return true;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return false;
+ }
+ },
+
+ /**
+ * Exact language matches (do not include all sub varients of the language) and ALL
+ */
+
+ EXACT_LANGUAGE_AND_ALL
+ {
+ public boolean includesAll()
+ {
+ return true;
+ }
+
+ public boolean includesContained()
+ {
+ return false;
+ }
+
+ public boolean includesContaining()
+ {
+ return false;
+ }
+
+ public boolean includesExact()
+ {
+ return false;
+ }
+
+ public boolean includesAllLanguageMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return true;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return false;
+ }
+ },
+
+ /**
+ * Exact country matches (do not include all sub varients of the country)
+ */
+
+ EXACT_COUNRTY
+ {
+ public boolean includesAll()
+ {
+ return false;
+ }
+
+ public boolean includesContained()
+ {
+ return false;
+ }
+
+ public boolean includesContaining()
+ {
+ return false;
+ }
+
+ public boolean includesExact()
+ {
+ return false;
+ }
+
+ public boolean includesAllLanguageMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return false;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return true;
+ }
+ },
+
+ /**
+ * Exact country matches (do not include all sub varients of the country) and ALL
+ */
+
+ EXACT_COUNTRY_AND_ALL
+ {
+ public boolean includesAll()
+ {
+ return true;
+ }
+
+ public boolean includesContained()
+ {
+ return false;
+ }
+
+ public boolean includesContaining()
+ {
+ return false;
+ }
+
+ public boolean includesExact()
+ {
+ return false;
+ }
+
+ public boolean includesAllLanguageMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return false;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return true;
+ }
+ },
+
+ /**
+ * All country matches
+ */
+
+ ALL_COUNTRIES
+ {
+ public boolean includesAll()
+ {
+ return false;
+ }
+
+ public boolean includesContained()
+ {
+ return false;
+ }
+
+ public boolean includesContaining()
+ {
+ return false;
+ }
+
+ public boolean includesExact()
+ {
+ return false;
+ }
+
+ public boolean includesAllLanguageMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return false;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return true;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return false;
+ }
+ },
+
+ /**
+ * All countries and ALL
+ */
+
+ ALL_COUNTRIES_AND_ALL
+ {
+ public boolean includesAll()
+ {
+ return true;
+ }
+
+ public boolean includesContained()
+ {
+ return false;
+ }
+
+ public boolean includesContaining()
+ {
+ return false;
+ }
+
+ public boolean includesExact()
+ {
+ return false;
+ }
+
+ public boolean includesAllLanguageMatches()
+ {
+ return false;
+ }
+
+ public boolean includesExactLanguageMatch()
+ {
+ return false;
+ }
+
+ public boolean includesAllCountryMatches()
+ {
+ return true;
+ }
+
+ public boolean includesExactCountryMatch()
+ {
+ return false;
+ }
};
-
+
public static MLAnalysisMode getMLAnalysisMode(String mode)
{
- for(MLAnalysisMode test : MLAnalysisMode.values())
+ for (MLAnalysisMode test : MLAnalysisMode.values())
{
- if(test.toString().equalsIgnoreCase(mode))
+ if (test.toString().equalsIgnoreCase(mode))
{
return test;
}
}
- throw new AlfrescoRuntimeException("Unknown ML Analysis mode "+mode);
+ throw new AlfrescoRuntimeException("Unknown ML Analysis mode " + mode);
}
-
+
public abstract boolean includesAll();
-
+
public abstract boolean includesContained();
-
+
public abstract boolean includesContaining();
-
+
public abstract boolean includesExact();
-
+
+ public abstract boolean includesAllLanguageMatches();
+
+ public abstract boolean includesExactLanguageMatch();
+
+ public abstract boolean includesAllCountryMatches();
+
+ public abstract boolean includesExactCountryMatch();
+
+ public static Collection getLocales(MLAnalysisMode mlAnalaysisMode, Locale locale, boolean withWildcards)
+ {
+ HashSet locales = new HashSet();
+
+ boolean l = locale.getLanguage().length() != 0;
+ boolean c = locale.getCountry().length() != 0;
+ boolean v = locale.getVariant().length() != 0;
+
+ if (mlAnalaysisMode.includesAll())
+ {
+ if (withWildcards)
+ {
+ locales.add(new Locale("", "", ""));
+ locales.add(new Locale("*", "", ""));
+ }
+ else
+ {
+ locales.add(new Locale("", "", ""));
+ }
+
+ }
+
+ if (mlAnalaysisMode.includesExact())
+ {
+ locales.add(locale);
+ }
+
+ if (mlAnalaysisMode.includesContaining())
+ {
+ if (v)
+ {
+ Locale noVarient = new Locale(locale.getLanguage(), locale.getCountry(), "");
+ locales.add(noVarient);
+
+ Locale noCountry = new Locale(locale.getLanguage(), "", "");
+ locales.add(noCountry);
+ }
+ if (c)
+ {
+ Locale noCountry = new Locale(locale.getLanguage(), "", "");
+ locales.add(noCountry);
+ }
+ }
+
+ if (mlAnalaysisMode.includesContained())
+ {
+ // varients have not contained
+ if (!v)
+ {
+ if (!c)
+ {
+ if (!l)
+ {
+ // All
+ if (withWildcards)
+ {
+ locales.add(new Locale("", "", ""));
+ locales.add(new Locale("*", "", ""));
+ }
+ else
+ {
+ for (Locale toAdd : Locale.getAvailableLocales())
+ {
+ locales.add(toAdd);
+ }
+ }
+ }
+ else
+ {
+ // All that match language
+ if (withWildcards)
+ {
+ locales.add(new Locale(locale.getLanguage(), "", ""));
+ locales.add(new Locale(locale.getLanguage(), "*", ""));
+ }
+ else
+ {
+ for (Locale toAdd : Locale.getAvailableLocales())
+ {
+ if (locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
+ {
+ locales.add(toAdd);
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ if (withWildcards)
+ {
+ locales.add(new Locale(locale.getLanguage(), locale.getCountry(), ""));
+ locales.add(new Locale(locale.getLanguage(), locale.getCountry(), "*"));
+ }
+ else
+ {
+ // All that match language and country
+ for (Locale toAdd : Locale.getAvailableLocales())
+ {
+ if ((locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
+ && (locale.getCountry().equals("") || locale.getCountry()
+ .equals(toAdd.getCountry())))
+ {
+ locales.add(toAdd);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (mlAnalaysisMode.includesAllLanguageMatches())
+ {
+ if (withWildcards)
+ {
+ locales.add(new Locale(locale.getLanguage(), "", ""));
+ locales.add(new Locale(locale.getLanguage(), "*", ""));
+ }
+ else
+ {
+ // All that match language
+ for (Locale toAdd : Locale.getAvailableLocales())
+ {
+ if (locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
+ {
+ locales.add(toAdd);
+ }
+ }
+ }
+ }
+
+ if (mlAnalaysisMode.includesExactLanguageMatch())
+ {
+ if (withWildcards)
+ {
+ locales.add(new Locale(locale.getLanguage(), "", ""));
+ }
+ else
+ {
+ locales.add(new Locale(locale.getLanguage(), "", ""));
+ }
+ }
+
+ if (mlAnalaysisMode.includesAllCountryMatches())
+ {
+ if (withWildcards)
+ {
+ locales.add(new Locale(locale.getLanguage(), locale.getCountry(), ""));
+ if(locale.getCountry().equals(""))
+ {
+ locales.add(new Locale(locale.getLanguage(), "*", ""));
+ }
+ else
+ {
+ locales.add(new Locale(locale.getLanguage(), locale.getCountry(), "*"));
+ }
+ }
+ else
+ {
+ // All that match language
+ for (Locale toAdd : Locale.getAvailableLocales())
+ {
+ if ((locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
+ && (locale.getCountry().equals("") || locale.getCountry().equals(toAdd.getCountry())))
+ {
+ locales.add(toAdd);
+ }
+ }
+ }
+ }
+
+ if (mlAnalaysisMode.includesExactCountryMatch())
+ {
+ if (withWildcards)
+ {
+ if(locale.getCountry().equals(""))
+ {
+ locales.add(new Locale(locale.getLanguage(), "", ""));
+ locales.add(new Locale(locale.getLanguage(), "*", ""));
+ }
+ else
+ {
+ locales.add(new Locale(locale.getLanguage(), locale.getCountry(), ""));
+ }
+
+ }
+ else
+ {
+ if (locale.getCountry().equals(""))
+ {
+ for (Locale toAdd : Locale.getAvailableLocales())
+ {
+ if (locale.getLanguage().equals("") || locale.getLanguage().equals(toAdd.getLanguage()))
+ {
+ locales.add(toAdd);
+ }
+ }
+ }
+ else
+ {
+ locales.add(new Locale(locale.getLanguage(), locale.getCountry(), ""));
+ }
+ }
+ }
+
+ return locales;
+ }
}
diff --git a/source/java/org/alfresco/repo/search/MLAnaysisModeExpansionTest.java b/source/java/org/alfresco/repo/search/MLAnaysisModeExpansionTest.java
new file mode 100644
index 0000000000..9094f580cd
--- /dev/null
+++ b/source/java/org/alfresco/repo/search/MLAnaysisModeExpansionTest.java
@@ -0,0 +1,409 @@
+package org.alfresco.repo.search;
+
+import java.util.HashSet;
+import java.util.Locale;
+
+import junit.framework.TestCase;
+
+public class MLAnaysisModeExpansionTest extends TestCase
+{
+
+ public MLAnaysisModeExpansionTest()
+ {
+ super();
+ }
+
+ public MLAnaysisModeExpansionTest(String arg0)
+ {
+ super(arg0);
+ }
+
+ public void testIdentity()
+ {
+ Locale locale = Locale.UK;
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_ONLY, locale, false));
+ assertEquals(1, locales.size());
+ assertTrue(locales.contains(locale));
+ }
+
+ public void testIdentityAndAll()
+ {
+ Locale locale = Locale.UK;
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL, locale, false));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(locale));
+ assertTrue(locales.contains(new Locale("", "", "")));
+ }
+
+ public void testAll()
+ {
+ Locale locale = Locale.UK;
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_ONLY, locale, false));
+ assertEquals(1, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ }
+
+ public void testContaining()
+ {
+ Locale locale = Locale.UK;
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES, locale, false));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ }
+
+ public void testContainingAndAll()
+ {
+ Locale locale = Locale.UK;
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL, locale, false));
+ assertEquals(3, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ }
+
+ public void testContained()
+ {
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "", ""), false));
+ assertEquals(9, locales.size());
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "AU", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "US", "")));
+ assertTrue(locales.contains(new Locale("en", "ZA", "")));
+ assertTrue(locales.contains(new Locale("en", "CA", "")));
+ assertTrue(locales.contains(new Locale("en", "IE", "")));
+ assertTrue(locales.contains(new Locale("en", "NZ", "")));
+ assertTrue(locales.contains(new Locale("en", "IN", "")));
+ }
+
+ public void testLang()
+ {
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false));
+ assertEquals(1, locales.size());
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES, new Locale("en", "GB", ""), false));
+ assertEquals(9, locales.size());
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "AU", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "US", "")));
+ assertTrue(locales.contains(new Locale("en", "ZA", "")));
+ assertTrue(locales.contains(new Locale("en", "CA", "")));
+ assertTrue(locales.contains(new Locale("en", "IE", "")));
+ assertTrue(locales.contains(new Locale("en", "NZ", "")));
+ assertTrue(locales.contains(new Locale("en", "IN", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES_AND_ALL, new Locale("en", "GB", ""), false));
+ assertEquals(10, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "AU", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "US", "")));
+ assertTrue(locales.contains(new Locale("en", "ZA", "")));
+ assertTrue(locales.contains(new Locale("en", "CA", "")));
+ assertTrue(locales.contains(new Locale("en", "IE", "")));
+ assertTrue(locales.contains(new Locale("en", "NZ", "")));
+ assertTrue(locales.contains(new Locale("en", "IN", "")));
+ }
+
+ public void testExactLang()
+ {
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false));
+ assertEquals(1, locales.size());
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE, new Locale("en", "GB", ""), false));
+ assertEquals(1, locales.size());
+ assertTrue(locales.contains(new Locale("en", "", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE_AND_ALL, new Locale("en", "GB", ""), false));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ }
+
+ public void testCountry()
+ {
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false));
+ assertEquals(1, locales.size());
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "", ""), false));
+ assertEquals(9, locales.size());
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "AU", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "US", "")));
+ assertTrue(locales.contains(new Locale("en", "ZA", "")));
+ assertTrue(locales.contains(new Locale("en", "CA", "")));
+ assertTrue(locales.contains(new Locale("en", "IE", "")));
+ assertTrue(locales.contains(new Locale("en", "NZ", "")));
+ assertTrue(locales.contains(new Locale("en", "IN", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "GB", ""), false));
+ assertEquals(1, locales.size());
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "", ""), false));
+ assertEquals(10, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "AU", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "US", "")));
+ assertTrue(locales.contains(new Locale("en", "ZA", "")));
+ assertTrue(locales.contains(new Locale("en", "CA", "")));
+ assertTrue(locales.contains(new Locale("en", "IE", "")));
+ assertTrue(locales.contains(new Locale("en", "NZ", "")));
+ assertTrue(locales.contains(new Locale("en", "IN", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "GB", ""), false));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ }
+
+ public void testExactCountry()
+ {
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), false));
+ assertEquals(1, locales.size());
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "GB", ""), false));
+ assertEquals(1, locales.size());
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "", ""), false));
+ assertEquals(9, locales.size());
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "AU", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "US", "")));
+ assertTrue(locales.contains(new Locale("en", "ZA", "")));
+ assertTrue(locales.contains(new Locale("en", "CA", "")));
+ assertTrue(locales.contains(new Locale("en", "IE", "")));
+ assertTrue(locales.contains(new Locale("en", "NZ", "")));
+ assertTrue(locales.contains(new Locale("en", "IN", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "GB", ""), false));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "", ""), false));
+ assertEquals(10, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "AU", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "US", "")));
+ assertTrue(locales.contains(new Locale("en", "ZA", "")));
+ assertTrue(locales.contains(new Locale("en", "CA", "")));
+ assertTrue(locales.contains(new Locale("en", "IE", "")));
+ assertTrue(locales.contains(new Locale("en", "NZ", "")));
+ assertTrue(locales.contains(new Locale("en", "IN", "")));
+ }
+
+
+ public void testIdentityWC()
+ {
+ Locale locale = Locale.UK;
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_ONLY, locale, true));
+ assertEquals(1, locales.size());
+ assertTrue(locales.contains(locale));
+ }
+
+ public void testIdentityAndAllWC()
+ {
+ Locale locale = Locale.UK;
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL, locale, true));
+ assertEquals(3, locales.size());
+ assertTrue(locales.contains(locale));
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("*", "", "")));
+ }
+
+ public void testAllWC()
+ {
+ Locale locale = Locale.UK;
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_ONLY, locale, true));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("*", "", "")));
+ }
+
+ public void testContainingWC()
+ {
+ Locale locale = Locale.UK;
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES, locale, true));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ }
+
+ public void testContainingAndAllWC()
+ {
+ Locale locale = Locale.UK;
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL, locale, true));
+ assertEquals(4, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("*", "", "")));
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ }
+
+ public void testContainedWC()
+ {
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "", ""), true));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("en", "*", "")));
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ }
+
+ public void testLangWC()
+ {
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "*")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES, new Locale("en", "GB", ""), true));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "*", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_LANGUAGES_AND_ALL, new Locale("en", "GB", ""), true));
+ assertEquals(4, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("*", "", "")));
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "*", "")));
+ }
+
+ public void testExactLangWC()
+ {
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "*")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE, new Locale("en", "GB", ""), true));
+ assertEquals(1, locales.size());
+ assertTrue(locales.contains(new Locale("en", "", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_LANGUAGE_AND_ALL, new Locale("en", "GB", ""), true));
+ assertEquals(3, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("*", "", "")));
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ }
+
+ public void testCountryWC()
+ {
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "*")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "", ""), true));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "*", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES, new Locale("en", "GB", ""), true));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "*")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "", ""), true));
+ assertEquals(4, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("*", "", "")));
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "*", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.ALL_COUNTRIES_AND_ALL, new Locale("en", "GB", ""), true));
+ assertEquals(4, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("*", "", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "*")));
+ }
+
+ public void testExactCountryWC()
+ {
+ HashSet locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.LOCALE_AND_ALL_CONTAINED_LOCALES, new Locale("en", "GB", ""), true));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "*")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "GB", ""), true));
+ assertEquals(1, locales.size());
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNRTY, new Locale("en", "", ""), true));
+ assertEquals(2, locales.size());
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "*", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "GB", ""), true));
+ assertEquals(3, locales.size());
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("*", "", "")));
+ assertTrue(locales.contains(new Locale("en", "GB", "")));
+
+ locales = new HashSet();
+ locales.addAll(MLAnalysisMode.getLocales(MLAnalysisMode.EXACT_COUNTRY_AND_ALL, new Locale("en", "", ""), true));
+ assertTrue(locales.contains(new Locale("", "", "")));
+ assertTrue(locales.contains(new Locale("*", "", "")));
+ assertTrue(locales.contains(new Locale("en", "", "")));
+ assertTrue(locales.contains(new Locale("en", "*", "")));
+ }
+
+}
diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerAndSearcherFactory2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerAndSearcherFactory2.java
index 5a37e53c6c..96164cc050 100644
--- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerAndSearcherFactory2.java
+++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerAndSearcherFactory2.java
@@ -131,9 +131,9 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
private String lockDirectory;
- private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL;
+ private MLAnalysisMode defaultMLIndexAnalysisMode = MLAnalysisMode.EXACT_LANGUAGE_AND_ALL;
- private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.LOCALE_AND_ALL_CONTAINING_LOCALES_AND_ALL;
+ private MLAnalysisMode defaultMLSearchAnalysisMode = MLAnalysisMode.EXACT_LANGUAGE_AND_ALL;
/**
* Private constructor for the singleton TODO: FIt in with IOC
@@ -1170,9 +1170,10 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
return defaultMLIndexAnalysisMode;
}
- public void setDefaultMLIndexAnalysisMode(String mode)
+ public void setDefaultMLIndexAnalysisMode(MLAnalysisMode mode)
{
- defaultMLIndexAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
+ //defaultMLIndexAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
+ defaultMLIndexAnalysisMode = mode;
}
public MLAnalysisMode getDefaultMLSearchAnalysisMode()
@@ -1180,9 +1181,10 @@ public class LuceneIndexerAndSearcherFactory2 implements LuceneIndexerAndSearche
return defaultMLSearchAnalysisMode;
}
- public void setDefaultMLSearchAnalysisMode(String mode)
+ public void setDefaultMLSearchAnalysisMode(MLAnalysisMode mode)
{
- defaultMLSearchAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
+ //defaultMLSearchAnalysisMode = MLAnalysisMode.getMLAnalysisMode(mode);
+ defaultMLSearchAnalysisMode = mode;
}
diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java
index 5f22803c09..e7cda33784 100644
--- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java
+++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneQueryParser.java
@@ -18,6 +18,7 @@ package org.alfresco.repo.search.impl.lucene;
import java.io.IOException;
import java.io.StringReader;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
@@ -26,6 +27,7 @@ import java.util.Locale;
import java.util.Set;
import org.alfresco.i18n.I18NUtil;
+import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.SearcherException;
import org.alfresco.repo.search.impl.lucene.query.PathQuery;
import org.alfresco.service.cmr.dictionary.AspectDefinition;
@@ -59,6 +61,8 @@ public class LuceneQueryParser extends QueryParser
private SearchParameters searchParameters;
+ private LuceneConfig config;
+
/**
* Parses a query string, returning a {@link org.apache.lucene.search.Query}.
*
@@ -68,12 +72,13 @@ public class LuceneQueryParser extends QueryParser
* the default field for query terms.
* @param analyzer
* used to find terms in the query text.
+ * @param config
* @throws ParseException
* if the parsing fails
*/
static public Query parse(String query, String field, Analyzer analyzer,
NamespacePrefixResolver namespacePrefixResolver, DictionaryService dictionaryService,
- Operator defaultOperator, SearchParameters searchParameters) throws ParseException
+ Operator defaultOperator, SearchParameters searchParameters, LuceneConfig config) throws ParseException
{
if (s_logger.isDebugEnabled())
{
@@ -84,6 +89,7 @@ public class LuceneQueryParser extends QueryParser
parser.setNamespacePrefixResolver(namespacePrefixResolver);
parser.setDictionaryService(dictionaryService);
parser.setSearchParameters(searchParameters);
+ parser.setLuceneConfig(config);
// TODO: Apply locale contstraints at the top level if required for the non ML doc types.
Query result = parser.parse(query);
if (s_logger.isDebugEnabled())
@@ -93,6 +99,11 @@ public class LuceneQueryParser extends QueryParser
return result;
}
+ private void setLuceneConfig(LuceneConfig config)
+ {
+ this.config = config;
+ }
+
private void setSearchParameters(SearchParameters searchParameters)
{
this.searchParameters = searchParameters;
@@ -158,7 +169,10 @@ public class LuceneQueryParser extends QueryParser
{
// The super implementation will create phrase queries etc if required
Query part = getFieldQuery("@" + qname.toString(), queryText);
- query.add(part, Occur.SHOULD);
+ if (part != null)
+ {
+ query.add(part, Occur.SHOULD);
+ }
}
return query;
}
@@ -168,7 +182,10 @@ public class LuceneQueryParser extends QueryParser
for (String fieldName : text)
{
Query part = getFieldQuery(fieldName, queryText);
- query.add(part, Occur.SHOULD);
+ if (part != null)
+ {
+ query.add(part, Occur.SHOULD);
+ }
}
return query;
}
@@ -251,7 +268,10 @@ public class LuceneQueryParser extends QueryParser
for (QName qname : subclasses)
{
TermQuery termQuery = new TermQuery(new Term(field, qname.toString()));
- booleanQuery.add(termQuery, Occur.SHOULD);
+ if (termQuery != null)
+ {
+ booleanQuery.add(termQuery, Occur.SHOULD);
+ }
}
return booleanQuery;
}
@@ -333,7 +353,10 @@ public class LuceneQueryParser extends QueryParser
for (QName qname : subclasses)
{
TermQuery termQuery = new TermQuery(new Term(field, qname.toString()));
- booleanQuery.add(termQuery, Occur.SHOULD);
+ if (termQuery != null)
+ {
+ booleanQuery.add(termQuery, Occur.SHOULD);
+ }
}
return booleanQuery;
}
@@ -369,7 +392,8 @@ public class LuceneQueryParser extends QueryParser
}
else if (field.startsWith("@"))
{
- return attributeQueryBuilder(field, queryText, new FieldQuery());
+ Query query = attributeQueryBuilder(field, queryText, new FieldQuery());
+ return query;
}
else if (field.equals("ALL"))
{
@@ -414,9 +438,12 @@ public class LuceneQueryParser extends QueryParser
QName container = pd.getContainerClass().getName();
BooleanQuery query = new BooleanQuery();
Query typeQuery = getFieldQuery("TYPE", container.toString());
- query.add(typeQuery, Occur.MUST);
Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*");
- query.add(presenceQuery, Occur.MUST_NOT);
+ if ((typeQuery != null) && (presenceQuery != null))
+ {
+ query.add(typeQuery, Occur.MUST);
+ query.add(presenceQuery, Occur.MUST_NOT);
+ }
return query;
}
else
@@ -435,9 +462,12 @@ public class LuceneQueryParser extends QueryParser
QName container = pd.getContainerClass().getName();
BooleanQuery query = new BooleanQuery();
Query typeQuery = getFieldQuery("TYPE", container.toString());
- query.add(typeQuery, Occur.MUST);
Query presenceQuery = getWildcardQuery("@" + qname.toString(), "*");
- query.add(presenceQuery, Occur.MUST);
+ if ((typeQuery != null) && (presenceQuery != null))
+ {
+ query.add(typeQuery, Occur.MUST);
+ query.add(presenceQuery, Occur.MUST);
+ }
return query;
}
else
@@ -455,7 +485,10 @@ public class LuceneQueryParser extends QueryParser
{
// The super implementation will create phrase queries etc if required
Query part = getFieldQuery("@" + qname.toString(), queryText);
- query.add(part, Occur.SHOULD);
+ if (part != null)
+ {
+ query.add(part, Occur.SHOULD);
+ }
}
return query;
}
@@ -585,7 +618,10 @@ public class LuceneQueryParser extends QueryParser
{
// The super implementation will create phrase queries etc if required
Query part = getPrefixQuery("@" + qname.toString(), termStr);
- query.add(part, Occur.SHOULD);
+ if (part != null)
+ {
+ query.add(part, Occur.SHOULD);
+ }
}
return query;
}
@@ -611,7 +647,10 @@ public class LuceneQueryParser extends QueryParser
{
// The super implementation will create phrase queries etc if required
Query part = getWildcardQuery("@" + qname.toString(), termStr);
- query.add(part, Occur.SHOULD);
+ if (part != null)
+ {
+ query.add(part, Occur.SHOULD);
+ }
}
return query;
}
@@ -637,7 +676,10 @@ public class LuceneQueryParser extends QueryParser
{
// The super implementation will create phrase queries etc if required
Query part = getFuzzyQuery("@" + qname.toString(), termStr, minSimilarity);
- query.add(part, Occur.SHOULD);
+ if (part != null)
+ {
+ query.add(part, Occur.SHOULD);
+ }
}
return query;
}
@@ -772,7 +814,10 @@ public class LuceneQueryParser extends QueryParser
StringBuilder builder = new StringBuilder(queryText.length() + 10);
builder.append("\u0000").append(locale.toString()).append("\u0000").append(queryText);
Query subQuery = subQueryBuilder.getQuery(expandedFieldName, builder.toString());
- booleanQuery.add(subQuery, Occur.SHOULD);
+ if (subQuery != null)
+ {
+ booleanQuery.add(subQuery, Occur.SHOULD);
+ }
}
return booleanQuery;
}
@@ -781,21 +826,62 @@ public class LuceneQueryParser extends QueryParser
{
// Build a sub query for each locale and or the results together -
// - add an explicit condition for the locale
- BooleanQuery booleanQuery = new BooleanQuery();
+
+ MLAnalysisMode analysisMode = searchParameters.getMlAnalaysisMode() == null ? config
+ .getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode();
+
+ if (analysisMode.includesAll())
+ {
+ return subQueryBuilder.getQuery(expandedFieldName, queryText);
+ }
+
List locales = searchParameters.getLocales();
+ List expandedLocales = new ArrayList();
for (Locale locale : (((locales == null) || (locales.size() == 0)) ? Collections.singletonList(I18NUtil
.getLocale()) : locales))
{
- BooleanQuery subQuery = new BooleanQuery();
- Query contentQuery = subQueryBuilder.getQuery(expandedFieldName, queryText);
- subQuery.add(contentQuery, Occur.MUST);
- StringBuilder builder = new StringBuilder();
- builder.append(expandedFieldName).append(".locale");
- Query localeQuery = getFieldQuery(builder.toString(), locale.toString());
- subQuery.add(localeQuery, Occur.MUST);
- booleanQuery.add(subQuery, Occur.SHOULD);
+ expandedLocales.addAll(MLAnalysisMode.getLocales(analysisMode, locale, true));
}
- return booleanQuery;
+
+ if (expandedLocales.size() > 0)
+ {
+ BooleanQuery booleanQuery = new BooleanQuery();
+ Query contentQuery = subQueryBuilder.getQuery(expandedFieldName, queryText);
+ if (contentQuery != null)
+ {
+ booleanQuery.add(contentQuery, Occur.MUST);
+ BooleanQuery subQuery = new BooleanQuery();
+ for (Locale locale : (expandedLocales))
+ {
+ StringBuilder builder = new StringBuilder();
+ builder.append(expandedFieldName).append(".locale");
+ String localeString = locale.toString();
+ if (localeString.indexOf("*") == -1)
+ {
+ Query localeQuery = getFieldQuery(builder.toString(), localeString);
+ if (localeQuery != null)
+ {
+ subQuery.add(localeQuery, Occur.SHOULD);
+ }
+ }
+ else
+ {
+ Query localeQuery = getWildcardQuery(builder.toString(), localeString);
+ if (localeQuery != null)
+ {
+ subQuery.add(localeQuery, Occur.SHOULD);
+ }
+ }
+ }
+ booleanQuery.add(subQuery, Occur.MUST);
+ }
+ return booleanQuery;
+ }
+ else
+ {
+ return subQueryBuilder.getQuery(expandedFieldName, queryText);
+ }
+
}
else
{
diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java
index a8bcf23e62..9ed193153d 100644
--- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java
+++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneSearcherImpl2.java
@@ -215,7 +215,7 @@ public class LuceneSearcherImpl2 extends LuceneBase2 implements LuceneSearcher2
}
Query query = LuceneQueryParser.parse(parameterisedQueryString, DEFAULT_FIELD, new LuceneAnalyser(
- dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters);
+ dictionaryService, searchParameters.getMlAnalaysisMode() == null ? getLuceneConfig().getDefaultMLSearchAnalysisMode() : searchParameters.getMlAnalaysisMode()), namespacePrefixResolver, dictionaryService, defaultOperator, searchParameters, getLuceneConfig());
ClosingIndexSearcher searcher = getSearcher(indexer);
if (searcher == null)
{
diff --git a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java
index d04dd0ca3b..fa46e827fc 100644
--- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java
+++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java
@@ -2543,6 +2543,14 @@ public class LuceneTest2 extends TestCase
// Test stop words are equivalent
+ results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"the\"", null, null);
+ assertEquals(0, results.length());
+ results.close();
+
+ results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"and\"", null, null);
+ assertEquals(0, results.length());
+ results.close();
+
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "TEXT:\"over the lazy\"", null, null);
assertEquals(1, results.length());
results.close();
@@ -2685,11 +2693,41 @@ public class LuceneTest2 extends TestCase
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
+
+ // locale serach in en_US for en_UK
+
+ sp = new SearchParameters();
+ sp.addStore(rootNodeRef.getStoreRef());
+ sp.setLanguage("lucene");
+ sp.setQuery("d\\:content:\"fox\"");
+ sp.addLocale(Locale.US);
+ results = searcher.query(sp);
+ assertEquals(1, results.length());
+ results.close();
// Direct ML tests
QName mlQName = QName.createQName(TEST_NAMESPACE, "ml");
+ sp = new SearchParameters();
+ sp.addStore(rootNodeRef.getStoreRef());
+ sp.setLanguage("lucene");
+ sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY);
+ sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":and");
+ results = searcher.query(sp);
+ assertEquals(0, results.length());
+ results.close();
+
+
+ sp = new SearchParameters();
+ sp.addStore(rootNodeRef.getStoreRef());
+ sp.setLanguage("lucene");
+ sp.setMlAnalaysisMode(MLAnalysisMode.ALL_ONLY);
+ sp.setQuery("@" + LuceneQueryParser.escape(mlQName.toString()) + ":\"and\"");
+ results = searcher.query(sp);
+ assertEquals(0, results.length());
+ results.close();
+
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
diff --git a/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java b/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java
index 2c7f4038ee..996ce7c1fd 100644
--- a/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java
+++ b/source/java/org/alfresco/repo/search/impl/lucene/analysis/MLTokenDuplicator.java
@@ -3,6 +3,7 @@ package org.alfresco.repo.search.impl.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
@@ -38,91 +39,20 @@ public class MLTokenDuplicator extends Tokenizer
this.source = source;
this.locale = locale;
- boolean l = locale.getLanguage().length() != 0;
- boolean c = locale.getCountry().length() != 0;
- boolean v = locale.getVariant().length() != 0;
-
- prefixes = new HashSet(4);
- if (mlAnalaysisMode.includesAll())
+ Collection locales = MLAnalysisMode.getLocales(mlAnalaysisMode, locale, false);
+ prefixes = new HashSet(locales.size());
+ for(Locale toAdd : locales)
{
- prefixes.add("");
- }
-
- if (mlAnalaysisMode.includesExact())
- {
- StringBuffer result = new StringBuffer();
- result.append("{").append(locale.toString()).append("}");
- prefixes.add(result.toString());
- }
-
- if (mlAnalaysisMode.includesContaining())
- {
- if (v)
+ String localeString = toAdd.toString();
+ if(localeString.length() == 0)
{
- Locale noVarient = new Locale(locale.getLanguage(), locale.getCountry(), "");
- StringBuffer result = new StringBuffer();
- result.append("{").append(noVarient.toString()).append("}");
- prefixes.add(result.toString());
-
- Locale noCountry = new Locale(locale.getLanguage(), "", "");
- result = new StringBuffer();
- result.append("{").append(noCountry.toString()).append("}");
- prefixes.add(result.toString());
+ prefixes.add("");
}
- if (c)
+ else
{
- Locale noCountry = new Locale(locale.getLanguage(), "", "");
- StringBuffer result = new StringBuffer();
- result.append("{").append(noCountry.toString()).append("}");
- prefixes.add(result.toString());
- }
- }
-
- if (mlAnalaysisMode.includesContained())
- {
- // varients have not contained
- if (!v)
- {
- if (!c)
- {
- if (!l)
- {
- // All
- for (Locale toAdd : Locale.getAvailableLocales())
- {
- StringBuffer result = new StringBuffer();
- result.append("{").append(toAdd.toString()).append("}");
- prefixes.add(result.toString());
- }
- }
- else
- {
- // All that match language
- for (Locale toAdd : Locale.getAvailableLocales())
- {
- if (locale.getLanguage().equals(toAdd.getLanguage()))
- {
- StringBuffer result = new StringBuffer();
- result.append("{").append(toAdd.toString()).append("}");
- prefixes.add(result.toString());
- }
- }
- }
- }
- else
- {
- // All that match language and country
- for (Locale toAdd : Locale.getAvailableLocales())
- {
- if ((locale.getLanguage().equals(toAdd.getLanguage()))
- && (locale.getCountry().equals(toAdd.getCountry())))
- {
- StringBuffer result = new StringBuffer();
- result.append("{").append(toAdd.toString()).append("}");
- prefixes.add(result.toString());
- }
- }
- }
+ StringBuilder builder = new StringBuilder(16);
+ builder.append("{").append(localeString).append("}");
+ prefixes.add(builder.toString());
}
}
if(s_logger.isDebugEnabled())