Merged V2.0 to HEAD

svn merge svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5114 svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5115 . - AR-942 svn merge svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5131 svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5132 . - AR-1244 svn merge svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5127 svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5128 . - AWC-1138 git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5165 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
2025-07-24 17:32:48 +00:00 · 2007-02-16 05:30:09 +00:00
parent ade659112d
commit 22f523d12b
8 changed files with 350 additions and 40 deletions
--- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java
+++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneAnalyser.java
@@ -21,6 +21,7 @@ import java.io.Reader;
 import java.util.HashMap;
 import java.util.Map;

+import org.alfresco.model.ContentModel;
 import org.alfresco.repo.search.MLAnalysisMode;
 import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser;
 import org.alfresco.repo.search.impl.lucene.analysis.LongAnalyser;
@@ -37,9 +38,8 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;

 /**
- * Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser
- * should not have been called when indexing properties that require no tokenisation. (tokenise should be set to false
- * when adding the field to the document)
+ * Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser should not have been called when indexing properties that
+ * require no tokenisation. (tokenise should be set to false when adding the field to the document)
 * 
 * @author andyh
 */
@@ -47,7 +47,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
 public class LuceneAnalyser extends Analyzer
 {
    private static Logger s_logger = Logger.getLogger(LuceneAnalyser.class);
-    
+
    // Dictinary service to look up analyser classes by data type and locale.
    private DictionaryService dictionaryService;

@@ -156,35 +156,45 @@ public class LuceneAnalyser extends Analyzer
            else
            {
                QName propertyQName = QName.createQName(fieldName.substring(1));
-                PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
-                if (propertyDef != null)
+                // Temporary fix for person and user uids
+
+                if (propertyQName.equals(ContentModel.PROP_USER_USERNAME)
+                        || propertyQName.equals(ContentModel.PROP_USERNAME))
                {
-                    if (propertyDef.isTokenisedInIndex())
+                    analyser = new VerbatimAnalyser(true);
+                }
+                else
+                {
+                    PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
+                    if (propertyDef != null)
                    {
-                        DataTypeDefinition dataType = propertyDef.getDataType();
-                        if (dataType.getName().equals(DataTypeDefinition.CONTENT))
+                        if (propertyDef.isTokenisedInIndex())
                        {
-                            analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
-                        }
-                        else if (dataType.getName().equals(DataTypeDefinition.TEXT))
-                        {
-                            analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
+                            DataTypeDefinition dataType = propertyDef.getDataType();
+                            if (dataType.getName().equals(DataTypeDefinition.CONTENT))
+                            {
+                                analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
+                            }
+                            else if (dataType.getName().equals(DataTypeDefinition.TEXT))
+                            {
+                                analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
+                            }
+                            else
+                            {
+                                analyser = loadAnalyzer(dataType);
+                            }
                        }
                        else
                        {
-                            analyser = loadAnalyzer(dataType);
+                            analyser = new VerbatimAnalyser();
                        }
                    }
                    else
                    {
-                        analyser = new VerbatimAnalyser();
+                        DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
+                        analyser = loadAnalyzer(dataType);
                    }
                }
-                else
-                {
-                    DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
-                    analyser = loadAnalyzer(dataType);
-                }
            }
        }
        else
@@ -208,9 +218,9 @@ public class LuceneAnalyser extends Analyzer
        {
            Class<?> clazz = Class.forName(analyserClassName);
            Analyzer analyser = (Analyzer) clazz.newInstance();
-            if(s_logger.isDebugEnabled())
+            if (s_logger.isDebugEnabled())
            {
-                s_logger.debug("Loaded "+analyserClassName+" for type "+dataType.getName());
+                s_logger.debug("Loaded " + analyserClassName + " for type " + dataType.getName());
            }
            return analyser;
        }
@@ -232,8 +242,7 @@ public class LuceneAnalyser extends Analyzer
    }

    /**
-     * For multilingual fields we separate the tokens for each instance to break phrase queries spanning different
-     * languages etc.
+     * For multilingual fields we separate the tokens for each instance to break phrase queries spanning different languages etc.
     */
    @Override
    public int getPositionIncrementGap(String fieldName)
--- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java
+++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneIndexerImpl2.java
@@ -65,6 +65,7 @@ import org.alfresco.service.cmr.repository.NodeService;
 import org.alfresco.service.cmr.repository.Path;
 import org.alfresco.service.cmr.repository.StoreRef;
 import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
+import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
 import org.alfresco.service.cmr.search.ResultSetRow;
 import org.alfresco.service.cmr.search.SearchParameters;
 import org.alfresco.service.namespace.QName;
@@ -97,6 +98,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2

    public static final String NOT_INDEXED_CONTENT_MISSING = "nicm";

+    public static final String NOT_INDEXED_NO_TYPE_CONVERSION = "nintc";
+
    private static Logger s_logger = Logger.getLogger(LuceneIndexerImpl2.class);

    /**
@@ -121,8 +124,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
    private long maxAtomicTransformationTime = 20;

    /**
-     * A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO:
-     * Consider if this information needs to be persisted for recovery
+     * A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO: Consider if this information needs to be persisted for recovery
     */
    private Set<NodeRef> deletions = new LinkedHashSet<NodeRef>();

@@ -141,8 +143,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
    private boolean isModified = false;

    /**
-     * Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just
-     * fixing up non atomically indexed things from one or more other updates.
+     * Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just fixing up non atomically indexed things from one or more other
+     * updates.
     */

    private Boolean isFTSUpdate = null;
@@ -689,8 +691,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
    }

    /**
-     * Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper
-     * serialisation against the index as would a data base transaction.
+     * Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper serialisation against the index as would a data base transaction.
     * 
     * @return
     */
@@ -804,8 +805,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
    }

    /**
-     * Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow
-     * roll back.
+     * Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow roll back.
     */

    public void setRollbackOnly()
@@ -1534,7 +1534,17 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
        // convert value to String
        for (Serializable serializableValue : DefaultTypeConverter.INSTANCE.getCollection(Serializable.class, value))
        {
-            String strValue =  DefaultTypeConverter.INSTANCE.convert(String.class, serializableValue);
+            String strValue = null;
+            try
+            {
+                strValue = DefaultTypeConverter.INSTANCE.convert(String.class, serializableValue);
+            }
+            catch (TypeConversionException e)
+            {
+                doc.add(new Field(attributeName, NOT_INDEXED_NO_TYPE_CONVERSION, Field.Store.NO,
+                        Field.Index.UN_TOKENIZED, Field.TermVector.NO));
+                continue;
+            }
            if (strValue == null)
            {
                // nothing to index
@@ -1727,6 +1737,12 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
                    }
                    else if (isText)
                    {
+                        // Temporary special case for uids
+                        if(propertyName.equals(ContentModel.PROP_USER_USERNAME) || propertyName.equals(ContentModel.PROP_USERNAME))
+                        {
+                            doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO));
+                        }
+                        
                        // TODO: Use the node locale in preferanced to the system locale
                        Locale locale = null;

@@ -1740,10 +1756,17 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
                        {
                            locale = Locale.getDefault();
                        }
-                        StringBuilder builder = new StringBuilder();
-                        builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue);
-                        doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex,
-                                Field.TermVector.NO));
+                        if (tokenise)
+                        {
+                            StringBuilder builder = new StringBuilder();
+                            builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue);
+                            doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex,
+                                    Field.TermVector.NO));
+                        }
+                        else
+                        {
+                            doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO));
+                        }
                    }
                    else
                    {
--- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java
+++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest2.java
@@ -290,6 +290,40 @@ public class LuceneTest2 extends TestCase
        mlText.addValue(new Locale("ru"), "банан");
        mlText.addValue(new Locale("es"), "plátano");
        testProperties.put(QName.createQName(TEST_NAMESPACE, "ml"), mlText);
+        // Any multivalued
+        ArrayList<Serializable> anyValues = new ArrayList<Serializable>();
+        anyValues.add(Integer.valueOf(100));
+        anyValues.add("anyValueAsString");
+        anyValues.add(new UnknownDataType());
+        testProperties.put(QName.createQName(TEST_NAMESPACE, "any-many-ista"), anyValues);
+        // Content multivalued
+        // - note only one the first value is used from the collection
+        // - andit has to go in type d:any as d:content is not allowed to be multivalued
+        
+        ArrayList<Serializable> contentValues = new ArrayList<Serializable>();
+        contentValues.add(new ContentData(null, "text/plain", 0L, "UTF-16"));
+        testProperties.put(QName.createQName(TEST_NAMESPACE, "content-many-ista"), contentValues);
+        
+      
+        
+        // MLText multivalued
+        
+        MLText mlText1 = new MLText();
+        mlText1.addValue(Locale.ENGLISH, "cabbage");
+        mlText1.addValue(Locale.FRENCH, "chou");
+        
+        MLText mlText2 = new MLText();
+        mlText2.addValue(Locale.ENGLISH, "lemur");
+        mlText2.addValue(new Locale("ru"), "лемур");
+        
+        ArrayList<Serializable> mlValues = new ArrayList<Serializable>();
+        mlValues.add(mlText1);
+        mlValues.add(mlText2);
+        
+        testProperties.put(QName.createQName(TEST_NAMESPACE, "mltext-many-ista"), mlValues);
+        
+        // null in multi valued
+        
        ArrayList<Object> testList = new ArrayList<Object>();
        testList.add(null);
        testProperties.put(QName.createQName(TEST_NAMESPACE, "nullList"), testList);
@@ -299,7 +333,14 @@ public class LuceneTest2 extends TestCase

        n4 = nodeService.createNode(rootNodeRef, ContentModel.ASSOC_CHILDREN, QName.createQName("{namespace}four"),
                testType, testProperties).getChildRef();
+        

+        ContentWriter multiWriter = contentService.getWriter(n4, QName.createQName(TEST_NAMESPACE, "content-many-ista"), true);
+        multiWriter.setEncoding( "UTF-16");
+        multiWriter.setMimetype("text/plain");
+        multiWriter.putContent("multicontent");
+
+        
        nodeService.getProperties(n1);
        nodeService.getProperties(n2);
        nodeService.getProperties(n3);
@@ -2427,6 +2468,80 @@ public class LuceneTest2 extends TestCase
        assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista")));
        results.close();

+        // d:any
+        
+        results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
+                + escapeQName(QName.createQName(TEST_NAMESPACE, "any-many-ista")) + ":\"100\"",
+                null, null);
+        assertEquals(1, results.length());
+        assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "any-many-ista")));
+        results.close();
+        
+        results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
+                + escapeQName(QName.createQName(TEST_NAMESPACE, "any-many-ista")) + ":\"anyValueAsString\"",
+                null, null);
+        assertEquals(1, results.length());
+        assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "any-many-ista")));
+        results.close();
+        
+        results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
+                + escapeQName(QName.createQName(TEST_NAMESPACE, "any-many-ista")) + ":\"nintc\"",
+                null, null);
+        assertEquals(1, results.length());
+        assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "any-many-ista")));
+        results.close();
+        
+        // multi ml text
+        
+        QName multimlQName = QName.createQName(TEST_NAMESPACE, "mltext-many-ista");
+        
+        SearchParameters sp = new SearchParameters();
+        sp.addStore(rootNodeRef.getStoreRef());
+        sp.setLanguage("lucene");
+        sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":лемур");
+        sp.addLocale(new Locale("ru"));
+        results = searcher.query(sp);
+        assertEquals(1, results.length());
+        results.close();
+        
+        sp = new SearchParameters();
+        sp.addStore(rootNodeRef.getStoreRef());
+        sp.setLanguage("lucene");
+        sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":lemur");
+        sp.addLocale(new Locale("en"));
+        results = searcher.query(sp);
+        assertEquals(1, results.length());
+        results.close();
+        
+        sp = new SearchParameters();
+        sp.addStore(rootNodeRef.getStoreRef());
+        sp.setLanguage("lucene");
+        sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":chou");
+        sp.addLocale(new Locale("fr"));
+        results = searcher.query(sp);
+        assertEquals(1, results.length());
+        results.close();
+        
+        sp = new SearchParameters();
+        sp.addStore(rootNodeRef.getStoreRef());
+        sp.setLanguage("lucene");
+        sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":cabbage");
+        sp.addLocale(new Locale("en"));
+        results = searcher.query(sp);
+        assertEquals(1, results.length());
+        results.close();
+        
+        // multivalued content in type d:any
+        // This should not be indexed as we can not know what to do with content here. 
+        
+        sp = new SearchParameters();
+        sp.addStore(rootNodeRef.getStoreRef());
+        sp.setLanguage("lucene");
+        sp.setQuery("@" + LuceneQueryParser.escape(QName.createQName(TEST_NAMESPACE, "content-many-ista").toString()) + ":multicontent");
+        results = searcher.query(sp);
+        assertEquals(0, results.length());
+        results.close();
+        
        // locale
        
        results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
@@ -2615,7 +2730,7 @@ public class LuceneTest2 extends TestCase

        // Configuration of TEXT

-        SearchParameters sp = new SearchParameters();
+        sp = new SearchParameters();
        sp.addStore(rootNodeRef.getStoreRef());
        sp.setLanguage("lucene");
        sp.setQuery("@" + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":\"fox\"");
@@ -4592,4 +4707,14 @@ public class LuceneTest2 extends TestCase

        // test.dictionaryService.getType(test.nodeService.getType(test.rootNodeRef)).getDefaultAspects();
    }
+    
+    public static class UnknownDataType implements Serializable
+    {
+
+        /**
+         * 
+         */
+        private static final long serialVersionUID = -6729690518573349055L;
+        
+    }
 }
--- a/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml
+++ b/source/java/org/alfresco/repo/search/impl/lucene/LuceneTest_model.xml
@@ -268,6 +268,38 @@
                        <stored>true</stored>
                        <tokenised>true</tokenised>
                    </index>
+                </property>
+				<!-- Any -->
+				<property name="test:any-many-ista">
+                    <type>d:any</type>
+                    <mandatory>false</mandatory>
+                    <multiple>true</multiple>
+                    <index enabled="true">
+                        <atomic>true</atomic>
+                        <stored>true</stored>
+                        <tokenised>true</tokenised>
+                    </index>
+                </property>
+				<!-- Complex multiples -->
+				<property name="test:content-many-ista">
+                    <type>d:any</type>
+                    <mandatory>false</mandatory>
+                    <multiple>true</multiple>
+                    <index enabled="true">
+                        <atomic>true</atomic>
+                        <stored>true</stored>
+                        <tokenised>true</tokenised>
+                    </index>
+                </property>
+				<property name="test:mltext-many-ista">
+                    <type>d:mltext</type>
+                    <mandatory>false</mandatory>
+                    <multiple>true</multiple>
+                    <index enabled="true">
+                        <atomic>true</atomic>
+                        <stored>true</stored>
+                        <tokenised>true</tokenised>
+                    </index>
                </property>
            </properties>
            <mandatory-aspects>