Merged V2.0 to HEAD

svn merge svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5114 svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5115 .
      - AR-942
   svn merge svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5131 svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5132 .
      - AR-1244
   svn merge svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5127 svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5128 .
      - AWC-1138


git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5165 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
Derek Hulley
2007-02-16 05:30:09 +00:00
parent ade659112d
commit 22f523d12b
8 changed files with 350 additions and 40 deletions

View File

@@ -0,0 +1,100 @@
/*
* Copyright (C) 2005 Alfresco, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.alfresco.repo.admin.patch.impl;
import org.alfresco.i18n.I18NUtil;
import org.alfresco.repo.admin.patch.AbstractPatch;
import org.alfresco.repo.importer.ImporterBootstrap;
import org.alfresco.repo.search.Indexer;
import org.alfresco.repo.search.IndexerAndSearcher;
import org.alfresco.service.cmr.repository.StoreRef;
import org.alfresco.service.cmr.search.ResultSet;
import org.alfresco.service.cmr.search.ResultSetRow;
import org.alfresco.service.cmr.search.SearchParameters;
import org.alfresco.service.cmr.search.SearchService;
/**
* Patch usr:user and cm:person objects so that the user name properties are in the
* index in untokenized form. If not authentication may fail in mixed language use.
*
* @author andyh
*
*/
public class UserAndPersonTokenisationPatch extends AbstractPatch
{
private static final String MSG_SUCCESS = "patch.userAndPersonUserNamesAsIdentifiers.result";
private ImporterBootstrap spacesImporterBootstrap;
private ImporterBootstrap userImporterBootstrap;
private IndexerAndSearcher indexerAndSearcher;
public UserAndPersonTokenisationPatch()
{
}
public void setSpacesImporterBootstrap(ImporterBootstrap spacesImporterBootstrap)
{
this.spacesImporterBootstrap = spacesImporterBootstrap;
}
public void setUserImporterBootstrap(ImporterBootstrap userImporterBootstrap)
{
this.userImporterBootstrap = userImporterBootstrap;
}
public void setIndexerAndSearcher(IndexerAndSearcher indexerAndSearcher)
{
this.indexerAndSearcher = indexerAndSearcher;
}
@Override
protected String applyInternal() throws Exception
{
reindex("TYPE:\"usr:user\"", userImporterBootstrap.getStoreRef());
reindex("TYPE:\"cm:person\"", spacesImporterBootstrap.getStoreRef());
return I18NUtil.getMessage(MSG_SUCCESS);
}
private void reindex(String query, StoreRef store)
{
SearchParameters sp = new SearchParameters();
sp.setLanguage(SearchService.LANGUAGE_LUCENE);
sp.setQuery(query);
sp.addStore(store);
ResultSet rs = null;
try
{
rs = searchService.query(sp);
for(ResultSetRow row : rs)
{
Indexer indexer = indexerAndSearcher.getIndexer(row.getNodeRef().getStoreRef());
indexer.updateNode(row.getNodeRef());
}
}
finally
{
if(rs != null)
{
rs.close();
}
}
}
}

View File

@@ -21,6 +21,7 @@ import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser;
import org.alfresco.repo.search.impl.lucene.analysis.LongAnalyser;
@@ -37,9 +38,8 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
/**
* Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser
* should not have been called when indexing properties that require no tokenisation. (tokenise should be set to false
* when adding the field to the document)
* Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser should not have been called when indexing properties that
* require no tokenisation. (tokenise should be set to false when adding the field to the document)
*
* @author andyh
*/
@@ -47,7 +47,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
public class LuceneAnalyser extends Analyzer
{
private static Logger s_logger = Logger.getLogger(LuceneAnalyser.class);
// Dictinary service to look up analyser classes by data type and locale.
private DictionaryService dictionaryService;
@@ -156,35 +156,45 @@ public class LuceneAnalyser extends Analyzer
else
{
QName propertyQName = QName.createQName(fieldName.substring(1));
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if (propertyDef != null)
// Temporary fix for person and user uids
if (propertyQName.equals(ContentModel.PROP_USER_USERNAME)
|| propertyQName.equals(ContentModel.PROP_USERNAME))
{
if (propertyDef.isTokenisedInIndex())
analyser = new VerbatimAnalyser(true);
}
else
{
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
if (propertyDef != null)
{
DataTypeDefinition dataType = propertyDef.getDataType();
if (dataType.getName().equals(DataTypeDefinition.CONTENT))
if (propertyDef.isTokenisedInIndex())
{
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
}
else if (dataType.getName().equals(DataTypeDefinition.TEXT))
{
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
DataTypeDefinition dataType = propertyDef.getDataType();
if (dataType.getName().equals(DataTypeDefinition.CONTENT))
{
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
}
else if (dataType.getName().equals(DataTypeDefinition.TEXT))
{
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
}
else
{
analyser = loadAnalyzer(dataType);
}
}
else
{
analyser = loadAnalyzer(dataType);
analyser = new VerbatimAnalyser();
}
}
else
{
analyser = new VerbatimAnalyser();
DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
analyser = loadAnalyzer(dataType);
}
}
else
{
DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
analyser = loadAnalyzer(dataType);
}
}
}
else
@@ -208,9 +218,9 @@ public class LuceneAnalyser extends Analyzer
{
Class<?> clazz = Class.forName(analyserClassName);
Analyzer analyser = (Analyzer) clazz.newInstance();
if(s_logger.isDebugEnabled())
if (s_logger.isDebugEnabled())
{
s_logger.debug("Loaded "+analyserClassName+" for type "+dataType.getName());
s_logger.debug("Loaded " + analyserClassName + " for type " + dataType.getName());
}
return analyser;
}
@@ -232,8 +242,7 @@ public class LuceneAnalyser extends Analyzer
}
/**
* For multilingual fields we separate the tokens for each instance to break phrase queries spanning different
* languages etc.
* For multilingual fields we separate the tokens for each instance to break phrase queries spanning different languages etc.
*/
@Override
public int getPositionIncrementGap(String fieldName)

View File

@@ -65,6 +65,7 @@ import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.repository.Path;
import org.alfresco.service.cmr.repository.StoreRef;
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
import org.alfresco.service.cmr.search.ResultSetRow;
import org.alfresco.service.cmr.search.SearchParameters;
import org.alfresco.service.namespace.QName;
@@ -97,6 +98,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
public static final String NOT_INDEXED_CONTENT_MISSING = "nicm";
public static final String NOT_INDEXED_NO_TYPE_CONVERSION = "nintc";
private static Logger s_logger = Logger.getLogger(LuceneIndexerImpl2.class);
/**
@@ -121,8 +124,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
private long maxAtomicTransformationTime = 20;
/**
* A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO:
* Consider if this information needs to be persisted for recovery
* A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO: Consider if this information needs to be persisted for recovery
*/
private Set<NodeRef> deletions = new LinkedHashSet<NodeRef>();
@@ -141,8 +143,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
private boolean isModified = false;
/**
* Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just
* fixing up non atomically indexed things from one or more other updates.
* Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just fixing up non atomically indexed things from one or more other
* updates.
*/
private Boolean isFTSUpdate = null;
@@ -689,8 +691,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
}
/**
* Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper
* serialisation against the index as would a data base transaction.
* Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper serialisation against the index as would a data base transaction.
*
* @return
*/
@@ -804,8 +805,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
}
/**
* Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow
* roll back.
* Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow roll back.
*/
public void setRollbackOnly()
@@ -1534,7 +1534,17 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
// convert value to String
for (Serializable serializableValue : DefaultTypeConverter.INSTANCE.getCollection(Serializable.class, value))
{
String strValue = DefaultTypeConverter.INSTANCE.convert(String.class, serializableValue);
String strValue = null;
try
{
strValue = DefaultTypeConverter.INSTANCE.convert(String.class, serializableValue);
}
catch (TypeConversionException e)
{
doc.add(new Field(attributeName, NOT_INDEXED_NO_TYPE_CONVERSION, Field.Store.NO,
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
continue;
}
if (strValue == null)
{
// nothing to index
@@ -1727,6 +1737,12 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
}
else if (isText)
{
// Temporary special case for uids
if(propertyName.equals(ContentModel.PROP_USER_USERNAME) || propertyName.equals(ContentModel.PROP_USERNAME))
{
doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO));
}
// TODO: Use the node locale in preferanced to the system locale
Locale locale = null;
@@ -1740,10 +1756,17 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
{
locale = Locale.getDefault();
}
StringBuilder builder = new StringBuilder();
builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue);
doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex,
Field.TermVector.NO));
if (tokenise)
{
StringBuilder builder = new StringBuilder();
builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue);
doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex,
Field.TermVector.NO));
}
else
{
doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO));
}
}
else
{

View File

@@ -290,6 +290,40 @@ public class LuceneTest2 extends TestCase
mlText.addValue(new Locale("ru"), "банан");
mlText.addValue(new Locale("es"), "plátano");
testProperties.put(QName.createQName(TEST_NAMESPACE, "ml"), mlText);
// Any multivalued
ArrayList<Serializable> anyValues = new ArrayList<Serializable>();
anyValues.add(Integer.valueOf(100));
anyValues.add("anyValueAsString");
anyValues.add(new UnknownDataType());
testProperties.put(QName.createQName(TEST_NAMESPACE, "any-many-ista"), anyValues);
// Content multivalued
// - note only one the first value is used from the collection
// - andit has to go in type d:any as d:content is not allowed to be multivalued
ArrayList<Serializable> contentValues = new ArrayList<Serializable>();
contentValues.add(new ContentData(null, "text/plain", 0L, "UTF-16"));
testProperties.put(QName.createQName(TEST_NAMESPACE, "content-many-ista"), contentValues);
// MLText multivalued
MLText mlText1 = new MLText();
mlText1.addValue(Locale.ENGLISH, "cabbage");
mlText1.addValue(Locale.FRENCH, "chou");
MLText mlText2 = new MLText();
mlText2.addValue(Locale.ENGLISH, "lemur");
mlText2.addValue(new Locale("ru"), "лемур");
ArrayList<Serializable> mlValues = new ArrayList<Serializable>();
mlValues.add(mlText1);
mlValues.add(mlText2);
testProperties.put(QName.createQName(TEST_NAMESPACE, "mltext-many-ista"), mlValues);
// null in multi valued
ArrayList<Object> testList = new ArrayList<Object>();
testList.add(null);
testProperties.put(QName.createQName(TEST_NAMESPACE, "nullList"), testList);
@@ -299,7 +333,14 @@ public class LuceneTest2 extends TestCase
n4 = nodeService.createNode(rootNodeRef, ContentModel.ASSOC_CHILDREN, QName.createQName("{namespace}four"),
testType, testProperties).getChildRef();
ContentWriter multiWriter = contentService.getWriter(n4, QName.createQName(TEST_NAMESPACE, "content-many-ista"), true);
multiWriter.setEncoding( "UTF-16");
multiWriter.setMimetype("text/plain");
multiWriter.putContent("multicontent");
nodeService.getProperties(n1);
nodeService.getProperties(n2);
nodeService.getProperties(n3);
@@ -2427,6 +2468,80 @@ public class LuceneTest2 extends TestCase
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista")));
results.close();
// d:any
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
+ escapeQName(QName.createQName(TEST_NAMESPACE, "any-many-ista")) + ":\"100\"",
null, null);
assertEquals(1, results.length());
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "any-many-ista")));
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
+ escapeQName(QName.createQName(TEST_NAMESPACE, "any-many-ista")) + ":\"anyValueAsString\"",
null, null);
assertEquals(1, results.length());
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "any-many-ista")));
results.close();
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
+ escapeQName(QName.createQName(TEST_NAMESPACE, "any-many-ista")) + ":\"nintc\"",
null, null);
assertEquals(1, results.length());
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "any-many-ista")));
results.close();
// multi ml text
QName multimlQName = QName.createQName(TEST_NAMESPACE, "mltext-many-ista");
SearchParameters sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":лемур");
sp.addLocale(new Locale("ru"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":lemur");
sp.addLocale(new Locale("en"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":chou");
sp.addLocale(new Locale("fr"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":cabbage");
sp.addLocale(new Locale("en"));
results = searcher.query(sp);
assertEquals(1, results.length());
results.close();
// multivalued content in type d:any
// This should not be indexed as we can not know what to do with content here.
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(QName.createQName(TEST_NAMESPACE, "content-many-ista").toString()) + ":multicontent");
results = searcher.query(sp);
assertEquals(0, results.length());
results.close();
// locale
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
@@ -2615,7 +2730,7 @@ public class LuceneTest2 extends TestCase
// Configuration of TEXT
SearchParameters sp = new SearchParameters();
sp = new SearchParameters();
sp.addStore(rootNodeRef.getStoreRef());
sp.setLanguage("lucene");
sp.setQuery("@" + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":\"fox\"");
@@ -4592,4 +4707,14 @@ public class LuceneTest2 extends TestCase
// test.dictionaryService.getType(test.nodeService.getType(test.rootNodeRef)).getDefaultAspects();
}
public static class UnknownDataType implements Serializable
{
/**
*
*/
private static final long serialVersionUID = -6729690518573349055L;
}
}

View File

@@ -268,6 +268,38 @@
<stored>true</stored>
<tokenised>true</tokenised>
</index>
</property>
<!-- Any -->
<property name="test:any-many-ista">
<type>d:any</type>
<mandatory>false</mandatory>
<multiple>true</multiple>
<index enabled="true">
<atomic>true</atomic>
<stored>true</stored>
<tokenised>true</tokenised>
</index>
</property>
<!-- Complex multiples -->
<property name="test:content-many-ista">
<type>d:any</type>
<mandatory>false</mandatory>
<multiple>true</multiple>
<index enabled="true">
<atomic>true</atomic>
<stored>true</stored>
<tokenised>true</tokenised>
</index>
</property>
<property name="test:mltext-many-ista">
<type>d:mltext</type>
<mandatory>false</mandatory>
<multiple>true</multiple>
<index enabled="true">
<atomic>true</atomic>
<stored>true</stored>
<tokenised>true</tokenised>
</index>
</property>
</properties>
<mandatory-aspects>