mirror of
https://github.com/Alfresco/alfresco-community-repo.git
synced 2025-07-24 17:32:48 +00:00
Merged V2.0 to HEAD
svn merge svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5114 svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5115 . - AR-942 svn merge svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5131 svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5132 . - AR-1244 svn merge svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5127 svn://svn.alfresco.com:3691/alfresco/BRANCHES/V2.0@5128 . - AWC-1138 git-svn-id: https://svn.alfresco.com/repos/alfresco-enterprise/alfresco/HEAD/root@5165 c4b6b30b-aa2e-2d43-bbcb-ca4b014f7261
This commit is contained in:
@@ -21,6 +21,7 @@ import java.io.Reader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.search.MLAnalysisMode;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.AlfrescoStandardAnalyser;
|
||||
import org.alfresco.repo.search.impl.lucene.analysis.LongAnalyser;
|
||||
@@ -37,9 +38,8 @@ import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
|
||||
/**
|
||||
* Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser
|
||||
* should not have been called when indexing properties that require no tokenisation. (tokenise should be set to false
|
||||
* when adding the field to the document)
|
||||
* Analyse properties according to the property definition. The default is to use the standard tokeniser. The tokeniser should not have been called when indexing properties that
|
||||
* require no tokenisation. (tokenise should be set to false when adding the field to the document)
|
||||
*
|
||||
* @author andyh
|
||||
*/
|
||||
@@ -47,7 +47,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
public class LuceneAnalyser extends Analyzer
|
||||
{
|
||||
private static Logger s_logger = Logger.getLogger(LuceneAnalyser.class);
|
||||
|
||||
|
||||
// Dictinary service to look up analyser classes by data type and locale.
|
||||
private DictionaryService dictionaryService;
|
||||
|
||||
@@ -156,35 +156,45 @@ public class LuceneAnalyser extends Analyzer
|
||||
else
|
||||
{
|
||||
QName propertyQName = QName.createQName(fieldName.substring(1));
|
||||
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
|
||||
if (propertyDef != null)
|
||||
// Temporary fix for person and user uids
|
||||
|
||||
if (propertyQName.equals(ContentModel.PROP_USER_USERNAME)
|
||||
|| propertyQName.equals(ContentModel.PROP_USERNAME))
|
||||
{
|
||||
if (propertyDef.isTokenisedInIndex())
|
||||
analyser = new VerbatimAnalyser(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
PropertyDefinition propertyDef = dictionaryService.getProperty(propertyQName);
|
||||
if (propertyDef != null)
|
||||
{
|
||||
DataTypeDefinition dataType = propertyDef.getDataType();
|
||||
if (dataType.getName().equals(DataTypeDefinition.CONTENT))
|
||||
if (propertyDef.isTokenisedInIndex())
|
||||
{
|
||||
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
|
||||
}
|
||||
else if (dataType.getName().equals(DataTypeDefinition.TEXT))
|
||||
{
|
||||
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
|
||||
DataTypeDefinition dataType = propertyDef.getDataType();
|
||||
if (dataType.getName().equals(DataTypeDefinition.CONTENT))
|
||||
{
|
||||
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
|
||||
}
|
||||
else if (dataType.getName().equals(DataTypeDefinition.TEXT))
|
||||
{
|
||||
analyser = new MLAnalayser(dictionaryService, MLAnalysisMode.ALL_ONLY);
|
||||
}
|
||||
else
|
||||
{
|
||||
analyser = loadAnalyzer(dataType);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
analyser = loadAnalyzer(dataType);
|
||||
analyser = new VerbatimAnalyser();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
analyser = new VerbatimAnalyser();
|
||||
DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
|
||||
analyser = loadAnalyzer(dataType);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
DataTypeDefinition dataType = dictionaryService.getDataType(DataTypeDefinition.TEXT);
|
||||
analyser = loadAnalyzer(dataType);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -208,9 +218,9 @@ public class LuceneAnalyser extends Analyzer
|
||||
{
|
||||
Class<?> clazz = Class.forName(analyserClassName);
|
||||
Analyzer analyser = (Analyzer) clazz.newInstance();
|
||||
if(s_logger.isDebugEnabled())
|
||||
if (s_logger.isDebugEnabled())
|
||||
{
|
||||
s_logger.debug("Loaded "+analyserClassName+" for type "+dataType.getName());
|
||||
s_logger.debug("Loaded " + analyserClassName + " for type " + dataType.getName());
|
||||
}
|
||||
return analyser;
|
||||
}
|
||||
@@ -232,8 +242,7 @@ public class LuceneAnalyser extends Analyzer
|
||||
}
|
||||
|
||||
/**
|
||||
* For multilingual fields we separate the tokens for each instance to break phrase queries spanning different
|
||||
* languages etc.
|
||||
* For multilingual fields we separate the tokens for each instance to break phrase queries spanning different languages etc.
|
||||
*/
|
||||
@Override
|
||||
public int getPositionIncrementGap(String fieldName)
|
||||
|
@@ -65,6 +65,7 @@ import org.alfresco.service.cmr.repository.NodeService;
|
||||
import org.alfresco.service.cmr.repository.Path;
|
||||
import org.alfresco.service.cmr.repository.StoreRef;
|
||||
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
|
||||
import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
|
||||
import org.alfresco.service.cmr.search.ResultSetRow;
|
||||
import org.alfresco.service.cmr.search.SearchParameters;
|
||||
import org.alfresco.service.namespace.QName;
|
||||
@@ -97,6 +98,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
|
||||
public static final String NOT_INDEXED_CONTENT_MISSING = "nicm";
|
||||
|
||||
public static final String NOT_INDEXED_NO_TYPE_CONVERSION = "nintc";
|
||||
|
||||
private static Logger s_logger = Logger.getLogger(LuceneIndexerImpl2.class);
|
||||
|
||||
/**
|
||||
@@ -121,8 +124,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
private long maxAtomicTransformationTime = 20;
|
||||
|
||||
/**
|
||||
* A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO:
|
||||
* Consider if this information needs to be persisted for recovery
|
||||
* A list of all deletions we have made - at merge these deletions need to be made against the main index. TODO: Consider if this information needs to be persisted for recovery
|
||||
*/
|
||||
private Set<NodeRef> deletions = new LinkedHashSet<NodeRef>();
|
||||
|
||||
@@ -141,8 +143,8 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
private boolean isModified = false;
|
||||
|
||||
/**
|
||||
* Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just
|
||||
* fixing up non atomically indexed things from one or more other updates.
|
||||
* Flag to indicte if we are doing an in transactional delta or a batch update to the index. If true, we are just fixing up non atomically indexed things from one or more other
|
||||
* updates.
|
||||
*/
|
||||
|
||||
private Boolean isFTSUpdate = null;
|
||||
@@ -689,8 +691,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper
|
||||
* serialisation against the index as would a data base transaction.
|
||||
* Prepare to commit At the moment this makes sure we have all the locks TODO: This is not doing proper serialisation against the index as would a data base transaction.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@@ -804,8 +805,7 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow
|
||||
* roll back.
|
||||
* Mark this index for roll back only. This action can not be reversed. It will reject all other work and only allow roll back.
|
||||
*/
|
||||
|
||||
public void setRollbackOnly()
|
||||
@@ -1534,7 +1534,17 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
// convert value to String
|
||||
for (Serializable serializableValue : DefaultTypeConverter.INSTANCE.getCollection(Serializable.class, value))
|
||||
{
|
||||
String strValue = DefaultTypeConverter.INSTANCE.convert(String.class, serializableValue);
|
||||
String strValue = null;
|
||||
try
|
||||
{
|
||||
strValue = DefaultTypeConverter.INSTANCE.convert(String.class, serializableValue);
|
||||
}
|
||||
catch (TypeConversionException e)
|
||||
{
|
||||
doc.add(new Field(attributeName, NOT_INDEXED_NO_TYPE_CONVERSION, Field.Store.NO,
|
||||
Field.Index.UN_TOKENIZED, Field.TermVector.NO));
|
||||
continue;
|
||||
}
|
||||
if (strValue == null)
|
||||
{
|
||||
// nothing to index
|
||||
@@ -1727,6 +1737,12 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
}
|
||||
else if (isText)
|
||||
{
|
||||
// Temporary special case for uids
|
||||
if(propertyName.equals(ContentModel.PROP_USER_USERNAME) || propertyName.equals(ContentModel.PROP_USERNAME))
|
||||
{
|
||||
doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO));
|
||||
}
|
||||
|
||||
// TODO: Use the node locale in preferanced to the system locale
|
||||
Locale locale = null;
|
||||
|
||||
@@ -1740,10 +1756,17 @@ public class LuceneIndexerImpl2 extends LuceneBase2 implements LuceneIndexer2
|
||||
{
|
||||
locale = Locale.getDefault();
|
||||
}
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue);
|
||||
doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex,
|
||||
Field.TermVector.NO));
|
||||
if (tokenise)
|
||||
{
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("\u0000").append(locale.toString()).append("\u0000").append(strValue);
|
||||
doc.add(new Field(attributeName, builder.toString(), fieldStore, fieldIndex,
|
||||
Field.TermVector.NO));
|
||||
}
|
||||
else
|
||||
{
|
||||
doc.add(new Field(attributeName, strValue, fieldStore, fieldIndex, Field.TermVector.NO));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@@ -290,6 +290,40 @@ public class LuceneTest2 extends TestCase
|
||||
mlText.addValue(new Locale("ru"), "банан");
|
||||
mlText.addValue(new Locale("es"), "plátano");
|
||||
testProperties.put(QName.createQName(TEST_NAMESPACE, "ml"), mlText);
|
||||
// Any multivalued
|
||||
ArrayList<Serializable> anyValues = new ArrayList<Serializable>();
|
||||
anyValues.add(Integer.valueOf(100));
|
||||
anyValues.add("anyValueAsString");
|
||||
anyValues.add(new UnknownDataType());
|
||||
testProperties.put(QName.createQName(TEST_NAMESPACE, "any-many-ista"), anyValues);
|
||||
// Content multivalued
|
||||
// - note only one the first value is used from the collection
|
||||
// - andit has to go in type d:any as d:content is not allowed to be multivalued
|
||||
|
||||
ArrayList<Serializable> contentValues = new ArrayList<Serializable>();
|
||||
contentValues.add(new ContentData(null, "text/plain", 0L, "UTF-16"));
|
||||
testProperties.put(QName.createQName(TEST_NAMESPACE, "content-many-ista"), contentValues);
|
||||
|
||||
|
||||
|
||||
// MLText multivalued
|
||||
|
||||
MLText mlText1 = new MLText();
|
||||
mlText1.addValue(Locale.ENGLISH, "cabbage");
|
||||
mlText1.addValue(Locale.FRENCH, "chou");
|
||||
|
||||
MLText mlText2 = new MLText();
|
||||
mlText2.addValue(Locale.ENGLISH, "lemur");
|
||||
mlText2.addValue(new Locale("ru"), "лемур");
|
||||
|
||||
ArrayList<Serializable> mlValues = new ArrayList<Serializable>();
|
||||
mlValues.add(mlText1);
|
||||
mlValues.add(mlText2);
|
||||
|
||||
testProperties.put(QName.createQName(TEST_NAMESPACE, "mltext-many-ista"), mlValues);
|
||||
|
||||
// null in multi valued
|
||||
|
||||
ArrayList<Object> testList = new ArrayList<Object>();
|
||||
testList.add(null);
|
||||
testProperties.put(QName.createQName(TEST_NAMESPACE, "nullList"), testList);
|
||||
@@ -299,7 +333,14 @@ public class LuceneTest2 extends TestCase
|
||||
|
||||
n4 = nodeService.createNode(rootNodeRef, ContentModel.ASSOC_CHILDREN, QName.createQName("{namespace}four"),
|
||||
testType, testProperties).getChildRef();
|
||||
|
||||
|
||||
ContentWriter multiWriter = contentService.getWriter(n4, QName.createQName(TEST_NAMESPACE, "content-many-ista"), true);
|
||||
multiWriter.setEncoding( "UTF-16");
|
||||
multiWriter.setMimetype("text/plain");
|
||||
multiWriter.putContent("multicontent");
|
||||
|
||||
|
||||
nodeService.getProperties(n1);
|
||||
nodeService.getProperties(n2);
|
||||
nodeService.getProperties(n3);
|
||||
@@ -2427,6 +2468,80 @@ public class LuceneTest2 extends TestCase
|
||||
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "path-ista")));
|
||||
results.close();
|
||||
|
||||
// d:any
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
|
||||
+ escapeQName(QName.createQName(TEST_NAMESPACE, "any-many-ista")) + ":\"100\"",
|
||||
null, null);
|
||||
assertEquals(1, results.length());
|
||||
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "any-many-ista")));
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
|
||||
+ escapeQName(QName.createQName(TEST_NAMESPACE, "any-many-ista")) + ":\"anyValueAsString\"",
|
||||
null, null);
|
||||
assertEquals(1, results.length());
|
||||
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "any-many-ista")));
|
||||
results.close();
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
|
||||
+ escapeQName(QName.createQName(TEST_NAMESPACE, "any-many-ista")) + ":\"nintc\"",
|
||||
null, null);
|
||||
assertEquals(1, results.length());
|
||||
assertNotNull(results.getRow(0).getValue(QName.createQName(TEST_NAMESPACE, "any-many-ista")));
|
||||
results.close();
|
||||
|
||||
// multi ml text
|
||||
|
||||
QName multimlQName = QName.createQName(TEST_NAMESPACE, "mltext-many-ista");
|
||||
|
||||
SearchParameters sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":лемур");
|
||||
sp.addLocale(new Locale("ru"));
|
||||
results = searcher.query(sp);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":lemur");
|
||||
sp.addLocale(new Locale("en"));
|
||||
results = searcher.query(sp);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":chou");
|
||||
sp.addLocale(new Locale("fr"));
|
||||
results = searcher.query(sp);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(multimlQName.toString()) + ":cabbage");
|
||||
sp.addLocale(new Locale("en"));
|
||||
results = searcher.query(sp);
|
||||
assertEquals(1, results.length());
|
||||
results.close();
|
||||
|
||||
// multivalued content in type d:any
|
||||
// This should not be indexed as we can not know what to do with content here.
|
||||
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(QName.createQName(TEST_NAMESPACE, "content-many-ista").toString()) + ":multicontent");
|
||||
results = searcher.query(sp);
|
||||
assertEquals(0, results.length());
|
||||
results.close();
|
||||
|
||||
// locale
|
||||
|
||||
results = searcher.query(rootNodeRef.getStoreRef(), "lucene", "\\@"
|
||||
@@ -2615,7 +2730,7 @@ public class LuceneTest2 extends TestCase
|
||||
|
||||
// Configuration of TEXT
|
||||
|
||||
SearchParameters sp = new SearchParameters();
|
||||
sp = new SearchParameters();
|
||||
sp.addStore(rootNodeRef.getStoreRef());
|
||||
sp.setLanguage("lucene");
|
||||
sp.setQuery("@" + LuceneQueryParser.escape(ContentModel.PROP_CONTENT.toString()) + ":\"fox\"");
|
||||
@@ -4592,4 +4707,14 @@ public class LuceneTest2 extends TestCase
|
||||
|
||||
// test.dictionaryService.getType(test.nodeService.getType(test.rootNodeRef)).getDefaultAspects();
|
||||
}
|
||||
|
||||
public static class UnknownDataType implements Serializable
|
||||
{
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = -6729690518573349055L;
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -268,6 +268,38 @@
|
||||
<stored>true</stored>
|
||||
<tokenised>true</tokenised>
|
||||
</index>
|
||||
</property>
|
||||
<!-- Any -->
|
||||
<property name="test:any-many-ista">
|
||||
<type>d:any</type>
|
||||
<mandatory>false</mandatory>
|
||||
<multiple>true</multiple>
|
||||
<index enabled="true">
|
||||
<atomic>true</atomic>
|
||||
<stored>true</stored>
|
||||
<tokenised>true</tokenised>
|
||||
</index>
|
||||
</property>
|
||||
<!-- Complex multiples -->
|
||||
<property name="test:content-many-ista">
|
||||
<type>d:any</type>
|
||||
<mandatory>false</mandatory>
|
||||
<multiple>true</multiple>
|
||||
<index enabled="true">
|
||||
<atomic>true</atomic>
|
||||
<stored>true</stored>
|
||||
<tokenised>true</tokenised>
|
||||
</index>
|
||||
</property>
|
||||
<property name="test:mltext-many-ista">
|
||||
<type>d:mltext</type>
|
||||
<mandatory>false</mandatory>
|
||||
<multiple>true</multiple>
|
||||
<index enabled="true">
|
||||
<atomic>true</atomic>
|
||||
<stored>true</stored>
|
||||
<tokenised>true</tokenised>
|
||||
</index>
|
||||
</property>
|
||||
</properties>
|
||||
<mandatory-aspects>
|
||||
|
Reference in New Issue
Block a user