mirror of
https://github.com/Alfresco/SearchServices.git
synced 2025-09-24 14:31:29 +00:00
SEARCH-2: Switch to multi-value indentifier field for fingerprint
This commit is contained in:
@@ -3087,26 +3087,21 @@ public class SolrInformationServer implements InformationServer
|
||||
|
||||
Analyzer analyzer = core.getLatestSchema().getFieldType("min_hash").getIndexAnalyzer();
|
||||
TokenStream ts = analyzer.tokenStream("min_hash", textContent);
|
||||
StringBuilder hashBuff = new StringBuilder();
|
||||
CharTermAttribute termAttribute = ts.getAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
while (ts.incrementToken())
|
||||
{
|
||||
StringBuilder tokenBuff = new StringBuilder();
|
||||
if(hashBuff.length() > 0) {
|
||||
hashBuff.append(" ");
|
||||
}
|
||||
|
||||
char[] buff = termAttribute.buffer();
|
||||
|
||||
for(int i=0; i<termAttribute.length();i++) {
|
||||
tokenBuff.append(Integer.toHexString(buff[i]));
|
||||
}
|
||||
hashBuff.append(tokenBuff.toString());
|
||||
doc.addField(FINGERPRINT, tokenBuff.toString());
|
||||
|
||||
}
|
||||
ts.end();
|
||||
ts.close();
|
||||
doc.addField(FINGERPRINT, hashBuff.toString());
|
||||
}
|
||||
|
||||
long end = System.nanoTime();
|
||||
|
@@ -703,8 +703,7 @@ public class Solr4QueryParser extends QueryParser implements QueryConstants
|
||||
SolrInputField mh = solrDoc.getField("FINGERPRINT");
|
||||
if (mh != null)
|
||||
{
|
||||
String fingerprint = mh.getValue().toString();
|
||||
String[] tokens = fingerprint.split(" ");
|
||||
Collection values = mh.getValues();
|
||||
int bandSize = 1;
|
||||
float fraction = -1;
|
||||
float truePositive = 1;
|
||||
@@ -723,14 +722,14 @@ public class Solr4QueryParser extends QueryParser implements QueryConstants
|
||||
{
|
||||
truePositive /= 100;
|
||||
}
|
||||
bandSize = computeBandSize(tokens.length, fraction, truePositive);
|
||||
bandSize = computeBandSize(values.size(), fraction, truePositive);
|
||||
}
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
BooleanQuery.Builder childBuilder = new BooleanQuery.Builder();
|
||||
int rowInBand = 0;
|
||||
for (String token : tokens)
|
||||
for (Object token : values)
|
||||
{
|
||||
TermQuery tq = new TermQuery(new Term("FINGERPRINT", token));
|
||||
TermQuery tq = new TermQuery(new Term("FINGERPRINT", token.toString()));
|
||||
if (bandSize == 1)
|
||||
{
|
||||
builder.add(new ConstantScoreQuery(tq), Occur.SHOULD);
|
||||
@@ -751,9 +750,9 @@ public class Solr4QueryParser extends QueryParser implements QueryConstants
|
||||
// start
|
||||
if (childBuilder.build().clauses().size() > 0)
|
||||
{
|
||||
for (String token : tokens)
|
||||
for (Object token : values)
|
||||
{
|
||||
TermQuery tq = new TermQuery(new Term("FINGERPRINT", token));
|
||||
TermQuery tq = new TermQuery(new Term("FINGERPRINT", token.toString()));
|
||||
childBuilder.add(new ConstantScoreQuery(tq), Occur.MUST);
|
||||
rowInBand++;
|
||||
if (rowInBand == bandSize)
|
||||
@@ -767,7 +766,7 @@ public class Solr4QueryParser extends QueryParser implements QueryConstants
|
||||
builder.setDisableCoord(true);
|
||||
if (parts.length == 2)
|
||||
{
|
||||
builder.setMinimumNumberShouldMatch((int) (Math.ceil(tokens.length * fraction)));
|
||||
builder.setMinimumNumberShouldMatch((int) (Math.ceil(values.size() * fraction)));
|
||||
}
|
||||
Query q = builder.build();
|
||||
return q;
|
||||
|
@@ -564,7 +564,7 @@
|
||||
<fields>
|
||||
<!-- For SOLR cloud - should be the node version -->
|
||||
<field name="_version_" type="version" indexed="false" stored="true" docValues="true" required="true" />
|
||||
<field name="FINGERPRINT" type="text_plain" indexed="true" omitNorms="true" stored="false" multiValued="false" required="false" docValues="false"/>
|
||||
<field name="FINGERPRINT" type="indentifier" indexed="true" omitNorms="true" stored="false" multiValued="true" required="false" docValues="false"/>
|
||||
|
||||
<!-- For block join - currently not used -->
|
||||
<field name="_root_" type="identifier" indexed="true" stored="false"/>
|
||||
|
@@ -19,10 +19,7 @@
|
||||
|
||||
package org.alfresco.solr.query;
|
||||
|
||||
import org.alfresco.model.ContentModel;
|
||||
import org.alfresco.repo.search.adaptor.lucene.QueryConstants;
|
||||
import org.alfresco.service.cmr.repository.NodeRef;
|
||||
import org.alfresco.service.cmr.repository.StoreRef;
|
||||
import org.alfresco.solr.AbstractAlfrescoSolrTests;
|
||||
import org.alfresco.solr.client.*;
|
||||
import org.apache.commons.logging.Log;
|
||||
@@ -39,12 +36,9 @@ import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.*;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.ancestors;
|
||||
import static org.alfresco.solr.AlfrescoSolrUtils.createGUID;
|
||||
|
||||
@LuceneTestCase.SuppressCodecs({"Appending","Lucene3x","Lucene40","Lucene41","Lucene42","Lucene43", "Lucene44", "Lucene45","Lucene46","Lucene47","Lucene48","Lucene49"})
|
||||
public class AlfrescoSolrFingerprintTest extends AbstractAlfrescoSolrTests
|
||||
@@ -129,10 +123,10 @@ public class AlfrescoSolrFingerprintTest extends AbstractAlfrescoSolrTests
|
||||
NodeMetaData nodeMetaData3 = getNodeMetaData(node3, txn, acl, "mike", null, false);
|
||||
NodeMetaData nodeMetaData4 = getNodeMetaData(node4, txn, acl, "mike", null, false);
|
||||
|
||||
List content = list("aaaa bbbb cccc dddd eeee ffff hhhh iiii jjjj kkkk",
|
||||
"aaaa bbbb cccc dddd eeee ffff hhhh iiii",
|
||||
"aaaa bbbb cccc dddd eeee ffff hhhh iiii jjjj",
|
||||
"aaaa bbbb cccc dddd eeee ffff hhhh");
|
||||
List content = list("aaaa 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25",
|
||||
"aaaa 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20",
|
||||
"aaaa 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24",
|
||||
"aaaa 1 2 3 4 5 6 7 8 9 10 11 12 13 14");
|
||||
|
||||
//Index the transaction, nodes, and nodeMetaDatas.
|
||||
//Note that the content is automatically created by the test framework.
|
||||
@@ -160,19 +154,54 @@ public class AlfrescoSolrFingerprintTest extends AbstractAlfrescoSolrTests
|
||||
logger.info("#################### Passed Third Test ##############################");
|
||||
|
||||
|
||||
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add("q", "FINGERPRINT:" + node1.getId()); //Query for an id in the content field. The node id is automatically populated into the content field by test framework
|
||||
params.add("qt", "/afts");
|
||||
params.add("fl", "DBID,score");
|
||||
params.add("start", "0");
|
||||
params.add("rows", "6");
|
||||
SolrServletRequest req = areq(params, null);
|
||||
assertQ(req, "*[count(//doc)=4]",
|
||||
"//result/doc[1]/long[@name='DBID'][.='" + node1.getId() + "']",
|
||||
"//result/doc[2]/long[@name='DBID'][.='" + node3.getId() + "']",
|
||||
"//result/doc[3]/long[@name='DBID'][.='" + node2.getId() + "']",
|
||||
"//result/doc[4]/long[@name='DBID'][.='" + node4.getId() + "']");
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", "FINGERPRINT:" + node1.getId() + "_90"); //Query for an id in the content field. The node id is automatically populated into the content field by test framework
|
||||
params.add("qt", "/afts");
|
||||
params.add("fl","DBID,score");
|
||||
params.add("start", "0");
|
||||
params.add("rows", "6");
|
||||
SolrServletRequest req = areq(params, null);
|
||||
assertQ(req, "//result/doc[1]/long[@name='DBID'][.='"+node1.getId()+"']",
|
||||
"//result/doc[2]/long[@name='DBID'][.='"+node3.getId()+"']",
|
||||
"//result/doc[3]/long[@name='DBID'][.='"+node2.getId()+"']",
|
||||
"//result/doc[4]/long[@name='DBID'][.='"+node4.getId()+"']");
|
||||
req = areq(params, null);
|
||||
assertQ(req, "*[count(//doc)= 2]",
|
||||
"//result/doc[1]/long[@name='DBID'][.='"+node1.getId()+"']",
|
||||
"//result/doc[2]/long[@name='DBID'][.='"+node3.getId()+"']");
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", "FINGERPRINT:" + node1.getId()+"_70"); //Query for an id in the content field. The node id is automatically populated into the content field by test framework
|
||||
params.add("qt", "/afts");
|
||||
params.add("fl","DBID,score");
|
||||
params.add("start", "0");
|
||||
params.add("rows", "6");
|
||||
req = areq(params, null);
|
||||
assertQ(req, "*[count(//doc)= 3]",
|
||||
"//result/doc[1]/long[@name='DBID'][.='"+node1.getId()+"']",
|
||||
"//result/doc[2]/long[@name='DBID'][.='"+node3.getId()+"']",
|
||||
"//result/doc[3]/long[@name='DBID'][.='"+node2.getId()+"']");
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", "FINGERPRINT:" + node1.getId()+"_40");
|
||||
params.add("qt", "/afts");
|
||||
params.add("fl","DBID,score");
|
||||
params.add("start", "0");
|
||||
params.add("rows", "6");
|
||||
req = areq(params, null);
|
||||
assertQ(req, "*[count(//doc)= 4]",
|
||||
"//result/doc[1]/long[@name='DBID'][.='"+node1.getId()+"']",
|
||||
"//result/doc[2]/long[@name='DBID'][.='"+node3.getId()+"']",
|
||||
"//result/doc[3]/long[@name='DBID'][.='"+node2.getId()+"']",
|
||||
"//result/doc[4]/long[@name='DBID'][.='"+node4.getId()+"']");
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -571,7 +571,7 @@
|
||||
|
||||
<!-- Unique identifier - based on DBID -->
|
||||
<field name="id" type="identifier" indexed="true" omitNorms="true" stored="true" multiValued="false" required="true" docValues="true"/>
|
||||
<field name="FINGERPRINT" type="text_plain" indexed="true" omitNorms="true" stored="false" multiValued="false" required="false" docValues="false"/>
|
||||
<field name="FINGERPRINT" type="identifier" indexed="true" omitNorms="true" stored="false" multiValued="true" required="false" docValues="false"/>
|
||||
|
||||
<!-- Special fields -->
|
||||
<field name="LID" type="identifier" indexed="true" omitNorms="true" stored="false" multiValued="false" sortMissingLast="true" />
|
||||
|
Reference in New Issue
Block a user