mirror of
https://github.com/Alfresco/SearchServices.git
synced 2025-09-24 14:31:29 +00:00
SEARCH-2: Switch to multi-value indentifier field for fingerprint
This commit is contained in:
@@ -3087,26 +3087,21 @@ public class SolrInformationServer implements InformationServer
|
|||||||
|
|
||||||
Analyzer analyzer = core.getLatestSchema().getFieldType("min_hash").getIndexAnalyzer();
|
Analyzer analyzer = core.getLatestSchema().getFieldType("min_hash").getIndexAnalyzer();
|
||||||
TokenStream ts = analyzer.tokenStream("min_hash", textContent);
|
TokenStream ts = analyzer.tokenStream("min_hash", textContent);
|
||||||
StringBuilder hashBuff = new StringBuilder();
|
|
||||||
CharTermAttribute termAttribute = ts.getAttribute(CharTermAttribute.class);
|
CharTermAttribute termAttribute = ts.getAttribute(CharTermAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while (ts.incrementToken())
|
while (ts.incrementToken())
|
||||||
{
|
{
|
||||||
StringBuilder tokenBuff = new StringBuilder();
|
StringBuilder tokenBuff = new StringBuilder();
|
||||||
if(hashBuff.length() > 0) {
|
|
||||||
hashBuff.append(" ");
|
|
||||||
}
|
|
||||||
|
|
||||||
char[] buff = termAttribute.buffer();
|
char[] buff = termAttribute.buffer();
|
||||||
|
|
||||||
for(int i=0; i<termAttribute.length();i++) {
|
for(int i=0; i<termAttribute.length();i++) {
|
||||||
tokenBuff.append(Integer.toHexString(buff[i]));
|
tokenBuff.append(Integer.toHexString(buff[i]));
|
||||||
}
|
}
|
||||||
hashBuff.append(tokenBuff.toString());
|
doc.addField(FINGERPRINT, tokenBuff.toString());
|
||||||
|
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
ts.close();
|
||||||
doc.addField(FINGERPRINT, hashBuff.toString());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
long end = System.nanoTime();
|
long end = System.nanoTime();
|
||||||
|
@@ -703,8 +703,7 @@ public class Solr4QueryParser extends QueryParser implements QueryConstants
|
|||||||
SolrInputField mh = solrDoc.getField("FINGERPRINT");
|
SolrInputField mh = solrDoc.getField("FINGERPRINT");
|
||||||
if (mh != null)
|
if (mh != null)
|
||||||
{
|
{
|
||||||
String fingerprint = mh.getValue().toString();
|
Collection values = mh.getValues();
|
||||||
String[] tokens = fingerprint.split(" ");
|
|
||||||
int bandSize = 1;
|
int bandSize = 1;
|
||||||
float fraction = -1;
|
float fraction = -1;
|
||||||
float truePositive = 1;
|
float truePositive = 1;
|
||||||
@@ -723,14 +722,14 @@ public class Solr4QueryParser extends QueryParser implements QueryConstants
|
|||||||
{
|
{
|
||||||
truePositive /= 100;
|
truePositive /= 100;
|
||||||
}
|
}
|
||||||
bandSize = computeBandSize(tokens.length, fraction, truePositive);
|
bandSize = computeBandSize(values.size(), fraction, truePositive);
|
||||||
}
|
}
|
||||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
BooleanQuery.Builder childBuilder = new BooleanQuery.Builder();
|
BooleanQuery.Builder childBuilder = new BooleanQuery.Builder();
|
||||||
int rowInBand = 0;
|
int rowInBand = 0;
|
||||||
for (String token : tokens)
|
for (Object token : values)
|
||||||
{
|
{
|
||||||
TermQuery tq = new TermQuery(new Term("FINGERPRINT", token));
|
TermQuery tq = new TermQuery(new Term("FINGERPRINT", token.toString()));
|
||||||
if (bandSize == 1)
|
if (bandSize == 1)
|
||||||
{
|
{
|
||||||
builder.add(new ConstantScoreQuery(tq), Occur.SHOULD);
|
builder.add(new ConstantScoreQuery(tq), Occur.SHOULD);
|
||||||
@@ -751,9 +750,9 @@ public class Solr4QueryParser extends QueryParser implements QueryConstants
|
|||||||
// start
|
// start
|
||||||
if (childBuilder.build().clauses().size() > 0)
|
if (childBuilder.build().clauses().size() > 0)
|
||||||
{
|
{
|
||||||
for (String token : tokens)
|
for (Object token : values)
|
||||||
{
|
{
|
||||||
TermQuery tq = new TermQuery(new Term("FINGERPRINT", token));
|
TermQuery tq = new TermQuery(new Term("FINGERPRINT", token.toString()));
|
||||||
childBuilder.add(new ConstantScoreQuery(tq), Occur.MUST);
|
childBuilder.add(new ConstantScoreQuery(tq), Occur.MUST);
|
||||||
rowInBand++;
|
rowInBand++;
|
||||||
if (rowInBand == bandSize)
|
if (rowInBand == bandSize)
|
||||||
@@ -767,7 +766,7 @@ public class Solr4QueryParser extends QueryParser implements QueryConstants
|
|||||||
builder.setDisableCoord(true);
|
builder.setDisableCoord(true);
|
||||||
if (parts.length == 2)
|
if (parts.length == 2)
|
||||||
{
|
{
|
||||||
builder.setMinimumNumberShouldMatch((int) (Math.ceil(tokens.length * fraction)));
|
builder.setMinimumNumberShouldMatch((int) (Math.ceil(values.size() * fraction)));
|
||||||
}
|
}
|
||||||
Query q = builder.build();
|
Query q = builder.build();
|
||||||
return q;
|
return q;
|
||||||
|
@@ -564,7 +564,7 @@
|
|||||||
<fields>
|
<fields>
|
||||||
<!-- For SOLR cloud - should be the node version -->
|
<!-- For SOLR cloud - should be the node version -->
|
||||||
<field name="_version_" type="version" indexed="false" stored="true" docValues="true" required="true" />
|
<field name="_version_" type="version" indexed="false" stored="true" docValues="true" required="true" />
|
||||||
<field name="FINGERPRINT" type="text_plain" indexed="true" omitNorms="true" stored="false" multiValued="false" required="false" docValues="false"/>
|
<field name="FINGERPRINT" type="indentifier" indexed="true" omitNorms="true" stored="false" multiValued="true" required="false" docValues="false"/>
|
||||||
|
|
||||||
<!-- For block join - currently not used -->
|
<!-- For block join - currently not used -->
|
||||||
<field name="_root_" type="identifier" indexed="true" stored="false"/>
|
<field name="_root_" type="identifier" indexed="true" stored="false"/>
|
||||||
|
@@ -19,10 +19,7 @@
|
|||||||
|
|
||||||
package org.alfresco.solr.query;
|
package org.alfresco.solr.query;
|
||||||
|
|
||||||
import org.alfresco.model.ContentModel;
|
|
||||||
import org.alfresco.repo.search.adaptor.lucene.QueryConstants;
|
import org.alfresco.repo.search.adaptor.lucene.QueryConstants;
|
||||||
import org.alfresco.service.cmr.repository.NodeRef;
|
|
||||||
import org.alfresco.service.cmr.repository.StoreRef;
|
|
||||||
import org.alfresco.solr.AbstractAlfrescoSolrTests;
|
import org.alfresco.solr.AbstractAlfrescoSolrTests;
|
||||||
import org.alfresco.solr.client.*;
|
import org.alfresco.solr.client.*;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
@@ -39,12 +36,9 @@ import org.junit.Before;
|
|||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import static org.alfresco.solr.AlfrescoSolrUtils.*;
|
import static org.alfresco.solr.AlfrescoSolrUtils.*;
|
||||||
import static org.alfresco.solr.AlfrescoSolrUtils.ancestors;
|
|
||||||
import static org.alfresco.solr.AlfrescoSolrUtils.createGUID;
|
|
||||||
|
|
||||||
@LuceneTestCase.SuppressCodecs({"Appending","Lucene3x","Lucene40","Lucene41","Lucene42","Lucene43", "Lucene44", "Lucene45","Lucene46","Lucene47","Lucene48","Lucene49"})
|
@LuceneTestCase.SuppressCodecs({"Appending","Lucene3x","Lucene40","Lucene41","Lucene42","Lucene43", "Lucene44", "Lucene45","Lucene46","Lucene47","Lucene48","Lucene49"})
|
||||||
public class AlfrescoSolrFingerprintTest extends AbstractAlfrescoSolrTests
|
public class AlfrescoSolrFingerprintTest extends AbstractAlfrescoSolrTests
|
||||||
@@ -129,10 +123,10 @@ public class AlfrescoSolrFingerprintTest extends AbstractAlfrescoSolrTests
|
|||||||
NodeMetaData nodeMetaData3 = getNodeMetaData(node3, txn, acl, "mike", null, false);
|
NodeMetaData nodeMetaData3 = getNodeMetaData(node3, txn, acl, "mike", null, false);
|
||||||
NodeMetaData nodeMetaData4 = getNodeMetaData(node4, txn, acl, "mike", null, false);
|
NodeMetaData nodeMetaData4 = getNodeMetaData(node4, txn, acl, "mike", null, false);
|
||||||
|
|
||||||
List content = list("aaaa bbbb cccc dddd eeee ffff hhhh iiii jjjj kkkk",
|
List content = list("aaaa 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25",
|
||||||
"aaaa bbbb cccc dddd eeee ffff hhhh iiii",
|
"aaaa 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20",
|
||||||
"aaaa bbbb cccc dddd eeee ffff hhhh iiii jjjj",
|
"aaaa 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24",
|
||||||
"aaaa bbbb cccc dddd eeee ffff hhhh");
|
"aaaa 1 2 3 4 5 6 7 8 9 10 11 12 13 14");
|
||||||
|
|
||||||
//Index the transaction, nodes, and nodeMetaDatas.
|
//Index the transaction, nodes, and nodeMetaDatas.
|
||||||
//Note that the content is automatically created by the test framework.
|
//Note that the content is automatically created by the test framework.
|
||||||
@@ -160,19 +154,54 @@ public class AlfrescoSolrFingerprintTest extends AbstractAlfrescoSolrTests
|
|||||||
logger.info("#################### Passed Third Test ##############################");
|
logger.info("#################### Passed Third Test ##############################");
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
params.add("q", "FINGERPRINT:" + node1.getId()); //Query for an id in the content field. The node id is automatically populated into the content field by test framework
|
params.add("q", "FINGERPRINT:" + node1.getId()); //Query for an id in the content field. The node id is automatically populated into the content field by test framework
|
||||||
params.add("qt", "/afts");
|
params.add("qt", "/afts");
|
||||||
|
params.add("fl", "DBID,score");
|
||||||
|
params.add("start", "0");
|
||||||
|
params.add("rows", "6");
|
||||||
|
SolrServletRequest req = areq(params, null);
|
||||||
|
assertQ(req, "*[count(//doc)=4]",
|
||||||
|
"//result/doc[1]/long[@name='DBID'][.='" + node1.getId() + "']",
|
||||||
|
"//result/doc[2]/long[@name='DBID'][.='" + node3.getId() + "']",
|
||||||
|
"//result/doc[3]/long[@name='DBID'][.='" + node2.getId() + "']",
|
||||||
|
"//result/doc[4]/long[@name='DBID'][.='" + node4.getId() + "']");
|
||||||
|
|
||||||
|
params = new ModifiableSolrParams();
|
||||||
|
params.add("q", "FINGERPRINT:" + node1.getId() + "_90"); //Query for an id in the content field. The node id is automatically populated into the content field by test framework
|
||||||
|
params.add("qt", "/afts");
|
||||||
params.add("fl","DBID,score");
|
params.add("fl","DBID,score");
|
||||||
params.add("start", "0");
|
params.add("start", "0");
|
||||||
params.add("rows", "6");
|
params.add("rows", "6");
|
||||||
SolrServletRequest req = areq(params, null);
|
req = areq(params, null);
|
||||||
assertQ(req, "//result/doc[1]/long[@name='DBID'][.='"+node1.getId()+"']",
|
assertQ(req, "*[count(//doc)= 2]",
|
||||||
"//result/doc[2]/long[@name='DBID'][.='"+node3.getId()+"']",
|
"//result/doc[1]/long[@name='DBID'][.='"+node1.getId()+"']",
|
||||||
"//result/doc[3]/long[@name='DBID'][.='"+node2.getId()+"']",
|
"//result/doc[2]/long[@name='DBID'][.='"+node3.getId()+"']");
|
||||||
"//result/doc[4]/long[@name='DBID'][.='"+node4.getId()+"']");
|
|
||||||
|
params = new ModifiableSolrParams();
|
||||||
|
params.add("q", "FINGERPRINT:" + node1.getId()+"_70"); //Query for an id in the content field. The node id is automatically populated into the content field by test framework
|
||||||
|
params.add("qt", "/afts");
|
||||||
|
params.add("fl","DBID,score");
|
||||||
|
params.add("start", "0");
|
||||||
|
params.add("rows", "6");
|
||||||
|
req = areq(params, null);
|
||||||
|
assertQ(req, "*[count(//doc)= 3]",
|
||||||
|
"//result/doc[1]/long[@name='DBID'][.='"+node1.getId()+"']",
|
||||||
|
"//result/doc[2]/long[@name='DBID'][.='"+node3.getId()+"']",
|
||||||
|
"//result/doc[3]/long[@name='DBID'][.='"+node2.getId()+"']");
|
||||||
|
|
||||||
|
params = new ModifiableSolrParams();
|
||||||
|
params.add("q", "FINGERPRINT:" + node1.getId()+"_40");
|
||||||
|
params.add("qt", "/afts");
|
||||||
|
params.add("fl","DBID,score");
|
||||||
|
params.add("start", "0");
|
||||||
|
params.add("rows", "6");
|
||||||
|
req = areq(params, null);
|
||||||
|
assertQ(req, "*[count(//doc)= 4]",
|
||||||
|
"//result/doc[1]/long[@name='DBID'][.='"+node1.getId()+"']",
|
||||||
|
"//result/doc[2]/long[@name='DBID'][.='"+node3.getId()+"']",
|
||||||
|
"//result/doc[3]/long[@name='DBID'][.='"+node2.getId()+"']",
|
||||||
|
"//result/doc[4]/long[@name='DBID'][.='"+node4.getId()+"']");
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -571,7 +571,7 @@
|
|||||||
|
|
||||||
<!-- Unique identifier - based on DBID -->
|
<!-- Unique identifier - based on DBID -->
|
||||||
<field name="id" type="identifier" indexed="true" omitNorms="true" stored="true" multiValued="false" required="true" docValues="true"/>
|
<field name="id" type="identifier" indexed="true" omitNorms="true" stored="true" multiValued="false" required="true" docValues="true"/>
|
||||||
<field name="FINGERPRINT" type="text_plain" indexed="true" omitNorms="true" stored="false" multiValued="false" required="false" docValues="false"/>
|
<field name="FINGERPRINT" type="identifier" indexed="true" omitNorms="true" stored="false" multiValued="true" required="false" docValues="false"/>
|
||||||
|
|
||||||
<!-- Special fields -->
|
<!-- Special fields -->
|
||||||
<field name="LID" type="identifier" indexed="true" omitNorms="true" stored="false" multiValued="false" sortMissingLast="true" />
|
<field name="LID" type="identifier" indexed="true" omitNorms="true" stored="false" multiValued="false" sortMissingLast="true" />
|
||||||
|
Reference in New Issue
Block a user