From b99aad037a08590f69c89445c8a43834ef116b18 Mon Sep 17 00:00:00 2001 From: Joel Date: Tue, 27 Sep 2016 15:49:43 -0400 Subject: [PATCH] SEARCH-2: Switch to multi-value indentifier field for fingerprint --- .../alfresco/solr/SolrInformationServer.java | 9 +-- .../alfresco/solr/query/Solr4QueryParser.java | 15 +++-- .../instance/templates/rerank/conf/schema.xml | 2 +- .../query/AlfrescoSolrFingerprintTest.java | 63 ++++++++++++++----- .../collection1/conf/schema-fingerprint.xml | 2 +- 5 files changed, 57 insertions(+), 34 deletions(-) diff --git a/search-services/alfresco-solr/src/main/java/org/alfresco/solr/SolrInformationServer.java b/search-services/alfresco-solr/src/main/java/org/alfresco/solr/SolrInformationServer.java index 62dd6f700..6bd4ccb14 100644 --- a/search-services/alfresco-solr/src/main/java/org/alfresco/solr/SolrInformationServer.java +++ b/search-services/alfresco-solr/src/main/java/org/alfresco/solr/SolrInformationServer.java @@ -3087,26 +3087,21 @@ public class SolrInformationServer implements InformationServer Analyzer analyzer = core.getLatestSchema().getFieldType("min_hash").getIndexAnalyzer(); TokenStream ts = analyzer.tokenStream("min_hash", textContent); - StringBuilder hashBuff = new StringBuilder(); CharTermAttribute termAttribute = ts.getAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { StringBuilder tokenBuff = new StringBuilder(); - if(hashBuff.length() > 0) { - hashBuff.append(" "); - } - char[] buff = termAttribute.buffer(); for(int i=0; i 0) { - for (String token : tokens) + for (Object token : values) { - TermQuery tq = new TermQuery(new Term("FINGERPRINT", token)); + TermQuery tq = new TermQuery(new Term("FINGERPRINT", token.toString())); childBuilder.add(new ConstantScoreQuery(tq), Occur.MUST); rowInBand++; if (rowInBand == bandSize) @@ -767,7 +766,7 @@ public class Solr4QueryParser extends QueryParser implements QueryConstants builder.setDisableCoord(true); if (parts.length == 2) { - builder.setMinimumNumberShouldMatch((int) (Math.ceil(tokens.length * fraction))); + builder.setMinimumNumberShouldMatch((int) (Math.ceil(values.size() * fraction))); } Query q = builder.build(); return q; diff --git a/search-services/alfresco-solr/src/main/resources/solr/instance/templates/rerank/conf/schema.xml b/search-services/alfresco-solr/src/main/resources/solr/instance/templates/rerank/conf/schema.xml index 71f727952..06bbbca76 100644 --- a/search-services/alfresco-solr/src/main/resources/solr/instance/templates/rerank/conf/schema.xml +++ b/search-services/alfresco-solr/src/main/resources/solr/instance/templates/rerank/conf/schema.xml @@ -564,7 +564,7 @@ - + diff --git a/search-services/alfresco-solr/src/test/java/org/alfresco/solr/query/AlfrescoSolrFingerprintTest.java b/search-services/alfresco-solr/src/test/java/org/alfresco/solr/query/AlfrescoSolrFingerprintTest.java index 23f5b90ef..280dc4010 100644 --- a/search-services/alfresco-solr/src/test/java/org/alfresco/solr/query/AlfrescoSolrFingerprintTest.java +++ b/search-services/alfresco-solr/src/test/java/org/alfresco/solr/query/AlfrescoSolrFingerprintTest.java @@ -19,10 +19,7 @@ package org.alfresco.solr.query; -import org.alfresco.model.ContentModel; import org.alfresco.repo.search.adaptor.lucene.QueryConstants; -import org.alfresco.service.cmr.repository.NodeRef; -import org.alfresco.service.cmr.repository.StoreRef; import org.alfresco.solr.AbstractAlfrescoSolrTests; import org.alfresco.solr.client.*; import org.apache.commons.logging.Log; @@ -39,12 +36,9 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import java.util.ArrayList; import java.util.List; import static org.alfresco.solr.AlfrescoSolrUtils.*; -import static org.alfresco.solr.AlfrescoSolrUtils.ancestors; -import static org.alfresco.solr.AlfrescoSolrUtils.createGUID; @LuceneTestCase.SuppressCodecs({"Appending","Lucene3x","Lucene40","Lucene41","Lucene42","Lucene43", "Lucene44", "Lucene45","Lucene46","Lucene47","Lucene48","Lucene49"}) public class AlfrescoSolrFingerprintTest extends AbstractAlfrescoSolrTests @@ -129,10 +123,10 @@ public class AlfrescoSolrFingerprintTest extends AbstractAlfrescoSolrTests NodeMetaData nodeMetaData3 = getNodeMetaData(node3, txn, acl, "mike", null, false); NodeMetaData nodeMetaData4 = getNodeMetaData(node4, txn, acl, "mike", null, false); - List content = list("aaaa bbbb cccc dddd eeee ffff hhhh iiii jjjj kkkk", - "aaaa bbbb cccc dddd eeee ffff hhhh iiii", - "aaaa bbbb cccc dddd eeee ffff hhhh iiii jjjj", - "aaaa bbbb cccc dddd eeee ffff hhhh"); + List content = list("aaaa 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25", + "aaaa 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20", + "aaaa 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24", + "aaaa 1 2 3 4 5 6 7 8 9 10 11 12 13 14"); //Index the transaction, nodes, and nodeMetaDatas. //Note that the content is automatically created by the test framework. @@ -160,19 +154,54 @@ public class AlfrescoSolrFingerprintTest extends AbstractAlfrescoSolrTests logger.info("#################### Passed Third Test ##############################"); - - ModifiableSolrParams params = new ModifiableSolrParams(); params.add("q", "FINGERPRINT:" + node1.getId()); //Query for an id in the content field. The node id is automatically populated into the content field by test framework params.add("qt", "/afts"); + params.add("fl", "DBID,score"); + params.add("start", "0"); + params.add("rows", "6"); + SolrServletRequest req = areq(params, null); + assertQ(req, "*[count(//doc)=4]", + "//result/doc[1]/long[@name='DBID'][.='" + node1.getId() + "']", + "//result/doc[2]/long[@name='DBID'][.='" + node3.getId() + "']", + "//result/doc[3]/long[@name='DBID'][.='" + node2.getId() + "']", + "//result/doc[4]/long[@name='DBID'][.='" + node4.getId() + "']"); + + params = new ModifiableSolrParams(); + params.add("q", "FINGERPRINT:" + node1.getId() + "_90"); //Query for an id in the content field. The node id is automatically populated into the content field by test framework + params.add("qt", "/afts"); params.add("fl","DBID,score"); params.add("start", "0"); params.add("rows", "6"); - SolrServletRequest req = areq(params, null); - assertQ(req, "//result/doc[1]/long[@name='DBID'][.='"+node1.getId()+"']", - "//result/doc[2]/long[@name='DBID'][.='"+node3.getId()+"']", - "//result/doc[3]/long[@name='DBID'][.='"+node2.getId()+"']", - "//result/doc[4]/long[@name='DBID'][.='"+node4.getId()+"']"); + req = areq(params, null); + assertQ(req, "*[count(//doc)= 2]", + "//result/doc[1]/long[@name='DBID'][.='"+node1.getId()+"']", + "//result/doc[2]/long[@name='DBID'][.='"+node3.getId()+"']"); + + params = new ModifiableSolrParams(); + params.add("q", "FINGERPRINT:" + node1.getId()+"_70"); //Query for an id in the content field. The node id is automatically populated into the content field by test framework + params.add("qt", "/afts"); + params.add("fl","DBID,score"); + params.add("start", "0"); + params.add("rows", "6"); + req = areq(params, null); + assertQ(req, "*[count(//doc)= 3]", + "//result/doc[1]/long[@name='DBID'][.='"+node1.getId()+"']", + "//result/doc[2]/long[@name='DBID'][.='"+node3.getId()+"']", + "//result/doc[3]/long[@name='DBID'][.='"+node2.getId()+"']"); + + params = new ModifiableSolrParams(); + params.add("q", "FINGERPRINT:" + node1.getId()+"_40"); + params.add("qt", "/afts"); + params.add("fl","DBID,score"); + params.add("start", "0"); + params.add("rows", "6"); + req = areq(params, null); + assertQ(req, "*[count(//doc)= 4]", + "//result/doc[1]/long[@name='DBID'][.='"+node1.getId()+"']", + "//result/doc[2]/long[@name='DBID'][.='"+node3.getId()+"']", + "//result/doc[3]/long[@name='DBID'][.='"+node2.getId()+"']", + "//result/doc[4]/long[@name='DBID'][.='"+node4.getId()+"']"); } } diff --git a/search-services/alfresco-solr/src/test/resources/test-files/collection1/conf/schema-fingerprint.xml b/search-services/alfresco-solr/src/test/resources/test-files/collection1/conf/schema-fingerprint.xml index b83c84886..5cdd086f0 100644 --- a/search-services/alfresco-solr/src/test/resources/test-files/collection1/conf/schema-fingerprint.xml +++ b/search-services/alfresco-solr/src/test/resources/test-files/collection1/conf/schema-fingerprint.xml @@ -571,7 +571,7 @@ - +