mirror of
https://github.com/Alfresco/SearchServices.git
synced 2025-10-01 14:41:19 +00:00
[ SEARCH-1693 ] Minor refactoring on the Alfresco Highlighter
This commit is contained in:
@@ -52,7 +52,7 @@ import java.util.Objects;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.StreamSupport;
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
import static java.util.Arrays.spliterator;
|
import static java.lang.String.join;
|
||||||
import static java.util.Arrays.stream;
|
import static java.util.Arrays.stream;
|
||||||
import static java.util.Optional.ofNullable;
|
import static java.util.Optional.ofNullable;
|
||||||
import static java.util.stream.Collectors.toList;
|
import static java.util.stream.Collectors.toList;
|
||||||
@@ -107,11 +107,12 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public AlfrescoSolrHighlighter(SolrCore solrCore)
|
public AlfrescoSolrHighlighter(SolrCore core)
|
||||||
{
|
{
|
||||||
super(solrCore);
|
super(core);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: E' possibile fare a meno di questo?
|
||||||
@Override
|
@Override
|
||||||
protected Highlighter getHighlighter(Query query, String requestFieldname, SolrQueryRequest request)
|
protected Highlighter getHighlighter(Query query, String requestFieldname, SolrQueryRequest request)
|
||||||
{
|
{
|
||||||
@@ -119,82 +120,74 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
|
|||||||
AlfrescoSolrDataModel.getInstance()
|
AlfrescoSolrDataModel.getInstance()
|
||||||
.mapProperty(requestFieldname, FieldUse.HIGHLIGHT, request);
|
.mapProperty(requestFieldname, FieldUse.HIGHLIGHT, request);
|
||||||
|
|
||||||
SolrParams params = request.getParams();
|
|
||||||
Highlighter highlighter =
|
Highlighter highlighter =
|
||||||
new Highlighter(getFormatter(
|
new Highlighter(
|
||||||
requestFieldname, params),
|
getFormatter(requestFieldname, request.getParams()),
|
||||||
getEncoder(requestFieldname, params),
|
getEncoder(requestFieldname, request.getParams()),
|
||||||
getQueryScorer(query,schemaFieldName, request));
|
getQueryScorer(query,schemaFieldName, request));
|
||||||
|
|
||||||
highlighter.setTextFragmenter(getFragmenter(requestFieldname, params));
|
highlighter.setTextFragmenter(getFragmenter(requestFieldname, request.getParams()));
|
||||||
return highlighter;
|
return highlighter;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected QueryScorer getSpanQueryScorer(Query query,
|
protected QueryScorer getSpanQueryScorer(Query query, String requestFieldname, TokenStream tokenStream, SolrQueryRequest request)
|
||||||
String requestFieldname,
|
{
|
||||||
TokenStream tokenStream, SolrQueryRequest request) {
|
String schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(requestFieldname, FieldUse.HIGHLIGHT, request);
|
||||||
String schemaFieldName = AlfrescoSolrDataModel.getInstance()
|
QueryScorer scorer = new QueryScorer(query,request.getParams().getFieldBool(requestFieldname, HighlightParams.FIELD_MATCH, false) ? schemaFieldName : null);
|
||||||
.mapProperty(requestFieldname, FieldUse.HIGHLIGHT, request);
|
scorer.setExpandMultiTermQuery(request.getParams().getBool(HighlightParams.HIGHLIGHT_MULTI_TERM, true));
|
||||||
QueryScorer scorer = new QueryScorer(query,
|
|
||||||
request.getParams().getFieldBool(requestFieldname,
|
|
||||||
HighlightParams.FIELD_MATCH, false) ? schemaFieldName : null);
|
|
||||||
scorer.setExpandMultiTermQuery(request.getParams().getBool(
|
|
||||||
HighlightParams.HIGHLIGHT_MULTI_TERM, true));
|
|
||||||
|
|
||||||
boolean defaultPayloads = true;// overwritten below
|
boolean defaultPayloads = true;// overwritten below
|
||||||
try {
|
try
|
||||||
|
{
|
||||||
// It'd be nice to know if payloads are on the tokenStream but the
|
// It'd be nice to know if payloads are on the tokenStream but the
|
||||||
// presence of the attribute isn't a good
|
// presence of the attribute isn't a good
|
||||||
// indicator.
|
// indicator.
|
||||||
final Terms terms = request.getSearcher().getSlowAtomicReader().fields()
|
final Terms terms = request.getSearcher().getSlowAtomicReader().fields().terms(schemaFieldName);
|
||||||
.terms(schemaFieldName);
|
if (terms != null)
|
||||||
if (terms != null) {
|
{
|
||||||
defaultPayloads = terms.hasPayloads();
|
defaultPayloads = terms.hasPayloads();
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
}
|
||||||
|
catch (IOException e)
|
||||||
|
{
|
||||||
LOGGER.error("Couldn't check for existence of payloads", e);
|
LOGGER.error("Couldn't check for existence of payloads", e);
|
||||||
}
|
}
|
||||||
scorer.setUsePayloads(request.getParams().getFieldBool(requestFieldname,
|
scorer.setUsePayloads(request.getParams().getFieldBool(requestFieldname, HighlightParams.PAYLOADS, defaultPayloads));
|
||||||
HighlightParams.PAYLOADS, defaultPayloads));
|
|
||||||
return scorer;
|
return scorer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
@Override
|
@Override
|
||||||
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest request, String[] defaultFields) throws IOException {
|
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest request, String[] defaultFields) throws IOException
|
||||||
|
{
|
||||||
final String idFieldName = request.getSchema().getUniqueKeyField().getName();
|
final String idFieldName = request.getSchema().getUniqueKeyField().getName();
|
||||||
final Set<String> idFields = Set.of(idFieldName, "DBID");
|
final Set<String> idFields = Set.of(idFieldName, "DBID");
|
||||||
final SolrParams originalRequestParameters = request.getParams();
|
final SolrParams originalRequestParameters = request.getParams();
|
||||||
|
|
||||||
// fields in the hl.fl parameter e.g. (content, name, title)
|
// raw fields in the hl.fl parameter (e.g. hl.fl=content, name, title)
|
||||||
List<String> highlightFields = stream(super.getHighlightFields(query, request, defaultFields)).collect(toList());
|
List<String> highlightFields = stream(super.getHighlightFields(query, request, defaultFields)).collect(toList());
|
||||||
|
|
||||||
/*
|
/*
|
||||||
The Alfresco Data Model is queried in order to retrieve the top-level choice mapping for the fields
|
The Alfresco Data Model is queried in order to retrieve the top-level choice mapping for the fields collected above.
|
||||||
collected above.
|
Top-level choice because for each incoming field name (e.g. content) the Alfresco Data Model could provide more
|
||||||
Top-level choice because for each simple field name (e.g. content) the Alfresco Data Model could provide more
|
than one alternative. The first one which is tried is the cross language field.
|
||||||
than one mapping. At this time, we choose the first.
|
|
||||||
|
|
||||||
e.g.
|
e.g.
|
||||||
{
|
{
|
||||||
name => text@s___t@{http://www.alfresco.org/model/content/1.0}name,
|
name => text@s___t@{http://www.alfresco.org/model/content/1.0}name,
|
||||||
title => mltext@m___t@{http://www.alfresco.org/model/content/1.0}title,
|
title => mltext@m___t@{http://www.alfresco.org/model/content/1.0}title,
|
||||||
content = content@s___t@{http://www.alfresco.org/model/content/1.0}content
|
content => content@s___t@{http://www.alfresco.org/model/content/1.0}content
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Since at the end we need to restore (in the response) the original request(ed) fields names (e.g. content, name) used by requestor
|
||||||
|
we collect a map which associates each schema field (e.g. text@s___t@{http://www.alfresco.org/model/content/1.0}name)
|
||||||
|
with the corresponding request(ed) field (e.g. name).
|
||||||
*/
|
*/
|
||||||
Map<String, String> mappings =
|
Map<String, String> mappings = withDebug(createInitialFieldMappings(request, highlightFields));
|
||||||
highlightFields.stream()
|
|
||||||
.map(requestFieldName ->
|
|
||||||
new AbstractMap.SimpleEntry<>(
|
|
||||||
AlfrescoSolrDataModel.getInstance().mapProperty(requestFieldName, FieldUse.HIGHLIGHT, request),
|
|
||||||
requestFieldName))
|
|
||||||
.collect(toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue, (prev, next) -> next, HashMap::new));
|
|
||||||
|
|
||||||
debugMappings(mappings);
|
// The identifiers map collects three documents identifiers for each document (Lucene docid, Solr "id" and "DBID").
|
||||||
|
// Keys of the identifiers map are Solr "id", while values are simple value objects encapsulating all those three identifiers (for a specific document).
|
||||||
// The identifiers map collects three documents identifiers per document (Lucene docid, Solr "id" and "DBID" fields).
|
|
||||||
// The keys of the map are Solr "id", the values a simple value object encapsulating all those three identifiers (for a specific document).
|
|
||||||
Iterable<Integer> iterable = docs::iterator;
|
Iterable<Integer> iterable = docs::iterator;
|
||||||
Map<String, IdTriple> identifiers =
|
Map<String, IdTriple> identifiers =
|
||||||
StreamSupport.stream(iterable.spliterator(), false)
|
StreamSupport.stream(iterable.spliterator(), false)
|
||||||
@@ -202,25 +195,28 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
|
|||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.collect(toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue));
|
.collect(toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue));
|
||||||
|
|
||||||
// First round: call the Solr highlighting procedure
|
// First round: call the Solr highlighting procedure using the current fields mappings.
|
||||||
request.setParams(rewrite(originalRequestParameters, mappings, String.join(",", mappings.keySet())));
|
request.setParams(rewrite(originalRequestParameters, mappings, join(",", mappings.keySet())));
|
||||||
NamedList<Object> highlightingResponse = super.doHighlighting(docs, query, request, defaultFields);
|
NamedList<Object> highlightingResponse = super.doHighlighting(docs, query, request, defaultFields);
|
||||||
|
|
||||||
// Remember, in the first try we used the top-level mapping choice coming from Alfresco Data Model.
|
// Remember, in the first try we used the cross-language field coming from Alfresco Data Model.
|
||||||
// Since it is possible that the stored content is not on that field (e.g. it could be on the localised version)
|
// Since it is possible that the stored content is not on that field (e.g. it could be on the localised version)
|
||||||
// the highlight response for that document/field will be empty.
|
// the highlight response for that document/field will be empty.
|
||||||
// For that reason, and for those documents / fields we will repeat the highlight call using the second choice (the
|
// For that reason, and for those documents/fields we will repeat the highlight call using the second choice
|
||||||
// localised version of the field).
|
// (i.e. the localised version of the field).
|
||||||
|
|
||||||
// Key = 2nd round fields (in the first try we didn't have any highlighting for those fields)
|
// Key = 2nd round fields got from Alfresco Data Model (i.e. localised fields)
|
||||||
// Value = list of identifiers of documents that didn't provide the highlighting info in the first round (for the key field)
|
// Value = list of identifiers of documents that didn't provide the highlighting info in the first round (for the key field)
|
||||||
Map<String, List<IdTriple>> missingHighlightedDocumentsByFields = new HashMap<>();
|
Map<String, List<IdTriple>> missingHighlightedDocumentsByFields = new HashMap<>();
|
||||||
|
|
||||||
|
// Additional mappings coming from this 2nd round
|
||||||
Map<String, String> additionalMappings = new HashMap<>();
|
Map<String, String> additionalMappings = new HashMap<>();
|
||||||
|
|
||||||
identifiers.keySet()
|
identifiers.keySet()
|
||||||
.forEach(id -> {
|
.forEach(id -> {
|
||||||
final NamedList<Object> docHighlighting = (NamedList<Object>) highlightingResponse.get(id);
|
final NamedList<Object> docHighlighting = (NamedList<Object>) highlightingResponse.get(id);
|
||||||
mappings.entrySet().stream()
|
mappings.entrySet().stream()
|
||||||
|
// we want to process only those entries that didn't produce any result in the first round.
|
||||||
.filter(fieldEntry -> docHighlighting.indexOf(fieldEntry.getKey(), 0) == -1)
|
.filter(fieldEntry -> docHighlighting.indexOf(fieldEntry.getKey(), 0) == -1)
|
||||||
.map(fieldEntry -> {
|
.map(fieldEntry -> {
|
||||||
String solrFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(fieldEntry.getValue(), FieldUse.HIGHLIGHT, request, 1);
|
String solrFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(fieldEntry.getValue(), FieldUse.HIGHLIGHT, request, 1);
|
||||||
@@ -231,7 +227,7 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
|
|||||||
.forEach(docList -> docList.add(identifiers.get(id)));});
|
.forEach(docList -> docList.add(identifiers.get(id)));});
|
||||||
|
|
||||||
mappings.putAll(additionalMappings);
|
mappings.putAll(additionalMappings);
|
||||||
debugMappings(mappings);
|
withDebug(mappings);
|
||||||
|
|
||||||
// We are going to re-call the highlight for those documents/fields which didnt' produce any result in the
|
// We are going to re-call the highlight for those documents/fields which didnt' produce any result in the
|
||||||
// previous step. In order to do that we need
|
// previous step. In order to do that we need
|
||||||
@@ -241,7 +237,9 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
|
|||||||
missingHighlightedDocumentsByFields.entrySet().stream()
|
missingHighlightedDocumentsByFields.entrySet().stream()
|
||||||
.map(entry -> {
|
.map(entry -> {
|
||||||
int [] docids = entry.getValue().stream().mapToInt(IdTriple::docid).toArray();
|
int [] docids = entry.getValue().stream().mapToInt(IdTriple::docid).toArray();
|
||||||
return new AbstractMap.SimpleEntry<>(entry.getKey(), new DocSlice(0, docids.length, docids, null, docids.length, 1));})
|
return new AbstractMap.SimpleEntry<>(
|
||||||
|
entry.getKey(),
|
||||||
|
new DocSlice(0, docids.length, docids, null, docids.length, 1));})
|
||||||
.collect(toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue));
|
.collect(toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue));
|
||||||
|
|
||||||
// For each field and corresponding document list, a new highlight request is executed
|
// For each field and corresponding document list, a new highlight request is executed
|
||||||
@@ -252,39 +250,35 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
|
|||||||
DocList doclist = entry.getValue();
|
DocList doclist = entry.getValue();
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
// ModifiableSolrParams params =
|
|
||||||
// new ModifiableSolrParams(request.getParams())
|
|
||||||
// .set(HighlightParams.FIELDS, fieldName);
|
|
||||||
// rewriteLocalFieldParameters(params, originalRequestParameters, mappings.get(fieldName), fieldName);
|
|
||||||
// request.setParams(params);
|
|
||||||
request.setParams(rewrite(originalRequestParameters, additionalMappings, fieldName));
|
request.setParams(rewrite(originalRequestParameters, additionalMappings, fieldName));
|
||||||
return super.doHighlighting(doclist, query, request, defaultFields);
|
return super.doHighlighting(doclist, query, request, defaultFields);
|
||||||
}
|
}
|
||||||
catch (Exception exception)
|
catch (Exception exception)
|
||||||
{
|
{
|
||||||
// This is a child request so in that case we log the error but we still return something to
|
// This is a "2nd round" request so in that case we log the error but we still return something to
|
||||||
// the requestor (i.e. the result of the first highlight call)
|
// the requestor (i.e. the result of the first highlight call)
|
||||||
LOGGER.error("Error during the execution of a child highlighting request. See the stacktrace below for further details.", exception);
|
LOGGER.error("Error during the execution of a \"2nd round\" highlighting request. " +
|
||||||
|
"See the stacktrace below for further details.", exception);
|
||||||
return null;
|
return null;
|
||||||
}})
|
}})
|
||||||
|
.filter(Objects::nonNull)
|
||||||
.collect(toList());
|
.collect(toList());
|
||||||
|
|
||||||
// We need to combine (actually reduce) the highlight response coming from the first try, with each
|
// Combine (actually reduce) the highlight response coming from the first try, with each
|
||||||
// partial highlight response coming from subsequent calls
|
// partial highlight response coming from subsequent calls
|
||||||
NamedList<Object> responseBeforeRenaming = partialHighlightingResponses.stream()
|
NamedList<Object> responseBeforeRenaming = partialHighlightingResponses.stream()
|
||||||
.reduce(highlightingResponse, (accumulator, partial) -> {
|
.reduce(highlightingResponse, (accumulator, partial) -> {
|
||||||
partial.iterator().forEachRemaining(entry -> {
|
partial.forEach(entry -> {
|
||||||
String id = entry.getKey();
|
String id = entry.getKey();
|
||||||
NamedList<Object> specificFieldsHighlighting = (NamedList<Object>) entry.getValue();
|
NamedList<Object> specificFieldHighlighting = (NamedList<Object>) entry.getValue();
|
||||||
NamedList<Object> preExistingDocHighlight = (NamedList<Object>) accumulator.get(id);
|
NamedList<Object> preExistingDocHighlighting = (NamedList<Object>) accumulator.get(id);
|
||||||
// this document were never collected
|
if (preExistingDocHighlighting == null) // this document were never collected
|
||||||
if (preExistingDocHighlight == null)
|
|
||||||
{
|
{
|
||||||
accumulator.add(id, entry.getValue());
|
accumulator.add(id, entry.getValue());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
preExistingDocHighlight.addAll(specificFieldsHighlighting);
|
preExistingDocHighlighting.addAll(specificFieldHighlighting);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return accumulator;
|
return accumulator;
|
||||||
@@ -295,8 +289,7 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
|
|||||||
// so we need to replace them with fields actually requested
|
// so we need to replace them with fields actually requested
|
||||||
// In addition, beside the snippets we want to have the document DBID as well.
|
// In addition, beside the snippets we want to have the document DBID as well.
|
||||||
NamedList<Object> response = new SimpleOrderedMap<>();
|
NamedList<Object> response = new SimpleOrderedMap<>();
|
||||||
responseBeforeRenaming.iterator()
|
responseBeforeRenaming.forEach( entry -> {
|
||||||
.forEachRemaining( entry -> {
|
|
||||||
String id = entry.getKey();
|
String id = entry.getKey();
|
||||||
NamedList<Object> documentHighlighting = (NamedList<Object>) entry.getValue();
|
NamedList<Object> documentHighlighting = (NamedList<Object>) entry.getValue();
|
||||||
NamedList<Object> renamedDocumentHighlighting = new SimpleOrderedMap<>();
|
NamedList<Object> renamedDocumentHighlighting = new SimpleOrderedMap<>();
|
||||||
@@ -406,11 +399,46 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
|
|||||||
return rewrittenParams;
|
return rewrittenParams;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void debugMappings(Map<String, String> mappings)
|
/**
|
||||||
|
* Debugs the content of the given mappings.
|
||||||
|
*
|
||||||
|
* @param mappings the fields mapping.
|
||||||
|
* @return the same input mappings instance.
|
||||||
|
*/
|
||||||
|
private Map<String, String> withDebug(Map<String, String> mappings)
|
||||||
{
|
{
|
||||||
if (LOGGER.isDebugEnabled())
|
if (LOGGER.isDebugEnabled())
|
||||||
{
|
{
|
||||||
mappings.forEach( (solrField, requestField) -> LOGGER.debug("Request field {} has been mapped to {}", requestField, solrField));
|
mappings.forEach( (solrField, requestField) -> LOGGER.debug("Request field {} has been mapped to {}", requestField, solrField));
|
||||||
}
|
}
|
||||||
|
return mappings;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starting from the input requested highlight fields (i.e. fields listed in {@link HighlightParams#FIELDS} parameter)
|
||||||
|
* we create a map which associates each member with the corresponding field in the Solr schema.
|
||||||
|
* For example:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* name => text@s___t@{http://www.alfresco.org/model/content/1.0}name,
|
||||||
|
* title => mltext@m___t@{http://www.alfresco.org/model/content/1.0}title,
|
||||||
|
* content => content@s___t@{http://www.alfresco.org/model/content/1.0}content
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* IMPORTANT: although returned as {@link Map} interface, the returned data structure IS MUTABLE. This is needed
|
||||||
|
* because during the highlighting workflow we need to change its content by adding fields.
|
||||||
|
*
|
||||||
|
* @param request the current incoming client request.
|
||||||
|
* @param requestedHighlightFields a list of raw fields listed in {@link HighlightParams#FIELDS} parameter
|
||||||
|
* @return a map associating request(ed) fields with the corresponding schema fields.
|
||||||
|
*/
|
||||||
|
private Map<String, String> createInitialFieldMappings(SolrQueryRequest request, List<String> requestedHighlightFields)
|
||||||
|
{
|
||||||
|
return requestedHighlightFields.stream()
|
||||||
|
.map(requestFieldName ->
|
||||||
|
new AbstractMap.SimpleEntry<>(
|
||||||
|
AlfrescoSolrDataModel.getInstance().mapProperty(requestFieldName, FieldUse.HIGHLIGHT, request),
|
||||||
|
requestFieldName))
|
||||||
|
.collect(toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue, (prev, next) -> next, HashMap::new));
|
||||||
}
|
}
|
||||||
}
|
}
|
Reference in New Issue
Block a user