[ SEARCH-1693 ] Minor refactoring on the Alfresco Highlighter

This commit is contained in:
agazzarini
2020-01-07 15:26:26 +01:00
parent 464c084f0b
commit a30cf20fcf

View File

@@ -52,7 +52,7 @@ import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
import static java.util.Arrays.spliterator; import static java.lang.String.join;
import static java.util.Arrays.stream; import static java.util.Arrays.stream;
import static java.util.Optional.ofNullable; import static java.util.Optional.ofNullable;
import static java.util.stream.Collectors.toList; import static java.util.stream.Collectors.toList;
@@ -107,11 +107,12 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
} }
} }
public AlfrescoSolrHighlighter(SolrCore solrCore) public AlfrescoSolrHighlighter(SolrCore core)
{ {
super(solrCore); super(core);
} }
// TODO: E' possibile fare a meno di questo?
@Override @Override
protected Highlighter getHighlighter(Query query, String requestFieldname, SolrQueryRequest request) protected Highlighter getHighlighter(Query query, String requestFieldname, SolrQueryRequest request)
{ {
@@ -119,82 +120,74 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
AlfrescoSolrDataModel.getInstance() AlfrescoSolrDataModel.getInstance()
.mapProperty(requestFieldname, FieldUse.HIGHLIGHT, request); .mapProperty(requestFieldname, FieldUse.HIGHLIGHT, request);
SolrParams params = request.getParams();
Highlighter highlighter = Highlighter highlighter =
new Highlighter(getFormatter( new Highlighter(
requestFieldname, params), getFormatter(requestFieldname, request.getParams()),
getEncoder(requestFieldname, params), getEncoder(requestFieldname, request.getParams()),
getQueryScorer(query,schemaFieldName, request)); getQueryScorer(query,schemaFieldName, request));
highlighter.setTextFragmenter(getFragmenter(requestFieldname, params)); highlighter.setTextFragmenter(getFragmenter(requestFieldname, request.getParams()));
return highlighter; return highlighter;
} }
@Override @Override
protected QueryScorer getSpanQueryScorer(Query query, protected QueryScorer getSpanQueryScorer(Query query, String requestFieldname, TokenStream tokenStream, SolrQueryRequest request)
String requestFieldname, {
TokenStream tokenStream, SolrQueryRequest request) { String schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(requestFieldname, FieldUse.HIGHLIGHT, request);
String schemaFieldName = AlfrescoSolrDataModel.getInstance() QueryScorer scorer = new QueryScorer(query,request.getParams().getFieldBool(requestFieldname, HighlightParams.FIELD_MATCH, false) ? schemaFieldName : null);
.mapProperty(requestFieldname, FieldUse.HIGHLIGHT, request); scorer.setExpandMultiTermQuery(request.getParams().getBool(HighlightParams.HIGHLIGHT_MULTI_TERM, true));
QueryScorer scorer = new QueryScorer(query,
request.getParams().getFieldBool(requestFieldname,
HighlightParams.FIELD_MATCH, false) ? schemaFieldName : null);
scorer.setExpandMultiTermQuery(request.getParams().getBool(
HighlightParams.HIGHLIGHT_MULTI_TERM, true));
boolean defaultPayloads = true;// overwritten below boolean defaultPayloads = true;// overwritten below
try { try
{
// It'd be nice to know if payloads are on the tokenStream but the // It'd be nice to know if payloads are on the tokenStream but the
// presence of the attribute isn't a good // presence of the attribute isn't a good
// indicator. // indicator.
final Terms terms = request.getSearcher().getSlowAtomicReader().fields() final Terms terms = request.getSearcher().getSlowAtomicReader().fields().terms(schemaFieldName);
.terms(schemaFieldName); if (terms != null)
if (terms != null) { {
defaultPayloads = terms.hasPayloads(); defaultPayloads = terms.hasPayloads();
} }
} catch (IOException e) { }
catch (IOException e)
{
LOGGER.error("Couldn't check for existence of payloads", e); LOGGER.error("Couldn't check for existence of payloads", e);
} }
scorer.setUsePayloads(request.getParams().getFieldBool(requestFieldname, scorer.setUsePayloads(request.getParams().getFieldBool(requestFieldname, HighlightParams.PAYLOADS, defaultPayloads));
HighlightParams.PAYLOADS, defaultPayloads));
return scorer; return scorer;
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@Override @Override
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest request, String[] defaultFields) throws IOException { public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest request, String[] defaultFields) throws IOException
{
final String idFieldName = request.getSchema().getUniqueKeyField().getName(); final String idFieldName = request.getSchema().getUniqueKeyField().getName();
final Set<String> idFields = Set.of(idFieldName, "DBID"); final Set<String> idFields = Set.of(idFieldName, "DBID");
final SolrParams originalRequestParameters = request.getParams(); final SolrParams originalRequestParameters = request.getParams();
// fields in the hl.fl parameter e.g. (content, name, title) // raw fields in the hl.fl parameter (e.g. hl.fl=content, name, title)
List<String> highlightFields = stream(super.getHighlightFields(query, request, defaultFields)).collect(toList()); List<String> highlightFields = stream(super.getHighlightFields(query, request, defaultFields)).collect(toList());
/* /*
The Alfresco Data Model is queried in order to retrieve the top-level choice mapping for the fields The Alfresco Data Model is queried in order to retrieve the top-level choice mapping for the fields collected above.
collected above. Top-level choice because for each incoming field name (e.g. content) the Alfresco Data Model could provide more
Top-level choice because for each simple field name (e.g. content) the Alfresco Data Model could provide more than one alternative. The first one which is tried is the cross language field.
than one mapping. At this time, we choose the first.
e.g. e.g.
{ {
name => text@s___t@{http://www.alfresco.org/model/content/1.0}name, name => text@s___t@{http://www.alfresco.org/model/content/1.0}name,
title => mltext@m___t@{http://www.alfresco.org/model/content/1.0}title, title => mltext@m___t@{http://www.alfresco.org/model/content/1.0}title,
content = content@s___t@{http://www.alfresco.org/model/content/1.0}content content => content@s___t@{http://www.alfresco.org/model/content/1.0}content
} }
Since at the end we need to restore (in the response) the original request(ed) fields names (e.g. content, name) used by requestor
we collect a map which associates each schema field (e.g. text@s___t@{http://www.alfresco.org/model/content/1.0}name)
with the corresponding request(ed) field (e.g. name).
*/ */
Map<String, String> mappings = Map<String, String> mappings = withDebug(createInitialFieldMappings(request, highlightFields));
highlightFields.stream()
.map(requestFieldName ->
new AbstractMap.SimpleEntry<>(
AlfrescoSolrDataModel.getInstance().mapProperty(requestFieldName, FieldUse.HIGHLIGHT, request),
requestFieldName))
.collect(toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue, (prev, next) -> next, HashMap::new));
debugMappings(mappings); // The identifiers map collects three documents identifiers for each document (Lucene docid, Solr "id" and "DBID").
// Keys of the identifiers map are Solr "id", while values are simple value objects encapsulating all those three identifiers (for a specific document).
// The identifiers map collects three documents identifiers per document (Lucene docid, Solr "id" and "DBID" fields).
// The keys of the map are Solr "id", the values a simple value object encapsulating all those three identifiers (for a specific document).
Iterable<Integer> iterable = docs::iterator; Iterable<Integer> iterable = docs::iterator;
Map<String, IdTriple> identifiers = Map<String, IdTriple> identifiers =
StreamSupport.stream(iterable.spliterator(), false) StreamSupport.stream(iterable.spliterator(), false)
@@ -202,25 +195,28 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
.filter(Objects::nonNull) .filter(Objects::nonNull)
.collect(toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue)); .collect(toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue));
// First round: call the Solr highlighting procedure // First round: call the Solr highlighting procedure using the current fields mappings.
request.setParams(rewrite(originalRequestParameters, mappings, String.join(",", mappings.keySet()))); request.setParams(rewrite(originalRequestParameters, mappings, join(",", mappings.keySet())));
NamedList<Object> highlightingResponse = super.doHighlighting(docs, query, request, defaultFields); NamedList<Object> highlightingResponse = super.doHighlighting(docs, query, request, defaultFields);
// Remember, in the first try we used the top-level mapping choice coming from Alfresco Data Model. // Remember, in the first try we used the cross-language field coming from Alfresco Data Model.
// Since it is possible that the stored content is not on that field (e.g. it could be on the localised version) // Since it is possible that the stored content is not on that field (e.g. it could be on the localised version)
// the highlight response for that document/field will be empty. // the highlight response for that document/field will be empty.
// For that reason, and for those documents / fields we will repeat the highlight call using the second choice (the // For that reason, and for those documents/fields we will repeat the highlight call using the second choice
// localised version of the field). // (i.e. the localised version of the field).
// Key = 2nd round fields (in the first try we didn't have any highlighting for those fields) // Key = 2nd round fields got from Alfresco Data Model (i.e. localised fields)
// Value = list of identifiers of documents that didn't provide the highlighting info in the first round (for the key field) // Value = list of identifiers of documents that didn't provide the highlighting info in the first round (for the key field)
Map<String, List<IdTriple>> missingHighlightedDocumentsByFields = new HashMap<>(); Map<String, List<IdTriple>> missingHighlightedDocumentsByFields = new HashMap<>();
// Additional mappings coming from this 2nd round
Map<String, String> additionalMappings = new HashMap<>(); Map<String, String> additionalMappings = new HashMap<>();
identifiers.keySet() identifiers.keySet()
.forEach(id -> { .forEach(id -> {
final NamedList<Object> docHighlighting = (NamedList<Object>) highlightingResponse.get(id); final NamedList<Object> docHighlighting = (NamedList<Object>) highlightingResponse.get(id);
mappings.entrySet().stream() mappings.entrySet().stream()
// we want to process only those entries that didn't produce any result in the first round.
.filter(fieldEntry -> docHighlighting.indexOf(fieldEntry.getKey(), 0) == -1) .filter(fieldEntry -> docHighlighting.indexOf(fieldEntry.getKey(), 0) == -1)
.map(fieldEntry -> { .map(fieldEntry -> {
String solrFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(fieldEntry.getValue(), FieldUse.HIGHLIGHT, request, 1); String solrFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(fieldEntry.getValue(), FieldUse.HIGHLIGHT, request, 1);
@@ -231,7 +227,7 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
.forEach(docList -> docList.add(identifiers.get(id)));}); .forEach(docList -> docList.add(identifiers.get(id)));});
mappings.putAll(additionalMappings); mappings.putAll(additionalMappings);
debugMappings(mappings); withDebug(mappings);
// We are going to re-call the highlight for those documents/fields which didnt' produce any result in the // We are going to re-call the highlight for those documents/fields which didnt' produce any result in the
// previous step. In order to do that we need // previous step. In order to do that we need
@@ -241,7 +237,9 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
missingHighlightedDocumentsByFields.entrySet().stream() missingHighlightedDocumentsByFields.entrySet().stream()
.map(entry -> { .map(entry -> {
int [] docids = entry.getValue().stream().mapToInt(IdTriple::docid).toArray(); int [] docids = entry.getValue().stream().mapToInt(IdTriple::docid).toArray();
return new AbstractMap.SimpleEntry<>(entry.getKey(), new DocSlice(0, docids.length, docids, null, docids.length, 1));}) return new AbstractMap.SimpleEntry<>(
entry.getKey(),
new DocSlice(0, docids.length, docids, null, docids.length, 1));})
.collect(toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue)); .collect(toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue));
// For each field and corresponding document list, a new highlight request is executed // For each field and corresponding document list, a new highlight request is executed
@@ -252,39 +250,35 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
DocList doclist = entry.getValue(); DocList doclist = entry.getValue();
try try
{ {
// ModifiableSolrParams params =
// new ModifiableSolrParams(request.getParams())
// .set(HighlightParams.FIELDS, fieldName);
// rewriteLocalFieldParameters(params, originalRequestParameters, mappings.get(fieldName), fieldName);
// request.setParams(params);
request.setParams(rewrite(originalRequestParameters, additionalMappings, fieldName)); request.setParams(rewrite(originalRequestParameters, additionalMappings, fieldName));
return super.doHighlighting(doclist, query, request, defaultFields); return super.doHighlighting(doclist, query, request, defaultFields);
} }
catch (Exception exception) catch (Exception exception)
{ {
// This is a child request so in that case we log the error but we still return something to // This is a "2nd round" request so in that case we log the error but we still return something to
// the requestor (i.e. the result of the first highlight call) // the requestor (i.e. the result of the first highlight call)
LOGGER.error("Error during the execution of a child highlighting request. See the stacktrace below for further details.", exception); LOGGER.error("Error during the execution of a \"2nd round\" highlighting request. " +
"See the stacktrace below for further details.", exception);
return null; return null;
}}) }})
.filter(Objects::nonNull)
.collect(toList()); .collect(toList());
// We need to combine (actually reduce) the highlight response coming from the first try, with each // Combine (actually reduce) the highlight response coming from the first try, with each
// partial highlight response coming from subsequent calls // partial highlight response coming from subsequent calls
NamedList<Object> responseBeforeRenaming = partialHighlightingResponses.stream() NamedList<Object> responseBeforeRenaming = partialHighlightingResponses.stream()
.reduce(highlightingResponse, (accumulator, partial) -> { .reduce(highlightingResponse, (accumulator, partial) -> {
partial.iterator().forEachRemaining(entry -> { partial.forEach(entry -> {
String id = entry.getKey(); String id = entry.getKey();
NamedList<Object> specificFieldsHighlighting = (NamedList<Object>) entry.getValue(); NamedList<Object> specificFieldHighlighting = (NamedList<Object>) entry.getValue();
NamedList<Object> preExistingDocHighlight = (NamedList<Object>) accumulator.get(id); NamedList<Object> preExistingDocHighlighting = (NamedList<Object>) accumulator.get(id);
// this document were never collected if (preExistingDocHighlighting == null) // this document were never collected
if (preExistingDocHighlight == null)
{ {
accumulator.add(id, entry.getValue()); accumulator.add(id, entry.getValue());
} }
else else
{ {
preExistingDocHighlight.addAll(specificFieldsHighlighting); preExistingDocHighlighting.addAll(specificFieldHighlighting);
} }
}); });
return accumulator; return accumulator;
@@ -295,8 +289,7 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
// so we need to replace them with fields actually requested // so we need to replace them with fields actually requested
// In addition, beside the snippets we want to have the document DBID as well. // In addition, beside the snippets we want to have the document DBID as well.
NamedList<Object> response = new SimpleOrderedMap<>(); NamedList<Object> response = new SimpleOrderedMap<>();
responseBeforeRenaming.iterator() responseBeforeRenaming.forEach( entry -> {
.forEachRemaining( entry -> {
String id = entry.getKey(); String id = entry.getKey();
NamedList<Object> documentHighlighting = (NamedList<Object>) entry.getValue(); NamedList<Object> documentHighlighting = (NamedList<Object>) entry.getValue();
NamedList<Object> renamedDocumentHighlighting = new SimpleOrderedMap<>(); NamedList<Object> renamedDocumentHighlighting = new SimpleOrderedMap<>();
@@ -406,11 +399,46 @@ public class AlfrescoSolrHighlighter extends DefaultSolrHighlighter implements P
return rewrittenParams; return rewrittenParams;
} }
private void debugMappings(Map<String, String> mappings) /**
* Debugs the content of the given mappings.
*
* @param mappings the fields mapping.
* @return the same input mappings instance.
*/
private Map<String, String> withDebug(Map<String, String> mappings)
{ {
if (LOGGER.isDebugEnabled()) if (LOGGER.isDebugEnabled())
{ {
mappings.forEach( (solrField, requestField) -> LOGGER.debug("Request field {} has been mapped to {}", requestField, solrField)); mappings.forEach( (solrField, requestField) -> LOGGER.debug("Request field {} has been mapped to {}", requestField, solrField));
} }
return mappings;
}
/**
* Starting from the input requested highlight fields (i.e. fields listed in {@link HighlightParams#FIELDS} parameter)
* we create a map which associates each member with the corresponding field in the Solr schema.
* For example:
*
* <pre>
* name => text@s___t@{http://www.alfresco.org/model/content/1.0}name,
* title => mltext@m___t@{http://www.alfresco.org/model/content/1.0}title,
* content => content@s___t@{http://www.alfresco.org/model/content/1.0}content
* </pre>
*
* IMPORTANT: although returned as {@link Map} interface, the returned data structure IS MUTABLE. This is needed
* because during the highlighting workflow we need to change its content by adding fields.
*
* @param request the current incoming client request.
* @param requestedHighlightFields a list of raw fields listed in {@link HighlightParams#FIELDS} parameter
* @return a map associating request(ed) fields with the corresponding schema fields.
*/
private Map<String, String> createInitialFieldMappings(SolrQueryRequest request, List<String> requestedHighlightFields)
{
return requestedHighlightFields.stream()
.map(requestFieldName ->
new AbstractMap.SimpleEntry<>(
AlfrescoSolrDataModel.getInstance().mapProperty(requestFieldName, FieldUse.HIGHLIGHT, request),
requestFieldName))
.collect(toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue, (prev, next) -> next, HashMap::new));
} }
} }