Merge branch 'feature/SEARCH-2029_ContentResponseCompression' into 'master'

Use gzipped compressed streams to recover content text from repository.

See merge request search_discovery/insightengine!320
This commit is contained in:
Angel Borroy
2020-01-17 13:26:30 +00:00
5 changed files with 63 additions and 12 deletions

View File

@@ -81,6 +81,7 @@ import java.util.Map.Entry;
import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import com.carrotsearch.hppc.IntArrayList; import com.carrotsearch.hppc.IntArrayList;
@@ -2500,6 +2501,10 @@ public class SolrInformationServer implements InformationServer
response); response);
InputStream ris = response.getContent(); InputStream ris = response.getContent();
if (response.getContentEncoding().equals("gzip"))
{
ris = new GZIPInputStream(ris);
}
String textContent = ""; String textContent = "";
try try
{ {

View File

@@ -174,6 +174,14 @@ solr.suggester.enabled=true
# -1 to disable suggester build throttling # -1 to disable suggester build throttling
solr.suggester.minSecsBetweenBuilds=3600 solr.suggester.minSecsBetweenBuilds=3600
#
# Request content text compression
# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled
# This setting can improve performance when having high network latency or large documents in the repository
#
solr.request.content.compress=false
# #
# Limit the maximum text size of transformed content sent to the index - in bytes # Limit the maximum text size of transformed content sent to the index - in bytes
# #

View File

@@ -174,6 +174,13 @@ solr.suggester.enabled=true
# -1 to disable suggester build throttling # -1 to disable suggester build throttling
solr.suggester.minSecsBetweenBuilds=3600 solr.suggester.minSecsBetweenBuilds=3600
#
# Request content text compression
# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled
# This setting can improve performance when having high network latency or large documents in the repository
#
solr.request.content.compress=false
# #
# Limit the maximum text size of transformed content sent to the index - in bytes # Limit the maximum text size of transformed content sent to the index - in bytes
# #

View File

@@ -30,6 +30,7 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.Reader; import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
@@ -73,6 +74,7 @@ import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.net.URLCodec; import org.apache.commons.codec.net.URLCodec;
import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.util.DateUtil; import org.apache.commons.httpclient.util.DateUtil;
import org.apache.commons.io.IOUtils;
import org.json.JSONArray; import org.json.JSONArray;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
@@ -113,15 +115,32 @@ public class SOLRAPIClient
private JsonFactory jsonFactory; private JsonFactory jsonFactory;
private NamespaceDAO namespaceDAO; private NamespaceDAO namespaceDAO;
/**
* This option enables ("Accept-Encoding": "gzip") header for compression
* in GET_CONTENT requests. Additional configuration is required in
* Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal
* with compressed requests.
*/
private boolean compression;
public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient, public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient,
DictionaryService dictionaryService, DictionaryService dictionaryService,
NamespaceDAO namespaceDAO) NamespaceDAO namespaceDAO)
{
this(repositoryHttpClient, dictionaryService, namespaceDAO, false);
}
public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient,
DictionaryService dictionaryService,
NamespaceDAO namespaceDAO,
boolean compression)
{ {
this.repositoryHttpClient = repositoryHttpClient; this.repositoryHttpClient = repositoryHttpClient;
this.dictionaryService = dictionaryService; this.dictionaryService = dictionaryService;
this.namespaceDAO = namespaceDAO; this.namespaceDAO = namespaceDAO;
this.deserializer = new SOLRDeserializer(namespaceDAO); this.deserializer = new SOLRDeserializer(namespaceDAO);
this.jsonFactory = new JsonFactory(); this.jsonFactory = new JsonFactory();
this.compression = compression;
} }
/** /**
@@ -1118,12 +1137,16 @@ public class SOLRAPIClient
GetRequest req = new GetRequest(url.toString()); GetRequest req = new GetRequest(url.toString());
Map<String, String> headers = new HashMap<>();
if(modifiedSince != null) if(modifiedSince != null)
{ {
Map<String, String> headers = new HashMap<String, String>(1, 1.0f);
headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince)))); headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince))));
req.setHeaders(headers);
} }
if (compression)
{
headers.put("Accept-Encoding", "gzip");
}
req.setHeaders(headers);
Response response = repositoryHttpClient.sendRequest(req); Response response = repositoryHttpClient.sendRequest(req);
@@ -1482,6 +1505,7 @@ public class SOLRAPIClient
private String transformException; private String transformException;
private String transformStatusStr; private String transformStatusStr;
private Long transformDuration; private Long transformDuration;
private String contentEncoding;
public GetTextContentResponse(Response response) throws IOException public GetTextContentResponse(Response response) throws IOException
{ {
@@ -1492,6 +1516,7 @@ public class SOLRAPIClient
this.transformException = response.getHeader("X-Alfresco-transformException"); this.transformException = response.getHeader("X-Alfresco-transformException");
String tmp = response.getHeader("X-Alfresco-transformDuration"); String tmp = response.getHeader("X-Alfresco-transformDuration");
this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null); this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null);
this.contentEncoding = response.getHeader("Content-Encoding");
setStatus(); setStatus();
} }
@@ -1558,6 +1583,11 @@ public class SOLRAPIClient
{ {
return transformDuration; return transformDuration;
} }
public String getContentEncoding()
{
return contentEncoding;
}
} }
public void close() public void close()

View File

@@ -143,6 +143,7 @@ public class SOLRAPIClientFactory
alfrescoHost = props.getProperty("alfresco.host", "localhost"); alfrescoHost = props.getProperty("alfresco.host", "localhost");
alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080")); alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080"));
alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443")); alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443"));
boolean compression = Boolean.parseBoolean(props.getProperty("solr.request.content.compress", "false"));
SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL); SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL);
if (client == null) if (client == null)
@@ -171,7 +172,7 @@ public class SOLRAPIClientFactory
maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40")); maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40"));
socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000")); socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000"));
client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO); client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO, compression);
setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client); setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client);
} }