diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java index c5b07d69f..a4ea83035 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java @@ -81,6 +81,7 @@ import java.util.Map.Entry; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; import com.carrotsearch.hppc.IntArrayList; @@ -2498,8 +2499,12 @@ public class SolrInformationServer implements InformationServer response); addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.ContentFieldType.TRANSFORMATION_TIME, response); - + InputStream ris = response.getContent(); + if (response.getContentEncoding().equals("gzip")) + { + ris = new GZIPInputStream(ris); + } String textContent = ""; try { diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties index e91ba7ee8..a6a5a1fae 100644 --- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties +++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties @@ -174,6 +174,14 @@ solr.suggester.enabled=true # -1 to disable suggester build throttling solr.suggester.minSecsBetweenBuilds=3600 +# +# Request content text compression +# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled +# This setting can improve performance when having high network latency or large documents in the repository +# +solr.request.content.compress=false + + # # Limit the maximum text size of transformed content sent to the index - in bytes # diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties index 06f089370..fdf702da6 100644 --- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties +++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties @@ -174,6 +174,13 @@ solr.suggester.enabled=true # -1 to disable suggester build throttling solr.suggester.minSecsBetweenBuilds=3600 +# +# Request content text compression +# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled +# This setting can improve performance when having high network latency or large documents in the repository +# +solr.request.content.compress=false + # # Limit the maximum text size of transformed content sent to the index - in bytes # diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java index 136d5ddde..ebc44dbd0 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -73,6 +74,7 @@ import org.apache.commons.codec.EncoderException; import org.apache.commons.codec.net.URLCodec; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.util.DateUtil; +import org.apache.commons.io.IOUtils; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -111,17 +113,34 @@ public class SOLRAPIClient private SOLRDeserializer deserializer; private DictionaryService dictionaryService; private JsonFactory jsonFactory; - private NamespaceDAO namespaceDAO; + private NamespaceDAO namespaceDAO; + + /** + * This option enables ("Accept-Encoding": "gzip") header for compression + * in GET_CONTENT requests. Additional configuration is required in + * Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal + * with compressed requests. + */ + private boolean compression; + public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient, + DictionaryService dictionaryService, + NamespaceDAO namespaceDAO) + { + this(repositoryHttpClient, dictionaryService, namespaceDAO, false); + } + public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient, DictionaryService dictionaryService, - NamespaceDAO namespaceDAO) + NamespaceDAO namespaceDAO, + boolean compression) { this.repositoryHttpClient = repositoryHttpClient; this.dictionaryService = dictionaryService; this.namespaceDAO = namespaceDAO; this.deserializer = new SOLRDeserializer(namespaceDAO); - this.jsonFactory = new JsonFactory(); + this.jsonFactory = new JsonFactory(); + this.compression = compression; } /** @@ -1118,20 +1137,24 @@ public class SOLRAPIClient GetRequest req = new GetRequest(url.toString()); + Map headers = new HashMap<>(); if(modifiedSince != null) { - Map headers = new HashMap(1, 1.0f); headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince)))); - req.setHeaders(headers); } - + if (compression) + { + headers.put("Accept-Encoding", "gzip"); + } + req.setHeaders(headers); + Response response = repositoryHttpClient.sendRequest(req); if(response.getStatus() != Status.STATUS_NOT_MODIFIED && response.getStatus() != Status.STATUS_NO_CONTENT && response.getStatus() != Status.STATUS_OK) { throw new AlfrescoRuntimeException("GetTextContentResponse return status is " + response.getStatus()); - } - + } + return new GetTextContentResponse(response); } @@ -1481,7 +1504,8 @@ public class SOLRAPIClient private SolrApiContentStatus status; private String transformException; private String transformStatusStr; - private Long transformDuration; + private Long transformDuration; + private String contentEncoding; public GetTextContentResponse(Response response) throws IOException { @@ -1491,7 +1515,8 @@ public class SOLRAPIClient this.transformStatusStr = response.getHeader("X-Alfresco-transformStatus"); this.transformException = response.getHeader("X-Alfresco-transformException"); String tmp = response.getHeader("X-Alfresco-transformDuration"); - this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null); + this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null); + this.contentEncoding = response.getHeader("Content-Encoding"); setStatus(); } @@ -1557,6 +1582,11 @@ public class SOLRAPIClient public Long getTransformDuration() { return transformDuration; + } + + public String getContentEncoding() + { + return contentEncoding; } } diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java index 0fbf5df7a..f467d3242 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java @@ -143,6 +143,7 @@ public class SOLRAPIClientFactory alfrescoHost = props.getProperty("alfresco.host", "localhost"); alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080")); alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443")); + boolean compression = Boolean.parseBoolean(props.getProperty("solr.request.content.compress", "false")); SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL); if (client == null) @@ -171,7 +172,7 @@ public class SOLRAPIClientFactory maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40")); socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000")); - client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO); + client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO, compression); setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client); }