Merge branch 'feature/SEARCH-2029_ContentResponseCompression' into 'master'

Use gzipped compressed streams to recover content text from repository.

See merge request search_discovery/insightengine!320
This commit is contained in:
Angel Borroy
2020-01-17 13:26:30 +00:00
5 changed files with 63 additions and 12 deletions

View File

@@ -81,6 +81,7 @@ import java.util.Map.Entry;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import com.carrotsearch.hppc.IntArrayList;
@@ -2498,8 +2499,12 @@ public class SolrInformationServer implements InformationServer
response);
addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.ContentFieldType.TRANSFORMATION_TIME,
response);
InputStream ris = response.getContent();
if (response.getContentEncoding().equals("gzip"))
{
ris = new GZIPInputStream(ris);
}
String textContent = "";
try
{

View File

@@ -174,6 +174,14 @@ solr.suggester.enabled=true
# -1 to disable suggester build throttling
solr.suggester.minSecsBetweenBuilds=3600
#
# Request content text compression
# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled
# This setting can improve performance when having high network latency or large documents in the repository
#
solr.request.content.compress=false
#
# Limit the maximum text size of transformed content sent to the index - in bytes
#

View File

@@ -174,6 +174,13 @@ solr.suggester.enabled=true
# -1 to disable suggester build throttling
solr.suggester.minSecsBetweenBuilds=3600
#
# Request content text compression
# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled
# This setting can improve performance when having high network latency or large documents in the repository
#
solr.request.content.compress=false
#
# Limit the maximum text size of transformed content sent to the index - in bytes
#

View File

@@ -30,6 +30,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
@@ -73,6 +74,7 @@ import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.net.URLCodec;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.util.DateUtil;
import org.apache.commons.io.IOUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
@@ -111,17 +113,34 @@ public class SOLRAPIClient
private SOLRDeserializer deserializer;
private DictionaryService dictionaryService;
private JsonFactory jsonFactory;
private NamespaceDAO namespaceDAO;
private NamespaceDAO namespaceDAO;
/**
* This option enables ("Accept-Encoding": "gzip") header for compression
* in GET_CONTENT requests. Additional configuration is required in
* Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal
* with compressed requests.
*/
private boolean compression;
public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient,
DictionaryService dictionaryService,
NamespaceDAO namespaceDAO)
{
this(repositoryHttpClient, dictionaryService, namespaceDAO, false);
}
public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient,
DictionaryService dictionaryService,
NamespaceDAO namespaceDAO)
NamespaceDAO namespaceDAO,
boolean compression)
{
this.repositoryHttpClient = repositoryHttpClient;
this.dictionaryService = dictionaryService;
this.namespaceDAO = namespaceDAO;
this.deserializer = new SOLRDeserializer(namespaceDAO);
this.jsonFactory = new JsonFactory();
this.jsonFactory = new JsonFactory();
this.compression = compression;
}
/**
@@ -1118,20 +1137,24 @@ public class SOLRAPIClient
GetRequest req = new GetRequest(url.toString());
Map<String, String> headers = new HashMap<>();
if(modifiedSince != null)
{
Map<String, String> headers = new HashMap<String, String>(1, 1.0f);
headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince))));
req.setHeaders(headers);
}
if (compression)
{
headers.put("Accept-Encoding", "gzip");
}
req.setHeaders(headers);
Response response = repositoryHttpClient.sendRequest(req);
if(response.getStatus() != Status.STATUS_NOT_MODIFIED && response.getStatus() != Status.STATUS_NO_CONTENT && response.getStatus() != Status.STATUS_OK)
{
throw new AlfrescoRuntimeException("GetTextContentResponse return status is " + response.getStatus());
}
}
return new GetTextContentResponse(response);
}
@@ -1481,7 +1504,8 @@ public class SOLRAPIClient
private SolrApiContentStatus status;
private String transformException;
private String transformStatusStr;
private Long transformDuration;
private Long transformDuration;
private String contentEncoding;
public GetTextContentResponse(Response response) throws IOException
{
@@ -1491,7 +1515,8 @@ public class SOLRAPIClient
this.transformStatusStr = response.getHeader("X-Alfresco-transformStatus");
this.transformException = response.getHeader("X-Alfresco-transformException");
String tmp = response.getHeader("X-Alfresco-transformDuration");
this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null);
this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null);
this.contentEncoding = response.getHeader("Content-Encoding");
setStatus();
}
@@ -1557,6 +1582,11 @@ public class SOLRAPIClient
public Long getTransformDuration()
{
return transformDuration;
}
public String getContentEncoding()
{
return contentEncoding;
}
}

View File

@@ -143,6 +143,7 @@ public class SOLRAPIClientFactory
alfrescoHost = props.getProperty("alfresco.host", "localhost");
alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080"));
alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443"));
boolean compression = Boolean.parseBoolean(props.getProperty("solr.request.content.compress", "false"));
SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL);
if (client == null)
@@ -171,7 +172,7 @@ public class SOLRAPIClientFactory
maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40"));
socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000"));
client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO);
client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO, compression);
setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client);
}