mirror of
https://github.com/Alfresco/SearchServices.git
synced 2025-10-08 14:51:20 +00:00
Merge branch 'feature/SEARCH-2029_ContentResponseCompression' into 'master'
Use gzipped compressed streams to recover content text from repository. See merge request search_discovery/insightengine!320
This commit is contained in:
@@ -81,6 +81,7 @@ import java.util.Map.Entry;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import com.carrotsearch.hppc.IntArrayList;
|
||||
|
||||
@@ -2498,8 +2499,12 @@ public class SolrInformationServer implements InformationServer
|
||||
response);
|
||||
addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.ContentFieldType.TRANSFORMATION_TIME,
|
||||
response);
|
||||
|
||||
|
||||
InputStream ris = response.getContent();
|
||||
if (response.getContentEncoding().equals("gzip"))
|
||||
{
|
||||
ris = new GZIPInputStream(ris);
|
||||
}
|
||||
String textContent = "";
|
||||
try
|
||||
{
|
||||
|
@@ -174,6 +174,14 @@ solr.suggester.enabled=true
|
||||
# -1 to disable suggester build throttling
|
||||
solr.suggester.minSecsBetweenBuilds=3600
|
||||
|
||||
#
|
||||
# Request content text compression
|
||||
# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled
|
||||
# This setting can improve performance when having high network latency or large documents in the repository
|
||||
#
|
||||
solr.request.content.compress=false
|
||||
|
||||
|
||||
#
|
||||
# Limit the maximum text size of transformed content sent to the index - in bytes
|
||||
#
|
||||
|
@@ -174,6 +174,13 @@ solr.suggester.enabled=true
|
||||
# -1 to disable suggester build throttling
|
||||
solr.suggester.minSecsBetweenBuilds=3600
|
||||
|
||||
#
|
||||
# Request content text compression
|
||||
# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled
|
||||
# This setting can improve performance when having high network latency or large documents in the repository
|
||||
#
|
||||
solr.request.content.compress=false
|
||||
|
||||
#
|
||||
# Limit the maximum text size of transformed content sent to the index - in bytes
|
||||
#
|
||||
|
@@ -30,6 +30,7 @@ import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
@@ -73,6 +74,7 @@ import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.net.URLCodec;
|
||||
import org.apache.commons.httpclient.HttpStatus;
|
||||
import org.apache.commons.httpclient.util.DateUtil;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
@@ -111,17 +113,34 @@ public class SOLRAPIClient
|
||||
private SOLRDeserializer deserializer;
|
||||
private DictionaryService dictionaryService;
|
||||
private JsonFactory jsonFactory;
|
||||
private NamespaceDAO namespaceDAO;
|
||||
private NamespaceDAO namespaceDAO;
|
||||
|
||||
/**
|
||||
* This option enables ("Accept-Encoding": "gzip") header for compression
|
||||
* in GET_CONTENT requests. Additional configuration is required in
|
||||
* Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal
|
||||
* with compressed requests.
|
||||
*/
|
||||
private boolean compression;
|
||||
|
||||
public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient,
|
||||
DictionaryService dictionaryService,
|
||||
NamespaceDAO namespaceDAO)
|
||||
{
|
||||
this(repositoryHttpClient, dictionaryService, namespaceDAO, false);
|
||||
}
|
||||
|
||||
public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient,
|
||||
DictionaryService dictionaryService,
|
||||
NamespaceDAO namespaceDAO)
|
||||
NamespaceDAO namespaceDAO,
|
||||
boolean compression)
|
||||
{
|
||||
this.repositoryHttpClient = repositoryHttpClient;
|
||||
this.dictionaryService = dictionaryService;
|
||||
this.namespaceDAO = namespaceDAO;
|
||||
this.deserializer = new SOLRDeserializer(namespaceDAO);
|
||||
this.jsonFactory = new JsonFactory();
|
||||
this.jsonFactory = new JsonFactory();
|
||||
this.compression = compression;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1118,20 +1137,24 @@ public class SOLRAPIClient
|
||||
|
||||
GetRequest req = new GetRequest(url.toString());
|
||||
|
||||
Map<String, String> headers = new HashMap<>();
|
||||
if(modifiedSince != null)
|
||||
{
|
||||
Map<String, String> headers = new HashMap<String, String>(1, 1.0f);
|
||||
headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince))));
|
||||
req.setHeaders(headers);
|
||||
}
|
||||
|
||||
if (compression)
|
||||
{
|
||||
headers.put("Accept-Encoding", "gzip");
|
||||
}
|
||||
req.setHeaders(headers);
|
||||
|
||||
Response response = repositoryHttpClient.sendRequest(req);
|
||||
|
||||
if(response.getStatus() != Status.STATUS_NOT_MODIFIED && response.getStatus() != Status.STATUS_NO_CONTENT && response.getStatus() != Status.STATUS_OK)
|
||||
{
|
||||
throw new AlfrescoRuntimeException("GetTextContentResponse return status is " + response.getStatus());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return new GetTextContentResponse(response);
|
||||
}
|
||||
|
||||
@@ -1481,7 +1504,8 @@ public class SOLRAPIClient
|
||||
private SolrApiContentStatus status;
|
||||
private String transformException;
|
||||
private String transformStatusStr;
|
||||
private Long transformDuration;
|
||||
private Long transformDuration;
|
||||
private String contentEncoding;
|
||||
|
||||
public GetTextContentResponse(Response response) throws IOException
|
||||
{
|
||||
@@ -1491,7 +1515,8 @@ public class SOLRAPIClient
|
||||
this.transformStatusStr = response.getHeader("X-Alfresco-transformStatus");
|
||||
this.transformException = response.getHeader("X-Alfresco-transformException");
|
||||
String tmp = response.getHeader("X-Alfresco-transformDuration");
|
||||
this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null);
|
||||
this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null);
|
||||
this.contentEncoding = response.getHeader("Content-Encoding");
|
||||
setStatus();
|
||||
}
|
||||
|
||||
@@ -1557,6 +1582,11 @@ public class SOLRAPIClient
|
||||
public Long getTransformDuration()
|
||||
{
|
||||
return transformDuration;
|
||||
}
|
||||
|
||||
public String getContentEncoding()
|
||||
{
|
||||
return contentEncoding;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -143,6 +143,7 @@ public class SOLRAPIClientFactory
|
||||
alfrescoHost = props.getProperty("alfresco.host", "localhost");
|
||||
alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080"));
|
||||
alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443"));
|
||||
boolean compression = Boolean.parseBoolean(props.getProperty("solr.request.content.compress", "false"));
|
||||
|
||||
SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL);
|
||||
if (client == null)
|
||||
@@ -171,7 +172,7 @@ public class SOLRAPIClientFactory
|
||||
maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40"));
|
||||
socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000"));
|
||||
|
||||
client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO);
|
||||
client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO, compression);
|
||||
setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user