From f29e3250fd2ba4dfbb8c415bd272d53c7a55ef18 Mon Sep 17 00:00:00 2001 From: Angel Borroy Date: Tue, 7 Jan 2020 11:18:18 +0100 Subject: [PATCH 01/17] Skip long periods of time where repository is not ingesting new content. --- .../solr/tracker/MetadataTracker.java | 11 +++++ .../alfresco/solr/client/SOLRAPIClient.java | 49 ++++++++++++++++++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java index 3862ebd30..7c0df7841 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java @@ -546,6 +546,17 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker { transactions = client.getTransactions(startTime, null, startTime + actualTimeStep, null, maxResults, shardstate); startTime += actualTimeStep; + + // If no transactions are found, advance the time window to the next available transaction commit time + if (transactions.getTransactions().size() == 0) + { + Long nextTxCommitTime = client.getNextTxCommitTime(coreName, startTime); + if (nextTxCommitTime != -1) + { + log.info("Advancing transactions from startTime = " + startTime + " to " + nextTxCommitTime); + transactions = client.getTransactions(nextTxCommitTime, null, nextTxCommitTime + actualTimeStep, null, maxResults, shardstate); + } + } } while (((transactions.getTransactions().size() == 0) && (startTime < endTime)) || ((transactions.getTransactions().size() > 0) && alreadyFoundTransactions(txnsFound, transactions))); diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java index 136d5ddde..2be8197e1 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java @@ -103,7 +103,8 @@ public class SOLRAPIClient private static final String GET_NODES_URL = "api/solr/nodes"; private static final String GET_CONTENT = "api/solr/textContent"; private static final String GET_MODEL = "api/solr/model"; - private static final String GET_MODELS_DIFF = "api/solr/modelsdiff"; + private static final String GET_MODELS_DIFF = "api/solr/modelsdiff"; + private static final String GET_NEXT_TX_COMMIT_TIME = "api/solr/nextTransaction"; private static final String CHECKSUM_HEADER = "XAlfresco-modelChecksum"; @@ -1228,7 +1229,51 @@ public class SOLRAPIClient } return diffs; - } + } + + /** + * Returns the minimum and the maximum commit time for transactions in a node id range. + * + * @param coreName alfresco, archive + * @param fromCommitTime initial transaction commit time + * @return Time of the next transaction + * @throws IOException + * @throws AuthenticationException + * @throws Exception + */ + public Long getNextTxCommitTime(String coreName, Long fromCommitTime) throws AuthenticationException, IOException + { + StringBuilder url = new StringBuilder(GET_NEXT_TX_COMMIT_TIME); + url.append("?").append("fromCommitTime").append("=").append(fromCommitTime); + GetRequest get = new GetRequest(url.toString()); + Response response = null; + JSONObject json = null; + try + { + response = repositoryHttpClient.sendRequest(get); + if (response.getStatus() != HttpStatus.SC_OK) + { + throw new AlfrescoRuntimeException(coreName + " - GetNextTxCommitTime return status is " + + response.getStatus() + " when invoking " + url); + } + + Reader reader = new BufferedReader(new InputStreamReader(response.getContentAsStream(), "UTF-8")); + json = new JSONObject(new JSONTokener(reader)); + } + finally + { + if (response != null) + { + response.release(); + } + } + if (log.isDebugEnabled()) + { + log.debug(json.toString()); + } + + return Long.parseLong(json.get("nextTransactionCommitTimeMs").toString()); + } /* * type conversions from serialized JSON values to SOLR-consumable objects From c3ab1fc817e30b94d0154e48b13bc651ee177643 Mon Sep 17 00:00:00 2001 From: Angel Borroy Date: Wed, 8 Jan 2020 14:45:49 +0100 Subject: [PATCH 02/17] Use the feature only when nextTxCommitTime Web Script is available in the repository. --- .../solr/tracker/MetadataTracker.java | 24 +++++++++++++++++-- .../alfresco/solr/client/SOLRAPIClient.java | 1 - 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java index 7c0df7841..a3a00fd2e 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java @@ -63,6 +63,16 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker private ConcurrentLinkedQueue nodesToIndex = new ConcurrentLinkedQueue<>(); private ConcurrentLinkedQueue nodesToPurge = new ConcurrentLinkedQueue<>(); private ConcurrentLinkedQueue queriesToReindex = new ConcurrentLinkedQueue<>(); + + /** + * Check if nextTxCommitTimeService is available in the repository. + * This service is used to find the next available transaction commit time from a given time, + * so periods of time where no document updating is happening can be skipped while getting + * pending transactions list. + * + * {@link org.alfresco.solr.client.SOLRAPIClient#GET_NEXT_TX_COMMIT_TIME} + */ + private boolean nextTxCommitTimeServiceAvailable = false; public MetadataTracker(final boolean isMaster, Properties p, SOLRAPIClient client, String coreName, InformationServer informationServer) @@ -71,6 +81,16 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker transactionDocsBatchSize = Integer.parseInt(p.getProperty("alfresco.transactionDocsBatchSize", "100")); nodeBatchSize = Integer.parseInt(p.getProperty("alfresco.nodeBatchSize", "10")); threadHandler = new ThreadHandler(p, coreName, "MetadataTracker"); + + try + { + client.getNextTxCommitTime(coreName, 0l); + nextTxCommitTimeServiceAvailable = true; + } + catch (Exception e) + { + log.warn("nextTxCommitTimeService is not available. Upgrade your ACS Repository version in order to use this feature", e); + } } MetadataTracker() @@ -548,12 +568,12 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker startTime += actualTimeStep; // If no transactions are found, advance the time window to the next available transaction commit time - if (transactions.getTransactions().size() == 0) + if (nextTxCommitTimeServiceAvailable && transactions.getTransactions().size() == 0) { Long nextTxCommitTime = client.getNextTxCommitTime(coreName, startTime); if (nextTxCommitTime != -1) { - log.info("Advancing transactions from startTime = " + startTime + " to " + nextTxCommitTime); + log.info("Advancing transactions from {} to {}", startTime, nextTxCommitTime); transactions = client.getTransactions(nextTxCommitTime, null, nextTxCommitTime + actualTimeStep, null, maxResults, shardstate); } } diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java index 2be8197e1..eb2142ff1 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java @@ -1239,7 +1239,6 @@ public class SOLRAPIClient * @return Time of the next transaction * @throws IOException * @throws AuthenticationException - * @throws Exception */ public Long getNextTxCommitTime(String coreName, Long fromCommitTime) throws AuthenticationException, IOException { From 2ff2c47041ccc981a307cc3bb3446a4b89320dc8 Mon Sep 17 00:00:00 2001 From: Angel Borroy Date: Thu, 9 Jan 2020 09:39:59 +0100 Subject: [PATCH 03/17] Add specific exception when Repository Service is not available to provide deeper information in the logs. --- .../org/alfresco/solr/tracker/MetadataTracker.java | 10 +++++++--- .../java/org/alfresco/solr/client/SOLRAPIClient.java | 5 +++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java index a3a00fd2e..6ddb64f2a 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java @@ -87,9 +87,13 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker client.getNextTxCommitTime(coreName, 0l); nextTxCommitTimeServiceAvailable = true; } + catch (NoSuchMethodException e) + { + log.warn("nextTxCommitTimeService is not available. Upgrade your ACS Repository version in order to use this feature: {} ", e.getMessage()); + } catch (Exception e) { - log.warn("nextTxCommitTimeService is not available. Upgrade your ACS Repository version in order to use this feature", e); + log.error("Checking nextTxCommitTimeService failed.", e); } } @@ -538,7 +542,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker } protected Transactions getSomeTransactions(BoundedDeque txnsFound, Long fromCommitTime, long timeStep, - int maxResults, long endTime) throws AuthenticationException, IOException, JSONException, EncoderException + int maxResults, long endTime) throws AuthenticationException, IOException, JSONException, EncoderException, NoSuchMethodException { long actualTimeStep = timeStep; @@ -995,7 +999,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker } public IndexHealthReport checkIndex(Long toTx, Long toAclTx, Long fromTime, Long toTime) - throws IOException, AuthenticationException, JSONException, EncoderException + throws IOException, AuthenticationException, JSONException, EncoderException, NoSuchMethodException { // DB TX Count long firstTransactionCommitTime = 0; diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java index eb2142ff1..9fb3382ed 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java @@ -1239,8 +1239,9 @@ public class SOLRAPIClient * @return Time of the next transaction * @throws IOException * @throws AuthenticationException + * @throws NoSuchMethodException */ - public Long getNextTxCommitTime(String coreName, Long fromCommitTime) throws AuthenticationException, IOException + public Long getNextTxCommitTime(String coreName, Long fromCommitTime) throws AuthenticationException, IOException, NoSuchMethodException { StringBuilder url = new StringBuilder(GET_NEXT_TX_COMMIT_TIME); url.append("?").append("fromCommitTime").append("=").append(fromCommitTime); @@ -1252,7 +1253,7 @@ public class SOLRAPIClient response = repositoryHttpClient.sendRequest(get); if (response.getStatus() != HttpStatus.SC_OK) { - throw new AlfrescoRuntimeException(coreName + " - GetNextTxCommitTime return status is " + throw new NoSuchMethodException(coreName + " - GetNextTxCommitTime return status is " + response.getStatus() + " when invoking " + url); } From 4059269ef0f022fd14b2f5413f859003f4e46c6b Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 13 Jan 2020 22:13:28 +0000 Subject: [PATCH 04/17] Bump rm.version from 3.2.0 to 3.3.0 in /e2e-test Bumps `rm.version` from 3.2.0 to 3.3.0. Updates `alfresco-governance-services-automation-enterprise-rest-api` from 3.2.0 to 3.3.0 Updates `alfresco-governance-services-automation-community-rest-api` from 3.2.0 to 3.3.0 Signed-off-by: dependabot-preview[bot] --- e2e-test/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e-test/pom.xml b/e2e-test/pom.xml index f0501aaf0..3bd320096 100644 --- a/e2e-test/pom.xml +++ b/e2e-test/pom.xml @@ -14,7 +14,7 @@ 1.26 1.12 3.0.17 - 3.2.0 + 3.3.0 src/test/resources/SearchSuite.xml From d28f05b19be51ef8d7a13e928db024814d4b3fe9 Mon Sep 17 00:00:00 2001 From: Angel Borroy Date: Tue, 14 Jan 2020 16:50:16 +0100 Subject: [PATCH 05/17] Use gzipped compressed streams to recover content text from repository. This modification will also require Tomcat / NGINX configuration in order to accept HTTP GZIP requests. --- .../alfresco/solr/SolrInformationServer.java | 7 +++++- .../alfresco/solr/client/SOLRAPIClient.java | 24 +++++++++++++------ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java index c5b07d69f..7c8552cb5 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java @@ -81,6 +81,7 @@ import java.util.Map.Entry; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; import com.carrotsearch.hppc.IntArrayList; @@ -2498,8 +2499,12 @@ public class SolrInformationServer implements InformationServer response); addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.ContentFieldType.TRANSFORMATION_TIME, response); - + InputStream ris = response.getContent(); + if (null != response.getContentEncoding() && response.getContentEncoding().equals("gzip")) + { + ris = new GZIPInputStream(ris); + } String textContent = ""; try { diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java index 136d5ddde..1bf4e9954 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -73,6 +74,7 @@ import org.apache.commons.codec.EncoderException; import org.apache.commons.codec.net.URLCodec; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.util.DateUtil; +import org.apache.commons.io.IOUtils; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -1118,20 +1120,21 @@ public class SOLRAPIClient GetRequest req = new GetRequest(url.toString()); + Map headers = new HashMap(); if(modifiedSince != null) { - Map headers = new HashMap(1, 1.0f); headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince)))); - req.setHeaders(headers); } - + headers.put("Accept-Encoding", "gzip"); + req.setHeaders(headers); + Response response = repositoryHttpClient.sendRequest(req); if(response.getStatus() != Status.STATUS_NOT_MODIFIED && response.getStatus() != Status.STATUS_NO_CONTENT && response.getStatus() != Status.STATUS_OK) { throw new AlfrescoRuntimeException("GetTextContentResponse return status is " + response.getStatus()); - } - + } + return new GetTextContentResponse(response); } @@ -1481,7 +1484,8 @@ public class SOLRAPIClient private SolrApiContentStatus status; private String transformException; private String transformStatusStr; - private Long transformDuration; + private Long transformDuration; + private String contentEncoding; public GetTextContentResponse(Response response) throws IOException { @@ -1491,7 +1495,8 @@ public class SOLRAPIClient this.transformStatusStr = response.getHeader("X-Alfresco-transformStatus"); this.transformException = response.getHeader("X-Alfresco-transformException"); String tmp = response.getHeader("X-Alfresco-transformDuration"); - this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null); + this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null); + this.contentEncoding = response.getHeader("Content-Encoding"); setStatus(); } @@ -1557,6 +1562,11 @@ public class SOLRAPIClient public Long getTransformDuration() { return transformDuration; + } + + public String getContentEncoding() + { + return contentEncoding; } } From 8747a4827b5ef7c8b59d84a6a39856e5e7b666f2 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Wed, 15 Jan 2020 22:13:50 +0000 Subject: [PATCH 06/17] Bump utility from 3.0.17 to 3.0.18 in /e2e-test Bumps [utility](https://github.com/Alfresco/alfresco-tas-utility) from 3.0.17 to 3.0.18. - [Release notes](https://github.com/Alfresco/alfresco-tas-utility/releases) - [Changelog](https://github.com/Alfresco/alfresco-tas-utility/blob/master/docs/CHANGELOG.md) - [Commits](https://github.com/Alfresco/alfresco-tas-utility/compare/utility-3.0.17...utility-3.0.18) Signed-off-by: dependabot-preview[bot] --- e2e-test/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e-test/pom.xml b/e2e-test/pom.xml index 3bd320096..df6414653 100644 --- a/e2e-test/pom.xml +++ b/e2e-test/pom.xml @@ -13,7 +13,7 @@ 1.26 1.12 - 3.0.17 + 3.0.18 3.3.0 src/test/resources/SearchSuite.xml From 9ff35e7807dff85de07e6f99d79b1c2d6bbe77c9 Mon Sep 17 00:00:00 2001 From: Angel Borroy Date: Thu, 16 Jan 2020 11:00:25 +0100 Subject: [PATCH 07/17] Add this feature to SOLR Core properties, in order to allow the user to switch on the compression depending on his environment and use case. --- .../noRerank/conf/solrcore.properties | 8 ++++++ .../templates/rerank/conf/solrcore.properties | 7 +++++ .../alfresco/solr/client/SOLRAPIClient.java | 28 ++++++++++++++++--- .../solr/client/SOLRAPIClientFactory.java | 3 +- 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties index e91ba7ee8..a6a5a1fae 100644 --- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties +++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties @@ -174,6 +174,14 @@ solr.suggester.enabled=true # -1 to disable suggester build throttling solr.suggester.minSecsBetweenBuilds=3600 +# +# Request content text compression +# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled +# This setting can improve performance when having high network latency or large documents in the repository +# +solr.request.content.compress=false + + # # Limit the maximum text size of transformed content sent to the index - in bytes # diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties index 06f089370..fdf702da6 100644 --- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties +++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties @@ -174,6 +174,13 @@ solr.suggester.enabled=true # -1 to disable suggester build throttling solr.suggester.minSecsBetweenBuilds=3600 +# +# Request content text compression +# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled +# This setting can improve performance when having high network latency or large documents in the repository +# +solr.request.content.compress=false + # # Limit the maximum text size of transformed content sent to the index - in bytes # diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java index 1bf4e9954..b3278a273 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java @@ -113,17 +113,34 @@ public class SOLRAPIClient private SOLRDeserializer deserializer; private DictionaryService dictionaryService; private JsonFactory jsonFactory; - private NamespaceDAO namespaceDAO; + private NamespaceDAO namespaceDAO; + + /** + * This option enables ("Accept-Encoding": "gzip") header for compression + * in GET_CONTENT requests. Additional configuration is required in + * Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal w + * with compressed requests. + */ + private boolean compression; + public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient, + DictionaryService dictionaryService, + NamespaceDAO namespaceDAO) + { + this(repositoryHttpClient, dictionaryService, namespaceDAO, false); + } + public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient, DictionaryService dictionaryService, - NamespaceDAO namespaceDAO) + NamespaceDAO namespaceDAO, + boolean compression) { this.repositoryHttpClient = repositoryHttpClient; this.dictionaryService = dictionaryService; this.namespaceDAO = namespaceDAO; this.deserializer = new SOLRDeserializer(namespaceDAO); - this.jsonFactory = new JsonFactory(); + this.jsonFactory = new JsonFactory(); + this.compression = compression; } /** @@ -1125,7 +1142,10 @@ public class SOLRAPIClient { headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince)))); } - headers.put("Accept-Encoding", "gzip"); + if (compression) + { + headers.put("Accept-Encoding", "gzip"); + } req.setHeaders(headers); Response response = repositoryHttpClient.sendRequest(req); diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java index 0fbf5df7a..f467d3242 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java @@ -143,6 +143,7 @@ public class SOLRAPIClientFactory alfrescoHost = props.getProperty("alfresco.host", "localhost"); alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080")); alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443")); + boolean compression = Boolean.parseBoolean(props.getProperty("solr.request.content.compress", "false")); SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL); if (client == null) @@ -171,7 +172,7 @@ public class SOLRAPIClientFactory maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40")); socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000")); - client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO); + client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO, compression); setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client); } From de1b47a2e0885afac89c198719b81c59242e3d2b Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 16 Jan 2020 22:13:51 +0000 Subject: [PATCH 08/17] Bump cmis from 1.12 to 1.13 in /e2e-test Bumps [cmis](https://github.com/Alfresco/alfresco-tas-cmis) from 1.12 to 1.13. - [Release notes](https://github.com/Alfresco/alfresco-tas-cmis/releases) - [Changelog](https://github.com/Alfresco/alfresco-tas-cmis/blob/master/docs/CHANGELOG.md) - [Commits](https://github.com/Alfresco/alfresco-tas-cmis/compare/v1.12...v1.13) Signed-off-by: dependabot-preview[bot] --- e2e-test/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e-test/pom.xml b/e2e-test/pom.xml index 3bd320096..4fcb122b4 100644 --- a/e2e-test/pom.xml +++ b/e2e-test/pom.xml @@ -12,7 +12,7 @@ Test Project to test Search Service and Analytics Features on a complete setup of Alfresco, Share 1.26 - 1.12 + 1.13 3.0.17 3.3.0 src/test/resources/SearchSuite.xml From 73e465567c5044d1d0f1c89b44a6f67df48caff2 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 16 Jan 2020 22:14:25 +0000 Subject: [PATCH 09/17] Bump restapi from 1.26 to 1.28 in /e2e-test Bumps [restapi](https://github.com/Alfresco/alfresco-tas-restapi) from 1.26 to 1.28. - [Release notes](https://github.com/Alfresco/alfresco-tas-restapi/releases) - [Commits](https://github.com/Alfresco/alfresco-tas-restapi/compare/v1.26...v1.28) Signed-off-by: dependabot-preview[bot] --- e2e-test/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e-test/pom.xml b/e2e-test/pom.xml index 3bd320096..dca709884 100644 --- a/e2e-test/pom.xml +++ b/e2e-test/pom.xml @@ -11,7 +11,7 @@ Search Analytics E2E Tests Test Project to test Search Service and Analytics Features on a complete setup of Alfresco, Share - 1.26 + 1.28 1.12 3.0.17 3.3.0 From 2c5ab74dc59750ffa3be5e20e9f36840d5176781 Mon Sep 17 00:00:00 2001 From: Tom Page Date: Fri, 17 Jan 2020 08:51:38 +0000 Subject: [PATCH 10/17] Update URL for maven-restlet repository. Restlet were acquired by Talend in 2017 and the old URL no longer works. --- search-services/alfresco-search/pom.xml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/search-services/alfresco-search/pom.xml b/search-services/alfresco-search/pom.xml index 2916a884e..752f9768b 100644 --- a/search-services/alfresco-search/pom.xml +++ b/search-services/alfresco-search/pom.xml @@ -182,6 +182,14 @@ + + + maven-restlet + Public online Restlet repository + http://maven.restlet.talend.com + + + alfresco-solr From 66e7cb9ff2a8131ee2b44ab19e0c52afac17cb66 Mon Sep 17 00:00:00 2001 From: Tom Page Date: Fri, 17 Jan 2020 09:27:19 +0000 Subject: [PATCH 11/17] SEARCH-2054 Decrease the priority of the restlet repository. This ensures that artifacts are only downloaded from maven-restlet if they aren't found in an alfresco or central repo. --- search-services/alfresco-search/pom.xml | 29 +++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/search-services/alfresco-search/pom.xml b/search-services/alfresco-search/pom.xml index 752f9768b..1bffd3a2b 100644 --- a/search-services/alfresco-search/pom.xml +++ b/search-services/alfresco-search/pom.xml @@ -183,6 +183,35 @@ + + alfresco-public + https://artifacts.alfresco.com/nexus/content/groups/public + + true + + + false + + + + alfresco-public-snapshots + https://artifacts.alfresco.com/nexus/content/groups/public-snapshots + + false + + + true + + + + central + Central Repository + https://repo.maven.apache.org/maven2 + default + + false + + maven-restlet Public online Restlet repository From ee21af7b2706b084e3b87bf56e9443325b52b57c Mon Sep 17 00:00:00 2001 From: Angel Borroy Date: Fri, 17 Jan 2020 14:25:59 +0100 Subject: [PATCH 12/17] Fix changes from review. --- .../main/java/org/alfresco/solr/SolrInformationServer.java | 2 +- .../src/main/java/org/alfresco/solr/client/SOLRAPIClient.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java index 7c8552cb5..a4ea83035 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java @@ -2501,7 +2501,7 @@ public class SolrInformationServer implements InformationServer response); InputStream ris = response.getContent(); - if (null != response.getContentEncoding() && response.getContentEncoding().equals("gzip")) + if (response.getContentEncoding().equals("gzip")) { ris = new GZIPInputStream(ris); } diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java index b3278a273..ebc44dbd0 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java @@ -118,7 +118,7 @@ public class SOLRAPIClient /** * This option enables ("Accept-Encoding": "gzip") header for compression * in GET_CONTENT requests. Additional configuration is required in - * Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal w + * Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal * with compressed requests. */ private boolean compression; @@ -1137,7 +1137,7 @@ public class SOLRAPIClient GetRequest req = new GetRequest(url.toString()); - Map headers = new HashMap(); + Map headers = new HashMap<>(); if(modifiedSince != null) { headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince)))); From 8dd9daa1996f90042e56ec0019ca1770707f608a Mon Sep 17 00:00:00 2001 From: Angel Borroy Date: Fri, 17 Jan 2020 15:24:57 +0000 Subject: [PATCH 13/17] Revert "Merge branch 'feature/SEARCH-2029_ContentResponseCompression' into 'master'" This reverts merge request !320 --- .../alfresco/solr/SolrInformationServer.java | 7 +-- .../noRerank/conf/solrcore.properties | 8 --- .../templates/rerank/conf/solrcore.properties | 7 --- .../alfresco/solr/client/SOLRAPIClient.java | 50 ++++--------------- .../solr/client/SOLRAPIClientFactory.java | 3 +- 5 files changed, 12 insertions(+), 63 deletions(-) diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java index a4ea83035..c5b07d69f 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java @@ -81,7 +81,6 @@ import java.util.Map.Entry; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.zip.GZIPInputStream; import com.carrotsearch.hppc.IntArrayList; @@ -2499,12 +2498,8 @@ public class SolrInformationServer implements InformationServer response); addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.ContentFieldType.TRANSFORMATION_TIME, response); - + InputStream ris = response.getContent(); - if (response.getContentEncoding().equals("gzip")) - { - ris = new GZIPInputStream(ris); - } String textContent = ""; try { diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties index a6a5a1fae..e91ba7ee8 100644 --- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties +++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties @@ -174,14 +174,6 @@ solr.suggester.enabled=true # -1 to disable suggester build throttling solr.suggester.minSecsBetweenBuilds=3600 -# -# Request content text compression -# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled -# This setting can improve performance when having high network latency or large documents in the repository -# -solr.request.content.compress=false - - # # Limit the maximum text size of transformed content sent to the index - in bytes # diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties index fdf702da6..06f089370 100644 --- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties +++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties @@ -174,13 +174,6 @@ solr.suggester.enabled=true # -1 to disable suggester build throttling solr.suggester.minSecsBetweenBuilds=3600 -# -# Request content text compression -# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled -# This setting can improve performance when having high network latency or large documents in the repository -# -solr.request.content.compress=false - # # Limit the maximum text size of transformed content sent to the index - in bytes # diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java index ebc44dbd0..136d5ddde 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java @@ -30,7 +30,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -74,7 +73,6 @@ import org.apache.commons.codec.EncoderException; import org.apache.commons.codec.net.URLCodec; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.util.DateUtil; -import org.apache.commons.io.IOUtils; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -113,34 +111,17 @@ public class SOLRAPIClient private SOLRDeserializer deserializer; private DictionaryService dictionaryService; private JsonFactory jsonFactory; - private NamespaceDAO namespaceDAO; - - /** - * This option enables ("Accept-Encoding": "gzip") header for compression - * in GET_CONTENT requests. Additional configuration is required in - * Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal - * with compressed requests. - */ - private boolean compression; + private NamespaceDAO namespaceDAO; - public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient, - DictionaryService dictionaryService, - NamespaceDAO namespaceDAO) - { - this(repositoryHttpClient, dictionaryService, namespaceDAO, false); - } - public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient, DictionaryService dictionaryService, - NamespaceDAO namespaceDAO, - boolean compression) + NamespaceDAO namespaceDAO) { this.repositoryHttpClient = repositoryHttpClient; this.dictionaryService = dictionaryService; this.namespaceDAO = namespaceDAO; this.deserializer = new SOLRDeserializer(namespaceDAO); - this.jsonFactory = new JsonFactory(); - this.compression = compression; + this.jsonFactory = new JsonFactory(); } /** @@ -1137,24 +1118,20 @@ public class SOLRAPIClient GetRequest req = new GetRequest(url.toString()); - Map headers = new HashMap<>(); if(modifiedSince != null) { + Map headers = new HashMap(1, 1.0f); headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince)))); + req.setHeaders(headers); } - if (compression) - { - headers.put("Accept-Encoding", "gzip"); - } - req.setHeaders(headers); - + Response response = repositoryHttpClient.sendRequest(req); if(response.getStatus() != Status.STATUS_NOT_MODIFIED && response.getStatus() != Status.STATUS_NO_CONTENT && response.getStatus() != Status.STATUS_OK) { throw new AlfrescoRuntimeException("GetTextContentResponse return status is " + response.getStatus()); - } - + } + return new GetTextContentResponse(response); } @@ -1504,8 +1481,7 @@ public class SOLRAPIClient private SolrApiContentStatus status; private String transformException; private String transformStatusStr; - private Long transformDuration; - private String contentEncoding; + private Long transformDuration; public GetTextContentResponse(Response response) throws IOException { @@ -1515,8 +1491,7 @@ public class SOLRAPIClient this.transformStatusStr = response.getHeader("X-Alfresco-transformStatus"); this.transformException = response.getHeader("X-Alfresco-transformException"); String tmp = response.getHeader("X-Alfresco-transformDuration"); - this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null); - this.contentEncoding = response.getHeader("Content-Encoding"); + this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null); setStatus(); } @@ -1582,11 +1557,6 @@ public class SOLRAPIClient public Long getTransformDuration() { return transformDuration; - } - - public String getContentEncoding() - { - return contentEncoding; } } diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java index f467d3242..0fbf5df7a 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java @@ -143,7 +143,6 @@ public class SOLRAPIClientFactory alfrescoHost = props.getProperty("alfresco.host", "localhost"); alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080")); alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443")); - boolean compression = Boolean.parseBoolean(props.getProperty("solr.request.content.compress", "false")); SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL); if (client == null) @@ -172,7 +171,7 @@ public class SOLRAPIClientFactory maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40")); socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000")); - client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO, compression); + client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO); setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client); } From 641bb26dcc0a16f56689b239c8bcc6fe4c6ba202 Mon Sep 17 00:00:00 2001 From: Angel Borroy Date: Mon, 20 Jan 2020 10:18:19 +0100 Subject: [PATCH 14/17] Fix null pointer comparison --- .../src/main/java/org/alfresco/solr/SolrInformationServer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java index a4ea83035..182067271 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java @@ -2501,7 +2501,7 @@ public class SolrInformationServer implements InformationServer response); InputStream ris = response.getContent(); - if (response.getContentEncoding().equals("gzip")) + if (Objects.equals(response.getContentEncoding(), "gzip")) { ris = new GZIPInputStream(ris); } From c1e660319b6eff8c230bb9979c90fda4b1e4ca17 Mon Sep 17 00:00:00 2001 From: Tom Page Date: Mon, 20 Jan 2020 09:58:42 +0000 Subject: [PATCH 15/17] Revert "Revert "Merge branch 'feature/SEARCH-2029_ContentResponseCompression' into 'master'"" This reverts commit 221567ba8c0ef24eb70062f8182c14227fa6a789. --- .../alfresco/solr/SolrInformationServer.java | 3 +- .../noRerank/conf/solrcore.properties | 8 +++ .../templates/rerank/conf/solrcore.properties | 7 +++ .../alfresco/solr/client/SOLRAPIClient.java | 50 +++++++++++++++---- .../solr/client/SOLRAPIClientFactory.java | 3 +- 5 files changed, 59 insertions(+), 12 deletions(-) diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java index 1f5a616c7..182067271 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java @@ -81,6 +81,7 @@ import java.util.Map.Entry; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; import com.carrotsearch.hppc.IntArrayList; @@ -2498,7 +2499,7 @@ public class SolrInformationServer implements InformationServer response); addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.ContentFieldType.TRANSFORMATION_TIME, response); - + InputStream ris = response.getContent(); if (Objects.equals(response.getContentEncoding(), "gzip")) { diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties index e91ba7ee8..a6a5a1fae 100644 --- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties +++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties @@ -174,6 +174,14 @@ solr.suggester.enabled=true # -1 to disable suggester build throttling solr.suggester.minSecsBetweenBuilds=3600 +# +# Request content text compression +# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled +# This setting can improve performance when having high network latency or large documents in the repository +# +solr.request.content.compress=false + + # # Limit the maximum text size of transformed content sent to the index - in bytes # diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties index 06f089370..fdf702da6 100644 --- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties +++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties @@ -174,6 +174,13 @@ solr.suggester.enabled=true # -1 to disable suggester build throttling solr.suggester.minSecsBetweenBuilds=3600 +# +# Request content text compression +# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled +# This setting can improve performance when having high network latency or large documents in the repository +# +solr.request.content.compress=false + # # Limit the maximum text size of transformed content sent to the index - in bytes # diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java index 136d5ddde..ebc44dbd0 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -73,6 +74,7 @@ import org.apache.commons.codec.EncoderException; import org.apache.commons.codec.net.URLCodec; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.util.DateUtil; +import org.apache.commons.io.IOUtils; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -111,17 +113,34 @@ public class SOLRAPIClient private SOLRDeserializer deserializer; private DictionaryService dictionaryService; private JsonFactory jsonFactory; - private NamespaceDAO namespaceDAO; + private NamespaceDAO namespaceDAO; + + /** + * This option enables ("Accept-Encoding": "gzip") header for compression + * in GET_CONTENT requests. Additional configuration is required in + * Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal + * with compressed requests. + */ + private boolean compression; + public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient, + DictionaryService dictionaryService, + NamespaceDAO namespaceDAO) + { + this(repositoryHttpClient, dictionaryService, namespaceDAO, false); + } + public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient, DictionaryService dictionaryService, - NamespaceDAO namespaceDAO) + NamespaceDAO namespaceDAO, + boolean compression) { this.repositoryHttpClient = repositoryHttpClient; this.dictionaryService = dictionaryService; this.namespaceDAO = namespaceDAO; this.deserializer = new SOLRDeserializer(namespaceDAO); - this.jsonFactory = new JsonFactory(); + this.jsonFactory = new JsonFactory(); + this.compression = compression; } /** @@ -1118,20 +1137,24 @@ public class SOLRAPIClient GetRequest req = new GetRequest(url.toString()); + Map headers = new HashMap<>(); if(modifiedSince != null) { - Map headers = new HashMap(1, 1.0f); headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince)))); - req.setHeaders(headers); } - + if (compression) + { + headers.put("Accept-Encoding", "gzip"); + } + req.setHeaders(headers); + Response response = repositoryHttpClient.sendRequest(req); if(response.getStatus() != Status.STATUS_NOT_MODIFIED && response.getStatus() != Status.STATUS_NO_CONTENT && response.getStatus() != Status.STATUS_OK) { throw new AlfrescoRuntimeException("GetTextContentResponse return status is " + response.getStatus()); - } - + } + return new GetTextContentResponse(response); } @@ -1481,7 +1504,8 @@ public class SOLRAPIClient private SolrApiContentStatus status; private String transformException; private String transformStatusStr; - private Long transformDuration; + private Long transformDuration; + private String contentEncoding; public GetTextContentResponse(Response response) throws IOException { @@ -1491,7 +1515,8 @@ public class SOLRAPIClient this.transformStatusStr = response.getHeader("X-Alfresco-transformStatus"); this.transformException = response.getHeader("X-Alfresco-transformException"); String tmp = response.getHeader("X-Alfresco-transformDuration"); - this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null); + this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null); + this.contentEncoding = response.getHeader("Content-Encoding"); setStatus(); } @@ -1557,6 +1582,11 @@ public class SOLRAPIClient public Long getTransformDuration() { return transformDuration; + } + + public String getContentEncoding() + { + return contentEncoding; } } diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java index 0fbf5df7a..f467d3242 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java @@ -143,6 +143,7 @@ public class SOLRAPIClientFactory alfrescoHost = props.getProperty("alfresco.host", "localhost"); alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080")); alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443")); + boolean compression = Boolean.parseBoolean(props.getProperty("solr.request.content.compress", "false")); SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL); if (client == null) @@ -171,7 +172,7 @@ public class SOLRAPIClientFactory maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40")); socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000")); - client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO); + client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO, compression); setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client); } From 37e612047bf41f48a7de9412ab3220cc8706e233 Mon Sep 17 00:00:00 2001 From: Angel Borroy Date: Tue, 21 Jan 2020 16:13:59 +0100 Subject: [PATCH 16/17] Skipping transactions for DB_ID_RANGE Shard method. --- .../solr/tracker/MetadataTracker.java | 67 ++++++++++++++++++- .../alfresco/solr/client/SOLRAPIClient.java | 55 +++++++++++++-- 2 files changed, 117 insertions(+), 5 deletions(-) diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java index 6ddb64f2a..c1390bf3d 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java @@ -40,6 +40,7 @@ import org.alfresco.solr.client.Node.SolrApiNodeStatus; import org.alfresco.solr.client.SOLRAPIClient; import org.alfresco.solr.client.Transaction; import org.alfresco.solr.client.Transactions; +import org.alfresco.util.Pair; import org.apache.commons.codec.EncoderException; import org.json.JSONException; import org.slf4j.Logger; @@ -73,6 +74,15 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker * {@link org.alfresco.solr.client.SOLRAPIClient#GET_NEXT_TX_COMMIT_TIME} */ private boolean nextTxCommitTimeServiceAvailable = false; + + /** + * Check if txInteravlCommitTimeService is available in the repository. + * This service returns the minimum and the maximum commit time for transactions in a node id range, + * so method sharding DB_ID_RANGE can skip transactions not relevant for the DB ID range. + * + * {@link org.alfresco.solr.client.SOLRAPIClient#GET_TX_INTERVAL_COMMIT_TIME} + */ + private boolean txIntervalCommitTimeServiceAvailable = false; public MetadataTracker(final boolean isMaster, Properties p, SOLRAPIClient client, String coreName, InformationServer informationServer) @@ -82,6 +92,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker nodeBatchSize = Integer.parseInt(p.getProperty("alfresco.nodeBatchSize", "10")); threadHandler = new ThreadHandler(p, coreName, "MetadataTracker"); + // Try invoking getNextTxCommitTime service try { client.getNextTxCommitTime(coreName, 0l); @@ -95,6 +106,23 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker { log.error("Checking nextTxCommitTimeService failed.", e); } + + // Try invoking txIntervalCommitTime service + try + { + client.getTxIntervalCommitTime(coreName, 0l, 0l); + txIntervalCommitTimeServiceAvailable = true; + } + catch (NoSuchMethodException e) + { + log.warn("txIntervalCommitTimeServiceAvailable is not available. If you are using DB_ID_RANGE shard method, " + + "upgrade your ACS Repository version in order to use this feature: {} ", e.getMessage()); + } + catch (Exception e) + { + log.error("Checking txIntervalCommitTimeServiceAvailable failed.", e); + } + } MetadataTracker() @@ -640,9 +668,46 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker * */ - Long fromCommitTime = getTxFromCommitTime(txnsFound, state.getLastGoodTxCommitTimeInIndex()); + Long fromCommitTime = getTxFromCommitTime(txnsFound, + state.getLastIndexedTxCommitTime() == 0 ? state.getLastGoodTxCommitTimeInIndex() : state.getLastIndexedTxCommitTime()); log.debug("#### Check txnsFound : " + txnsFound.size()); log.debug("======= fromCommitTime: " + fromCommitTime); + + // When using DB_ID_RANGE, fromCommitTime cannot be before the commit time of the first transaction + // for the DB_ID_RANGE to be indexed and commit time of the last transaction cannot be lower than fromCommitTime. + // When there isn't nodes in that range, -1 is returned as commit times + if (docRouter instanceof DBIDRangeRouter && txIntervalCommitTimeServiceAvailable) + { + + DBIDRangeRouter dbIdRangeRouter = (DBIDRangeRouter) docRouter; + Pair commitTimes = client.getTxIntervalCommitTime(coreName, + dbIdRangeRouter.getStartRange(), dbIdRangeRouter.getEndRange()); + Long shardMinCommitTime = commitTimes.getFirst(); + Long shardMaxCommitTime = commitTimes.getSecond(); + + // Node Range it's not still available in repository + if (shardMinCommitTime == -1) + { + log.debug("#### [DB_ID_RANGE] No nodes in range [" + dbIdRangeRouter.getStartRange() + "-" + + dbIdRangeRouter.getEndRange() + "] " + + "exist in the repository. Skipping metadata tracking."); + return; + } + if (fromCommitTime > shardMaxCommitTime) + { + log.debug("#### [DB_ID_RANGE] Last commit time is greater that max commit time in in range [" + + dbIdRangeRouter.getStartRange() + "-" + dbIdRangeRouter.getEndRange() + "]. " + + "Skipping metadata tracking."); + return; + } + // Initial commit time for Node Range is greater than calculated from commit time + if (fromCommitTime < shardMinCommitTime) + { + log.debug("#### [DB_ID_RANGE] SKIPPING TRANSACTIONS FROM " + fromCommitTime + " TO " + + shardMinCommitTime); + fromCommitTime = shardMinCommitTime; + } + } log.debug("#### Get txn from commit time: " + fromCommitTime); transactions = getSomeTransactions(txnsFound, fromCommitTime, TIME_STEP_1_HR_IN_MS, 2000, diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java index 55762b340..1556f7cb6 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java @@ -30,7 +30,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -74,7 +73,6 @@ import org.apache.commons.codec.EncoderException; import org.apache.commons.codec.net.URLCodec; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.util.DateUtil; -import org.apache.commons.io.IOUtils; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -106,7 +104,8 @@ public class SOLRAPIClient private static final String GET_CONTENT = "api/solr/textContent"; private static final String GET_MODEL = "api/solr/model"; private static final String GET_MODELS_DIFF = "api/solr/modelsdiff"; - private static final String GET_NEXT_TX_COMMIT_TIME = "api/solr/nextTransaction"; + private static final String GET_NEXT_TX_COMMIT_TIME = "api/solr/nextTransaction"; + private static final String GET_TX_INTERVAL_COMMIT_TIME = "api/solr/transactionInterval"; private static final String CHECKSUM_HEADER = "XAlfresco-modelChecksum"; @@ -1296,7 +1295,55 @@ public class SOLRAPIClient } return Long.parseLong(json.get("nextTransactionCommitTimeMs").toString()); - } + } + + /** + * Returns the minimum and the maximum commit time for transactions in a node id range. + * + * @param coreName alfresco, archive + * @param fromNodeId Id of the initial node + * @param toNodeId Id of the final node + * @return Time of the first transaction, time of the last transaction + * @throws IOException + * @throws AuthenticationException + * @throws NoSuchMethodException + */ + public Pair getTxIntervalCommitTime(String coreName, Long fromNodeId, Long toNodeId) + throws AuthenticationException, IOException, NoSuchMethodException + { + StringBuilder url = new StringBuilder(GET_TX_INTERVAL_COMMIT_TIME); + url.append("?").append("fromNodeId").append("=").append(fromNodeId); + url.append("&").append("toNodeId").append("=").append(toNodeId); + GetRequest get = new GetRequest(url.toString()); + Response response = null; + JSONObject json = null; + try + { + response = repositoryHttpClient.sendRequest(get); + if (response.getStatus() != HttpStatus.SC_OK) + { + throw new NoSuchMethodException(coreName + " - GetTxIntervalCommitTime return status is " + + response.getStatus() + " when invoking " + url); + } + + Reader reader = new BufferedReader(new InputStreamReader(response.getContentAsStream(), "UTF-8")); + json = new JSONObject(new JSONTokener(reader)); + } + finally + { + if (response != null) + { + response.release(); + } + } + if (log.isDebugEnabled()) + { + log.debug(json.toString()); + } + + return new Pair(Long.parseLong(json.get("minTransactionCommitTimeMs").toString()), + Long.parseLong(json.get("maxTransactionCommitTimeMs").toString())); + } /* * type conversions from serialized JSON values to SOLR-consumable objects From f708f89bb22fb6a66f6321293ea7fcc9e95d2eba Mon Sep 17 00:00:00 2001 From: Tom Page Date: Wed, 22 Jan 2020 15:01:07 +0000 Subject: [PATCH 17/17] SEARCH-2067 Try fixing integration test order as alphabetical. Some of our integration tests fail if they are run after other tests. --- pom.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pom.xml b/pom.xml index 9af7a2537..f21eb44c8 100644 --- a/pom.xml +++ b/pom.xml @@ -66,6 +66,9 @@ + + alphabetical +