diff --git a/e2e-test/pom.xml b/e2e-test/pom.xml index f0501aaf0..1e48fef17 100644 --- a/e2e-test/pom.xml +++ b/e2e-test/pom.xml @@ -11,10 +11,11 @@ Search Analytics E2E Tests Test Project to test Search Service and Analytics Features on a complete setup of Alfresco, Share + 1.28 1.26 - 1.12 - 3.0.17 - 3.2.0 + 1.13 + 3.0.18 + 3.3.0 src/test/resources/SearchSuite.xml diff --git a/pom.xml b/pom.xml index 9af7a2537..f21eb44c8 100644 --- a/pom.xml +++ b/pom.xml @@ -66,6 +66,9 @@ + + alphabetical + diff --git a/search-services/alfresco-search/pom.xml b/search-services/alfresco-search/pom.xml index 2916a884e..1bffd3a2b 100644 --- a/search-services/alfresco-search/pom.xml +++ b/search-services/alfresco-search/pom.xml @@ -182,6 +182,43 @@ + + + alfresco-public + https://artifacts.alfresco.com/nexus/content/groups/public + + true + + + false + + + + alfresco-public-snapshots + https://artifacts.alfresco.com/nexus/content/groups/public-snapshots + + false + + + true + + + + central + Central Repository + https://repo.maven.apache.org/maven2 + default + + false + + + + maven-restlet + Public online Restlet repository + http://maven.restlet.talend.com + + + alfresco-solr diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java index c5b07d69f..182067271 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java @@ -81,6 +81,7 @@ import java.util.Map.Entry; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; import com.carrotsearch.hppc.IntArrayList; @@ -2498,8 +2499,12 @@ public class SolrInformationServer implements InformationServer response); addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.ContentFieldType.TRANSFORMATION_TIME, response); - + InputStream ris = response.getContent(); + if (Objects.equals(response.getContentEncoding(), "gzip")) + { + ris = new GZIPInputStream(ris); + } String textContent = ""; try { diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java index 3862ebd30..c1390bf3d 100644 --- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java +++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java @@ -40,6 +40,7 @@ import org.alfresco.solr.client.Node.SolrApiNodeStatus; import org.alfresco.solr.client.SOLRAPIClient; import org.alfresco.solr.client.Transaction; import org.alfresco.solr.client.Transactions; +import org.alfresco.util.Pair; import org.apache.commons.codec.EncoderException; import org.json.JSONException; import org.slf4j.Logger; @@ -63,6 +64,25 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker private ConcurrentLinkedQueue nodesToIndex = new ConcurrentLinkedQueue<>(); private ConcurrentLinkedQueue nodesToPurge = new ConcurrentLinkedQueue<>(); private ConcurrentLinkedQueue queriesToReindex = new ConcurrentLinkedQueue<>(); + + /** + * Check if nextTxCommitTimeService is available in the repository. + * This service is used to find the next available transaction commit time from a given time, + * so periods of time where no document updating is happening can be skipped while getting + * pending transactions list. + * + * {@link org.alfresco.solr.client.SOLRAPIClient#GET_NEXT_TX_COMMIT_TIME} + */ + private boolean nextTxCommitTimeServiceAvailable = false; + + /** + * Check if txInteravlCommitTimeService is available in the repository. + * This service returns the minimum and the maximum commit time for transactions in a node id range, + * so method sharding DB_ID_RANGE can skip transactions not relevant for the DB ID range. + * + * {@link org.alfresco.solr.client.SOLRAPIClient#GET_TX_INTERVAL_COMMIT_TIME} + */ + private boolean txIntervalCommitTimeServiceAvailable = false; public MetadataTracker(final boolean isMaster, Properties p, SOLRAPIClient client, String coreName, InformationServer informationServer) @@ -71,6 +91,38 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker transactionDocsBatchSize = Integer.parseInt(p.getProperty("alfresco.transactionDocsBatchSize", "100")); nodeBatchSize = Integer.parseInt(p.getProperty("alfresco.nodeBatchSize", "10")); threadHandler = new ThreadHandler(p, coreName, "MetadataTracker"); + + // Try invoking getNextTxCommitTime service + try + { + client.getNextTxCommitTime(coreName, 0l); + nextTxCommitTimeServiceAvailable = true; + } + catch (NoSuchMethodException e) + { + log.warn("nextTxCommitTimeService is not available. Upgrade your ACS Repository version in order to use this feature: {} ", e.getMessage()); + } + catch (Exception e) + { + log.error("Checking nextTxCommitTimeService failed.", e); + } + + // Try invoking txIntervalCommitTime service + try + { + client.getTxIntervalCommitTime(coreName, 0l, 0l); + txIntervalCommitTimeServiceAvailable = true; + } + catch (NoSuchMethodException e) + { + log.warn("txIntervalCommitTimeServiceAvailable is not available. If you are using DB_ID_RANGE shard method, " + + "upgrade your ACS Repository version in order to use this feature: {} ", e.getMessage()); + } + catch (Exception e) + { + log.error("Checking txIntervalCommitTimeServiceAvailable failed.", e); + } + } MetadataTracker() @@ -518,7 +570,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker } protected Transactions getSomeTransactions(BoundedDeque txnsFound, Long fromCommitTime, long timeStep, - int maxResults, long endTime) throws AuthenticationException, IOException, JSONException, EncoderException + int maxResults, long endTime) throws AuthenticationException, IOException, JSONException, EncoderException, NoSuchMethodException { long actualTimeStep = timeStep; @@ -546,6 +598,17 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker { transactions = client.getTransactions(startTime, null, startTime + actualTimeStep, null, maxResults, shardstate); startTime += actualTimeStep; + + // If no transactions are found, advance the time window to the next available transaction commit time + if (nextTxCommitTimeServiceAvailable && transactions.getTransactions().size() == 0) + { + Long nextTxCommitTime = client.getNextTxCommitTime(coreName, startTime); + if (nextTxCommitTime != -1) + { + log.info("Advancing transactions from {} to {}", startTime, nextTxCommitTime); + transactions = client.getTransactions(nextTxCommitTime, null, nextTxCommitTime + actualTimeStep, null, maxResults, shardstate); + } + } } while (((transactions.getTransactions().size() == 0) && (startTime < endTime)) || ((transactions.getTransactions().size() > 0) && alreadyFoundTransactions(txnsFound, transactions))); @@ -605,9 +668,46 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker * */ - Long fromCommitTime = getTxFromCommitTime(txnsFound, state.getLastGoodTxCommitTimeInIndex()); + Long fromCommitTime = getTxFromCommitTime(txnsFound, + state.getLastIndexedTxCommitTime() == 0 ? state.getLastGoodTxCommitTimeInIndex() : state.getLastIndexedTxCommitTime()); log.debug("#### Check txnsFound : " + txnsFound.size()); log.debug("======= fromCommitTime: " + fromCommitTime); + + // When using DB_ID_RANGE, fromCommitTime cannot be before the commit time of the first transaction + // for the DB_ID_RANGE to be indexed and commit time of the last transaction cannot be lower than fromCommitTime. + // When there isn't nodes in that range, -1 is returned as commit times + if (docRouter instanceof DBIDRangeRouter && txIntervalCommitTimeServiceAvailable) + { + + DBIDRangeRouter dbIdRangeRouter = (DBIDRangeRouter) docRouter; + Pair commitTimes = client.getTxIntervalCommitTime(coreName, + dbIdRangeRouter.getStartRange(), dbIdRangeRouter.getEndRange()); + Long shardMinCommitTime = commitTimes.getFirst(); + Long shardMaxCommitTime = commitTimes.getSecond(); + + // Node Range it's not still available in repository + if (shardMinCommitTime == -1) + { + log.debug("#### [DB_ID_RANGE] No nodes in range [" + dbIdRangeRouter.getStartRange() + "-" + + dbIdRangeRouter.getEndRange() + "] " + + "exist in the repository. Skipping metadata tracking."); + return; + } + if (fromCommitTime > shardMaxCommitTime) + { + log.debug("#### [DB_ID_RANGE] Last commit time is greater that max commit time in in range [" + + dbIdRangeRouter.getStartRange() + "-" + dbIdRangeRouter.getEndRange() + "]. " + + "Skipping metadata tracking."); + return; + } + // Initial commit time for Node Range is greater than calculated from commit time + if (fromCommitTime < shardMinCommitTime) + { + log.debug("#### [DB_ID_RANGE] SKIPPING TRANSACTIONS FROM " + fromCommitTime + " TO " + + shardMinCommitTime); + fromCommitTime = shardMinCommitTime; + } + } log.debug("#### Get txn from commit time: " + fromCommitTime); transactions = getSomeTransactions(txnsFound, fromCommitTime, TIME_STEP_1_HR_IN_MS, 2000, @@ -964,7 +1064,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker } public IndexHealthReport checkIndex(Long toTx, Long toAclTx, Long fromTime, Long toTime) - throws IOException, AuthenticationException, JSONException, EncoderException + throws IOException, AuthenticationException, JSONException, EncoderException, NoSuchMethodException { // DB TX Count long firstTransactionCommitTime = 0; diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties index e91ba7ee8..a6a5a1fae 100644 --- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties +++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties @@ -174,6 +174,14 @@ solr.suggester.enabled=true # -1 to disable suggester build throttling solr.suggester.minSecsBetweenBuilds=3600 +# +# Request content text compression +# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled +# This setting can improve performance when having high network latency or large documents in the repository +# +solr.request.content.compress=false + + # # Limit the maximum text size of transformed content sent to the index - in bytes # diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties index 06f089370..fdf702da6 100644 --- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties +++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties @@ -174,6 +174,13 @@ solr.suggester.enabled=true # -1 to disable suggester build throttling solr.suggester.minSecsBetweenBuilds=3600 +# +# Request content text compression +# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled +# This setting can improve performance when having high network latency or large documents in the repository +# +solr.request.content.compress=false + # # Limit the maximum text size of transformed content sent to the index - in bytes # diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java index 136d5ddde..1556f7cb6 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java @@ -103,7 +103,9 @@ public class SOLRAPIClient private static final String GET_NODES_URL = "api/solr/nodes"; private static final String GET_CONTENT = "api/solr/textContent"; private static final String GET_MODEL = "api/solr/model"; - private static final String GET_MODELS_DIFF = "api/solr/modelsdiff"; + private static final String GET_MODELS_DIFF = "api/solr/modelsdiff"; + private static final String GET_NEXT_TX_COMMIT_TIME = "api/solr/nextTransaction"; + private static final String GET_TX_INTERVAL_COMMIT_TIME = "api/solr/transactionInterval"; private static final String CHECKSUM_HEADER = "XAlfresco-modelChecksum"; @@ -111,17 +113,34 @@ public class SOLRAPIClient private SOLRDeserializer deserializer; private DictionaryService dictionaryService; private JsonFactory jsonFactory; - private NamespaceDAO namespaceDAO; + private NamespaceDAO namespaceDAO; + + /** + * This option enables ("Accept-Encoding": "gzip") header for compression + * in GET_CONTENT requests. Additional configuration is required in + * Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal + * with compressed requests. + */ + private boolean compression; + public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient, + DictionaryService dictionaryService, + NamespaceDAO namespaceDAO) + { + this(repositoryHttpClient, dictionaryService, namespaceDAO, false); + } + public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient, DictionaryService dictionaryService, - NamespaceDAO namespaceDAO) + NamespaceDAO namespaceDAO, + boolean compression) { this.repositoryHttpClient = repositoryHttpClient; this.dictionaryService = dictionaryService; this.namespaceDAO = namespaceDAO; this.deserializer = new SOLRDeserializer(namespaceDAO); - this.jsonFactory = new JsonFactory(); + this.jsonFactory = new JsonFactory(); + this.compression = compression; } /** @@ -1118,20 +1137,24 @@ public class SOLRAPIClient GetRequest req = new GetRequest(url.toString()); + Map headers = new HashMap<>(); if(modifiedSince != null) { - Map headers = new HashMap(1, 1.0f); headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince)))); - req.setHeaders(headers); } - + if (compression) + { + headers.put("Accept-Encoding", "gzip"); + } + req.setHeaders(headers); + Response response = repositoryHttpClient.sendRequest(req); if(response.getStatus() != Status.STATUS_NOT_MODIFIED && response.getStatus() != Status.STATUS_NO_CONTENT && response.getStatus() != Status.STATUS_OK) { throw new AlfrescoRuntimeException("GetTextContentResponse return status is " + response.getStatus()); - } - + } + return new GetTextContentResponse(response); } @@ -1228,7 +1251,99 @@ public class SOLRAPIClient } return diffs; - } + } + + /** + * Returns the minimum and the maximum commit time for transactions in a node id range. + * + * @param coreName alfresco, archive + * @param fromCommitTime initial transaction commit time + * @return Time of the next transaction + * @throws IOException + * @throws AuthenticationException + * @throws NoSuchMethodException + */ + public Long getNextTxCommitTime(String coreName, Long fromCommitTime) throws AuthenticationException, IOException, NoSuchMethodException + { + StringBuilder url = new StringBuilder(GET_NEXT_TX_COMMIT_TIME); + url.append("?").append("fromCommitTime").append("=").append(fromCommitTime); + GetRequest get = new GetRequest(url.toString()); + Response response = null; + JSONObject json = null; + try + { + response = repositoryHttpClient.sendRequest(get); + if (response.getStatus() != HttpStatus.SC_OK) + { + throw new NoSuchMethodException(coreName + " - GetNextTxCommitTime return status is " + + response.getStatus() + " when invoking " + url); + } + + Reader reader = new BufferedReader(new InputStreamReader(response.getContentAsStream(), "UTF-8")); + json = new JSONObject(new JSONTokener(reader)); + } + finally + { + if (response != null) + { + response.release(); + } + } + if (log.isDebugEnabled()) + { + log.debug(json.toString()); + } + + return Long.parseLong(json.get("nextTransactionCommitTimeMs").toString()); + } + + /** + * Returns the minimum and the maximum commit time for transactions in a node id range. + * + * @param coreName alfresco, archive + * @param fromNodeId Id of the initial node + * @param toNodeId Id of the final node + * @return Time of the first transaction, time of the last transaction + * @throws IOException + * @throws AuthenticationException + * @throws NoSuchMethodException + */ + public Pair getTxIntervalCommitTime(String coreName, Long fromNodeId, Long toNodeId) + throws AuthenticationException, IOException, NoSuchMethodException + { + StringBuilder url = new StringBuilder(GET_TX_INTERVAL_COMMIT_TIME); + url.append("?").append("fromNodeId").append("=").append(fromNodeId); + url.append("&").append("toNodeId").append("=").append(toNodeId); + GetRequest get = new GetRequest(url.toString()); + Response response = null; + JSONObject json = null; + try + { + response = repositoryHttpClient.sendRequest(get); + if (response.getStatus() != HttpStatus.SC_OK) + { + throw new NoSuchMethodException(coreName + " - GetTxIntervalCommitTime return status is " + + response.getStatus() + " when invoking " + url); + } + + Reader reader = new BufferedReader(new InputStreamReader(response.getContentAsStream(), "UTF-8")); + json = new JSONObject(new JSONTokener(reader)); + } + finally + { + if (response != null) + { + response.release(); + } + } + if (log.isDebugEnabled()) + { + log.debug(json.toString()); + } + + return new Pair(Long.parseLong(json.get("minTransactionCommitTimeMs").toString()), + Long.parseLong(json.get("maxTransactionCommitTimeMs").toString())); + } /* * type conversions from serialized JSON values to SOLR-consumable objects @@ -1481,7 +1596,8 @@ public class SOLRAPIClient private SolrApiContentStatus status; private String transformException; private String transformStatusStr; - private Long transformDuration; + private Long transformDuration; + private String contentEncoding; public GetTextContentResponse(Response response) throws IOException { @@ -1491,7 +1607,8 @@ public class SOLRAPIClient this.transformStatusStr = response.getHeader("X-Alfresco-transformStatus"); this.transformException = response.getHeader("X-Alfresco-transformException"); String tmp = response.getHeader("X-Alfresco-transformDuration"); - this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null); + this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null); + this.contentEncoding = response.getHeader("Content-Encoding"); setStatus(); } @@ -1557,6 +1674,11 @@ public class SOLRAPIClient public Long getTransformDuration() { return transformDuration; + } + + public String getContentEncoding() + { + return contentEncoding; } } diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java index 0fbf5df7a..f467d3242 100644 --- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java +++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java @@ -143,6 +143,7 @@ public class SOLRAPIClientFactory alfrescoHost = props.getProperty("alfresco.host", "localhost"); alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080")); alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443")); + boolean compression = Boolean.parseBoolean(props.getProperty("solr.request.content.compress", "false")); SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL); if (client == null) @@ -171,7 +172,7 @@ public class SOLRAPIClientFactory maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40")); socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000")); - client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO); + client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO, compression); setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client); }