diff --git a/e2e-test/pom.xml b/e2e-test/pom.xml
index f0501aaf0..1e48fef17 100644
--- a/e2e-test/pom.xml
+++ b/e2e-test/pom.xml
@@ -11,10 +11,11 @@
Search Analytics E2E Tests
Test Project to test Search Service and Analytics Features on a complete setup of Alfresco, Share
+ 1.28
1.26
- 1.12
- 3.0.17
- 3.2.0
+ 1.13
+ 3.0.18
+ 3.3.0
src/test/resources/SearchSuite.xml
diff --git a/pom.xml b/pom.xml
index 9af7a2537..f21eb44c8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -66,6 +66,9 @@
+
+ alphabetical
+
diff --git a/search-services/alfresco-search/pom.xml b/search-services/alfresco-search/pom.xml
index 2916a884e..1bffd3a2b 100644
--- a/search-services/alfresco-search/pom.xml
+++ b/search-services/alfresco-search/pom.xml
@@ -182,6 +182,43 @@
+
+
+ alfresco-public
+ https://artifacts.alfresco.com/nexus/content/groups/public
+
+ true
+
+
+ false
+
+
+
+ alfresco-public-snapshots
+ https://artifacts.alfresco.com/nexus/content/groups/public-snapshots
+
+ false
+
+
+ true
+
+
+
+ central
+ Central Repository
+ https://repo.maven.apache.org/maven2
+ default
+
+ false
+
+
+
+ maven-restlet
+ Public online Restlet repository
+ http://maven.restlet.talend.com
+
+
+
alfresco-solr
diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java
index c5b07d69f..182067271 100644
--- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java
+++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/SolrInformationServer.java
@@ -81,6 +81,7 @@ import java.util.Map.Entry;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import java.util.zip.GZIPInputStream;
import com.carrotsearch.hppc.IntArrayList;
@@ -2498,8 +2499,12 @@ public class SolrInformationServer implements InformationServer
response);
addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.ContentFieldType.TRANSFORMATION_TIME,
response);
-
+
InputStream ris = response.getContent();
+ if (Objects.equals(response.getContentEncoding(), "gzip"))
+ {
+ ris = new GZIPInputStream(ris);
+ }
String textContent = "";
try
{
diff --git a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java
index 3862ebd30..c1390bf3d 100644
--- a/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java
+++ b/search-services/alfresco-search/src/main/java/org/alfresco/solr/tracker/MetadataTracker.java
@@ -40,6 +40,7 @@ import org.alfresco.solr.client.Node.SolrApiNodeStatus;
import org.alfresco.solr.client.SOLRAPIClient;
import org.alfresco.solr.client.Transaction;
import org.alfresco.solr.client.Transactions;
+import org.alfresco.util.Pair;
import org.apache.commons.codec.EncoderException;
import org.json.JSONException;
import org.slf4j.Logger;
@@ -63,6 +64,25 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
private ConcurrentLinkedQueue nodesToIndex = new ConcurrentLinkedQueue<>();
private ConcurrentLinkedQueue nodesToPurge = new ConcurrentLinkedQueue<>();
private ConcurrentLinkedQueue queriesToReindex = new ConcurrentLinkedQueue<>();
+
+ /**
+ * Check if nextTxCommitTimeService is available in the repository.
+ * This service is used to find the next available transaction commit time from a given time,
+ * so periods of time where no document updating is happening can be skipped while getting
+ * pending transactions list.
+ *
+ * {@link org.alfresco.solr.client.SOLRAPIClient#GET_NEXT_TX_COMMIT_TIME}
+ */
+ private boolean nextTxCommitTimeServiceAvailable = false;
+
+ /**
+ * Check if txInteravlCommitTimeService is available in the repository.
+ * This service returns the minimum and the maximum commit time for transactions in a node id range,
+ * so method sharding DB_ID_RANGE can skip transactions not relevant for the DB ID range.
+ *
+ * {@link org.alfresco.solr.client.SOLRAPIClient#GET_TX_INTERVAL_COMMIT_TIME}
+ */
+ private boolean txIntervalCommitTimeServiceAvailable = false;
public MetadataTracker(final boolean isMaster, Properties p, SOLRAPIClient client, String coreName,
InformationServer informationServer)
@@ -71,6 +91,38 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
transactionDocsBatchSize = Integer.parseInt(p.getProperty("alfresco.transactionDocsBatchSize", "100"));
nodeBatchSize = Integer.parseInt(p.getProperty("alfresco.nodeBatchSize", "10"));
threadHandler = new ThreadHandler(p, coreName, "MetadataTracker");
+
+ // Try invoking getNextTxCommitTime service
+ try
+ {
+ client.getNextTxCommitTime(coreName, 0l);
+ nextTxCommitTimeServiceAvailable = true;
+ }
+ catch (NoSuchMethodException e)
+ {
+ log.warn("nextTxCommitTimeService is not available. Upgrade your ACS Repository version in order to use this feature: {} ", e.getMessage());
+ }
+ catch (Exception e)
+ {
+ log.error("Checking nextTxCommitTimeService failed.", e);
+ }
+
+ // Try invoking txIntervalCommitTime service
+ try
+ {
+ client.getTxIntervalCommitTime(coreName, 0l, 0l);
+ txIntervalCommitTimeServiceAvailable = true;
+ }
+ catch (NoSuchMethodException e)
+ {
+ log.warn("txIntervalCommitTimeServiceAvailable is not available. If you are using DB_ID_RANGE shard method, "
+ + "upgrade your ACS Repository version in order to use this feature: {} ", e.getMessage());
+ }
+ catch (Exception e)
+ {
+ log.error("Checking txIntervalCommitTimeServiceAvailable failed.", e);
+ }
+
}
MetadataTracker()
@@ -518,7 +570,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
}
protected Transactions getSomeTransactions(BoundedDeque txnsFound, Long fromCommitTime, long timeStep,
- int maxResults, long endTime) throws AuthenticationException, IOException, JSONException, EncoderException
+ int maxResults, long endTime) throws AuthenticationException, IOException, JSONException, EncoderException, NoSuchMethodException
{
long actualTimeStep = timeStep;
@@ -546,6 +598,17 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
{
transactions = client.getTransactions(startTime, null, startTime + actualTimeStep, null, maxResults, shardstate);
startTime += actualTimeStep;
+
+ // If no transactions are found, advance the time window to the next available transaction commit time
+ if (nextTxCommitTimeServiceAvailable && transactions.getTransactions().size() == 0)
+ {
+ Long nextTxCommitTime = client.getNextTxCommitTime(coreName, startTime);
+ if (nextTxCommitTime != -1)
+ {
+ log.info("Advancing transactions from {} to {}", startTime, nextTxCommitTime);
+ transactions = client.getTransactions(nextTxCommitTime, null, nextTxCommitTime + actualTimeStep, null, maxResults, shardstate);
+ }
+ }
} while (((transactions.getTransactions().size() == 0) && (startTime < endTime))
|| ((transactions.getTransactions().size() > 0) && alreadyFoundTransactions(txnsFound, transactions)));
@@ -605,9 +668,46 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
*
*/
- Long fromCommitTime = getTxFromCommitTime(txnsFound, state.getLastGoodTxCommitTimeInIndex());
+ Long fromCommitTime = getTxFromCommitTime(txnsFound,
+ state.getLastIndexedTxCommitTime() == 0 ? state.getLastGoodTxCommitTimeInIndex() : state.getLastIndexedTxCommitTime());
log.debug("#### Check txnsFound : " + txnsFound.size());
log.debug("======= fromCommitTime: " + fromCommitTime);
+
+ // When using DB_ID_RANGE, fromCommitTime cannot be before the commit time of the first transaction
+ // for the DB_ID_RANGE to be indexed and commit time of the last transaction cannot be lower than fromCommitTime.
+ // When there isn't nodes in that range, -1 is returned as commit times
+ if (docRouter instanceof DBIDRangeRouter && txIntervalCommitTimeServiceAvailable)
+ {
+
+ DBIDRangeRouter dbIdRangeRouter = (DBIDRangeRouter) docRouter;
+ Pair commitTimes = client.getTxIntervalCommitTime(coreName,
+ dbIdRangeRouter.getStartRange(), dbIdRangeRouter.getEndRange());
+ Long shardMinCommitTime = commitTimes.getFirst();
+ Long shardMaxCommitTime = commitTimes.getSecond();
+
+ // Node Range it's not still available in repository
+ if (shardMinCommitTime == -1)
+ {
+ log.debug("#### [DB_ID_RANGE] No nodes in range [" + dbIdRangeRouter.getStartRange() + "-"
+ + dbIdRangeRouter.getEndRange() + "] "
+ + "exist in the repository. Skipping metadata tracking.");
+ return;
+ }
+ if (fromCommitTime > shardMaxCommitTime)
+ {
+ log.debug("#### [DB_ID_RANGE] Last commit time is greater that max commit time in in range ["
+ + dbIdRangeRouter.getStartRange() + "-" + dbIdRangeRouter.getEndRange() + "]. "
+ + "Skipping metadata tracking.");
+ return;
+ }
+ // Initial commit time for Node Range is greater than calculated from commit time
+ if (fromCommitTime < shardMinCommitTime)
+ {
+ log.debug("#### [DB_ID_RANGE] SKIPPING TRANSACTIONS FROM " + fromCommitTime + " TO "
+ + shardMinCommitTime);
+ fromCommitTime = shardMinCommitTime;
+ }
+ }
log.debug("#### Get txn from commit time: " + fromCommitTime);
transactions = getSomeTransactions(txnsFound, fromCommitTime, TIME_STEP_1_HR_IN_MS, 2000,
@@ -964,7 +1064,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
}
public IndexHealthReport checkIndex(Long toTx, Long toAclTx, Long fromTime, Long toTime)
- throws IOException, AuthenticationException, JSONException, EncoderException
+ throws IOException, AuthenticationException, JSONException, EncoderException, NoSuchMethodException
{
// DB TX Count
long firstTransactionCommitTime = 0;
diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties
index e91ba7ee8..a6a5a1fae 100644
--- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties
+++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/noRerank/conf/solrcore.properties
@@ -174,6 +174,14 @@ solr.suggester.enabled=true
# -1 to disable suggester build throttling
solr.suggester.minSecsBetweenBuilds=3600
+#
+# Request content text compression
+# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled
+# This setting can improve performance when having high network latency or large documents in the repository
+#
+solr.request.content.compress=false
+
+
#
# Limit the maximum text size of transformed content sent to the index - in bytes
#
diff --git a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties
index 06f089370..fdf702da6 100644
--- a/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties
+++ b/search-services/alfresco-search/src/main/resources/solr/instance/templates/rerank/conf/solrcore.properties
@@ -174,6 +174,13 @@ solr.suggester.enabled=true
# -1 to disable suggester build throttling
solr.suggester.minSecsBetweenBuilds=3600
+#
+# Request content text compression
+# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled
+# This setting can improve performance when having high network latency or large documents in the repository
+#
+solr.request.content.compress=false
+
#
# Limit the maximum text size of transformed content sent to the index - in bytes
#
diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java
index 136d5ddde..1556f7cb6 100644
--- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java
+++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClient.java
@@ -103,7 +103,9 @@ public class SOLRAPIClient
private static final String GET_NODES_URL = "api/solr/nodes";
private static final String GET_CONTENT = "api/solr/textContent";
private static final String GET_MODEL = "api/solr/model";
- private static final String GET_MODELS_DIFF = "api/solr/modelsdiff";
+ private static final String GET_MODELS_DIFF = "api/solr/modelsdiff";
+ private static final String GET_NEXT_TX_COMMIT_TIME = "api/solr/nextTransaction";
+ private static final String GET_TX_INTERVAL_COMMIT_TIME = "api/solr/transactionInterval";
private static final String CHECKSUM_HEADER = "XAlfresco-modelChecksum";
@@ -111,17 +113,34 @@ public class SOLRAPIClient
private SOLRDeserializer deserializer;
private DictionaryService dictionaryService;
private JsonFactory jsonFactory;
- private NamespaceDAO namespaceDAO;
+ private NamespaceDAO namespaceDAO;
+
+ /**
+ * This option enables ("Accept-Encoding": "gzip") header for compression
+ * in GET_CONTENT requests. Additional configuration is required in
+ * Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal
+ * with compressed requests.
+ */
+ private boolean compression;
+ public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient,
+ DictionaryService dictionaryService,
+ NamespaceDAO namespaceDAO)
+ {
+ this(repositoryHttpClient, dictionaryService, namespaceDAO, false);
+ }
+
public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient,
DictionaryService dictionaryService,
- NamespaceDAO namespaceDAO)
+ NamespaceDAO namespaceDAO,
+ boolean compression)
{
this.repositoryHttpClient = repositoryHttpClient;
this.dictionaryService = dictionaryService;
this.namespaceDAO = namespaceDAO;
this.deserializer = new SOLRDeserializer(namespaceDAO);
- this.jsonFactory = new JsonFactory();
+ this.jsonFactory = new JsonFactory();
+ this.compression = compression;
}
/**
@@ -1118,20 +1137,24 @@ public class SOLRAPIClient
GetRequest req = new GetRequest(url.toString());
+ Map headers = new HashMap<>();
if(modifiedSince != null)
{
- Map headers = new HashMap(1, 1.0f);
headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince))));
- req.setHeaders(headers);
}
-
+ if (compression)
+ {
+ headers.put("Accept-Encoding", "gzip");
+ }
+ req.setHeaders(headers);
+
Response response = repositoryHttpClient.sendRequest(req);
if(response.getStatus() != Status.STATUS_NOT_MODIFIED && response.getStatus() != Status.STATUS_NO_CONTENT && response.getStatus() != Status.STATUS_OK)
{
throw new AlfrescoRuntimeException("GetTextContentResponse return status is " + response.getStatus());
- }
-
+ }
+
return new GetTextContentResponse(response);
}
@@ -1228,7 +1251,99 @@ public class SOLRAPIClient
}
return diffs;
- }
+ }
+
+ /**
+ * Returns the minimum and the maximum commit time for transactions in a node id range.
+ *
+ * @param coreName alfresco, archive
+ * @param fromCommitTime initial transaction commit time
+ * @return Time of the next transaction
+ * @throws IOException
+ * @throws AuthenticationException
+ * @throws NoSuchMethodException
+ */
+ public Long getNextTxCommitTime(String coreName, Long fromCommitTime) throws AuthenticationException, IOException, NoSuchMethodException
+ {
+ StringBuilder url = new StringBuilder(GET_NEXT_TX_COMMIT_TIME);
+ url.append("?").append("fromCommitTime").append("=").append(fromCommitTime);
+ GetRequest get = new GetRequest(url.toString());
+ Response response = null;
+ JSONObject json = null;
+ try
+ {
+ response = repositoryHttpClient.sendRequest(get);
+ if (response.getStatus() != HttpStatus.SC_OK)
+ {
+ throw new NoSuchMethodException(coreName + " - GetNextTxCommitTime return status is "
+ + response.getStatus() + " when invoking " + url);
+ }
+
+ Reader reader = new BufferedReader(new InputStreamReader(response.getContentAsStream(), "UTF-8"));
+ json = new JSONObject(new JSONTokener(reader));
+ }
+ finally
+ {
+ if (response != null)
+ {
+ response.release();
+ }
+ }
+ if (log.isDebugEnabled())
+ {
+ log.debug(json.toString());
+ }
+
+ return Long.parseLong(json.get("nextTransactionCommitTimeMs").toString());
+ }
+
+ /**
+ * Returns the minimum and the maximum commit time for transactions in a node id range.
+ *
+ * @param coreName alfresco, archive
+ * @param fromNodeId Id of the initial node
+ * @param toNodeId Id of the final node
+ * @return Time of the first transaction, time of the last transaction
+ * @throws IOException
+ * @throws AuthenticationException
+ * @throws NoSuchMethodException
+ */
+ public Pair getTxIntervalCommitTime(String coreName, Long fromNodeId, Long toNodeId)
+ throws AuthenticationException, IOException, NoSuchMethodException
+ {
+ StringBuilder url = new StringBuilder(GET_TX_INTERVAL_COMMIT_TIME);
+ url.append("?").append("fromNodeId").append("=").append(fromNodeId);
+ url.append("&").append("toNodeId").append("=").append(toNodeId);
+ GetRequest get = new GetRequest(url.toString());
+ Response response = null;
+ JSONObject json = null;
+ try
+ {
+ response = repositoryHttpClient.sendRequest(get);
+ if (response.getStatus() != HttpStatus.SC_OK)
+ {
+ throw new NoSuchMethodException(coreName + " - GetTxIntervalCommitTime return status is "
+ + response.getStatus() + " when invoking " + url);
+ }
+
+ Reader reader = new BufferedReader(new InputStreamReader(response.getContentAsStream(), "UTF-8"));
+ json = new JSONObject(new JSONTokener(reader));
+ }
+ finally
+ {
+ if (response != null)
+ {
+ response.release();
+ }
+ }
+ if (log.isDebugEnabled())
+ {
+ log.debug(json.toString());
+ }
+
+ return new Pair(Long.parseLong(json.get("minTransactionCommitTimeMs").toString()),
+ Long.parseLong(json.get("maxTransactionCommitTimeMs").toString()));
+ }
/*
* type conversions from serialized JSON values to SOLR-consumable objects
@@ -1481,7 +1596,8 @@ public class SOLRAPIClient
private SolrApiContentStatus status;
private String transformException;
private String transformStatusStr;
- private Long transformDuration;
+ private Long transformDuration;
+ private String contentEncoding;
public GetTextContentResponse(Response response) throws IOException
{
@@ -1491,7 +1607,8 @@ public class SOLRAPIClient
this.transformStatusStr = response.getHeader("X-Alfresco-transformStatus");
this.transformException = response.getHeader("X-Alfresco-transformException");
String tmp = response.getHeader("X-Alfresco-transformDuration");
- this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null);
+ this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null);
+ this.contentEncoding = response.getHeader("Content-Encoding");
setStatus();
}
@@ -1557,6 +1674,11 @@ public class SOLRAPIClient
public Long getTransformDuration()
{
return transformDuration;
+ }
+
+ public String getContentEncoding()
+ {
+ return contentEncoding;
}
}
diff --git a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java
index 0fbf5df7a..f467d3242 100644
--- a/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java
+++ b/search-services/alfresco-solrclient-lib/src/main/java/org/alfresco/solr/client/SOLRAPIClientFactory.java
@@ -143,6 +143,7 @@ public class SOLRAPIClientFactory
alfrescoHost = props.getProperty("alfresco.host", "localhost");
alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080"));
alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443"));
+ boolean compression = Boolean.parseBoolean(props.getProperty("solr.request.content.compress", "false"));
SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL);
if (client == null)
@@ -171,7 +172,7 @@ public class SOLRAPIClientFactory
maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40"));
socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000"));
- client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO);
+ client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO, compression);
setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client);
}