Merge master into dependabot/maven/search-services/com.googlecode.maven-download-plugin-download-maven-plugin-1.5.0.

This commit is contained in:
Tom Page
2020-01-23 09:15:53 +00:00
9 changed files with 304 additions and 20 deletions

View File

@@ -11,10 +11,11 @@
<name>Search Analytics E2E Tests</name> <name>Search Analytics E2E Tests</name>
<description>Test Project to test Search Service and Analytics Features on a complete setup of Alfresco, Share</description> <description>Test Project to test Search Service and Analytics Features on a complete setup of Alfresco, Share</description>
<properties> <properties>
<tas.rest.api.version>1.28</tas.rest.api.version>
<tas.rest.api.version>1.26</tas.rest.api.version> <tas.rest.api.version>1.26</tas.rest.api.version>
<tas.cmis.api.version>1.12</tas.cmis.api.version> <tas.cmis.api.version>1.13</tas.cmis.api.version>
<tas.utility.version>3.0.17</tas.utility.version> <tas.utility.version>3.0.18</tas.utility.version>
<rm.version>3.2.0</rm.version> <rm.version>3.3.0</rm.version>
<suiteXmlFile>src/test/resources/SearchSuite.xml</suiteXmlFile> <suiteXmlFile>src/test/resources/SearchSuite.xml</suiteXmlFile>
<test.exclude></test.exclude> <test.exclude></test.exclude>
<test.include></test.include> <test.include></test.include>

View File

@@ -66,6 +66,9 @@
</goals> </goals>
</execution> </execution>
</executions> </executions>
<configuration>
<runOrder>alphabetical</runOrder>
</configuration>
</plugin> </plugin>
</plugins> </plugins>
</build> </build>

View File

@@ -182,6 +182,43 @@
</dependency> </dependency>
</dependencies> </dependencies>
<repositories>
<repository>
<id>alfresco-public</id>
<url>https://artifacts.alfresco.com/nexus/content/groups/public</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>alfresco-public-snapshots</id>
<url>https://artifacts.alfresco.com/nexus/content/groups/public-snapshots</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>central</id>
<name>Central Repository</name>
<url>https://repo.maven.apache.org/maven2</url>
<layout>default</layout>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>maven-restlet</id>
<name>Public online Restlet repository</name>
<url>http://maven.restlet.talend.com</url>
</repository>
</repositories>
<build> <build>
<finalName>alfresco-solr</finalName> <finalName>alfresco-solr</finalName>
<plugins> <plugins>

View File

@@ -81,6 +81,7 @@ import java.util.Map.Entry;
import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import com.carrotsearch.hppc.IntArrayList; import com.carrotsearch.hppc.IntArrayList;
@@ -2500,6 +2501,10 @@ public class SolrInformationServer implements InformationServer
response); response);
InputStream ris = response.getContent(); InputStream ris = response.getContent();
if (Objects.equals(response.getContentEncoding(), "gzip"))
{
ris = new GZIPInputStream(ris);
}
String textContent = ""; String textContent = "";
try try
{ {

View File

@@ -40,6 +40,7 @@ import org.alfresco.solr.client.Node.SolrApiNodeStatus;
import org.alfresco.solr.client.SOLRAPIClient; import org.alfresco.solr.client.SOLRAPIClient;
import org.alfresco.solr.client.Transaction; import org.alfresco.solr.client.Transaction;
import org.alfresco.solr.client.Transactions; import org.alfresco.solr.client.Transactions;
import org.alfresco.util.Pair;
import org.apache.commons.codec.EncoderException; import org.apache.commons.codec.EncoderException;
import org.json.JSONException; import org.json.JSONException;
import org.slf4j.Logger; import org.slf4j.Logger;
@@ -64,6 +65,25 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
private ConcurrentLinkedQueue<Long> nodesToPurge = new ConcurrentLinkedQueue<>(); private ConcurrentLinkedQueue<Long> nodesToPurge = new ConcurrentLinkedQueue<>();
private ConcurrentLinkedQueue<String> queriesToReindex = new ConcurrentLinkedQueue<>(); private ConcurrentLinkedQueue<String> queriesToReindex = new ConcurrentLinkedQueue<>();
/**
* Check if nextTxCommitTimeService is available in the repository.
* This service is used to find the next available transaction commit time from a given time,
* so periods of time where no document updating is happening can be skipped while getting
* pending transactions list.
*
* {@link org.alfresco.solr.client.SOLRAPIClient#GET_NEXT_TX_COMMIT_TIME}
*/
private boolean nextTxCommitTimeServiceAvailable = false;
/**
* Check if txInteravlCommitTimeService is available in the repository.
* This service returns the minimum and the maximum commit time for transactions in a node id range,
* so method sharding DB_ID_RANGE can skip transactions not relevant for the DB ID range.
*
* {@link org.alfresco.solr.client.SOLRAPIClient#GET_TX_INTERVAL_COMMIT_TIME}
*/
private boolean txIntervalCommitTimeServiceAvailable = false;
public MetadataTracker(final boolean isMaster, Properties p, SOLRAPIClient client, String coreName, public MetadataTracker(final boolean isMaster, Properties p, SOLRAPIClient client, String coreName,
InformationServer informationServer) InformationServer informationServer)
{ {
@@ -71,6 +91,38 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
transactionDocsBatchSize = Integer.parseInt(p.getProperty("alfresco.transactionDocsBatchSize", "100")); transactionDocsBatchSize = Integer.parseInt(p.getProperty("alfresco.transactionDocsBatchSize", "100"));
nodeBatchSize = Integer.parseInt(p.getProperty("alfresco.nodeBatchSize", "10")); nodeBatchSize = Integer.parseInt(p.getProperty("alfresco.nodeBatchSize", "10"));
threadHandler = new ThreadHandler(p, coreName, "MetadataTracker"); threadHandler = new ThreadHandler(p, coreName, "MetadataTracker");
// Try invoking getNextTxCommitTime service
try
{
client.getNextTxCommitTime(coreName, 0l);
nextTxCommitTimeServiceAvailable = true;
}
catch (NoSuchMethodException e)
{
log.warn("nextTxCommitTimeService is not available. Upgrade your ACS Repository version in order to use this feature: {} ", e.getMessage());
}
catch (Exception e)
{
log.error("Checking nextTxCommitTimeService failed.", e);
}
// Try invoking txIntervalCommitTime service
try
{
client.getTxIntervalCommitTime(coreName, 0l, 0l);
txIntervalCommitTimeServiceAvailable = true;
}
catch (NoSuchMethodException e)
{
log.warn("txIntervalCommitTimeServiceAvailable is not available. If you are using DB_ID_RANGE shard method, "
+ "upgrade your ACS Repository version in order to use this feature: {} ", e.getMessage());
}
catch (Exception e)
{
log.error("Checking txIntervalCommitTimeServiceAvailable failed.", e);
}
} }
MetadataTracker() MetadataTracker()
@@ -518,7 +570,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
} }
protected Transactions getSomeTransactions(BoundedDeque<Transaction> txnsFound, Long fromCommitTime, long timeStep, protected Transactions getSomeTransactions(BoundedDeque<Transaction> txnsFound, Long fromCommitTime, long timeStep,
int maxResults, long endTime) throws AuthenticationException, IOException, JSONException, EncoderException int maxResults, long endTime) throws AuthenticationException, IOException, JSONException, EncoderException, NoSuchMethodException
{ {
long actualTimeStep = timeStep; long actualTimeStep = timeStep;
@@ -547,6 +599,17 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
transactions = client.getTransactions(startTime, null, startTime + actualTimeStep, null, maxResults, shardstate); transactions = client.getTransactions(startTime, null, startTime + actualTimeStep, null, maxResults, shardstate);
startTime += actualTimeStep; startTime += actualTimeStep;
// If no transactions are found, advance the time window to the next available transaction commit time
if (nextTxCommitTimeServiceAvailable && transactions.getTransactions().size() == 0)
{
Long nextTxCommitTime = client.getNextTxCommitTime(coreName, startTime);
if (nextTxCommitTime != -1)
{
log.info("Advancing transactions from {} to {}", startTime, nextTxCommitTime);
transactions = client.getTransactions(nextTxCommitTime, null, nextTxCommitTime + actualTimeStep, null, maxResults, shardstate);
}
}
} while (((transactions.getTransactions().size() == 0) && (startTime < endTime)) } while (((transactions.getTransactions().size() == 0) && (startTime < endTime))
|| ((transactions.getTransactions().size() > 0) && alreadyFoundTransactions(txnsFound, transactions))); || ((transactions.getTransactions().size() > 0) && alreadyFoundTransactions(txnsFound, transactions)));
@@ -605,10 +668,47 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
* *
*/ */
Long fromCommitTime = getTxFromCommitTime(txnsFound, state.getLastGoodTxCommitTimeInIndex()); Long fromCommitTime = getTxFromCommitTime(txnsFound,
state.getLastIndexedTxCommitTime() == 0 ? state.getLastGoodTxCommitTimeInIndex() : state.getLastIndexedTxCommitTime());
log.debug("#### Check txnsFound : " + txnsFound.size()); log.debug("#### Check txnsFound : " + txnsFound.size());
log.debug("======= fromCommitTime: " + fromCommitTime); log.debug("======= fromCommitTime: " + fromCommitTime);
// When using DB_ID_RANGE, fromCommitTime cannot be before the commit time of the first transaction
// for the DB_ID_RANGE to be indexed and commit time of the last transaction cannot be lower than fromCommitTime.
// When there isn't nodes in that range, -1 is returned as commit times
if (docRouter instanceof DBIDRangeRouter && txIntervalCommitTimeServiceAvailable)
{
DBIDRangeRouter dbIdRangeRouter = (DBIDRangeRouter) docRouter;
Pair<Long, Long> commitTimes = client.getTxIntervalCommitTime(coreName,
dbIdRangeRouter.getStartRange(), dbIdRangeRouter.getEndRange());
Long shardMinCommitTime = commitTimes.getFirst();
Long shardMaxCommitTime = commitTimes.getSecond();
// Node Range it's not still available in repository
if (shardMinCommitTime == -1)
{
log.debug("#### [DB_ID_RANGE] No nodes in range [" + dbIdRangeRouter.getStartRange() + "-"
+ dbIdRangeRouter.getEndRange() + "] "
+ "exist in the repository. Skipping metadata tracking.");
return;
}
if (fromCommitTime > shardMaxCommitTime)
{
log.debug("#### [DB_ID_RANGE] Last commit time is greater that max commit time in in range ["
+ dbIdRangeRouter.getStartRange() + "-" + dbIdRangeRouter.getEndRange() + "]. "
+ "Skipping metadata tracking.");
return;
}
// Initial commit time for Node Range is greater than calculated from commit time
if (fromCommitTime < shardMinCommitTime)
{
log.debug("#### [DB_ID_RANGE] SKIPPING TRANSACTIONS FROM " + fromCommitTime + " TO "
+ shardMinCommitTime);
fromCommitTime = shardMinCommitTime;
}
}
log.debug("#### Get txn from commit time: " + fromCommitTime); log.debug("#### Get txn from commit time: " + fromCommitTime);
transactions = getSomeTransactions(txnsFound, fromCommitTime, TIME_STEP_1_HR_IN_MS, 2000, transactions = getSomeTransactions(txnsFound, fromCommitTime, TIME_STEP_1_HR_IN_MS, 2000,
state.getTimeToStopIndexing()); state.getTimeToStopIndexing());
@@ -964,7 +1064,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
} }
public IndexHealthReport checkIndex(Long toTx, Long toAclTx, Long fromTime, Long toTime) public IndexHealthReport checkIndex(Long toTx, Long toAclTx, Long fromTime, Long toTime)
throws IOException, AuthenticationException, JSONException, EncoderException throws IOException, AuthenticationException, JSONException, EncoderException, NoSuchMethodException
{ {
// DB TX Count // DB TX Count
long firstTransactionCommitTime = 0; long firstTransactionCommitTime = 0;

View File

@@ -174,6 +174,14 @@ solr.suggester.enabled=true
# -1 to disable suggester build throttling # -1 to disable suggester build throttling
solr.suggester.minSecsBetweenBuilds=3600 solr.suggester.minSecsBetweenBuilds=3600
#
# Request content text compression
# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled
# This setting can improve performance when having high network latency or large documents in the repository
#
solr.request.content.compress=false
# #
# Limit the maximum text size of transformed content sent to the index - in bytes # Limit the maximum text size of transformed content sent to the index - in bytes
# #

View File

@@ -174,6 +174,13 @@ solr.suggester.enabled=true
# -1 to disable suggester build throttling # -1 to disable suggester build throttling
solr.suggester.minSecsBetweenBuilds=3600 solr.suggester.minSecsBetweenBuilds=3600
#
# Request content text compression
# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled
# This setting can improve performance when having high network latency or large documents in the repository
#
solr.request.content.compress=false
# #
# Limit the maximum text size of transformed content sent to the index - in bytes # Limit the maximum text size of transformed content sent to the index - in bytes
# #

View File

@@ -104,6 +104,8 @@ public class SOLRAPIClient
private static final String GET_CONTENT = "api/solr/textContent"; private static final String GET_CONTENT = "api/solr/textContent";
private static final String GET_MODEL = "api/solr/model"; private static final String GET_MODEL = "api/solr/model";
private static final String GET_MODELS_DIFF = "api/solr/modelsdiff"; private static final String GET_MODELS_DIFF = "api/solr/modelsdiff";
private static final String GET_NEXT_TX_COMMIT_TIME = "api/solr/nextTransaction";
private static final String GET_TX_INTERVAL_COMMIT_TIME = "api/solr/transactionInterval";
private static final String CHECKSUM_HEADER = "XAlfresco-modelChecksum"; private static final String CHECKSUM_HEADER = "XAlfresco-modelChecksum";
@@ -113,15 +115,32 @@ public class SOLRAPIClient
private JsonFactory jsonFactory; private JsonFactory jsonFactory;
private NamespaceDAO namespaceDAO; private NamespaceDAO namespaceDAO;
/**
* This option enables ("Accept-Encoding": "gzip") header for compression
* in GET_CONTENT requests. Additional configuration is required in
* Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal
* with compressed requests.
*/
private boolean compression;
public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient, public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient,
DictionaryService dictionaryService, DictionaryService dictionaryService,
NamespaceDAO namespaceDAO) NamespaceDAO namespaceDAO)
{
this(repositoryHttpClient, dictionaryService, namespaceDAO, false);
}
public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient,
DictionaryService dictionaryService,
NamespaceDAO namespaceDAO,
boolean compression)
{ {
this.repositoryHttpClient = repositoryHttpClient; this.repositoryHttpClient = repositoryHttpClient;
this.dictionaryService = dictionaryService; this.dictionaryService = dictionaryService;
this.namespaceDAO = namespaceDAO; this.namespaceDAO = namespaceDAO;
this.deserializer = new SOLRDeserializer(namespaceDAO); this.deserializer = new SOLRDeserializer(namespaceDAO);
this.jsonFactory = new JsonFactory(); this.jsonFactory = new JsonFactory();
this.compression = compression;
} }
/** /**
@@ -1118,12 +1137,16 @@ public class SOLRAPIClient
GetRequest req = new GetRequest(url.toString()); GetRequest req = new GetRequest(url.toString());
Map<String, String> headers = new HashMap<>();
if(modifiedSince != null) if(modifiedSince != null)
{ {
Map<String, String> headers = new HashMap<String, String>(1, 1.0f);
headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince)))); headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince))));
req.setHeaders(headers);
} }
if (compression)
{
headers.put("Accept-Encoding", "gzip");
}
req.setHeaders(headers);
Response response = repositoryHttpClient.sendRequest(req); Response response = repositoryHttpClient.sendRequest(req);
@@ -1230,6 +1253,98 @@ public class SOLRAPIClient
return diffs; return diffs;
} }
/**
* Returns the minimum and the maximum commit time for transactions in a node id range.
*
* @param coreName alfresco, archive
* @param fromCommitTime initial transaction commit time
* @return Time of the next transaction
* @throws IOException
* @throws AuthenticationException
* @throws NoSuchMethodException
*/
public Long getNextTxCommitTime(String coreName, Long fromCommitTime) throws AuthenticationException, IOException, NoSuchMethodException
{
StringBuilder url = new StringBuilder(GET_NEXT_TX_COMMIT_TIME);
url.append("?").append("fromCommitTime").append("=").append(fromCommitTime);
GetRequest get = new GetRequest(url.toString());
Response response = null;
JSONObject json = null;
try
{
response = repositoryHttpClient.sendRequest(get);
if (response.getStatus() != HttpStatus.SC_OK)
{
throw new NoSuchMethodException(coreName + " - GetNextTxCommitTime return status is "
+ response.getStatus() + " when invoking " + url);
}
Reader reader = new BufferedReader(new InputStreamReader(response.getContentAsStream(), "UTF-8"));
json = new JSONObject(new JSONTokener(reader));
}
finally
{
if (response != null)
{
response.release();
}
}
if (log.isDebugEnabled())
{
log.debug(json.toString());
}
return Long.parseLong(json.get("nextTransactionCommitTimeMs").toString());
}
/**
* Returns the minimum and the maximum commit time for transactions in a node id range.
*
* @param coreName alfresco, archive
* @param fromNodeId Id of the initial node
* @param toNodeId Id of the final node
* @return Time of the first transaction, time of the last transaction
* @throws IOException
* @throws AuthenticationException
* @throws NoSuchMethodException
*/
public Pair<Long, Long> getTxIntervalCommitTime(String coreName, Long fromNodeId, Long toNodeId)
throws AuthenticationException, IOException, NoSuchMethodException
{
StringBuilder url = new StringBuilder(GET_TX_INTERVAL_COMMIT_TIME);
url.append("?").append("fromNodeId").append("=").append(fromNodeId);
url.append("&").append("toNodeId").append("=").append(toNodeId);
GetRequest get = new GetRequest(url.toString());
Response response = null;
JSONObject json = null;
try
{
response = repositoryHttpClient.sendRequest(get);
if (response.getStatus() != HttpStatus.SC_OK)
{
throw new NoSuchMethodException(coreName + " - GetTxIntervalCommitTime return status is "
+ response.getStatus() + " when invoking " + url);
}
Reader reader = new BufferedReader(new InputStreamReader(response.getContentAsStream(), "UTF-8"));
json = new JSONObject(new JSONTokener(reader));
}
finally
{
if (response != null)
{
response.release();
}
}
if (log.isDebugEnabled())
{
log.debug(json.toString());
}
return new Pair<Long, Long>(Long.parseLong(json.get("minTransactionCommitTimeMs").toString()),
Long.parseLong(json.get("maxTransactionCommitTimeMs").toString()));
}
/* /*
* type conversions from serialized JSON values to SOLR-consumable objects * type conversions from serialized JSON values to SOLR-consumable objects
*/ */
@@ -1482,6 +1597,7 @@ public class SOLRAPIClient
private String transformException; private String transformException;
private String transformStatusStr; private String transformStatusStr;
private Long transformDuration; private Long transformDuration;
private String contentEncoding;
public GetTextContentResponse(Response response) throws IOException public GetTextContentResponse(Response response) throws IOException
{ {
@@ -1492,6 +1608,7 @@ public class SOLRAPIClient
this.transformException = response.getHeader("X-Alfresco-transformException"); this.transformException = response.getHeader("X-Alfresco-transformException");
String tmp = response.getHeader("X-Alfresco-transformDuration"); String tmp = response.getHeader("X-Alfresco-transformDuration");
this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null); this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null);
this.contentEncoding = response.getHeader("Content-Encoding");
setStatus(); setStatus();
} }
@@ -1558,6 +1675,11 @@ public class SOLRAPIClient
{ {
return transformDuration; return transformDuration;
} }
public String getContentEncoding()
{
return contentEncoding;
}
} }
public void close() public void close()

View File

@@ -143,6 +143,7 @@ public class SOLRAPIClientFactory
alfrescoHost = props.getProperty("alfresco.host", "localhost"); alfrescoHost = props.getProperty("alfresco.host", "localhost");
alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080")); alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080"));
alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443")); alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443"));
boolean compression = Boolean.parseBoolean(props.getProperty("solr.request.content.compress", "false"));
SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL); SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL);
if (client == null) if (client == null)
@@ -171,7 +172,7 @@ public class SOLRAPIClientFactory
maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40")); maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40"));
socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000")); socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000"));
client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO); client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO, compression);
setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client); setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client);
} }