Merge master into dependabot/maven/search-services/com.googlecode.maven-download-plugin-download-maven-plugin-1.5.0.

This commit is contained in:
Tom Page
2020-01-23 09:15:53 +00:00
9 changed files with 304 additions and 20 deletions

View File

@@ -11,10 +11,11 @@
<name>Search Analytics E2E Tests</name>
<description>Test Project to test Search Service and Analytics Features on a complete setup of Alfresco, Share</description>
<properties>
<tas.rest.api.version>1.28</tas.rest.api.version>
<tas.rest.api.version>1.26</tas.rest.api.version>
<tas.cmis.api.version>1.12</tas.cmis.api.version>
<tas.utility.version>3.0.17</tas.utility.version>
<rm.version>3.2.0</rm.version>
<tas.cmis.api.version>1.13</tas.cmis.api.version>
<tas.utility.version>3.0.18</tas.utility.version>
<rm.version>3.3.0</rm.version>
<suiteXmlFile>src/test/resources/SearchSuite.xml</suiteXmlFile>
<test.exclude></test.exclude>
<test.include></test.include>

View File

@@ -66,6 +66,9 @@
</goals>
</execution>
</executions>
<configuration>
<runOrder>alphabetical</runOrder>
</configuration>
</plugin>
</plugins>
</build>

View File

@@ -182,6 +182,43 @@
</dependency>
</dependencies>
<repositories>
<repository>
<id>alfresco-public</id>
<url>https://artifacts.alfresco.com/nexus/content/groups/public</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>alfresco-public-snapshots</id>
<url>https://artifacts.alfresco.com/nexus/content/groups/public-snapshots</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>central</id>
<name>Central Repository</name>
<url>https://repo.maven.apache.org/maven2</url>
<layout>default</layout>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>maven-restlet</id>
<name>Public online Restlet repository</name>
<url>http://maven.restlet.talend.com</url>
</repository>
</repositories>
<build>
<finalName>alfresco-solr</finalName>
<plugins>

View File

@@ -81,6 +81,7 @@ import java.util.Map.Entry;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import com.carrotsearch.hppc.IntArrayList;
@@ -2498,8 +2499,12 @@ public class SolrInformationServer implements InformationServer
response);
addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.ContentFieldType.TRANSFORMATION_TIME,
response);
InputStream ris = response.getContent();
if (Objects.equals(response.getContentEncoding(), "gzip"))
{
ris = new GZIPInputStream(ris);
}
String textContent = "";
try
{

View File

@@ -40,6 +40,7 @@ import org.alfresco.solr.client.Node.SolrApiNodeStatus;
import org.alfresco.solr.client.SOLRAPIClient;
import org.alfresco.solr.client.Transaction;
import org.alfresco.solr.client.Transactions;
import org.alfresco.util.Pair;
import org.apache.commons.codec.EncoderException;
import org.json.JSONException;
import org.slf4j.Logger;
@@ -63,6 +64,25 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
private ConcurrentLinkedQueue<Long> nodesToIndex = new ConcurrentLinkedQueue<>();
private ConcurrentLinkedQueue<Long> nodesToPurge = new ConcurrentLinkedQueue<>();
private ConcurrentLinkedQueue<String> queriesToReindex = new ConcurrentLinkedQueue<>();
/**
* Check if nextTxCommitTimeService is available in the repository.
* This service is used to find the next available transaction commit time from a given time,
* so periods of time where no document updating is happening can be skipped while getting
* pending transactions list.
*
* {@link org.alfresco.solr.client.SOLRAPIClient#GET_NEXT_TX_COMMIT_TIME}
*/
private boolean nextTxCommitTimeServiceAvailable = false;
/**
* Check if txInteravlCommitTimeService is available in the repository.
* This service returns the minimum and the maximum commit time for transactions in a node id range,
* so method sharding DB_ID_RANGE can skip transactions not relevant for the DB ID range.
*
* {@link org.alfresco.solr.client.SOLRAPIClient#GET_TX_INTERVAL_COMMIT_TIME}
*/
private boolean txIntervalCommitTimeServiceAvailable = false;
public MetadataTracker(final boolean isMaster, Properties p, SOLRAPIClient client, String coreName,
InformationServer informationServer)
@@ -71,6 +91,38 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
transactionDocsBatchSize = Integer.parseInt(p.getProperty("alfresco.transactionDocsBatchSize", "100"));
nodeBatchSize = Integer.parseInt(p.getProperty("alfresco.nodeBatchSize", "10"));
threadHandler = new ThreadHandler(p, coreName, "MetadataTracker");
// Try invoking getNextTxCommitTime service
try
{
client.getNextTxCommitTime(coreName, 0l);
nextTxCommitTimeServiceAvailable = true;
}
catch (NoSuchMethodException e)
{
log.warn("nextTxCommitTimeService is not available. Upgrade your ACS Repository version in order to use this feature: {} ", e.getMessage());
}
catch (Exception e)
{
log.error("Checking nextTxCommitTimeService failed.", e);
}
// Try invoking txIntervalCommitTime service
try
{
client.getTxIntervalCommitTime(coreName, 0l, 0l);
txIntervalCommitTimeServiceAvailable = true;
}
catch (NoSuchMethodException e)
{
log.warn("txIntervalCommitTimeServiceAvailable is not available. If you are using DB_ID_RANGE shard method, "
+ "upgrade your ACS Repository version in order to use this feature: {} ", e.getMessage());
}
catch (Exception e)
{
log.error("Checking txIntervalCommitTimeServiceAvailable failed.", e);
}
}
MetadataTracker()
@@ -518,7 +570,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
}
protected Transactions getSomeTransactions(BoundedDeque<Transaction> txnsFound, Long fromCommitTime, long timeStep,
int maxResults, long endTime) throws AuthenticationException, IOException, JSONException, EncoderException
int maxResults, long endTime) throws AuthenticationException, IOException, JSONException, EncoderException, NoSuchMethodException
{
long actualTimeStep = timeStep;
@@ -546,6 +598,17 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
{
transactions = client.getTransactions(startTime, null, startTime + actualTimeStep, null, maxResults, shardstate);
startTime += actualTimeStep;
// If no transactions are found, advance the time window to the next available transaction commit time
if (nextTxCommitTimeServiceAvailable && transactions.getTransactions().size() == 0)
{
Long nextTxCommitTime = client.getNextTxCommitTime(coreName, startTime);
if (nextTxCommitTime != -1)
{
log.info("Advancing transactions from {} to {}", startTime, nextTxCommitTime);
transactions = client.getTransactions(nextTxCommitTime, null, nextTxCommitTime + actualTimeStep, null, maxResults, shardstate);
}
}
} while (((transactions.getTransactions().size() == 0) && (startTime < endTime))
|| ((transactions.getTransactions().size() > 0) && alreadyFoundTransactions(txnsFound, transactions)));
@@ -605,9 +668,46 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
*
*/
Long fromCommitTime = getTxFromCommitTime(txnsFound, state.getLastGoodTxCommitTimeInIndex());
Long fromCommitTime = getTxFromCommitTime(txnsFound,
state.getLastIndexedTxCommitTime() == 0 ? state.getLastGoodTxCommitTimeInIndex() : state.getLastIndexedTxCommitTime());
log.debug("#### Check txnsFound : " + txnsFound.size());
log.debug("======= fromCommitTime: " + fromCommitTime);
// When using DB_ID_RANGE, fromCommitTime cannot be before the commit time of the first transaction
// for the DB_ID_RANGE to be indexed and commit time of the last transaction cannot be lower than fromCommitTime.
// When there isn't nodes in that range, -1 is returned as commit times
if (docRouter instanceof DBIDRangeRouter && txIntervalCommitTimeServiceAvailable)
{
DBIDRangeRouter dbIdRangeRouter = (DBIDRangeRouter) docRouter;
Pair<Long, Long> commitTimes = client.getTxIntervalCommitTime(coreName,
dbIdRangeRouter.getStartRange(), dbIdRangeRouter.getEndRange());
Long shardMinCommitTime = commitTimes.getFirst();
Long shardMaxCommitTime = commitTimes.getSecond();
// Node Range it's not still available in repository
if (shardMinCommitTime == -1)
{
log.debug("#### [DB_ID_RANGE] No nodes in range [" + dbIdRangeRouter.getStartRange() + "-"
+ dbIdRangeRouter.getEndRange() + "] "
+ "exist in the repository. Skipping metadata tracking.");
return;
}
if (fromCommitTime > shardMaxCommitTime)
{
log.debug("#### [DB_ID_RANGE] Last commit time is greater that max commit time in in range ["
+ dbIdRangeRouter.getStartRange() + "-" + dbIdRangeRouter.getEndRange() + "]. "
+ "Skipping metadata tracking.");
return;
}
// Initial commit time for Node Range is greater than calculated from commit time
if (fromCommitTime < shardMinCommitTime)
{
log.debug("#### [DB_ID_RANGE] SKIPPING TRANSACTIONS FROM " + fromCommitTime + " TO "
+ shardMinCommitTime);
fromCommitTime = shardMinCommitTime;
}
}
log.debug("#### Get txn from commit time: " + fromCommitTime);
transactions = getSomeTransactions(txnsFound, fromCommitTime, TIME_STEP_1_HR_IN_MS, 2000,
@@ -964,7 +1064,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
}
public IndexHealthReport checkIndex(Long toTx, Long toAclTx, Long fromTime, Long toTime)
throws IOException, AuthenticationException, JSONException, EncoderException
throws IOException, AuthenticationException, JSONException, EncoderException, NoSuchMethodException
{
// DB TX Count
long firstTransactionCommitTime = 0;

View File

@@ -174,6 +174,14 @@ solr.suggester.enabled=true
# -1 to disable suggester build throttling
solr.suggester.minSecsBetweenBuilds=3600
#
# Request content text compression
# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled
# This setting can improve performance when having high network latency or large documents in the repository
#
solr.request.content.compress=false
#
# Limit the maximum text size of transformed content sent to the index - in bytes
#

View File

@@ -174,6 +174,13 @@ solr.suggester.enabled=true
# -1 to disable suggester build throttling
solr.suggester.minSecsBetweenBuilds=3600
#
# Request content text compression
# When enabling this option, Tomcat Connector or HTTP Web Proxy (NGINX, Apache) compression must be also enabled
# This setting can improve performance when having high network latency or large documents in the repository
#
solr.request.content.compress=false
#
# Limit the maximum text size of transformed content sent to the index - in bytes
#

View File

@@ -103,7 +103,9 @@ public class SOLRAPIClient
private static final String GET_NODES_URL = "api/solr/nodes";
private static final String GET_CONTENT = "api/solr/textContent";
private static final String GET_MODEL = "api/solr/model";
private static final String GET_MODELS_DIFF = "api/solr/modelsdiff";
private static final String GET_MODELS_DIFF = "api/solr/modelsdiff";
private static final String GET_NEXT_TX_COMMIT_TIME = "api/solr/nextTransaction";
private static final String GET_TX_INTERVAL_COMMIT_TIME = "api/solr/transactionInterval";
private static final String CHECKSUM_HEADER = "XAlfresco-modelChecksum";
@@ -111,17 +113,34 @@ public class SOLRAPIClient
private SOLRDeserializer deserializer;
private DictionaryService dictionaryService;
private JsonFactory jsonFactory;
private NamespaceDAO namespaceDAO;
private NamespaceDAO namespaceDAO;
/**
* This option enables ("Accept-Encoding": "gzip") header for compression
* in GET_CONTENT requests. Additional configuration is required in
* Alfresco Repository Tomcat Connector or HTTP Web Proxy to deal
* with compressed requests.
*/
private boolean compression;
public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient,
DictionaryService dictionaryService,
NamespaceDAO namespaceDAO)
{
this(repositoryHttpClient, dictionaryService, namespaceDAO, false);
}
public SOLRAPIClient(AlfrescoHttpClient repositoryHttpClient,
DictionaryService dictionaryService,
NamespaceDAO namespaceDAO)
NamespaceDAO namespaceDAO,
boolean compression)
{
this.repositoryHttpClient = repositoryHttpClient;
this.dictionaryService = dictionaryService;
this.namespaceDAO = namespaceDAO;
this.deserializer = new SOLRDeserializer(namespaceDAO);
this.jsonFactory = new JsonFactory();
this.jsonFactory = new JsonFactory();
this.compression = compression;
}
/**
@@ -1118,20 +1137,24 @@ public class SOLRAPIClient
GetRequest req = new GetRequest(url.toString());
Map<String, String> headers = new HashMap<>();
if(modifiedSince != null)
{
Map<String, String> headers = new HashMap<String, String>(1, 1.0f);
headers.put("If-Modified-Since", String.valueOf(DateUtil.formatDate(new Date(modifiedSince))));
req.setHeaders(headers);
}
if (compression)
{
headers.put("Accept-Encoding", "gzip");
}
req.setHeaders(headers);
Response response = repositoryHttpClient.sendRequest(req);
if(response.getStatus() != Status.STATUS_NOT_MODIFIED && response.getStatus() != Status.STATUS_NO_CONTENT && response.getStatus() != Status.STATUS_OK)
{
throw new AlfrescoRuntimeException("GetTextContentResponse return status is " + response.getStatus());
}
}
return new GetTextContentResponse(response);
}
@@ -1228,7 +1251,99 @@ public class SOLRAPIClient
}
return diffs;
}
}
/**
* Returns the minimum and the maximum commit time for transactions in a node id range.
*
* @param coreName alfresco, archive
* @param fromCommitTime initial transaction commit time
* @return Time of the next transaction
* @throws IOException
* @throws AuthenticationException
* @throws NoSuchMethodException
*/
public Long getNextTxCommitTime(String coreName, Long fromCommitTime) throws AuthenticationException, IOException, NoSuchMethodException
{
StringBuilder url = new StringBuilder(GET_NEXT_TX_COMMIT_TIME);
url.append("?").append("fromCommitTime").append("=").append(fromCommitTime);
GetRequest get = new GetRequest(url.toString());
Response response = null;
JSONObject json = null;
try
{
response = repositoryHttpClient.sendRequest(get);
if (response.getStatus() != HttpStatus.SC_OK)
{
throw new NoSuchMethodException(coreName + " - GetNextTxCommitTime return status is "
+ response.getStatus() + " when invoking " + url);
}
Reader reader = new BufferedReader(new InputStreamReader(response.getContentAsStream(), "UTF-8"));
json = new JSONObject(new JSONTokener(reader));
}
finally
{
if (response != null)
{
response.release();
}
}
if (log.isDebugEnabled())
{
log.debug(json.toString());
}
return Long.parseLong(json.get("nextTransactionCommitTimeMs").toString());
}
/**
* Returns the minimum and the maximum commit time for transactions in a node id range.
*
* @param coreName alfresco, archive
* @param fromNodeId Id of the initial node
* @param toNodeId Id of the final node
* @return Time of the first transaction, time of the last transaction
* @throws IOException
* @throws AuthenticationException
* @throws NoSuchMethodException
*/
public Pair<Long, Long> getTxIntervalCommitTime(String coreName, Long fromNodeId, Long toNodeId)
throws AuthenticationException, IOException, NoSuchMethodException
{
StringBuilder url = new StringBuilder(GET_TX_INTERVAL_COMMIT_TIME);
url.append("?").append("fromNodeId").append("=").append(fromNodeId);
url.append("&").append("toNodeId").append("=").append(toNodeId);
GetRequest get = new GetRequest(url.toString());
Response response = null;
JSONObject json = null;
try
{
response = repositoryHttpClient.sendRequest(get);
if (response.getStatus() != HttpStatus.SC_OK)
{
throw new NoSuchMethodException(coreName + " - GetTxIntervalCommitTime return status is "
+ response.getStatus() + " when invoking " + url);
}
Reader reader = new BufferedReader(new InputStreamReader(response.getContentAsStream(), "UTF-8"));
json = new JSONObject(new JSONTokener(reader));
}
finally
{
if (response != null)
{
response.release();
}
}
if (log.isDebugEnabled())
{
log.debug(json.toString());
}
return new Pair<Long, Long>(Long.parseLong(json.get("minTransactionCommitTimeMs").toString()),
Long.parseLong(json.get("maxTransactionCommitTimeMs").toString()));
}
/*
* type conversions from serialized JSON values to SOLR-consumable objects
@@ -1481,7 +1596,8 @@ public class SOLRAPIClient
private SolrApiContentStatus status;
private String transformException;
private String transformStatusStr;
private Long transformDuration;
private Long transformDuration;
private String contentEncoding;
public GetTextContentResponse(Response response) throws IOException
{
@@ -1491,7 +1607,8 @@ public class SOLRAPIClient
this.transformStatusStr = response.getHeader("X-Alfresco-transformStatus");
this.transformException = response.getHeader("X-Alfresco-transformException");
String tmp = response.getHeader("X-Alfresco-transformDuration");
this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null);
this.transformDuration = (tmp != null ? Long.valueOf(tmp) : null);
this.contentEncoding = response.getHeader("Content-Encoding");
setStatus();
}
@@ -1557,6 +1674,11 @@ public class SOLRAPIClient
public Long getTransformDuration()
{
return transformDuration;
}
public String getContentEncoding()
{
return contentEncoding;
}
}

View File

@@ -143,6 +143,7 @@ public class SOLRAPIClientFactory
alfrescoHost = props.getProperty("alfresco.host", "localhost");
alfrescoPort = Integer.parseInt(props.getProperty("alfresco.port", "8080"));
alfrescoPortSSL = Integer.parseInt(props.getProperty("alfresco.port.ssl", "8443"));
boolean compression = Boolean.parseBoolean(props.getProperty("solr.request.content.compress", "false"));
SOLRAPIClient client = getCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL);
if (client == null)
@@ -171,7 +172,7 @@ public class SOLRAPIClientFactory
maxHostConnections = Integer.parseInt(props.getProperty("alfresco.maxHostConnections", "40"));
socketTimeout = Integer.parseInt(props.getProperty("alfresco.socketTimeout", "60000"));
client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO);
client = new SOLRAPIClient(getRepoClient(keyResourceLoader), dictionaryService, namespaceDAO, compression);
setCachedClient(alfrescoHost, alfrescoPort, alfrescoPortSSL, client);
}