Merge branch 'feature/SEARCH_1687_ContentStore_Removal' of git.alfresco.com:search_discovery/insightengine into feature/SEARCH_1687_ThreadScope_1

This commit is contained in:
eliaporciani
2020-02-04 10:11:10 +01:00
29 changed files with 738 additions and 199 deletions

View File

@@ -55,7 +55,7 @@ import org.testng.annotations.BeforeSuite;
public abstract class AbstractE2EFunctionalTest extends AbstractTestNGSpringContextTests public abstract class AbstractE2EFunctionalTest extends AbstractTestNGSpringContextTests
{ {
/** The number of retries that a query will be tried before giving up. */ /** The number of retries that a query will be tried before giving up. */
private static final int SEARCH_MAX_ATTEMPTS = 6; protected static final int SEARCH_MAX_ATTEMPTS = 6;
private static final Logger LOGGER = LogFactory.getLogger(); private static final Logger LOGGER = LogFactory.getLogger();

View File

@@ -61,7 +61,7 @@ public abstract class AbstractCmisE2ETest extends AbstractE2EFunctionalTest
protected boolean waitForIndexing(String query, long expectedCountResults) protected boolean waitForIndexing(String query, long expectedCountResults)
{ {
for (int searchCount = 1; searchCount <= 3; searchCount++) for (int searchCount = 1; searchCount <= SEARCH_MAX_ATTEMPTS; searchCount++)
{ {
try try
@@ -71,7 +71,7 @@ public abstract class AbstractCmisE2ETest extends AbstractE2EFunctionalTest
} }
catch (AssertionError ae) catch (AssertionError ae)
{ {
LOGGER.debug(ae.toString()); LOGGER.info(String.format("WaitForIndexing in Progress: %s", ae.toString()));
} }

View File

@@ -7,6 +7,7 @@ import org.alfresco.utility.data.provider.XMLTestDataProvider;
import org.alfresco.utility.model.FileModel; import org.alfresco.utility.model.FileModel;
import org.alfresco.utility.model.FolderModel; import org.alfresco.utility.model.FolderModel;
import org.alfresco.utility.model.QueryModel; import org.alfresco.utility.model.QueryModel;
import org.testng.Assert;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@@ -135,6 +136,7 @@ public class SolrSearchByAspectTests extends AbstractCmisE2ETest
.replace("NODE_REF[f1]", tasFolder1.getNodeRef()) .replace("NODE_REF[f1]", tasFolder1.getNodeRef())
.replace("NODE_REF[s1]", siteDoclibNodeRef); .replace("NODE_REF[s1]", siteDoclibNodeRef);
cmisApi.authenticateUser(testUser).withQuery(currentQuery).assertResultsCount().equals(query.getResults()); cmisApi.authenticateUser(testUser);
Assert.assertTrue(waitForIndexing(currentQuery, query.getResults()), String.format("Result count not as expected for query: %s", currentQuery));
} }
} }

View File

@@ -7,6 +7,7 @@ import org.alfresco.utility.data.provider.XMLTestDataProvider;
import org.alfresco.utility.model.FileModel; import org.alfresco.utility.model.FileModel;
import org.alfresco.utility.model.FolderModel; import org.alfresco.utility.model.FolderModel;
import org.alfresco.utility.model.QueryModel; import org.alfresco.utility.model.QueryModel;
import org.testng.Assert;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@@ -87,7 +88,7 @@ public class SolrSearchByIdTests extends AbstractCmisE2ETest
@Test(dataProviderClass = XMLTestDataProvider.class, dataProvider = "getQueriesData") @Test(dataProviderClass = XMLTestDataProvider.class, dataProvider = "getQueriesData")
@XMLDataConfig(file = "src/test/resources/testdata/search-by-id.xml") @XMLDataConfig(file = "src/test/resources/testdata/search-by-id.xml")
public void executeSearchByAspect(QueryModel query) throws Exception public void executeSearchById(QueryModel query) throws Exception
{ {
String currentQuery = query.getValue() String currentQuery = query.getValue()
.replace("NODE_REF[siteId]", siteDoclibNodeRef) .replace("NODE_REF[siteId]", siteDoclibNodeRef)
@@ -96,6 +97,7 @@ public class SolrSearchByIdTests extends AbstractCmisE2ETest
.replace("NODE_REF[f1]", tasFolder1.getNodeRef()) .replace("NODE_REF[f1]", tasFolder1.getNodeRef())
.replace("NODE_REF[f1-1]", tasSubFolder1.getNodeRef()); .replace("NODE_REF[f1-1]", tasSubFolder1.getNodeRef());
cmisApi.authenticateUser(testUser).withQuery(currentQuery).assertResultsCount().equals(query.getResults()); cmisApi.authenticateUser(testUser);
Assert.assertTrue(waitForIndexing(currentQuery, query.getResults()), String.format("Result count not as expected for query: %s", currentQuery));
} }
} }

View File

@@ -7,6 +7,7 @@ import org.alfresco.utility.data.provider.XMLTestDataProvider;
import org.alfresco.utility.model.QueryModel; import org.alfresco.utility.model.QueryModel;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterClass; import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@@ -51,6 +52,8 @@ public class SolrSearchByPathTests extends AbstractCmisE2ETest
@XMLDataConfig(file = "src/test/resources/testdata/search-by-path.xml") @XMLDataConfig(file = "src/test/resources/testdata/search-by-path.xml")
public void executeSearchByPathQueries(QueryModel query) public void executeSearchByPathQueries(QueryModel query)
{ {
cmisApi.withQuery(query.getValue()).assertResultsCount().equals(query.getResults()); cmisApi.authenticateUser(testUser);
Assert.assertTrue(waitForIndexing(query.getValue(), query.getResults()), String.format("Result count not as expected for query: %s", query.getValue()));
} }
} }

View File

@@ -10,6 +10,7 @@ import org.alfresco.utility.data.provider.XMLTestDataProvider;
import org.alfresco.utility.model.FileModel; import org.alfresco.utility.model.FileModel;
import org.alfresco.utility.model.FolderModel; import org.alfresco.utility.model.FolderModel;
import org.alfresco.utility.model.QueryModel; import org.alfresco.utility.model.QueryModel;
import org.testng.Assert;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@@ -104,7 +105,8 @@ public class SolrSearchByPropertyTests extends AbstractCmisE2ETest
.addProperty("tas:IntPropertyC", 2223)); .addProperty("tas:IntPropertyC", 2223));
// wait for solr index // wait for solr index
Utility.waitToLoopTime(getSolrWaitTimeInSeconds()); cmisApi.authenticateUser(testUser);
waitForIndexing("SELECT * FROM tas:document where cmis:name = 'testc3.txt'", 1);
} }
@Test @Test

View File

@@ -7,6 +7,7 @@ import org.alfresco.utility.model.FileModel;
import org.alfresco.utility.model.FileType; import org.alfresco.utility.model.FileType;
import org.alfresco.utility.model.FolderModel; import org.alfresco.utility.model.FolderModel;
import org.alfresco.utility.model.QueryModel; import org.alfresco.utility.model.QueryModel;
import org.testng.Assert;
import org.testng.annotations.AfterClass; import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@@ -55,6 +56,7 @@ public class SolrSearchInFolderTests extends AbstractCmisE2ETest
public void executeCMISQuery(QueryModel query) throws Exception public void executeCMISQuery(QueryModel query) throws Exception
{ {
String currentQuery = String.format(query.getValue(), parentFolder.getNodeRef()); String currentQuery = String.format(query.getValue(), parentFolder.getNodeRef());
cmisApi.withQuery(currentQuery).assertResultsCount().equals(query.getResults()); cmisApi.authenticateUser(testUser);
Assert.assertTrue(waitForIndexing(currentQuery, query.getResults()), String.format("Result count not as expected for query: %s", currentQuery));
} }
} }

View File

@@ -7,6 +7,7 @@ import org.alfresco.utility.model.FileModel;
import org.alfresco.utility.model.FileType; import org.alfresco.utility.model.FileType;
import org.alfresco.utility.model.FolderModel; import org.alfresco.utility.model.FolderModel;
import org.alfresco.utility.model.QueryModel; import org.alfresco.utility.model.QueryModel;
import org.testng.Assert;
import org.testng.annotations.AfterClass; import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@@ -59,7 +60,7 @@ public class SolrSearchInTreeTests extends AbstractCmisE2ETest
public void executeCMISQuery(QueryModel query) throws Exception public void executeCMISQuery(QueryModel query) throws Exception
{ {
String currentQuery = String.format(query.getValue(), parentFolder.getNodeRef()); String currentQuery = String.format(query.getValue(), parentFolder.getNodeRef());
cmisApi.withQuery(currentQuery) cmisApi.authenticateUser(testUser);
.assertResultsCount().equals(query.getResults()); Assert.assertTrue(waitForIndexing(currentQuery, query.getResults()), String.format("Result count not as expected for query: %s", currentQuery));
} }
} }

View File

@@ -9,6 +9,7 @@ import org.alfresco.utility.data.provider.XMLTestDataProvider;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterClass; import org.testng.annotations.AfterClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@@ -54,8 +55,8 @@ public class SolrSearchScoreQueryTests extends AbstractCmisE2ETest
this.testData = testData; this.testData = testData;
this.testData.createUsers(dataUser); this.testData.createUsers(dataUser);
this.testData.createSitesStructure(dataSite, dataContent, dataUser); this.testData.createSitesStructure(dataSite, dataContent, dataUser);
cmisApi.authenticateUser(dataUser.getCurrentUser()); testUser = dataUser.getCurrentUser();
cmisApi.authenticateUser(testUser);
} }
/** /**
@@ -71,66 +72,48 @@ public class SolrSearchScoreQueryTests extends AbstractCmisE2ETest
+ "WHERE CONTAINS('Quidditch') " + "WHERE CONTAINS('Quidditch') "
+ "ORDER BY orderCriteria"; + "ORDER BY orderCriteria";
if (waitForIndexing(query, 3)) Assert.assertTrue(waitForIndexing(query, 3), String.format("Result count not as expected for query: %s", query));
{
cmisApi cmisApi.withQuery(query).assertColumnIsOrdered().isOrderedAsc("orderCriteria");
.withQuery(query)
.assertColumnIsOrdered().isOrderedAsc("orderCriteria");
}
else
{
throw new AssertionError("Wait for indexing has failed!");
}
} }
/** /**
* Verify that results are inverse ordered * Verify that results are inverse ordered
* @throws Exception *
*/ * @throws Exception
@Test(dependsOnMethods = "prepareDataForScoreSearch") */
@Test(dependsOnMethods = "prepareDataForScoreSearch")
public void scoreQueryOrderedDesc() throws Exception public void scoreQueryOrderedDesc() throws Exception
{ {
String query = "SELECT cmis:objectId, SCORE() AS orderCriteria " String query = "SELECT cmis:objectId, SCORE() AS orderCriteria "
+ "FROM cmis:document " + "FROM cmis:document "
+ "WHERE CONTAINS('Quidditch') " + "WHERE CONTAINS('Quidditch') "
+ "ORDER BY orderCriteria DESC"; + "ORDER BY orderCriteria DESC";
if (waitForIndexing(query, 3)) Assert.assertTrue(waitForIndexing(query, 3), String.format("Result count not as expected for query: %s", query));
{
cmisApi cmisApi.withQuery(query).assertColumnIsOrdered().isOrderedDesc("orderCriteria");
.withQuery(query).assertColumnIsOrdered().isOrderedDesc("orderCriteria");
}
else
{
throw new AssertionError("Wait for indexing has failed!");
}
} }
/** /**
* Verify that all SCORE results are between 0 and 1 * Verify that all SCORE results are between 0 and 1
* @throws Exception *
*/ * @throws Exception
@Test(groups = { TestGroup.ACS_62n }, dependsOnMethods = "prepareDataForScoreSearch") */
@Test(groups = { TestGroup.ACS_62n }, dependsOnMethods = "prepareDataForScoreSearch")
public void scoreQueryInRange() throws Exception public void scoreQueryInRange() throws Exception
{ {
String query = "SELECT cmis:objectId, SCORE() " String query = "SELECT cmis:objectId, SCORE() "
+ "FROM cmis:document " + "FROM cmis:document "
+ "WHERE CONTAINS('Quidditch')"; + "WHERE CONTAINS('Quidditch')";
if (waitForIndexing(query, 3)) Assert.assertTrue(waitForIndexing(query, 3), String.format("Result count not as expected for query: %s", query));
{
cmisApi cmisApi.withQuery(query).assertColumnValuesRange().isReturningValuesInRange("SEARCH_SCORE", BigDecimal.ZERO, BigDecimal.ONE);
.withQuery(query)
.assertColumnValuesRange().isReturningValuesInRange("SEARCH_SCORE", BigDecimal.ZERO, BigDecimal.ONE);
}
else
{
throw new AssertionError("Wait for indexing has failed!");
}
} }
@@ -143,44 +126,32 @@ public class SolrSearchScoreQueryTests extends AbstractCmisE2ETest
{ {
String query = "SELECT cmis:objectId, SCORE() AS orderCriteria " String query = "SELECT cmis:objectId, SCORE() AS orderCriteria "
+ "FROM cmis:document " + "FROM cmis:document "
+ "WHERE CONTAINS('Quidditch')"; + "WHERE CONTAINS('Quidditch')";
if (waitForIndexing(query, 3)) Assert.assertTrue(waitForIndexing(query, 3), String.format("Result count not as expected for query: %s", query));
{
cmisApi cmisApi.withQuery(query).assertColumnValuesRange().isReturningValuesInRange("orderCriteria", BigDecimal.ZERO, BigDecimal.ONE);
.withQuery(query)
.assertColumnValuesRange().isReturningValuesInRange("orderCriteria", BigDecimal.ZERO, BigDecimal.ONE);
}
else
{
throw new AssertionError("Wait for indexing has failed!");
}
} }
/** /**
* Verify that SCORE is valid name for an alias * Verify that SCORE is valid name for an alias
* Currently only supported with double quotes * Currently only supported with double quotes
* @throws Exception *
*/ * @throws Exception
@Test(dependsOnMethods = "prepareDataForScoreSearch") */
@Test(dependsOnMethods = "prepareDataForScoreSearch")
public void scoreQueryScoreAsAlias() throws Exception public void scoreQueryScoreAsAlias() throws Exception
{ {
String query = "SELECT cmis:objectId, SCORE() AS \"score\" " String query = "SELECT cmis:objectId, SCORE() AS \"score\" "
+ "FROM cmis:document " + "FROM cmis:document "
+ "WHERE CONTAINS('Quidditch')"; + "WHERE CONTAINS('Quidditch')";
if (waitForIndexing(query, 3)) Assert.assertTrue(waitForIndexing(query, 3), String.format("Result count not as expected for query: %s", query));
{
cmisApi cmisApi.withQuery(query).assertResultsCount().equals(3);
.withQuery(query).assertResultsCount().equals(3);
}
else
{
throw new AssertionError("Wait for indexing has failed!");
}
} }

View File

@@ -101,6 +101,7 @@ public class ShardInfoTest extends AbstractE2EFunctionalTest
RestShardInfoModel model = shardInfoModel.getModel(); RestShardInfoModel model = shardInfoModel.getModel();
assertEquals(model.getTemplate(), "rerank"); assertEquals(model.getTemplate(), "rerank");
assertEquals(model.getShardMethod(), "DB_ID"); assertEquals(model.getShardMethod(), "DB_ID");
assertEquals(model.getMode(), "MIXED");
assertTrue(model.getHasContent()); assertTrue(model.getHasContent());
assertTrue(stores.contains(model.getStores())); assertTrue(stores.contains(model.getStores()));

View File

@@ -31,7 +31,7 @@ import static org.testng.Assert.assertTrue;
* @author Alessandro Benedetti * @author Alessandro Benedetti
* @author Meenal Bhave * @author Meenal Bhave
*/ */
public class CascadingTrackerIntegrationTest extends AbstractE2EFunctionalTest public class CascadingIntegrationTest extends AbstractE2EFunctionalTest
{ {
@Autowired @Autowired
protected DataContent dataContent; protected DataContent dataContent;

View File

@@ -0,0 +1,394 @@
## SolrContentStore Removal
### Status
![Completeness Badge](https://img.shields.io/badge/Document_Level-Completed-green.svg?style=flat-square)
### Context
SearchServices is a set of Alfresco specific customisations built on top of Apache Solr, a highly reliable, scalable and
fault tolerant search platform.
Apache Solr provides efficient search services on a given set of data composed by atomic units of work called "documents".
Data managed in Alfresco that needs to be "searchable" must be sent to Solr for "indexing". In the indexing phase Solr
stores the incoming data and organises it in an immutable data structure called "Inverted Index" plus some additional data
structures needed for other complementary services offered by the platform (e.g. highlighting, storage, more like this).
The following picture illustrates the content of the Solr data directory
![Apache Solr Data Directory](solr.data.dir.png)
SearchServices adds a complementary and auxiliary data organisation structure, based on filesystem, called **Solr Content Store**.
The following picture illustrates the content and the structure of the Solr Content Store.
![SolrContentStore](solrcontentstore.png)
### What is the SolrContentStore?
The _SolrContentStore_ is a logical extension of the Apache Solr Index used by SearchServices for maintaining a verbatim copy of
each incoming data. It is a local folder organised by tenant which contains all input documents indexed in Solr.
Within that folder, each file
- is organised hierarchically, under a root folder called "contentstore", and per tenant
- represents a single document sent to Solr for indexing
- is compressed (.gz) and serialised. Specifically, it consists of the serialised form of a _SolrInputDocument_ instance,
the Java class used by Solr for representing an incoming document that is going to be indexed.
Data that needs to be indexed is retrieved from Alfresco (_Node_ is the composite class representing the main Alfresco
Domain Object) and then
- each _Node_ instance is converted to a _SolrInputDocument_ instance
- each _SolrInputDocument_ instance is compressed, serialised and then stored in the content store
- each _SolrInputDocument_ instance is sent to Solr
![SolrContentStore](stored_content_in_searchservices.png)
With that flow in mind, at a given time T, the main difference between a document D in the content store and in the Solr
index is that:
- the content store file represents a verbatim copy of the _SolrInputDocument_ created starting from the corresponding _Node_
- it can be easily individuated because it corresponds to a single file in the content store; the Solr document definition
instead doesn't have a "single" representation in the filesystem because it has been passed through the text analysis process.
### Apache Solr Domain Model
In order to understand the reason why the content store approach has been adopted until SearchServices 1.4.x, we need to
briefly describes how Solr manages the fields of the managed documents.
In Solr, the configuration file where fields are declared and configured is called "schema.xml". Each field can have
different attributes that define
- how it is internally organised
- what search features are enabled (for that specific field)
In this context we are interested in two specific attributes: "indexed" and "stored". A field in Solr schema can be declared
as "stored" and/or "indexed":
- if the field is indexed (i.e. indexed="true") that means search features are enabled for that field (i.e. search, faceting, sorting)
- if the field is stored (i.e. stored="true") the verbatim copy of the incoming field value is stored and can be returned as part of search results.
In the SearchServices 1.4.x schema:
- all fields are marked as indexed: this is quite obvious because we want to enable search features on them
- 99% of fields are marked as **non stored**: this because SearchServices **retrieves the stored content from the Solr Content Store**
There are actually only three fields marked as stored: id, DBID and _version_. The last one is a Solr internal field used for some
features like atomic updates and optimistic locking (both of them are not used in SearchServices 1.4.x).
### When the Solr Content Store is used
As described above, SearchServices doesn't make use of Solr storage capabilities, so the rule is: the Solr
Content Store is involved on each interaction which requires the stored content. That includes:
- **Fields retrieval**: Solr stored only DBID, id and version fields; in search results we want to be able to retrieve
also other fields (e.g. name, title, LID, PATH)
- **Highlighting**: highlighted snippets are built using the fields stored value(s)
- **Clustering**: runtime clusters generation use the fields stored value(s), as well
- **Fingerprint**: the Fingerprint (custom) SearchComponent returns the (stored value of the) MINHASH field computed from the text content associated
with a given document
- **Text Content Management**: this is strictly connected with how the _ContentTracker_ works. See this [ADR](../trackers/00001-content-tracker.md) for a detailed exaplanation about the text content lifecycle in SearchServices.
### Read/Write Path on Solr Content Store
Every time a search request involves one of the points listed in the previous section we need to interact
- with the Solr index
- with the Solr Content Store
The Solr Content Store interaction can have two purposes:
- **Read only**: we need to read the stored fields associated to one or more documents
- **Read/Write**: we need to read and update the document definition (i.e. some field has been updated)
The two paths execution requires additional I/O and CPU work on top of what Solr already normally does; Specifically:
The **Read Path** consists of the following steps (remember, this needs to be done for each match produced by a query):
- Locate the .gz file corresponding to a given DBID
- Uncompress the .gz file
- Deserialise the file in a _SolrInputDocument_ instance
- Use the fields values in the instance in order to perform the required task (e.g. fields retrieval, highlighting)
A first important thing about the flow above: it's not possible to load in memory only the fields we need.
Every time the document D is needed (even if our interaction requires just one field) the whole document definition is
- located (file seek)
- uncompressed
- deserialised
- read
Such capability is instead possible using Lucene: the IndexSearcher class can load a partial document definition which
contains only fields actually needed. For example, if we want to highlight search terms in two fields, let's say
"name" and "title"
- the _AlfrescoHighlighter_ loads the whole document in memory
- the _SolrHighlighter_ loads only those two fields
This can make a relevant difference if we are in a context where the fields cardinality for each document is high, or if
we have one or more big fields (not needed) with a lot of text content.
The **Write Path** is even worst because it adds to the list above the following steps:
- Update the _SolrInputDocument_ instance with updated values
- Delete the old compressed file in the filesystem
- Serialise the updated _SolrInputDocument_ instance
- Compress the serialised image
- Write a new .gz file
### Solr Content Store Removal Benefits
#### Use as much as possible Solr built-in capabilities
The main reason why an open source platform is chosen as underlying framework is its popularity. That means a lot of
advantages in terms of
- community and non-commercial support
- product improvements with short iterations (e.g. enhancements, bug fixing)
Although the underlying reasons for introducing a customisation could be perfectly acceptable, it's important to keep in
mind that increasing such customisation level necessarily creates a gap, a distance with the open source product.
From one side, the customisation allows to implement some functional requirement not covered by the open source version,
on the other side the same customisation won't have the required support from the community.
The initial approach to this task consisted of a verification [Spike](https://issues.alfresco.com/jira/browse/SEARCH-1669) where
we investigated pro and cons about having/removing the _SolrContentStore_.
Summarised, the output has been in favour of the removal, because the Solr storage capabilities are definitely more efficient
than the approach adopted in the _SolrContentStore_.
#### Less Solr customisations
This is a direct consequence of the preceding point. As you can read below, when we describe the major components affected
by the removal task, some customised component (e.g. Clustering) has been removed at all while some other else (e.g. Highlighter)
has been simplified a lot, leveraging the Solr built-in capabilities as much as possible.
### Only Solr data files
SearchServices no longer has to manage external files or folders. In SearchServices 1.4.x the content store required a
relevant effort for [customising]((https://issues.alfresco.com/jira/browse/SEARCH-1669)) the built-in Solr Replication
mechanism that doesn't take in account the Alfresco SolrContentStore.
![Solr Replication](replication.png)
Note that such customisation has been removed in this branch and it has been replaced by the built-in Solr Replication Handler:
the whole stored content management has been centralised in Solr; as consequence of that, the Read/Write paths described above
are no longer valid.
### Better compression
Compressing at single document level is not very efficient because the small amount of data available. Moving such task
at Solr level can deliver very good results for two main reason:
- data cardinality is higher, so that means the compression algorithm can work with more representative and efficient stats
- data compression and index organisation is one area where the Solr community dedicated and dedicates a considerable amount of effort
### Less, more efficient I/O and CPU(compress/decompress) resources usage
This is again related with the Read/Write paths we described above: once the _SolrContentStore_ has been removed, we do not have to
deal with external files and folders and the read, write, compress, uncompress, serialise, deserialise tasks will be no longer needed.
### Better OS Page Cache usage
The OS Page Cache is used for storing and caching files required by the application processes running on a given machine.
In an ideal context the OS would put the entire Solr index in the page cache so every further read operation won't require any disk seek.
Unfortunately, the cache size is usually smaller than that, so a certain amount of time is spent by the OS in order to load/unload the
requested files.
In a context like that, the less number of files we have to manage, the better: having a component like the content store
which requires a relevant amount of I/O operations, it means a significant impact on the hardware resources (e.g. disk, cpu)
and a less efficient usage of the OS Page cache (e.g. the OS could unload the Solr datafiles for working with Solr content store files).
## Major Changes
This section provides a high-level description of the components / area that have been affected by the SolrContentStore removal.
### Solr Schema
Jira Ticket: [SEARCH-1707](https://issues.alfresco.com/jira/browse/SEARCH-1707)
The Solr schema (schema.xml) includes the following changes:
- **stored fields**: every field is marked as stored. Since this is something we'd want to apply to all fields, the stored
attribute has been defined at field type level.
- **cleanup and new field types**: there are several new field types that declare the default values applied to a field.
The naming is quite intuitive (e.g. "long" is a single value numeric field, "longs" is for multiValued numeric fields).
That change allowed a more clear fields definitions (i.e. fields definitions that don't override default values are very short and concise)
![Field Types](schema_field_types.png)
- **comments and examples**: sometimes it is very hard to understand the purpose of a field and what is its runtime content.
For each field the current schema provides a description about its intent and one or more examples.
![Comments in schema](field_content_example.png)
### Highlighting
Jira Ticket: [SEARCH-1693](https://issues.alfresco.com/jira/browse/SEARCH-1693)
Before the content store removal, the _AlfrescoSolrHighlighter_ class was a custom "copy" of the _DefaultSolrHighlighter_.
Instead of extending the Solr component, at time of writing that class had been
- copied
- renamed in _AlfrescoSolrHighlighter_
- customised
As consequence of that, the class was a mix of Alfresco and Solr code. Specifically, the custom code (and this is valid for all the customised
components mentioned in this document) was there mainly for two reasons:
- SolrContentStore interaction: every time the component needed to access to the stored content of a document
- Field renaming/mapping between Alfresco and Solr: for example a "cm_name" or "name" Alfresco field in the highlighting
request needs to be translated in the corresponding Solr field (e.g. text@s___t@....@name)
The new _AlfrescoSolrHighlighter_
- removes any interactions with the content store
- extends the _DefaultSolrHighlighter_
- contains at 95% the Alfresco specific logic (mainly related with the field mapping/renaming). Each time it needs to execute
the highlighting logic, it delegates the Solr superclass.
- it still has a 5% of code copied from the superclass. That because sometime it has't been possible to decorate
Solr methods from the superclass (see _getSpanQueryScorer_ or _getHighlighter_ methods)
The field mapping/renaming didn't allow to remove completely the custom component. However, the refactoring described above could be
a first step for externalising (in an intermediate REST layer) that logic. Once did that, the custom highlighter could be removed and replaced with
the plain Solr built-in component.
### Clustering
Jira Ticket: [SEARCH-1688](https://issues.alfresco.com/jira/browse/SEARCH-1688)
The _AlfrescoClusteringComponent_ has been removed because it was a raw copy of the corresponding Solr component
and the only customisation was related with the content store interaction.
### Fingerprint
Jira Ticket: [SEARCH-1694](https://issues.alfresco.com/jira/browse/SEARCH-1694)
Two components have been affected by the content store removal:
- the [_Solr4QueryParser_](https://issues.alfresco.com/jira/browse/SEARCH-1694?focusedCommentId=622599&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-622599): the component which is in charge to parse incoming queries (FINGERPRINT queries in this case)
- _FingerprintComponent_: this is a custom _SearchComponent_ which accepts in input a node identifier and returns a response consisting of the corresponding fingerprint (i.e. the MINHASH multivalued field). Note that the MINHASH value(s) is not computed on the fly. Instead it is computed at index time when the text context is indexed.
### CachedDocTransformer
Jira Ticket: [SEARCH-1689](https://issues.alfresco.com/jira/browse/SEARCH-1689)
We said above that before the content store removal we had only three stored fields: DBID, id and version.
If that could be perfectly reasonable from a "search" execution perspective because we didn't need stored fields at all in the
matching and scoring phases, that becomes a problem when we have to return the search results to the caller:
- a search client would probably need some other field like a title, a name.
- we couldn't use Solr for retrieving those fields/values because we didn't store them
The only place where we had the stored content was the _SolrContentStore_, but Solr didn't know how to interact with it.
For this reason Alfresco introduced a custom _DocTransformer_. A _DocTransformer_ is an extension point provided by Solr for
introducing a custom transformation logic at document level. Once the search has been executed, for each matching document
the transformer is invoked and it can manipulate it.
This was one of the customisations strictly tied with the content store. Even after the content store removal, the doc
transformer is still there because the field mapping/renaming executed at document/field level is crucial for decoupling
the Alfresco Data Model from the Solr schema.
The _DocTransformer_ could be referred using the "[cached]" mnemonic code which no longer communicate the new purpose.
For that reason a new alias "[fmap]" has been introduced. The old "[cached]" code is still working but will be deprecated.
The same consideration we did for the highlighter is valid for this component as well: if the field mapping/renaming is
moved outside Solr, this component could be easily removed.
### DateFormatDocTransformer
Jira Ticket: [SEARCH-2044](https://issues.alfresco.com/jira/browse/SEARCH-2044)
This is a new _DocTransfomer_ introduced for maintaining the retro-compatibility with date/datetime fields management
in InsightEngine.
The InsightEngine SQL interface uses a hybrid language for expressing queries. Specifically, while the most part of the
query language is a plain standard SQL, everything related with date/datetime fields or expressions follows the Solr semantic.
For example, the Solr DateMath expressions can be used in SQL queries:
- select cm_created_month, count(*) as ct from alfresco where cm_owner = 'jimmy' and cm_created >= **'NOW/MONTH-6MONTHS'** group by cm_created_month
- select cm_created_year, count(*) as ct from alfresco where cm_owner = 'morton' and cm_created >= **'NOW/YEAR-4YEARS'** group by cm_created_year
Those expressions are not valid in SQL, so we must force the Calcite parser behaviour in order to consider them as "opaque" values.
In other words, everything related with date/datetime fields/expressions are considered (opaque) strings and aren't parsed by the
Calcite SQLParser: they are directly forwarded to Solr.
A _SolrInputDocument_ instance in the content store was composed by a set of fields whose values were exclusively a string
or list of strings. After the content store removal SearchServices retrieves the stored content from Solr, and if a field
is declared as having a Date or DateTime field type, Solr will return its value as a _java.util.Date_.
The _DateFormatDocTransformer_ is a simple transformer which replaces the Date value of such fields with the corresponding UTC
string representation.
### SolrInformationServer
Jira Ticket: [SEARCH-1702](https://issues.alfresco.com/jira/browse/SEARCH-1702)
The _SolrInformationServer_ is a kind of Mediator/Facade between SearchServices and the underlying search platform. This is
huge class which contains all methods for manipulating the index. Those methods are mainly called by the trackers
subsystem.
It had a strong connection/interaction with the content store because it represents the central point where the three
different representations of the same data
- the incoming Node representing new or updated data which will create the "updated" version of document D
- the document D in the content store
- the document D in the Solr index
are managed, manipulated, updated or deleted, and finally indexed.
A first big change which affected the _SolrInformationServer_ has been the removal of all interactions with the content store.
#### Atomic Updates
An important change has been the introduction of partial/atomic updates.
Imagine an update path:
- an incoming Node arrives. It contains data that needs to be updated
- the _Node_ is not the exact copy of the Solr document. For example there are some fields that have been computed at indexing time (e.g MINHASH)
- the _SolrContentStore_ contains the exact copy of the last version of that Solr document which has been previously sent to index
- that document is loaded from the content store
- it merged/updated with the data in the incoming _Node_
- the entry on the content store is overwritten
- the updated document is then sent to Solr
Without the _SolrContentStore_ that path is no longer possible and it will be simplified a lot with the introduction of Atomic Updates.
Atomic Updates are a way to execute indexing commands on the client side using an “update” semantic, by applying/indexing
a document which represents a partial state of a domain object (the incoming _Node_, in our case).
One of the main reason why the _SolrInformationServer_ code has been widely changed is related with the Atomic Updates
introduction. More information about this change can be found [here](https://sease.io/2020/01/apache-solr-atomic-updates-polymorphic-approach.html)
Note that enabling the atomic updates requires also a major change in the configuration: the **UpdateLog**
must be enabled in order to make sure the updates are always applied to the latest version of the indexed document.
##### Dirty Text Content Detection
Another change introduced in _SolrInformationServer_ class is related with how SearchServices (specifically
the _ContentTracker_) detects the documents whose text content needs to be updated.
Previously, we had a field in the Solr schema called **FTSSTATUS** that could have the following domain:
- **Clean**: the text content of the document is in sync, no update is needed
- **New**: the document has been just created, it has to be updated with the corresponding text content
- **Dirty**: the text content of the document changed, the new content needs to be retrieved and the document updated
After the content store removal, the FTSSTATUS field has been removed. This because the field value was set depending on
the document state in the content store:
- if the incoming node didn't have a corresponding entry in the content store, then it was set to **New**
- if the incoming node had a corresponding entry in the content store a DOCID field value was compared between the node and the stored document. In case
the two values were different then the FTSSTATUS was set to **Dirty**
- Once the _ContentTracker_ updated the document with the new text content, the FTSSTATUS was set to **Clean**
We no longer have the content store, so the comparison above cannot be done. For example, when a _Node_ arrives we
cannot know if that corresponds to an existing document or if it is the first time we see it.
We could request that information to Solr but that would mean one query for each incoming _Node_, and that wouldn't be efficient.
The new approach uses two fields:
- **LATEST_APPLIED_CONTENT_VERSION_ID**: it corresponds to the identifier of the latest applied content property id
(content@s__docid@* or content@m__docid@*). It can be null (i.e. the incoming node doesn't have a value for that property,
even if it requires content indexing)
- **LAST_INCOMING_CONTENT_VERSION_ID**: If the field has the same value of the previous one (or it is equal to _SolrInformationServer.CONTENT_UPDATED_MARKER_),
then the content is supposed to be in synch. Otherwise, if the value is different, it is not _SolrInformationServer.CONTENT_UPDATED_MARKER_
or it is _SolrInformationServer.CONTENT_OUTDATED_MARKER_ the content is intended as outdated and therefore it will
be selected (later) by the _ContentTracker_.
### AlfrescoReplicationHandler
This set of components, [introduced in SearchServices 1.4.x](https://issues.alfresco.com/jira/browse/SEARCH-1850) for including the content store in the Solr replication mechanism, has been removed
because we no longer have any external folder/file to be synched between master and slave(s). As consequence of that
the built-in Solr ReplicationHandler is used.
### Content Store Package and Tests
Jira Tickets: [SEARCH-1692](https://issues.alfresco.com/jira/browse/SEARCH-1692),[SEARCH-2025](https://issues.alfresco.com/jira/browse/SEARCH-2025)
Once the content store references have been removed from the components listed in the sections above, the _org.alfresco.solr.content_
package has been completely removed.

Binary file not shown.

After

Width:  |  Height:  |  Size: 138 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 797 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 298 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 383 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 162 KiB

View File

@@ -532,7 +532,7 @@ public class AlfrescoSolrDataModel implements QueryConstants
} }
catch (IOException e) catch (IOException e)
{ {
log.info("Failed to read shared properties fat " + propertiesFile.getAbsolutePath()); log.info("Failed to read shared properties at " + propertiesFile.getAbsolutePath());
} }
return props; return props;

View File

@@ -88,7 +88,7 @@ public interface InformationServer extends InformationServerCollectionProvider
void indexNode(Node node, boolean overwrite) throws IOException, AuthenticationException, JSONException; void indexNode(Node node, boolean overwrite) throws IOException, AuthenticationException, JSONException;
void indexNodes(List<Node> nodes, boolean overwrite, boolean cascade) throws IOException, AuthenticationException, JSONException; void indexNodes(List<Node> nodes, boolean overwrite) throws IOException, AuthenticationException, JSONException;
void cascadeNodes(List<NodeMetaData> nodes, boolean overwrite) throws IOException, AuthenticationException, JSONException; void cascadeNodes(List<NodeMetaData> nodes, boolean overwrite) throws IOException, AuthenticationException, JSONException;
@@ -183,4 +183,11 @@ public interface InformationServer extends InformationServerCollectionProvider
String getHostName(); String getHostName();
String getBaseUrl(); String getBaseUrl();
/**
* Check if cascade tracking is enabled.
*
* @return true if cascade tracking is enabled (note that this is the default behaviour if not specified in the properties file).
*/
boolean cascadeTrackingEnabled();
} }

View File

@@ -80,8 +80,23 @@ import java.io.PrintWriter;
import java.io.StringWriter; import java.io.StringWriter;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.text.DecimalFormat; import java.text.DecimalFormat;
import java.util.*; import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.function.BiConsumer; import java.util.function.BiConsumer;
import java.util.function.Function; import java.util.function.Function;
@@ -92,9 +107,9 @@ import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import com.carrotsearch.hppc.IntArrayList; import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.LongHashSet; import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.cursors.LongCursor; import com.carrotsearch.hppc.cursors.LongCursor;
import org.alfresco.httpclient.AuthenticationException; import org.alfresco.httpclient.AuthenticationException;
import org.alfresco.model.ContentModel; import org.alfresco.model.ContentModel;
import org.alfresco.opencmis.dictionary.CMISStrictDictionaryService; import org.alfresco.opencmis.dictionary.CMISStrictDictionaryService;
@@ -104,6 +119,7 @@ import org.alfresco.repo.dictionary.NamespaceDAO;
import org.alfresco.repo.search.adaptor.lucene.QueryConstants; import org.alfresco.repo.search.adaptor.lucene.QueryConstants;
import org.alfresco.service.cmr.dictionary.AspectDefinition; import org.alfresco.service.cmr.dictionary.AspectDefinition;
import org.alfresco.service.cmr.dictionary.TypeDefinition; import org.alfresco.service.cmr.dictionary.TypeDefinition;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter; import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.cmr.security.AuthorityType; import org.alfresco.service.cmr.security.AuthorityType;
import org.alfresco.service.namespace.QName; import org.alfresco.service.namespace.QName;
@@ -148,7 +164,19 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.*; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.LegacyNumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
@@ -333,6 +361,8 @@ public class SolrInformationServer implements InformationServer
*/ */
private static final int BATCH_FACET_TXS = 4096; private static final int BATCH_FACET_TXS = 4096;
private static final String FINGERPRINT_FIELD = "MINHASH"; private static final String FINGERPRINT_FIELD = "MINHASH";
/** Shared property to determine if the cascade tracking is enabled. */
public static final String CASCADE_TRACKER_ENABLED = "alfresco.cascade.tracker.enabled";
private final static Function<String, List<Object>> LAZY_EMPTY_MUTABLE_LIST = key -> new ArrayList<>(); private final static Function<String, List<Object>> LAZY_EMPTY_MUTABLE_LIST = key -> new ArrayList<>();
@@ -559,6 +589,14 @@ public class SolrInformationServer implements InformationServer
return this.adminHandler; return this.adminHandler;
} }
@Override
public boolean cascadeTrackingEnabled()
{
return ofNullable((String) props.get(CASCADE_TRACKER_ENABLED))
.map(Boolean::parseBoolean)
.orElse(true);
}
@Override @Override
public synchronized void initSkippingDescendantDocs() public synchronized void initSkippingDescendantDocs()
{ {
@@ -661,6 +699,7 @@ public class SolrInformationServer implements InformationServer
String query = FIELD_ACLID + ":" + aclid + AND + FIELD_DOC_TYPE + ":" + DOC_TYPE_ACL; String query = FIELD_ACLID + ":" + aclid + AND + FIELD_DOC_TYPE + ":" + DOC_TYPE_ACL;
long count = this.getDocListSize(query); long count = this.getDocListSize(query);
aclReport.setIndexedAclDocCount(count); aclReport.setIndexedAclDocCount(count);
return aclReport; return aclReport;
} }
@@ -875,8 +914,6 @@ public class SolrInformationServer implements InformationServer
IntArrayList docList = docListCollector.getDocs(); IntArrayList docList = docListCollector.getDocs();
int size = docList.size(); int size = docList.size();
List<Long> processedTxns = new ArrayList<>(); List<Long> processedTxns = new ArrayList<>();
for (int i = 0; i < size; ++i) for (int i = 0; i < size; ++i)
{ {
@@ -1507,7 +1544,10 @@ public class SolrInformationServer implements InformationServer
public void dirtyTransaction(long txnId) public void dirtyTransaction(long txnId)
{ {
this.cleanContentCache.remove(txnId); this.cleanContentCache.remove(txnId);
this.cleanCascadeCache.remove(txnId); if (cascadeTrackingEnabled())
{
this.cleanCascadeCache.remove(txnId);
}
} }
@Override @Override
@@ -1594,16 +1634,16 @@ public class SolrInformationServer implements InformationServer
LOGGER.debug("Incoming Node {} with Status {}", node.getId(), node.getStatus()); LOGGER.debug("Incoming Node {} with Status {}", node.getId(), node.getStatus());
if ((node.getStatus() == SolrApiNodeStatus.DELETED) if ((node.getStatus() == SolrApiNodeStatus.DELETED)
|| (node.getStatus() == SolrApiNodeStatus.NON_SHARD_DELETED) || (node.getStatus() == SolrApiNodeStatus.UNKNOWN)
|| (node.getStatus() == SolrApiNodeStatus.NON_SHARD_UPDATED) || cascadeTrackingEnabled() && ((node.getStatus() == SolrApiNodeStatus.NON_SHARD_DELETED)
|| (node.getStatus() == SolrApiNodeStatus.UNKNOWN)) || (node.getStatus() == SolrApiNodeStatus.NON_SHARD_UPDATED)))
{ {
deleteNode(processor, request, node); deleteNode(processor, request, node);
} }
if ((node.getStatus() == SolrApiNodeStatus.UPDATED) if (node.getStatus() == SolrApiNodeStatus.UPDATED
|| (node.getStatus() == SolrApiNodeStatus.UNKNOWN) || node.getStatus() == SolrApiNodeStatus.UNKNOWN
|| (node.getStatus() == SolrApiNodeStatus.NON_SHARD_UPDATED)) || (cascadeTrackingEnabled() && node.getStatus() == SolrApiNodeStatus.NON_SHARD_UPDATED))
{ {
LOGGER.debug("Node {} is being updated", node.getId()); LOGGER.debug("Node {} is being updated", node.getId());
@@ -1844,7 +1884,7 @@ public class SolrInformationServer implements InformationServer
@Override @Override
public void indexNodes(List<Node> nodes, boolean overwrite, boolean cascade) throws IOException, JSONException public void indexNodes(List<Node> nodes, boolean overwrite) throws IOException, JSONException
{ {
UpdateRequestProcessor processor = null; UpdateRequestProcessor processor = null;
try (SolrQueryRequest request = newSolrQueryRequest()) try (SolrQueryRequest request = newSolrQueryRequest())
@@ -1857,13 +1897,54 @@ public class SolrInformationServer implements InformationServer
categorizeNodes(nodes, nodeIdsToNodes, nodeStatusToNodeIds); categorizeNodes(nodes, nodeIdsToNodes, nodeStatusToNodeIds);
List<Long> deletedNodeIds = notNullOrEmpty(nodeStatusToNodeIds.get(SolrApiNodeStatus.DELETED)); List<Long> deletedNodeIds = notNullOrEmpty(nodeStatusToNodeIds.get(SolrApiNodeStatus.DELETED));
List<Long> shardDeletedNodeIds = notNullOrEmpty(nodeStatusToNodeIds.get(SolrApiNodeStatus.NON_SHARD_DELETED)); List<Long> shardDeletedNodeIds = Collections.emptyList();
List<Long> shardUpdatedNodeIds = notNullOrEmpty(nodeStatusToNodeIds.get(SolrApiNodeStatus.NON_SHARD_UPDATED)); List<Long> shardUpdatedNodeIds = Collections.emptyList();
if (cascadeTrackingEnabled())
{
shardDeletedNodeIds = notNullOrEmpty(nodeStatusToNodeIds.get(SolrApiNodeStatus.NON_SHARD_DELETED));
shardUpdatedNodeIds = notNullOrEmpty(nodeStatusToNodeIds.get(SolrApiNodeStatus.NON_SHARD_UPDATED));
}
List<Long> unknownNodeIds = notNullOrEmpty(nodeStatusToNodeIds.get(SolrApiNodeStatus.UNKNOWN)); List<Long> unknownNodeIds = notNullOrEmpty(nodeStatusToNodeIds.get(SolrApiNodeStatus.UNKNOWN));
List<Long> updatedNodeIds = notNullOrEmpty(nodeStatusToNodeIds.get(SolrApiNodeStatus.UPDATED)); List<Long> updatedNodeIds = notNullOrEmpty(nodeStatusToNodeIds.get(SolrApiNodeStatus.UPDATED));
if (!deletedNodeIds.isEmpty() || !shardDeletedNodeIds.isEmpty() || !shardUpdatedNodeIds.isEmpty() || !unknownNodeIds.isEmpty()) if (!deletedNodeIds.isEmpty() || !shardDeletedNodeIds.isEmpty() || !shardUpdatedNodeIds.isEmpty() || !unknownNodeIds.isEmpty())
{ {
// fix up any secondary paths
List<NodeMetaData> nodeMetaDatas = new ArrayList<>();
// For all deleted nodes, fake the node metadata
for (Long deletedNodeId : deletedNodeIds)
{
Node node = nodeIdsToNodes.get(deletedNodeId);
NodeMetaData nodeMetaData = createDeletedNodeMetaData(node);
nodeMetaDatas.add(nodeMetaData);
}
if (!unknownNodeIds.isEmpty())
{
NodeMetaDataParameters nmdp = new NodeMetaDataParameters();
nmdp.setNodeIds(unknownNodeIds);
// When deleting nodes, no additional information is required
nmdp.setIncludeChildIds(false);
nmdp.setIncludeChildAssociations(false);
nmdp.setIncludeAspects(false);
nmdp.setIncludePaths(false);
nmdp.setIncludeParentAssociations(false);
nodeMetaDatas.addAll(repositoryClient.getNodesMetaData(nmdp, Integer.MAX_VALUE));
}
for (NodeMetaData nodeMetaData : nodeMetaDatas)
{
Node node = nodeIdsToNodes.get(nodeMetaData.getId());
if (nodeMetaData.getTxnId() > node.getTxnId())
{
// the node has moved on to a later transaction
// it will be indexed later
continue;
}
}
LOGGER.debug("Deleting");
DeleteUpdateCommand delDocCmd = new DeleteUpdateCommand(request); DeleteUpdateCommand delDocCmd = new DeleteUpdateCommand(request);
String query = this.cloud.getQuery(FIELD_DBID, OR, deletedNodeIds, shardDeletedNodeIds, shardUpdatedNodeIds, unknownNodeIds); String query = this.cloud.getQuery(FIELD_DBID, OR, deletedNodeIds, shardDeletedNodeIds, shardUpdatedNodeIds, unknownNodeIds);
delDocCmd.setQuery(query); delDocCmd.setQuery(query);
@@ -1878,6 +1959,8 @@ public class SolrInformationServer implements InformationServer
nodeIds.addAll(unknownNodeIds); nodeIds.addAll(unknownNodeIds);
nodeIds.addAll(shardUpdatedNodeIds); nodeIds.addAll(shardUpdatedNodeIds);
nmdp.setNodeIds(nodeIds); nmdp.setNodeIds(nodeIds);
nmdp.setIncludeChildIds(false);
nmdp.setIncludeChildAssociations(false);
// Fetches bulk metadata // Fetches bulk metadata
List<NodeMetaData> nodeMetaDatas = repositoryClient.getNodesMetaData(nmdp, Integer.MAX_VALUE); List<NodeMetaData> nodeMetaDatas = repositoryClient.getNodesMetaData(nmdp, Integer.MAX_VALUE);
@@ -1895,12 +1978,12 @@ public class SolrInformationServer implements InformationServer
continue; continue;
} }
if (nodeIdsToNodes.get(nodeMetaData.getId()).getStatus() == SolrApiNodeStatus.NON_SHARD_UPDATED) if (cascadeTrackingEnabled() && nodeIdsToNodes.get(nodeMetaData.getId()).getStatus() == SolrApiNodeStatus.NON_SHARD_UPDATED)
{
if (nodeMetaData.getProperties().get(ContentModel.PROP_CASCADE_TX) != null)
{ {
indexNonShardCascade(nodeMetaData); if (nodeMetaData.getProperties().get(ContentModel.PROP_CASCADE_TX) != null)
} {
indexNonShardCascade(nodeMetaData);
}
continue; continue;
} }
@@ -2182,7 +2265,6 @@ public class SolrInformationServer implements InformationServer
private void deleteErrorNode(UpdateRequestProcessor processor, SolrQueryRequest request, Node node) throws IOException private void deleteErrorNode(UpdateRequestProcessor processor, SolrQueryRequest request, Node node) throws IOException
{ {
String errorDocId = PREFIX_ERROR + node.getId(); String errorDocId = PREFIX_ERROR + node.getId();
// Try finding the node before performing removal operation // Try finding the node before performing removal operation
@@ -2194,7 +2276,6 @@ public class SolrInformationServer implements InformationServer
delErrorDocCmd.setId(errorDocId); delErrorDocCmd.setId(errorDocId);
processor.processDelete(delErrorDocCmd); processor.processDelete(delErrorDocCmd);
} }
} }
private void deleteNode(UpdateRequestProcessor processor, SolrQueryRequest request, Node node) throws IOException private void deleteNode(UpdateRequestProcessor processor, SolrQueryRequest request, Node node) throws IOException
@@ -2220,7 +2301,6 @@ public class SolrInformationServer implements InformationServer
delDocCmd.setQuery(FIELD_DBID + ":" + dbid); delDocCmd.setQuery(FIELD_DBID + ":" + dbid);
processor.processDelete(delDocCmd); processor.processDelete(delDocCmd);
} }
} }
private boolean isContentIndexedForNode(Map<QName, PropertyValue> properties) private boolean isContentIndexedForNode(Map<QName, PropertyValue> properties)
@@ -2594,7 +2674,10 @@ public class SolrInformationServer implements InformationServer
input.addField(FIELD_INTXID, txn.getId()); input.addField(FIELD_INTXID, txn.getId());
input.addField(FIELD_TXCOMMITTIME, txn.getCommitTimeMs()); input.addField(FIELD_TXCOMMITTIME, txn.getCommitTimeMs());
input.addField(FIELD_DOC_TYPE, DOC_TYPE_TX); input.addField(FIELD_DOC_TYPE, DOC_TYPE_TX);
input.addField(FIELD_CASCADE_FLAG, 0); if (cascadeTrackingEnabled())
{
input.addField(FIELD_CASCADE_FLAG, 0);
}
cmd.solrDoc = input; cmd.solrDoc = input;
processor.processAdd(cmd); processor.processAdd(cmd);
} }
@@ -2635,8 +2718,11 @@ public class SolrInformationServer implements InformationServer
input.addField(FIELD_S_TXID, info.getId()); input.addField(FIELD_S_TXID, info.getId());
input.addField(FIELD_S_TXCOMMITTIME, info.getCommitTimeMs()); input.addField(FIELD_S_TXCOMMITTIME, info.getCommitTimeMs());
//Set the cascade flag to 1. This means cascading updates have not been done yet. if (cascadeTrackingEnabled())
input.addField(FIELD_CASCADE_FLAG, 1); {
//Set the cascade flag to 1. This means cascading updates have not been done yet.
input.addField(FIELD_CASCADE_FLAG, 1);
}
cmd.solrDoc = input; cmd.solrDoc = input;
processor.processAdd(cmd); processor.processAdd(cmd);
@@ -2862,7 +2948,6 @@ public class SolrInformationServer implements InformationServer
{ {
activeTrackerThreadsLock.writeLock().unlock(); activeTrackerThreadsLock.writeLock().unlock();
} }
} }
@Override @Override
@@ -2907,13 +2992,13 @@ public class SolrInformationServer implements InformationServer
if(batch.size() >= 200) if(batch.size() >= 200)
{ {
indexNodes(batch, true, true); indexNodes(batch, true);
batch.clear(); batch.clear();
} }
} }
if(batch.size() > 0) if(batch.size() > 0)
{ {
indexNodes(batch, true, true); indexNodes(batch, true);
batch.clear(); batch.clear();
} }
} }
@@ -3340,7 +3425,6 @@ public class SolrInformationServer implements InformationServer
SolrQueryRequest request, UpdateRequestProcessor processor, LinkedHashSet<Long> stack) SolrQueryRequest request, UpdateRequestProcessor processor, LinkedHashSet<Long> stack)
throws AuthenticationException, IOException, JSONException throws AuthenticationException, IOException, JSONException
{ {
// skipDescendantDocsForSpecificAspects is initialised on a synchronised method, so access must be also synchronised // skipDescendantDocsForSpecificAspects is initialised on a synchronised method, so access must be also synchronised
synchronized (this) synchronized (this)
{ {
@@ -3801,4 +3885,14 @@ public class SolrInformationServer implements InformationServer
{ {
stream(notNullOrEmpty(fields)).forEach(document::removeField); stream(notNullOrEmpty(fields)).forEach(document::removeField);
} }
private NodeMetaData createDeletedNodeMetaData(Node node)
{
NodeMetaData nodeMetaData = new NodeMetaData();
nodeMetaData.setId(node.getId());
nodeMetaData.setType(ContentModel.TYPE_DELETED);
nodeMetaData.setNodeRef(new NodeRef(node.getNodeRef()));
nodeMetaData.setTxnId(node.getTxnId());
return nodeMetaData;
}
} }

View File

@@ -21,6 +21,15 @@ package org.alfresco.solr.lifecycle;
import static java.util.Arrays.asList; import static java.util.Arrays.asList;
import static java.util.Optional.ofNullable; import static java.util.Optional.ofNullable;
import static org.alfresco.solr.SolrInformationServer.CASCADE_TRACKER_ENABLED;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
import java.util.function.Function;
import java.util.function.Predicate;
import org.alfresco.opencmis.dictionary.CMISStrictDictionaryService; import org.alfresco.opencmis.dictionary.CMISStrictDictionaryService;
import org.alfresco.solr.AlfrescoCoreAdminHandler; import org.alfresco.solr.AlfrescoCoreAdminHandler;
import org.alfresco.solr.AlfrescoSolrDataModel; import org.alfresco.solr.AlfrescoSolrDataModel;
@@ -53,13 +62,6 @@ import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
import java.util.function.Function;
import java.util.function.Predicate;
/** /**
* Listeners for *FIRST SEARCHER* events in order to prepare and register the SolrContentStore and the Tracking Subsystem. * Listeners for *FIRST SEARCHER* events in order to prepare and register the SolrContentStore and the Tracking Subsystem.
* *
@@ -254,19 +256,27 @@ public class SolrCoreLoadListener extends AbstractSolrEventListener
trackerRegistry, trackerRegistry,
scheduler); scheduler);
CascadeTracker cascadeTracker = List<Tracker> trackers = new ArrayList<>();
registerAndSchedule(
new CascadeTracker(props, repositoryClient, core.getName(), srv), String cascadeTrackerEnabledProp = ofNullable((String) props.get(CASCADE_TRACKER_ENABLED)).orElse("true");
core, if (Boolean.valueOf(cascadeTrackerEnabledProp))
props, {
trackerRegistry, CascadeTracker cascadeTracker =
scheduler); registerAndSchedule(
new CascadeTracker(props, repositoryClient, core.getName(), srv),
core,
props,
trackerRegistry,
scheduler);
trackers.add(cascadeTracker);
}
//The CommitTracker will acquire these locks in order //The CommitTracker will acquire these locks in order
//The ContentTracker will likely have the longest runs so put it first to ensure the MetadataTracker is not paused while //The ContentTracker will likely have the longest runs so put it first to ensure the MetadataTracker is not paused while
//waiting for the ContentTracker to release it's lock. //waiting for the ContentTracker to release it's lock.
//The aclTracker will likely have the shortest runs so put it last. //The aclTracker will likely have the shortest runs so put it last.
return asList(cascadeTracker, contentTracker, metadataTracker, aclTracker); trackers.addAll(asList(contentTracker, metadataTracker, aclTracker));
return trackers;
} }
/** /**

View File

@@ -19,7 +19,11 @@
package org.alfresco.solr.tracker; package org.alfresco.solr.tracker;
import static java.util.Optional.empty;
import static java.util.Optional.ofNullable;
import java.util.List; import java.util.List;
import java.util.Optional;
import java.util.Properties; import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
@@ -41,7 +45,8 @@ public class CommitTracker extends AbstractTracker
private MetadataTracker metadataTracker; private MetadataTracker metadataTracker;
private AclTracker aclTracker; private AclTracker aclTracker;
private ContentTracker contentTracker; private ContentTracker contentTracker;
private CascadeTracker cascadeTracker; /** The cascade tracker. Note that this may be empty if cascade tracking is disabled. */
private Optional<CascadeTracker> cascadeTracker = empty();
private AtomicInteger rollbackCount = new AtomicInteger(0); private AtomicInteger rollbackCount = new AtomicInteger(0);
protected final static Logger log = LoggerFactory.getLogger(CommitTracker.class); protected final static Logger log = LoggerFactory.getLogger(CommitTracker.class);
@@ -71,7 +76,7 @@ public class CommitTracker extends AbstractTracker
} else if(tracker instanceof ContentTracker) { } else if(tracker instanceof ContentTracker) {
this.contentTracker = (ContentTracker)tracker; this.contentTracker = (ContentTracker)tracker;
} else if(tracker instanceof CascadeTracker) { } else if(tracker instanceof CascadeTracker) {
this.cascadeTracker = (CascadeTracker)tracker; this.cascadeTracker = ofNullable((CascadeTracker) tracker);
} }
} }
@@ -178,8 +183,11 @@ public class CommitTracker extends AbstractTracker
contentTracker.getWriteLock().acquire(); contentTracker.getWriteLock().acquire();
assert(contentTracker.getWriteLock().availablePermits() == 0); assert(contentTracker.getWriteLock().availablePermits() == 0);
cascadeTracker.getWriteLock().acquire(); if (cascadeTracker.isPresent())
assert(cascadeTracker.getWriteLock().availablePermits() == 0); {
cascadeTracker.get().getWriteLock().acquire();
assert (cascadeTracker.get().getWriteLock().availablePermits() == 0);
}
infoSrv.rollback(); infoSrv.rollback();
} }
@@ -202,12 +210,12 @@ public class CommitTracker extends AbstractTracker
contentTracker.invalidateState(); contentTracker.invalidateState();
//Reset cascadeTracker //Reset cascadeTracker
cascadeTracker.setRollback(false); cascadeTracker.ifPresent(c -> c.setRollback(false));
cascadeTracker.invalidateState(); cascadeTracker.ifPresent(c -> invalidateState());
//Release the locks //Release the locks
contentTracker.getWriteLock().release(); contentTracker.getWriteLock().release();
cascadeTracker.getWriteLock().release(); cascadeTracker.ifPresent(c -> c.getWriteLock().release());
rollbackCount.incrementAndGet(); rollbackCount.incrementAndGet();
} }

View File

@@ -32,6 +32,7 @@ import org.alfresco.repo.index.shard.ShardState;
import org.alfresco.solr.BoundedDeque; import org.alfresco.solr.BoundedDeque;
import org.alfresco.solr.InformationServer; import org.alfresco.solr.InformationServer;
import org.alfresco.solr.NodeReport; import org.alfresco.solr.NodeReport;
import org.alfresco.solr.SolrInformationServer;
import org.alfresco.solr.TrackerState; import org.alfresco.solr.TrackerState;
import org.alfresco.solr.adapters.IOpenBitSet; import org.alfresco.solr.adapters.IOpenBitSet;
import org.alfresco.solr.client.GetNodesParameters; import org.alfresco.solr.client.GetNodesParameters;
@@ -83,6 +84,8 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
* {@link org.alfresco.solr.client.SOLRAPIClient#GET_TX_INTERVAL_COMMIT_TIME} * {@link org.alfresco.solr.client.SOLRAPIClient#GET_TX_INTERVAL_COMMIT_TIME}
*/ */
private boolean txIntervalCommitTimeServiceAvailable = false; private boolean txIntervalCommitTimeServiceAvailable = false;
/** Whether the cascade tracking is enabled. */
private boolean cascadeTrackerEnabled = true;
public MetadataTracker(final boolean isMaster, Properties p, SOLRAPIClient client, String coreName, public MetadataTracker(final boolean isMaster, Properties p, SOLRAPIClient client, String coreName,
InformationServer informationServer) InformationServer informationServer)
@@ -107,6 +110,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
transactionDocsBatchSize = Integer.parseInt(p.getProperty("alfresco.transactionDocsBatchSize", "100")); transactionDocsBatchSize = Integer.parseInt(p.getProperty("alfresco.transactionDocsBatchSize", "100"));
nodeBatchSize = Integer.parseInt(p.getProperty("alfresco.nodeBatchSize", "10")); nodeBatchSize = Integer.parseInt(p.getProperty("alfresco.nodeBatchSize", "10"));
threadHandler = new ThreadHandler(p, coreName, "MetadataTracker"); threadHandler = new ThreadHandler(p, coreName, "MetadataTracker");
cascadeTrackerEnabled = informationServer.cascadeTrackingEnabled();
// In order to apply performance optimizations, checking the availability of Repo Web Scripts is required. // In order to apply performance optimizations, checking the availability of Repo Web Scripts is required.
// As these services are available from ACS 6.2 // As these services are available from ACS 6.2
@@ -957,7 +961,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
List<Node> filteredNodes = filterNodes(nodes); List<Node> filteredNodes = filterNodes(nodes);
if(filteredNodes.size() > 0) if(filteredNodes.size() > 0)
{ {
this.infoServer.indexNodes(filteredNodes, true, false); this.infoServer.indexNodes(filteredNodes, true);
} }
} }
@@ -976,7 +980,7 @@ public class MetadataTracker extends CoreStatePublisher implements Tracker
{ {
filteredList.add(node); filteredList.add(node);
} }
else else if (cascadeTrackerEnabled)
{ {
if(node.getStatus() == SolrApiNodeStatus.UPDATED) if(node.getStatus() == SolrApiNodeStatus.UPDATED)
{ {

View File

@@ -31,3 +31,6 @@ alfresco.cross.locale.datatype.1={http://www.alfresco.org/model/dictionary/1.0}c
alfresco.cross.locale.datatype.2={http://www.alfresco.org/model/dictionary/1.0}mltext alfresco.cross.locale.datatype.2={http://www.alfresco.org/model/dictionary/1.0}mltext
alfresco.model.tracker.cron=0/10 * * * * ? * alfresco.model.tracker.cron=0/10 * * * * ? *
# Whether path queries are enabled.
alfresco.cascade.tracker.enabled=true

View File

@@ -787,25 +787,25 @@
<!-- <!--
These 2 fields are used after the content store removal, for detecting the content indexing status: These 2 fields are used after the content store removal, for detecting the content indexing status:
LATEST_APPLIED_CONTENT_VERSION_ID LATEST_APPLIED_CONTENT_VERSION_ID
It corresponds to the identifier of the latest applied content property id (content@s__docid@* or content@m__docid@*). It corresponds to the identifier of the latest applied content property id (content@s__docid@* or content@m__docid@*).
Note It can be null (i.e. the incoming node doesn't have a value for that property, even if it requires content indexing) Note It can be null (i.e. the incoming node doesn't have a value for that property, even if it requires content indexing)
LAST_INCOMING_CONTENT_VERSION_ID: LAST_INCOMING_CONTENT_VERSION_ID:
This is instead used for detecting documents outdated or that require content indexing. This is instead used for detecting documents outdated or that require content indexing.
If the field has the same value of the previous one (or it has SolrInformationServer.CONTENT_UPDATED_MARKER as a value), If the field has the same value of the previous one (or it has SolrInformationServer.CONTENT_UPDATED_MARKER as a value),
then the content is updated. Otherwise, if the value is different, is not SolrInformationServer.CONTENT_UPDATED_MARKER then the content is updated. Otherwise, if the value is different, is not SolrInformationServer.CONTENT_UPDATED_MARKER
or it is SolrInformationServer.CONTENT_OUTDATED_MARKER the content is intended as outdated and therefore it will or it is SolrInformationServer.CONTENT_OUTDATED_MARKER the content is intended as outdated and therefore it will
be managed (later) by the ContentTracker. be managed (later) by the ContentTracker.
They can have the following values: They can have the following values:
-10 (or SolrInformationServer.CONTENT_OUTDATED_MARKER) -10 (or SolrInformationServer.CONTENT_OUTDATED_MARKER)
Whatever is the content version in the incoming node, this constant marks a document as OUTDATED (i.e. it will Whatever is the content version in the incoming node, this constant marks a document as OUTDATED (i.e. it will
be later picked up by the ContentTracker for starting the content update process). be later picked up by the ContentTracker for starting the content update process).
This is the default value of the LAST_INCOMING_CONTENT_VERSION_ID field. This is the default value of the LAST_INCOMING_CONTENT_VERSION_ID field.
-20 (or SolrInformationServer.CONTENT_UPDATED_MARKER) -20 (or SolrInformationServer.CONTENT_UPDATED_MARKER)
This value marks a document/node as updated. This value marks a document/node as updated.
--> -->
<field name="LATEST_APPLIED_CONTENT_VERSION_ID" type="long_without_precision_step"/> <field name="LATEST_APPLIED_CONTENT_VERSION_ID" type="long_without_precision_step"/>
<field name="LAST_INCOMING_CONTENT_VERSION_ID" type="long_without_precision_step" default="-10"/> <field name="LAST_INCOMING_CONTENT_VERSION_ID" type="long_without_precision_step" default="-10"/>

View File

@@ -18,6 +18,30 @@
*/ */
package org.alfresco.solr.lifecycle; package org.alfresco.solr.lifecycle;
import static java.util.Arrays.asList;
import static org.alfresco.solr.SolrInformationServer.CASCADE_TRACKER_ENABLED;
import static org.alfresco.solr.tracker.Tracker.Type.ACL;
import static org.alfresco.solr.tracker.Tracker.Type.CASCADE;
import static org.alfresco.solr.tracker.Tracker.Type.CONTENT;
import static org.alfresco.solr.tracker.Tracker.Type.METADATA;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.ArgumentMatchers.same;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.mockito.MockitoAnnotations.initMocks;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;
import org.alfresco.solr.SolrInformationServer; import org.alfresco.solr.SolrInformationServer;
import org.alfresco.solr.client.SOLRAPIClient; import org.alfresco.solr.client.SOLRAPIClient;
import org.alfresco.solr.tracker.AclTracker; import org.alfresco.solr.tracker.AclTracker;
@@ -26,6 +50,7 @@ import org.alfresco.solr.tracker.ContentTracker;
import org.alfresco.solr.tracker.MetadataTracker; import org.alfresco.solr.tracker.MetadataTracker;
import org.alfresco.solr.tracker.SolrTrackerScheduler; import org.alfresco.solr.tracker.SolrTrackerScheduler;
import org.alfresco.solr.tracker.Tracker; import org.alfresco.solr.tracker.Tracker;
import org.alfresco.solr.tracker.Tracker.Type;
import org.alfresco.solr.tracker.TrackerRegistry; import org.alfresco.solr.tracker.TrackerRegistry;
import org.apache.solr.core.SolrConfig; import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
@@ -36,20 +61,6 @@ import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner; import org.mockito.junit.MockitoJUnitRunner;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
import java.util.List;
import java.util.Properties;
import static java.util.Arrays.asList;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.ArgumentMatchers.same;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
/** /**
* Unit tests for the {@link SolrCoreLoadListener}. * Unit tests for the {@link SolrCoreLoadListener}.
* *
@@ -83,6 +94,8 @@ public class SolrCoreLoadListenerTest
@Before @Before
public void setUp() public void setUp()
{ {
initMocks(this);
listener = new SolrCoreLoadListener(core); listener = new SolrCoreLoadListener(core);
when(core.getName()).thenReturn(coreName); when(core.getName()).thenReturn(coreName);
@@ -104,7 +117,29 @@ public class SolrCoreLoadListenerTest
verify(scheduler).schedule(any(MetadataTracker.class), eq(coreName), same(coreProperties)); verify(scheduler).schedule(any(MetadataTracker.class), eq(coreName), same(coreProperties));
verify(scheduler).schedule(any(CascadeTracker.class), eq(coreName), same(coreProperties)); verify(scheduler).schedule(any(CascadeTracker.class), eq(coreName), same(coreProperties));
assertEquals(4, coreTrackers.size()); Set<Type> trackerTypes = coreTrackers.stream().map(Tracker::getType).collect(Collectors.toSet());
assertEquals("Unexpected trackers found.", Set.of(ACL, CONTENT, METADATA, CASCADE), trackerTypes);
}
@Test
public void testDisabledCascadeTracking()
{
coreProperties.put(CASCADE_TRACKER_ENABLED, "false");
List<Tracker> coreTrackers = listener.createAndScheduleCoreTrackers(core, registry, coreProperties, scheduler, api, informationServer);
verify(registry).register(eq(coreName), any(AclTracker.class));
verify(registry).register(eq(coreName), any(ContentTracker.class));
verify(registry).register(eq(coreName), any(MetadataTracker.class));
verify(registry, never()).register(eq(coreName), any(CascadeTracker.class));
verify(scheduler).schedule(any(AclTracker.class), eq(coreName), same(coreProperties));
verify(scheduler).schedule(any(ContentTracker.class), eq(coreName), same(coreProperties));
verify(scheduler).schedule(any(MetadataTracker.class), eq(coreName), same(coreProperties));
verify(scheduler, never()).schedule(any(CascadeTracker.class), eq(coreName), same(coreProperties));
Set<Type> trackerTypes = coreTrackers.stream().map(Tracker::getType).collect(Collectors.toSet());
assertEquals("Unexpected trackers found.", Set.of(ACL, CONTENT, METADATA), trackerTypes);
} }
@Test @Test

View File

@@ -52,7 +52,7 @@ import java.util.List;
@LuceneTestCase.SuppressCodecs({"Appending","Lucene3x","Lucene40","Lucene41","Lucene42","Lucene43", "Lucene44", "Lucene45","Lucene46","Lucene47","Lucene48","Lucene49"}) @LuceneTestCase.SuppressCodecs({"Appending","Lucene3x","Lucene40","Lucene41","Lucene42","Lucene43", "Lucene44", "Lucene45","Lucene46","Lucene47","Lucene48","Lucene49"})
@SolrTestCaseJ4.SuppressSSL @SolrTestCaseJ4.SuppressSSL
public class CascadeTrackerIT extends AbstractAlfrescoSolrIT public class CascadingIT extends AbstractAlfrescoSolrIT
{ {
private static long MAX_WAIT_TIME = 80000; private static long MAX_WAIT_TIME = 80000;

View File

@@ -120,7 +120,7 @@ public class MetadataTrackerTest
this.metadataTracker.doTrack(); this.metadataTracker.doTrack();
InOrder inOrder = inOrder(srv); InOrder inOrder = inOrder(srv);
inOrder.verify(srv).indexNodes(nodes, true, false); inOrder.verify(srv).indexNodes(nodes, true);
inOrder.verify(srv).indexTransaction(tx, true); inOrder.verify(srv).indexTransaction(tx, true);
inOrder.verify(srv).commit(); inOrder.verify(srv).commit();
} }