From 8c5806f3d28e9bc32b0a2ffc052234d993209268 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 8 Jan 2026 14:34:53 -0500 Subject: [PATCH 1/5] indexing performance improvement --- .../search/SolrIndexServiceBean.java | 51 +++++++++++-------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index aec352a615b..fee5195c209 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -365,8 +365,20 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) indexPermissionsForOneDvObject(definitionPoint); numObjects++; - // Process the dataset's files in a new transaction - self.indexDatasetFilesInNewTransaction(dataset.getId(), counter, fileQueryMin); + // Prepare the data needed for the new transaction. + // This ensures lazy-loaded collections are fetched here. + Map desiredCards = searchPermissionsService.getDesiredCards(dataset); + List versionsToIndex = new ArrayList<>(); + for (DatasetVersion version : versionsToReIndexPermissionsFor(dataset)) { + if (desiredCards.get(version.getVersionState())) { + // IMPORTANT: This triggers the loading of fileMetadatas within the current transaction + version.getFileMetadatas().size(); + versionsToIndex.add(version); + } + } + + // Process the dataset's files in a new transaction, passing the pre-loaded data + self.indexDatasetFilesInNewTransaction(versionsToIndex, counter, fileQueryMin); } else { // For other types (like files), just index in a new transaction indexPermissionsForOneDvObject(definitionPoint); @@ -399,15 +411,11 @@ public void indexDatasetBatchInNewTransaction(List datasetIds, final int[] } @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void indexDatasetFilesInNewTransaction(Long datasetId, final int[] fileCounter, int fileQueryMin) { - Dataset dataset = datasetService.find(datasetId); - if (dataset != null) { - Map desiredCards = searchPermissionsService.getDesiredCards(dataset); - for (DatasetVersion version : versionsToReIndexPermissionsFor(dataset)) { - if (desiredCards.get(version.getVersionState())) { - processDatasetVersionFiles(version, fileCounter, fileQueryMin); - } - } + public void indexDatasetFilesInNewTransaction(List versions, final int[] fileCounter, int fileQueryMin) { + for (DatasetVersion version : versions) { + // The version object is detached, but its fileMetadatas collection is already loaded. + // We only need its ID and state, which are available. + processDatasetVersionFiles(version, fileCounter, fileQueryMin); } } @@ -423,18 +431,19 @@ private void processDatasetVersionFiles(DatasetVersion version, if (version.getFileMetadatas().size() > fileQueryMin) { // For large datasets, use a more efficient SQL query - Stream fileStream = getDataFileInfoForPermissionIndexing(version.getId()); + try (Stream fileStream = getDataFileInfoForPermissionIndexing(version.getId())) { - // Process files in batches to avoid memory issues - fileStream.forEach(fileInfo -> { - filesToReindexAsBatch.add(fileInfo); - fileCounter[0]++; + // Process files in batches to avoid memory issues + fileStream.forEach(fileInfo -> { + filesToReindexAsBatch.add(fileInfo); + fileCounter[0]++; - if (filesToReindexAsBatch.size() >= batchSize) { - reindexFilesInBatches(filesToReindexAsBatch, cachedPerms, versionId, solrIdEnd); - filesToReindexAsBatch.clear(); - } - }); + if (filesToReindexAsBatch.size() >= batchSize) { + reindexFilesInBatches(filesToReindexAsBatch, cachedPerms, versionId, solrIdEnd); + filesToReindexAsBatch.clear(); + } + }); + } } else { // For smaller datasets, process files directly for (FileMetadata fmd : version.getFileMetadatas()) { From a403cb0f27971cae90ccdc10e2614869b77cc18f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 14 Jan 2026 15:17:10 -0500 Subject: [PATCH 2/5] documentation, improve perm only performance --- .../search/SolrIndexServiceBean.java | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index fee5195c209..7ae239d9a2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.search; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DatasetVersion; @@ -53,6 +54,8 @@ public class SolrIndexServiceBean { @EJB SearchPermissionsServiceBean searchPermissionsService; @EJB + DataFileServiceBean dataFileService; + @EJB DataverseServiceBean dataverseService; @EJB DatasetServiceBean datasetService; @@ -365,14 +368,24 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) indexPermissionsForOneDvObject(definitionPoint); numObjects++; - // Prepare the data needed for the new transaction. - // This ensures lazy-loaded collections are fetched here. + /** + * Prepare the data needed for the new transaction. For performance reasons, indexDatasetFilesInNewTransaction does not merge the dataset or versions into the new transaction (we only read info, there + * are no changes to write). However, there are two ways the code here is used. In one case, indexing content and permissions, the versions and fileMetadatas in them are already loaded. In the other + * case, indexing permissions only, the fileMetadatas are not yet loaded, and we may need them, but only if there are fewer than fileQueryMin. For each version that will get reindexed (at most two of + * them), the code below does a lightweight query to see how many fileMetadatas exist in it and, if it is equal to or below fileQueryMin, calls getFileMetadatas().size() to assure they are loaded + * (before we pass the version into a new transaction where it will be detached and fileMetadatas can't be loaded). Calling getFileMetadas.size() should be lightweight when the fileMetadatas are + * loaded (first case) and done only when needed for the second case. + * + **/ Map desiredCards = searchPermissionsService.getDesiredCards(dataset); List versionsToIndex = new ArrayList<>(); for (DatasetVersion version : versionsToReIndexPermissionsFor(dataset)) { if (desiredCards.get(version.getVersionState())) { - // IMPORTANT: This triggers the loading of fileMetadatas within the current transaction - version.getFileMetadatas().size(); + int fileCount = dataFileService.findCountByDatasetVersionId(version.getId()).intValue(); + if (fileCount >= fileQueryMin) { + // IMPORTANT: This triggers the loading of fileMetadatas within the current transaction + version.getFileMetadatas().size(); + } versionsToIndex.add(version); } } @@ -429,7 +442,7 @@ private void processDatasetVersionFiles(DatasetVersion version, // Process files in batches of 100 int batchSize = 100; - if (version.getFileMetadatas().size() > fileQueryMin) { + if (dataFileService.findCountByDatasetVersionId(version.getId()).intValue() > fileQueryMin) { // For large datasets, use a more efficient SQL query try (Stream fileStream = getDataFileInfoForPermissionIndexing(version.getId())) { @@ -446,6 +459,7 @@ private void processDatasetVersionFiles(DatasetVersion version, } } else { // For smaller datasets, process files directly + // We only call getFileMetadatas() in the case where we know they have already been loaded for (FileMetadata fmd : version.getFileMetadatas()) { DataFileProxy fileProxy = new DataFileProxy(fmd); filesToReindexAsBatch.add(fileProxy); From b4fc8220cf2063a06f813317f0276dc54c6205c6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 14 Jan 2026 15:57:10 -0500 Subject: [PATCH 3/5] release note --- doc/release-notes/12082-permission-indexing-improvements.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 doc/release-notes/12082-permission-indexing-improvements.md diff --git a/doc/release-notes/12082-permission-indexing-improvements.md b/doc/release-notes/12082-permission-indexing-improvements.md new file mode 100644 index 00000000000..38d71d3f28f --- /dev/null +++ b/doc/release-notes/12082-permission-indexing-improvements.md @@ -0,0 +1,6 @@ +Changes in v6.9 that significantly improved re-indexing performance and lowered memory use in situations +such as when a user's role on the root collection were changed, also slowed reindexing of individual +datasets ater editing and publication. + +This release restores/improves the individual dataset reindexing performance while retaining the +benefits of the earlier update. From 5018dd5117f0fba5345f38daaa7ff873a281d089 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 15 Jan 2026 10:29:09 -0500 Subject: [PATCH 4/5] simplify --- .../edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index 7ae239d9a2a..ee160b428a6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -412,16 +412,12 @@ public void indexDatasetBatchInNewTransaction(List datasetIds, final int[] indexPermissionsForOneDvObject(dataset); // Process files for this dataset - Map desiredCards = searchPermissionsService.getDesiredCards(dataset); - for (DatasetVersion version : versionsToReIndexPermissionsFor(dataset)) { - if (desiredCards.get(version.getVersionState())) { processDatasetVersionFiles(version, fileCounter, fileQueryMin); } } } } - } @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void indexDatasetFilesInNewTransaction(List versions, final int[] fileCounter, int fileQueryMin) { From 9ded209165c4b02754950056245232db9b24b56b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 15 Jan 2026 16:13:25 -0500 Subject: [PATCH 5/5] simplify2 --- .../search/SearchPermissionsServiceBean.java | 46 ------ .../search/SolrIndexServiceBean.java | 96 +++++------- .../search/SolrIndexServiceBeanTest.java | 140 ++++++++++++++++++ 3 files changed, 181 insertions(+), 101 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBeanTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchPermissionsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchPermissionsServiceBean.java index c25a462efab..921bf97496f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchPermissionsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchPermissionsServiceBean.java @@ -92,52 +92,6 @@ public List findDvObjectPerms(DvObject dvObject) { return permStrings; } - public Map getDesiredCards(Dataset dataset) { - Map desiredCards = new LinkedHashMap<>(); - DatasetVersion latestVersion = dataset.getLatestVersion(); - DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); - DatasetVersion releasedVersion = dataset.getReleasedVersion(); - boolean atLeastOnePublishedVersion = false; - if (releasedVersion != null) { - atLeastOnePublishedVersion = true; - } else { - atLeastOnePublishedVersion = false; - } - - if (atLeastOnePublishedVersion == false) { - if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) { - desiredCards.put(DatasetVersion.VersionState.DRAFT, true); - desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - desiredCards.put(DatasetVersion.VersionState.RELEASED, false); - } else if (latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) { - desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, true); - desiredCards.put(DatasetVersion.VersionState.RELEASED, false); - desiredCards.put(DatasetVersion.VersionState.DRAFT, false); - } else { - String msg = "No-op. Unexpected condition reached: There is no published version and the latest published version is neither " + DatasetVersion.VersionState.DRAFT + " nor " + DatasetVersion.VersionState.DEACCESSIONED + ". Its state is " + latestVersionState + "."; - logger.info(msg); - } - } else if (atLeastOnePublishedVersion == true) { - if (latestVersionState.equals(DatasetVersion.VersionState.RELEASED) - || latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) { - desiredCards.put(DatasetVersion.VersionState.RELEASED, true); - desiredCards.put(DatasetVersion.VersionState.DRAFT, false); - desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - } else if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) { - desiredCards.put(DatasetVersion.VersionState.DRAFT, true); - desiredCards.put(DatasetVersion.VersionState.RELEASED, true); - desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - } else { - String msg = "No-op. Unexpected condition reached: There is at least one published version but the latest version is neither published nor draft"; - logger.info(msg); - } - } else { - String msg = "No-op. Unexpected condition reached: Has a version been published or not?"; - logger.info(msg); - } - return desiredCards; - } - private boolean hasBeenPublished(Dataverse dataverse) { return dataverse.isReleased(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index ee160b428a6..e2e307e3ced 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -86,15 +86,14 @@ public List determineSolrDocs(DvObject dvObject) { solrDocs.addAll(datasetSolrDocs); } else if (dvObject.isInstanceofDataFile()) { DataFile datafile = (DataFile) dvObject; - Map desiredCards = searchPermissionsService.getDesiredCards(datafile.getOwner()); Set datasetVersions = datasetVersionsToBuildCardsFor(datafile.getOwner()); for (DatasetVersion version : datasetVersions) { - if(desiredCards.containsKey(version.getVersionState()) && desiredCards.get(version.getVersionState()) && datafile.isInDatasetVersion(version)) { - List cachedPerms = searchPermissionsService.findDatasetVersionPerms(version); - String solrIdEnd = getDatasetOrDataFileSolrEnding(version.getVersionState()); - Long versionId = version.getId(); - DvObjectSolrDoc fileSolrDoc = constructDatafileSolrDoc(new DataFileProxy(datafile.getFileMetadata()), cachedPerms, versionId, solrIdEnd); - solrDocs.add(fileSolrDoc); + if (datafile.isInDatasetVersion(version)) { + List cachedPerms = searchPermissionsService.findDatasetVersionPerms(version); + String solrIdEnd = getDatasetOrDataFileSolrEnding(version.getVersionState()); + Long versionId = version.getId(); + DvObjectSolrDoc fileSolrDoc = constructDatafileSolrDoc(new DataFileProxy(datafile.getFileMetadata()), cachedPerms, versionId, solrIdEnd); + solrDocs.add(fileSolrDoc); } } } else { @@ -136,13 +135,9 @@ private DvObjectSolrDoc constructDataverseSolrDoc(Dataverse dataverse) { private List constructDatasetSolrDocs(Dataset dataset) { List emptyList = new ArrayList<>(); List solrDocs = emptyList; - Map desiredCards = searchPermissionsService.getDesiredCards(dataset); for (DatasetVersion version : datasetVersionsToBuildCardsFor(dataset)) { - boolean cardShouldExist = desiredCards.get(version.getVersionState()); - if (cardShouldExist) { - DvObjectSolrDoc datasetSolrDoc = makeDatasetSolrDoc(version); - solrDocs.add(datasetSolrDoc); - } + DvObjectSolrDoc datasetSolrDoc = makeDatasetSolrDoc(version); + solrDocs.add(datasetSolrDoc); } return solrDocs; } @@ -161,39 +156,45 @@ private DvObjectSolrDoc constructDatafileSolrDoc(DataFileProxy fileProxy, List constructDatafileSolrDocsFromDataset(Dataset dataset) { List datafileSolrDocs = new ArrayList<>(); - Map desiredCards = searchPermissionsService.getDesiredCards(dataset); for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) { - boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState()); - if (cardShouldExist) { - List perms = new ArrayList<>(); - if (datasetVersionFileIsAttachedTo.isReleased()) { - perms.add(IndexServiceBean.getPublicGroupString()); - } else { - perms = searchPermissionsService.findDatasetVersionPerms(datasetVersionFileIsAttachedTo); - } + List perms = new ArrayList<>(); + if (datasetVersionFileIsAttachedTo.isReleased()) { + perms.add(IndexServiceBean.getPublicGroupString()); + } else { + perms = searchPermissionsService.findDatasetVersionPerms(datasetVersionFileIsAttachedTo); + } - for (FileMetadata fileMetadata : datasetVersionFileIsAttachedTo.getFileMetadatas()) { - Long fileId = fileMetadata.getDataFile().getId(); - String solrIdStart = IndexServiceBean.solrDocIdentifierFile + fileId; - String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState()); - String solrId = solrIdStart + solrIdEnd; - DvObjectSolrDoc dataFileSolrDoc = new DvObjectSolrDoc(fileId.toString(), solrId, datasetVersionFileIsAttachedTo.getId(), fileMetadata.getLabel(), perms); - logger.finest("adding fileid " + fileId); - datafileSolrDocs.add(dataFileSolrDoc); - } + for (FileMetadata fileMetadata : datasetVersionFileIsAttachedTo.getFileMetadatas()) { + Long fileId = fileMetadata.getDataFile().getId(); + String solrIdStart = IndexServiceBean.solrDocIdentifierFile + fileId; + String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState()); + String solrId = solrIdStart + solrIdEnd; + DvObjectSolrDoc dataFileSolrDoc = new DvObjectSolrDoc(fileId.toString(), solrId, datasetVersionFileIsAttachedTo.getId(), fileMetadata.getLabel(), perms); + logger.finest("adding fileid " + fileId); + datafileSolrDocs.add(dataFileSolrDoc); } } return datafileSolrDocs; } + /** Find the versions to index. The overall logic is + * If there is only one version, or no released version (all non-draft versions are deaccessioned) + * then index it regardless of it's versionstate + * If there are released versions + * then index the latest released version and a draft version if one exists + * Hence - the latest deaccessioned version is only indexed if there is no released version + * @param dataset + * @return the set of versions to build cards for + */ private Set datasetVersionsToBuildCardsFor(Dataset dataset) { Set datasetVersions = new HashSet<>(); DatasetVersion latest = dataset.getLatestVersion(); - if (latest != null) { + DatasetVersion released = dataset.getReleasedVersion(); + if (latest != null && (released == null || latest.isDraft())) { datasetVersions.add(latest); } - DatasetVersion released = dataset.getReleasedVersion(); if (released != null) { + //May be the same as the latest version - only one copy will be in the set in that case datasetVersions.add(released); } return datasetVersions; @@ -377,17 +378,14 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) * loaded (first case) and done only when needed for the second case. * **/ - Map desiredCards = searchPermissionsService.getDesiredCards(dataset); List versionsToIndex = new ArrayList<>(); - for (DatasetVersion version : versionsToReIndexPermissionsFor(dataset)) { - if (desiredCards.get(version.getVersionState())) { - int fileCount = dataFileService.findCountByDatasetVersionId(version.getId()).intValue(); - if (fileCount >= fileQueryMin) { - // IMPORTANT: This triggers the loading of fileMetadatas within the current transaction - version.getFileMetadatas().size(); - } - versionsToIndex.add(version); + for (DatasetVersion version : datasetVersionsToBuildCardsFor(dataset)) { + int fileCount = dataFileService.findCountByDatasetVersionId(version.getId()).intValue(); + if (fileCount >= fileQueryMin) { + // IMPORTANT: This triggers the loading of fileMetadatas within the current transaction + version.getFileMetadatas().size(); } + versionsToIndex.add(version); } // Process the dataset's files in a new transaction, passing the pre-loaded data @@ -412,7 +410,7 @@ public void indexDatasetBatchInNewTransaction(List datasetIds, final int[] indexPermissionsForOneDvObject(dataset); // Process files for this dataset - for (DatasetVersion version : versionsToReIndexPermissionsFor(dataset)) { + for (DatasetVersion version : datasetVersionsToBuildCardsFor(dataset)) { processDatasetVersionFiles(version, fileCounter, fileQueryMin); } } @@ -496,18 +494,6 @@ private void reindexFilesInBatches(List filesToReindexAsBatch, Li } } - private List versionsToReIndexPermissionsFor(Dataset dataset) { - List versionsToReindexPermissionsFor = new ArrayList<>(); - Map desiredCards = searchPermissionsService.getDesiredCards(dataset); - for (DatasetVersion version : datasetVersionsToBuildCardsFor(dataset)) { - boolean cardShouldExist = desiredCards.get(version.getVersionState()); - if (cardShouldExist) { - versionsToReindexPermissionsFor.add(version); - } - } - return versionsToReindexPermissionsFor; - } - public IndexResponse deleteMultipleSolrIds(List solrIdsToDelete) { if (solrIdsToDelete.isEmpty()) { return new IndexResponse("nothing to delete"); diff --git a/src/test/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBeanTest.java new file mode 100644 index 00000000000..f04d5a7896c --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBeanTest.java @@ -0,0 +1,140 @@ + +package edu.harvard.iq.dataverse.search; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.InjectMocks; +import org.mockito.MockitoAnnotations; + +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +public class SolrIndexServiceBeanTest { + + @InjectMocks + private SolrIndexServiceBean solrIndexServiceBean; + + @BeforeEach + public void setUp() { + MockitoAnnotations.openMocks(this); + } + + @Test + public void testDatasetVersionsToBuildCardsFor_OnlyDraft() { + // Arrange + Dataset dataset = mock(Dataset.class); + DatasetVersion draftVersion = createMockVersion(1L, DatasetVersion.VersionState.DRAFT, true); + + when(dataset.getLatestVersion()).thenReturn(draftVersion); + when(dataset.getReleasedVersion()).thenReturn(null); + + // Act + Set result = invokeDatasetVersionsToBuildCardsFor(dataset); + + // Assert + assertEquals(1, result.size()); + assertTrue(result.contains(draftVersion)); + } + + @Test + public void testDatasetVersionsToBuildCardsFor_OnlyDeaccessioned() { + // Arrange + Dataset dataset = mock(Dataset.class); + DatasetVersion deaccessionedVersion = createMockVersion(1L, DatasetVersion.VersionState.DEACCESSIONED, false); + + when(dataset.getLatestVersion()).thenReturn(deaccessionedVersion); + when(dataset.getReleasedVersion()).thenReturn(null); + + // Act + Set result = invokeDatasetVersionsToBuildCardsFor(dataset); + + // Assert + assertEquals(1, result.size()); + assertTrue(result.contains(deaccessionedVersion)); + } + + @Test + public void testDatasetVersionsToBuildCardsFor_OnlyReleased() { + // Arrange + Dataset dataset = mock(Dataset.class); + DatasetVersion releasedVersion = createMockVersion(1L, DatasetVersion.VersionState.RELEASED, false); + + when(dataset.getLatestVersion()).thenReturn(releasedVersion); + when(dataset.getReleasedVersion()).thenReturn(releasedVersion); + + // Act + Set result = invokeDatasetVersionsToBuildCardsFor(dataset); + + // Assert + assertEquals(1, result.size()); + assertTrue(result.contains(releasedVersion)); + } + + @Test + public void testDatasetVersionsToBuildCardsFor_ReleasedAndDraft() { + // Arrange + Dataset dataset = mock(Dataset.class); + DatasetVersion releasedVersion = createMockVersion(1L, DatasetVersion.VersionState.RELEASED, false); + DatasetVersion draftVersion = createMockVersion(2L, DatasetVersion.VersionState.DRAFT, true); + + when(dataset.getLatestVersion()).thenReturn(draftVersion); + when(dataset.getReleasedVersion()).thenReturn(releasedVersion); + + // Act + Set result = invokeDatasetVersionsToBuildCardsFor(dataset); + + // Assert + assertEquals(2, result.size()); + assertTrue(result.contains(releasedVersion)); + assertTrue(result.contains(draftVersion)); + } + + @Test + public void testDatasetVersionsToBuildCardsFor_ReleasedAndDeaccessioned() { + // Arrange + Dataset dataset = mock(Dataset.class); + DatasetVersion releasedVersion = createMockVersion(1L, DatasetVersion.VersionState.RELEASED, false); + DatasetVersion deaccessionedVersion = createMockVersion(2L, DatasetVersion.VersionState.DEACCESSIONED, false); + + // Latest is deaccessioned, but there's a released version + when(dataset.getLatestVersion()).thenReturn(deaccessionedVersion); + when(dataset.getReleasedVersion()).thenReturn(releasedVersion); + + // Act + Set result = invokeDatasetVersionsToBuildCardsFor(dataset); + + // Assert + // Should only return the released version, not the deaccessioned one + assertEquals(1, result.size()); + assertTrue(result.contains(releasedVersion)); + assertFalse(result.contains(deaccessionedVersion)); + } + + // Helper method to create mock DatasetVersion + private DatasetVersion createMockVersion(Long id, DatasetVersion.VersionState state, boolean isDraft) { + DatasetVersion version = mock(DatasetVersion.class); + when(version.getId()).thenReturn(id); + when(version.getVersionState()).thenReturn(state); + when(version.isDraft()).thenReturn(isDraft); + when(version.isReleased()).thenReturn(state == DatasetVersion.VersionState.RELEASED); + when(version.isDeaccessioned()).thenReturn(state == DatasetVersion.VersionState.DEACCESSIONED); + return version; + } + + // Helper method to invoke the private method using reflection + @SuppressWarnings("unchecked") + private Set invokeDatasetVersionsToBuildCardsFor(Dataset dataset) { + try { + java.lang.reflect.Method method = SolrIndexServiceBean.class.getDeclaredMethod( + "datasetVersionsToBuildCardsFor", Dataset.class); + method.setAccessible(true); + return (Set) method.invoke(solrIndexServiceBean, dataset); + } catch (Exception e) { + throw new RuntimeException("Failed to invoke private method", e); + } + } +} \ No newline at end of file