Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/release-notes/12082-permission-indexing-improvements.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Changes in v6.9 that significantly improved re-indexing performance and lowered memory use in situations
such as when a user's role on the root collection were changed, also slowed reindexing of individual
datasets ater editing and publication.

This release restores/improves the individual dataset reindexing performance while retaining the
benefits of the earlier update.
2 changes: 2 additions & 0 deletions doc/release-notes/12094permission-indexing-improvements3.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
(assuming the earlier PRs have been merged, tehre will be a section on indexing improvements already)
This release also avoids creating unused Solr entries for files in drafts of new versions of published datasets (decreasing the Solr db size and thereby improving performance).
8 changes: 4 additions & 4 deletions src/main/java/edu/harvard/iq/dataverse/FileMetadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@
*/
@Table(indexes = {@Index(columnList="datafile_id"), @Index(columnList="datasetversion_id")} )
@NamedNativeQuery(
name = "FileMetadata.compareFileMetadata",
name = "FileMetadata.getDatafilesWithChangedMetadata",
query = "WITH fm_categories AS (" +
" SELECT fmd.filemetadatas_id, " +
" STRING_AGG(dfc.name, ',' ORDER BY dfc.name) AS categories " +
" FROM FileMetadata_DataFileCategory fmd " +
" JOIN DataFileCategory dfc ON fmd.filecategories_id = dfc.id " +
" GROUP BY fmd.filemetadatas_id " +
") " +
"SELECT fm1.id " +
"SELECT fm1.datafile_id AS id " +
"FROM FileMetadata fm1 " +
"LEFT JOIN FileMetadata fm2 ON fm1.datafile_id = fm2.datafile_id " +
" AND fm2.datasetversion_id = ?1 " +
Expand All @@ -93,11 +93,11 @@
" ) " +
" ) " +
" )",
resultSetMapping = "IdToLongMapping"
resultSetMapping = "IdToIntegerMapping"
)
/* When this mapping was to Long.class, Postgres was still returning an Integer, causing indexing failures - see #11776 */
@SqlResultSetMapping(
name = "IdToLongMapping",
name = "IdToIntegerMapping",
columns = @ColumnResult(name = "id", type = Integer.class)
)
@Entity
Expand Down
57 changes: 23 additions & 34 deletions src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -602,10 +602,24 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr
writeDebugInfo(debug, dataset);
}
if (doNormalSolrDocCleanUp) {
List<String> solrIdsOfPermissionDocsToDelete = new ArrayList<>();
try {
solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId());
logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete));
if (!solrIdsOfDocsToDelete.isEmpty()) {
if (!latestVersion.isDraft()) {
// After publication, we need to delete old draft perm docs
// For the first draft, a perm doc will exist for each file
// For subsequent drafts, perm docs should only exist for new files/those with changed metadata
// This code adds the ids of draft perm docs for all files - if the docs don't exist, Solr will just ignore them
for (String fileDocId : solrIdsOfDocsToDelete) {
if (!fileDocId.endsWith(draftSuffix)) {
solrIdsOfPermissionDocsToDelete.add(fileDocId + draftSuffix + discoverabilityPermissionSuffix);
}
}

logger.fine("Existing permission docs: " + String.join(", ", solrIdsOfPermissionDocsToDelete));
}
// We keep the latest version's docs unless it is deaccessioned and there is no
// published/released version
// So skip the loop removing those docs from the delete list except in that case
Expand Down Expand Up @@ -649,7 +663,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr
logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete));

if (!solrIdsOfDocsToDelete.isEmpty()) {
List<String> solrIdsOfPermissionDocsToDelete = new ArrayList<>();

for (String file : solrIdsOfDocsToDelete) {
// Also remove associated permission docs
solrIdsOfPermissionDocsToDelete.add(file + discoverabilityPermissionSuffix);
Expand Down Expand Up @@ -1416,7 +1430,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
long maxSize = maxFTIndexingSize != null ? maxFTIndexingSize.longValue() : Long.MAX_VALUE;

List<String> filesIndexed = new ArrayList<>();
final List<Long> changedFileMetadataIds = new ArrayList<>();
final List<Long> changedFileIds = new ArrayList<>();
if (datasetVersion != null) {
List<FileMetadata> fileMetadatas = datasetVersion.getFileMetadatas();
List<FileMetadata> rfm = new ArrayList<>();
Expand All @@ -1427,42 +1441,17 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
fileMap.put(released.getDataFile().getId(), released);
}

Query query = em.createNamedQuery("FileMetadata.compareFileMetadata", Long.class);
query.setParameter(1, dataset.getReleasedVersion().getId());
query.setParameter(2, datasetVersion.getId());

/*
* When the query was configured to return Long, it was returning Integer. The query has been changed to return Integer now. The code here is robust if that changes in the future.
*/
List<Object> queryResults = query.getResultList();
for (Object result : queryResults) {
if (result != null) {
// Ensure we're adding Long objects to the list
if (result instanceof Integer intResult) {
logger.finest("Converted Integer result to Long: " + result);
changedFileMetadataIds.add(Long.valueOf(intResult));
} else if (result instanceof Long longResult) {
// Already a Long, add directly
logger.finest("Added existing Long to list: " + result);
changedFileMetadataIds.add(longResult);
} else {
// If it's not a Long, convert it to one via String
try {
changedFileMetadataIds.add(Long.valueOf(result.toString()));
logger.finest("Converted non-Long result to Long: " + result + " of type " + result.getClass().getName());
} catch (NumberFormatException e) {
logger.warning("Could not convert query result to Long: " + result);
}
}
}
}
solrIndexService.populateChangedFileIds(
dataset.getReleasedVersion().getId(),
datasetVersion.getId(),
changedFileIds);
logger.fine(
"We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions.");
} else if (datasetVersion.isDraft()) {
// Add all file metadata ids to changedFileMetadataIds
changedFileMetadataIds.addAll(
changedFileIds.addAll(
fileMetadatas.stream()
.map(FileMetadata::getId)
.map(fm -> fm.getDataFile().getId())
.collect(Collectors.toList())
);
}
Expand Down Expand Up @@ -1526,7 +1515,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
}
boolean indexThisFile = false;

if (indexThisMetadata && (isReleasedVersion || changedFileMetadataIds.contains(fileMetadata.getId()))) {
if (indexThisMetadata && (isReleasedVersion || changedFileIds.contains(datafile.getId()))) {
indexThisFile = true;
} else if (indexThisMetadata) {
// Draft version, file is not new or all file metadata matches the released version
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,52 +92,6 @@ public List<String> findDvObjectPerms(DvObject dvObject) {
return permStrings;
}

public Map<DatasetVersion.VersionState, Boolean> getDesiredCards(Dataset dataset) {
Map<DatasetVersion.VersionState, Boolean> desiredCards = new LinkedHashMap<>();
DatasetVersion latestVersion = dataset.getLatestVersion();
DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState();
DatasetVersion releasedVersion = dataset.getReleasedVersion();
boolean atLeastOnePublishedVersion = false;
if (releasedVersion != null) {
atLeastOnePublishedVersion = true;
} else {
atLeastOnePublishedVersion = false;
}

if (atLeastOnePublishedVersion == false) {
if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) {
desiredCards.put(DatasetVersion.VersionState.DRAFT, true);
desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false);
desiredCards.put(DatasetVersion.VersionState.RELEASED, false);
} else if (latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) {
desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, true);
desiredCards.put(DatasetVersion.VersionState.RELEASED, false);
desiredCards.put(DatasetVersion.VersionState.DRAFT, false);
} else {
String msg = "No-op. Unexpected condition reached: There is no published version and the latest published version is neither " + DatasetVersion.VersionState.DRAFT + " nor " + DatasetVersion.VersionState.DEACCESSIONED + ". Its state is " + latestVersionState + ".";
logger.info(msg);
}
} else if (atLeastOnePublishedVersion == true) {
if (latestVersionState.equals(DatasetVersion.VersionState.RELEASED)
|| latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) {
desiredCards.put(DatasetVersion.VersionState.RELEASED, true);
desiredCards.put(DatasetVersion.VersionState.DRAFT, false);
desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false);
} else if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) {
desiredCards.put(DatasetVersion.VersionState.DRAFT, true);
desiredCards.put(DatasetVersion.VersionState.RELEASED, true);
desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false);
} else {
String msg = "No-op. Unexpected condition reached: There is at least one published version but the latest version is neither published nor draft";
logger.info(msg);
}
} else {
String msg = "No-op. Unexpected condition reached: Has a version been published or not?";
logger.info(msg);
}
return desiredCards;
}

private boolean hasBeenPublished(Dataverse dataverse) {
return dataverse.isReleased();
}
Expand Down
Loading