Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/release-notes/12081-reExportAll-improvements.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
This release includes several improvements to the admin/metadata/reExportAll API call:

- Fatal run time problems, such as a problem with the database connection (seen at DANS) get logged to the export log before terminating, making it easier to see when there has been a problem
- A new optional ?olderThan= parameter has been introduced to allow incremental reExportAll, i.e. allowing reExportAll to be done in smaller chunks, or to allow reExporting datasets last updated before some exporter change occurred, or to restart after a failure (use olderThan=<YYYY-MM-DD>)
- Performance and memory use have been improved
5 changes: 4 additions & 1 deletion doc/sphinx-guides/source/admin/metadataexport.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@ In addition to the automated exports, a Dataverse installation admin can start a

``curl http://localhost:8080/api/admin/metadata/reExportAll``

``curl http://localhost:8080/api/admin/metadata/reExportAll?olderThan=<YYYY-MM-DD>``

``curl http://localhost:8080/api/admin/metadata/clearExportTimestamps``

``curl http://localhost:8080/api/admin/metadata/:persistentId/reExportDataset?persistentId=doi:10.5072/FK2/AAA000``

The first will attempt to export all the published, local (non-harvested) datasets that haven't been exported yet.
The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not.
The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not.
With the optional olderThan query parameter, the second will *force* re-export of all published, local datasets that were never exported or last exported before the olderThan date.

The first two calls return a status message informing the administrator that the process has been launched (``{"status":"WORKFLOW_IN_PROGRESS"}``). The administrator can check the progress of the process via log files: ``[Payara directory]/glassfish/domains/domain1/logs/export_[time stamp].log``.

Expand Down
49 changes: 33 additions & 16 deletions src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,13 @@ public void reExportAllAsync() {
exportAllDatasets(true);
}

// reExportAll with a date *forces* a reexport on all published datasets that were not exported or were exported before the date;
@Asynchronous
public void reExportAllAsync(Date reExportDate) {
exportAllDatasets(true, reExportDate);

}

public void reExportAll() {
exportAllDatasets(true);
}
Expand All @@ -715,30 +722,27 @@ public void exportAll() {
exportAllDatasets(false);
}

public void exportAllDatasets(boolean forceReExport) {
private void exportAllDatasets(boolean b) {
exportAllDatasets(b, null);
}

@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
private void exportAllDatasets(boolean forceReExport, Date reExportDate) {
Integer countAll = 0;
Integer countSuccess = 0;
Integer countError = 0;
String logTimestamp = logFormatter.format(new Date());
Logger exportLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp);
String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "export_" + logTimestamp + ".log";
FileHandler fileHandler;
boolean fileHandlerSuceeded;
try {
fileHandler = new FileHandler(logFileName);
exportLogger.setUseParentHandlers(false);
fileHandlerSuceeded = true;
} catch (IOException | SecurityException ex) {
Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex);
return;
}

if (fileHandlerSuceeded) {
exportLogger.addHandler(fileHandler);
} else {
exportLogger = logger;
}

exportLogger.info("Starting an export all job");

for (Long datasetId : findAllLocalDatasetIds()) {
Expand All @@ -757,9 +761,17 @@ public void exportAllDatasets(boolean forceReExport) {

// can't trust dataset.getPublicationDate(), no.
Date publicationDate = dataset.getReleasedVersion().getReleaseTime(); // we know this dataset has a non-null released version! Maybe not - SEK 8/19 (We do now! :)
if (forceReExport || (publicationDate != null
&& (dataset.getLastExportTime() == null
|| dataset.getLastExportTime().before(publicationDate)))) {
/**
* Three cases: force is true and no date given - reexport every dataset force
* is true and reExport date given - reexport datasets last exported before that
* date force is false, reExportDate ignored - reexport datasets last exported
* before they were last published
*/
if ((forceReExport && reExportDate == null)
|| (forceReExport && (dataset.getLastExportTime() == null || dataset.getLastExportTime().before(reExportDate)))
|| (forceReExport == false
&& (publicationDate != null && (dataset.getLastExportTime() == null
|| dataset.getLastExportTime().before(publicationDate))))) {
countAll++;
try {
recordService.exportAllFormatsInNewTransaction(dataset);
Expand All @@ -768,6 +780,13 @@ public void exportAllDatasets(boolean forceReExport) {
} catch (Exception ex) {
exportLogger.log(Level.INFO, "Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString() + "; " + ex.getMessage(), ex);
countError++;
} catch (Throwable t) {
exportLogger.log(Level.SEVERE, "Fatal error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString() + "; " + t.getClass().getName() + ": " + t.getMessage(), t);
exportLogger.info("Datasets processed before fatal error: " + countAll.toString());
exportLogger.info("Datasets exported successfully: " + countSuccess.toString());
exportLogger.info("Datasets failures: " + countError.toString());
fileHandler.close();
throw t;
}
}
}
Expand All @@ -778,10 +797,7 @@ public void exportAllDatasets(boolean forceReExport) {
exportLogger.info("Datasets failures: " + countError.toString());
exportLogger.info("Finished export-all job.");

if (fileHandlerSuceeded) {
fileHandler.close();
}

fileHandler.close();
}

@Asynchronous
Expand Down Expand Up @@ -1140,4 +1156,5 @@ public void saveStorageQuota(Dataset target, Long allocation) {
}
em.flush();
}

}
18 changes: 13 additions & 5 deletions src/main/java/edu/harvard/iq/dataverse/api/Metadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetServiceBean;

import java.util.Date;
import java.util.logging.Logger;
import jakarta.ejb.EJB;
import jakarta.json.Json;
import jakarta.json.JsonArrayBuilder;
import jakarta.json.JsonObjectBuilder;
import jakarta.ws.rs.*;

import jakarta.ws.rs.core.Response;
Expand Down Expand Up @@ -57,8 +55,18 @@ public Response exportAll() {
@GET
@Path("/reExportAll")
@Produces("application/json")
public Response reExportAll() {
datasetService.reExportAllAsync();
public Response reExportAll(@QueryParam(value = "olderThan") String olderThan) {
Date reExportDate = null;
if (olderThan != null && !olderThan.isEmpty()) {
try {
java.text.SimpleDateFormat dateFormat = new java.text.SimpleDateFormat("yyyy-MM-dd");
dateFormat.setLenient(false);
reExportDate = dateFormat.parse(olderThan);
} catch (java.text.ParseException e) {
return error(Response.Status.BAD_REQUEST, "Invalid date format for olderThan parameter. Expected format: YYYY-MM-DD");
}
}
datasetService.reExportAllAsync(reExportDate);
return this.accepted();
}

Expand Down