From 6a2c63ea479a6d2f50064fffab101a71d54efa2d Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Mon, 1 Sep 2025 19:10:26 +0200 Subject: [PATCH 01/11] wip --- .../controller/SubmissionController.kt | 12 +- .../org/loculus/backend/model/SubmitModel.kt | 154 ++++++++--- .../service/submission/CompressionService.kt | 3 +- .../submission/SequenceEntriesTable.kt | 2 +- .../submission/UploadDatabaseService.kt | 243 ++++++++++++------ .../loculus/backend/utils/ParseFastaHeader.kt | 2 +- 6 files changed, 294 insertions(+), 122 deletions(-) diff --git a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt index 80e27fc556..73d3e535e6 100644 --- a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt +++ b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt @@ -106,14 +106,14 @@ open class SubmissionController( ) @RequestParam restrictedUntil: String?, @Parameter(description = FILE_MAPPING_DESCRIPTION) @RequestPart(required = false) fileMapping: String?, ): List { - var innerDataUseTermsType = DataUseTermsType.OPEN - if (backendConfig.dataUseTerms.enabled) { - if (dataUseTermsType == null) { - throw BadRequestException("the 'dataUseTermsType' needs to be provided.") + val innerDataUseTermsType = + if (backendConfig.dataUseTerms.enabled) { + dataUseTermsType ?: throw BadRequestException( + "the 'dataUseTermsType' needs to be provided." + ) } else { - innerDataUseTermsType = dataUseTermsType + DataUseTermsType.OPEN } - } val fileMappingParsed = parseFileMapping(fileMapping, organism) val params = SubmissionParams.OriginalSubmissionParams( diff --git a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt index cf447175b7..980e99f281 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt @@ -25,14 +25,19 @@ import org.loculus.backend.service.submission.SubmissionIdFilesMappingPreconditi import org.loculus.backend.service.submission.UploadDatabaseService import org.loculus.backend.utils.DateProvider import org.loculus.backend.utils.FastaReader +import org.loculus.backend.utils.MetadataEntry +import org.loculus.backend.utils.ParseFastaHeader import org.loculus.backend.utils.metadataEntryStreamAsSequence import org.loculus.backend.utils.revisionEntryStreamAsSequence import org.springframework.stereotype.Service import org.springframework.transaction.annotation.Transactional import org.springframework.web.multipart.MultipartFile import java.io.BufferedInputStream +import java.io.Closeable import java.io.File import java.io.InputStream +import java.util.UUID + const val HEADER_TO_CONNECT_METADATA_AND_SEQUENCES = "id" const val HEADER_TO_CONNECT_METADATA_AND_SEQUENCES_ALTERNATE_FOR_BACKCOMPAT = "submissionId" @@ -91,6 +96,7 @@ class SubmitModel( private val dateProvider: DateProvider, private val backendConfig: BackendConfig, private val s3Service: S3Service, + private val parseFastaHeader: ParseFastaHeader, ) { companion object AcceptedFileTypes { @@ -122,37 +128,55 @@ class SubmitModel( .validateMultipartUploads(submissionParams.files) .validateFilesExist(submissionParams.files) - insertDataIntoAux( - uploadId, - submissionParams, - batchSize, - ) + if (submissionParams is SubmissionParams.OriginalSubmissionParams) { + groupManagementPreconditionValidator.validateUserIsAllowedToModifyGroup( + submissionParams.groupId, + submissionParams.authenticatedUser, + ) + dataUseTermsPreconditionValidator.checkThatRestrictedUntilIsAllowed(submissionParams.dataUseTerms) + } + + // 1. Parse metadata and sequences + // 2. Do validations + // 3. Insert into sequenceEntries + + val metadata = parseMetadataFile(submissionParams) + val rawSequences = parseSequenceFile(submissionParams) + val sequences = groupSequencesById(rawSequences, submissionParams.organism) - val metadataSubmissionIds = uploadDatabaseService.getMetadataUploadSubmissionIds(uploadId).toSet() if (requiresConsensusSequenceFile(submissionParams.organism)) { log.debug { "Validating submission with uploadId $uploadId" } - val sequenceSubmissionIds = uploadDatabaseService.getSequenceUploadSubmissionIds(uploadId).toSet() - validateSubmissionIdSetsForConsensusSequences(metadataSubmissionIds, sequenceSubmissionIds) - } - - if (submissionParams is SubmissionParams.RevisionSubmissionParams) { - log.info { "Associating uploaded sequence data with existing sequence entries with uploadId $uploadId" } - uploadDatabaseService.associateRevisedDataWithExistingSequenceEntries( - uploadId, - submissionParams.organism, - submissionParams.authenticatedUser, - ) + validateSubmissionIdSetsForConsensusSequences(metadata.keys, sequences.keys) } submissionParams.files?.let { submittedFiles -> - val fileSubmissionIds = submittedFiles.keys - validateSubmissionIdSetsForFiles(metadataSubmissionIds, fileSubmissionIds) + validateSubmissionIdSetsForFiles(metadata.keys, submittedFiles.keys) validateFileExistenceAndGroupOwnership(submittedFiles, submissionParams, uploadId) } - if (submissionParams is SubmissionParams.OriginalSubmissionParams) { - log.info { "Generating new accessions for uploaded sequence data with uploadId $uploadId" } - uploadDatabaseService.generateNewAccessionsForOriginalUpload(uploadId) + + when (submissionParams) { + is SubmissionParams.OriginalSubmissionParams -> { + val submissionIdToAccession = uploadDatabaseService.generateNewAccessions(metadata.keys) + + // Actually put the sequences into the db + uploadDatabaseService.createNewSequenceEntries( + metadata, + sequences, + files, + submissionParams + + ) + } + + is SubmissionParams.RevisionSubmissionParams -> { + log.info { "Associating uploaded sequence data with existing sequence entries with uploadId $uploadId" } + uploadDatabaseService.associateRevisedDataWithExistingSequenceEntries( + uploadId, + submissionParams.organism, + submissionParams.authenticatedUser, + ) + } } log.debug { "Persisting submission with uploadId $uploadId" } @@ -161,22 +185,68 @@ class SubmitModel( uploadDatabaseService.deleteUploadData(uploadId) } + private fun parseMetadataFile(submissionParams: SubmissionParams): Map { + return MaybeFile().use { metadataTempFileToDelete -> + val metadataStream = getStreamFromFile( + submissionParams.metadataFile, + metadataFileTypes, + metadataTempFileToDelete, + ) + // Collect the metadataStream into a map so we can do validations + val metadata: Map = metadataEntryStreamAsSequence(metadataStream) + .associateBy { it.submissionId } + + metadata + } + } + + private fun parseSequenceFile(submissionParams: SubmissionParams): Map { + val sequenceFile = submissionParams.sequenceFile + if (sequenceFile == null) { + if (requiresConsensusSequenceFile(submissionParams.organism)) { + throw BadRequestException( + "Submissions for organism ${submissionParams.organism.name} require a sequence file.", + ) + } + return emptyMap() + } + if (!requiresConsensusSequenceFile(submissionParams.organism)) { + throw BadRequestException( + "Sequence uploads are not allowed for organism ${submissionParams.organism.name}.", + ) + } + + return MaybeFile().use { sequenceTempFileToDelete -> + val sequenceStream = getStreamFromFile( + sequenceFile, + sequenceFileTypes, + sequenceTempFileToDelete, + ) + FastaReader(sequenceStream).asSequence().associate { it.sampleName to it.sequence } + } + } + + private fun groupSequencesById( + sequences: Map, + organism: Organism, + ): Map> { + val sequencesById = mutableMapOf>() + sequences.forEach { (header, sequence) -> + val (submissionId, segmentName) = parseFastaHeader.parse(header, organism) + val segmentsForId = sequencesById.getOrPut(submissionId) { mutableMapOf() } + segmentsForId[segmentName] = sequence + } + return sequencesById + } + + /** * Inserts the uploaded metadata (and sequence data) into the 'aux' tables in the database. */ private fun insertDataIntoAux(uploadId: String, submissionParams: SubmissionParams, batchSize: Int) { - if (submissionParams is SubmissionParams.OriginalSubmissionParams) { - groupManagementPreconditionValidator.validateUserIsAllowedToModifyGroup( - submissionParams.groupId, - submissionParams.authenticatedUser, - ) - dataUseTermsPreconditionValidator.checkThatRestrictedUntilIsAllowed(submissionParams.dataUseTerms) - } - val metadataTempFileToDelete = MaybeFile() val metadataStream = getStreamFromFile( submissionParams.metadataFile, - uploadId, metadataFileTypes, metadataTempFileToDelete, ) @@ -204,7 +274,6 @@ class SubmitModel( try { val sequenceStream = getStreamFromFile( sequenceFile, - uploadId, sequenceFileTypes, sequenceTempFileToDelete, ) @@ -215,23 +284,26 @@ class SubmitModel( } } - class MaybeFile { + class MaybeFile : Closeable { var file: File? = null fun delete() { file?.delete() } + + override fun close() { + delete() + } } private fun getStreamFromFile( file: MultipartFile, - uploadId: String, dataType: ValidExtension, maybeFileToDelete: MaybeFile, ): InputStream = when (getFileType(file, dataType)) { CompressionAlgorithm.ZIP -> { val tempFile = File.createTempFile( "upload_" + dataType.displayName.replace(" ", ""), - uploadId, + UUID.randomUUID().toString(), ) maybeFileToDelete.file = tempFile @@ -260,7 +332,7 @@ class SubmitModel( ) { log.debug { "intermediate storing uploaded metadata of type ${submissionParams.uploadType.name} " + - "from $submissionParams.submitter with UploadId $uploadId" + "from $submissionParams.submitter with UploadId $uploadId" } val now = dateProvider.getCurrentDateTime() try { @@ -343,8 +415,8 @@ class SubmitModel( .flatMap { it.value }.joinToString(", .") throw BadRequestException( "${expectedFileType.displayName} has wrong extension. Must be " + - ".${expectedFileType.validExtensions.joinToString(", .")} for uncompressed submissions or " + - ".$allowedCompressionFormats for compressed submissions", + ".${expectedFileType.validExtensions.joinToString(", .")} for uncompressed submissions or " + + ".$allowedCompressionFormats for compressed submissions", ) } @@ -358,13 +430,13 @@ class SubmitModel( if (metadataKeysNotInSequences.isNotEmpty() || sequenceKeysNotInMetadata.isNotEmpty()) { val metadataNotPresentErrorText = if (metadataKeysNotInSequences.isNotEmpty()) { "Metadata file contains ${metadataKeysNotInSequences.size} ids that are not present " + - "in the sequence file: " + metadataKeysNotInSequences.toList().joinToString(limit = 10) + "; " + "in the sequence file: " + metadataKeysNotInSequences.toList().joinToString(limit = 10) + "; " } else { "" } val sequenceNotPresentErrorText = if (sequenceKeysNotInMetadata.isNotEmpty()) { "Sequence file contains ${sequenceKeysNotInMetadata.size} ids that are not present " + - "in the metadata file: " + sequenceKeysNotInMetadata.toList().joinToString(limit = 10) + "in the metadata file: " + sequenceKeysNotInMetadata.toList().joinToString(limit = 10) } else { "" } @@ -377,7 +449,7 @@ class SubmitModel( if (filesKeysNotInMetadata.isNotEmpty()) { throw UnprocessableEntityException( "File upload contains ${filesKeysNotInMetadata.size} submissionIds that are not present in the " + - "metadata file: " + filesKeysNotInMetadata.toList().joinToString(limit = 10), + "metadata file: " + filesKeysNotInMetadata.toList().joinToString(limit = 10), ) } } diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/CompressionService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/CompressionService.kt index f5ff4f02b7..7007c29fba 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/CompressionService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/CompressionService.kt @@ -5,6 +5,7 @@ import org.loculus.backend.api.GeneticSequence import org.loculus.backend.api.Organism import org.loculus.backend.api.OriginalData import org.loculus.backend.api.ProcessedData +import org.loculus.backend.api.SegmentName import org.loculus.backend.config.BackendConfig import org.springframework.stereotype.Service import java.nio.charset.StandardCharsets @@ -27,7 +28,7 @@ class CompressionService(private val backendConfig: BackendConfig) { fun compressNucleotideSequence( uncompressedSequence: GeneticSequence, - segmentName: String, + segmentName: SegmentName, organism: Organism, ): CompressedSequence = compress( uncompressedSequence, diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceEntriesTable.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceEntriesTable.kt index 1643e16533..13c52b7d57 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceEntriesTable.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceEntriesTable.kt @@ -25,7 +25,7 @@ object SequenceEntriesTable : Table(SEQUENCE_ENTRIES_TABLE_NAME) { val organismColumn = varchar("organism", 255) val submissionIdColumn = varchar("submission_id", 255) val submitterColumn = varchar("submitter", 255) - val approverColumn = varchar("approver", 255) + val approverColumn = varchar("approver", 255).nullable() val groupIdColumn = integer("group_id") val submittedAtTimestampColumn = datetime("submitted_at") val releasedAtTimestampColumn = datetime("released_at").nullable() diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt index ca9cef8919..c4f04246b1 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt @@ -4,36 +4,37 @@ import kotlinx.datetime.LocalDateTime import mu.KotlinLogging import org.jetbrains.exposed.sql.SqlExpressionBuilder.eq import org.jetbrains.exposed.sql.VarCharColumnType -import org.jetbrains.exposed.sql.and import org.jetbrains.exposed.sql.batchInsert import org.jetbrains.exposed.sql.deleteWhere import org.jetbrains.exposed.sql.statements.StatementType import org.jetbrains.exposed.sql.transactions.transaction -import org.jetbrains.exposed.sql.update +import org.loculus.backend.api.FileIdAndName import org.loculus.backend.api.Organism +import org.loculus.backend.api.OriginalData import org.loculus.backend.api.Status import org.loculus.backend.api.SubmissionIdFilesMap import org.loculus.backend.api.SubmissionIdMapping import org.loculus.backend.auth.AuthenticatedUser import org.loculus.backend.log.AuditLogger +import org.loculus.backend.model.SegmentName import org.loculus.backend.model.SubmissionId import org.loculus.backend.model.SubmissionParams import org.loculus.backend.service.GenerateAccessionFromNumberService import org.loculus.backend.service.datauseterms.DataUseTermsDatabaseService -import org.loculus.backend.service.submission.MetadataUploadAuxTable.accessionColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.filesColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.groupIdColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.metadataColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.organismColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.submissionIdColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.submitterColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.uploadIdColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.uploadedAtColumn -import org.loculus.backend.service.submission.SequenceUploadAuxTable.compressedSequenceDataColumn -import org.loculus.backend.service.submission.SequenceUploadAuxTable.segmentNameColumn -import org.loculus.backend.service.submission.SequenceUploadAuxTable.sequenceSubmissionIdColumn -import org.loculus.backend.service.submission.SequenceUploadAuxTable.sequenceUploadIdColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.versionColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.versionCommentColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.accessionColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.organismColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.submissionIdColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.submitterColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.approverColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.groupIdColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.submittedAtTimestampColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.releasedAtTimestampColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.originalDataColumn +import org.loculus.backend.utils.Accession import org.loculus.backend.utils.DatabaseConstants +import org.loculus.backend.utils.DateProvider import org.loculus.backend.utils.FastaEntry import org.loculus.backend.utils.MetadataEntry import org.loculus.backend.utils.ParseFastaHeader @@ -48,8 +49,10 @@ private val log = KotlinLogging.logger { } private const val SEQUENCE_INSERT_COLUMNS = 4 private const val METADATA_INSERT_COLUMNS = 8 +private const val SEQUENCE_ENTRIES_INSERT_COLUMNS = 10 private const val SEQUENCE_BATCH_SIZE = DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT / SEQUENCE_INSERT_COLUMNS private const val METADATA_BATCH_SIZE = DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT / METADATA_INSERT_COLUMNS +private const val SEQUENCE_ENTRIES_BATCH_SIZE = DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT / SEQUENCE_ENTRIES_INSERT_COLUMNS @Service @Transactional @@ -60,6 +63,7 @@ class UploadDatabaseService( private val dataUseTermsDatabaseService: DataUseTermsDatabaseService, private val generateAccessionFromNumberService: GenerateAccessionFromNumberService, private val auditLogger: AuditLogger, + private val dateProvider: DateProvider, ) { fun batchInsertMetadataInAuxTable( @@ -107,44 +111,157 @@ class UploadDatabaseService( } } - fun batchInsertSequencesInAuxTable( - uploadId: String, - submittedOrganism: Organism, - uploadedSequencesBatch: List, - ) { - uploadedSequencesBatch.chunkedForDatabase({ batch -> - SequenceUploadAuxTable.batchInsert(batch) { - val (submissionId, segmentName) = parseFastaHeader.parse(it.sampleName, submittedOrganism) - this[sequenceSubmissionIdColumn] = submissionId - this[segmentNameColumn] = segmentName - this[sequenceUploadIdColumn] = uploadId - this[compressedSequenceDataColumn] = compressor.compressNucleotideSequence( - it.sequence, - segmentName, - submittedOrganism, - ) - } - emptyList() - }, SEQUENCE_INSERT_COLUMNS) - } - fun getMetadataUploadSubmissionIds(uploadId: String): List = MetadataUploadAuxTable - .select( - uploadIdColumn, - submissionIdColumn, + data class SequenceEntry( + val accession: Accession, + val version: Long, + val versionComment: String?, + val organism: Organism, + val submissionId: SubmissionId, + val submitter: String, + val approver: String?, + val groupId: Int, + val submittedAtTimestamp: LocalDateTime, + val releasedAtTimestamp: LocalDateTime?, + val originalData: OriginalData, + ) + + fun createNewSequenceEntries( + metadata: Map, + sequences: Map>, + files: Map>>, + submissionParams: SubmissionParams.OriginalSubmissionParams, + ): List { + + log.debug { "Creating new sequence entries" } + + val now = dateProvider.getCurrentDateTime() + + val newAccessions = generateNewAccessions(metadata.keys) + + val newEntries = newAccessions.map { (submissionId, accession) -> + SequenceEntry( + accession = accession, + version = 1, + versionComment = null, + organism = submissionParams.organism, + submissionId = submissionId, + submitter = submissionParams.authenticatedUser.username, + approver = null, + groupId = submissionParams.groupId, + submittedAtTimestamp = now, + releasedAtTimestamp = null, + originalData = OriginalData( + metadata = metadata[submissionId]?.metadata ?: emptyMap(), + files = files[submissionId], + unalignedNucleotideSequences = sequences[submissionId]?.mapValues { (segmentName, sequence) -> + compressor.compressNucleotideSequence( + sequence, + segmentName, + submissionParams.organism, + ) + } ?: emptyMap(), + ), + ) + } + + newEntries.chunkedForDatabase( + { batch -> + SequenceEntriesTable.batchInsert(batch) { + this[accessionColumn] = it.accession + this[versionColumn] = it.version + this[versionCommentColumn] = it.versionComment + this[organismColumn] = it.organism.name + this[submissionIdColumn] = it.submissionId + this[submitterColumn] = it.submitter + this[approverColumn] = it.approver + this[groupIdColumn] = it.groupId + this[submittedAtTimestampColumn] = it.submittedAtTimestamp + this[releasedAtTimestampColumn] = it.releasedAtTimestamp + this[originalDataColumn] = it.originalData + } + emptyList() + }, + SEQUENCE_ENTRIES_INSERT_COLUMNS ) - .where { uploadIdColumn eq uploadId } - .map { it[submissionIdColumn] } - fun getSequenceUploadSubmissionIds(uploadId: String): List = SequenceUploadAuxTable - .select( - sequenceUploadIdColumn, - sequenceSubmissionIdColumn, + return newAccessions.map { (submissionId, accession) -> + SubmissionIdMapping( + accession = accession, + version = 1, + submissionId = submissionId, + ) + } + + } + + fun createRevisionEntries( + metadata: Map, + sequences: Map>, + files: Map>>, + submissionParams: SubmissionParams.RevisionSubmissionParams, + ): List { + + log.debug { "Creating revision entries" } + + val now = dateProvider.getCurrentDateTime() + + val newEntries = metadata.values.map { entry -> + val latestVersion = accessionPreconditionValidator.getLatestVersion(entry.accession) + SequenceEntry( + accession = entry.accession, + version = latestVersion + 1, + versionComment = submissionParams.versionComment, + organism = submissionParams.organism, + submissionId = entry.submissionId, + submitter = submissionParams.authenticatedUser.username, + approver = null, + groupId = submissionParams.groupId, + submittedAtTimestamp = now, + releasedAtTimestamp = null, + originalData = OriginalData( + metadata = entry.metadata, + files = files[entry.submissionId], + unalignedNucleotideSequences = sequences[entry.submissionId]?.mapValues { (segmentName, sequence) -> + compressor.compressNucleotideSequence( + sequence, + segmentName, + submissionParams.organism, + ) + } ?: emptyMap(), + ), + ) + } + + newEntries.chunkedForDatabase( + { batch -> + SequenceEntriesTable.batchInsert(batch) { + this[accessionColumn] = it.accession + this[versionColumn] = it.version + this[versionCommentColumn] = it.versionComment + this[organismColumn] = it.organism.name + this[submissionIdColumn] = it.submissionId + this[submitterColumn] = it.submitter + this[approverColumn] = it.approver + this[groupIdColumn] = it.groupId + this[submittedAtTimestampColumn] = it.submittedAtTimestamp + this[releasedAtTimestampColumn] = it.releasedAtTimestamp + this[originalDataColumn] = it.originalData + } + emptyList() + }, + SEQUENCE_ENTRIES_INSERT_COLUMNS ) - .where { sequenceUploadIdColumn eq uploadId } - .map { - it[sequenceSubmissionIdColumn] + + return metadata.values.map { entry -> + SubmissionIdMapping( + accession = entry.accession, + version = accessionPreconditionValidator.getLatestVersion(entry.accession), + submissionId = entry.submissionId, + ) } + } + ) fun mapAndCopy(uploadId: String, submissionParams: SubmissionParams): List = transaction { log.debug { @@ -229,7 +346,7 @@ class UploadDatabaseService( auditLogger.log( username = submissionParams.authenticatedUser.username, description = "Submitted or revised ${insertionResult.size} sequences: " + - insertionResult.joinToString { it.displayAccessionVersion() }, + insertionResult.joinToString { it.displayAccessionVersion() }, ) return@transaction insertionResult @@ -288,13 +405,9 @@ class UploadDatabaseService( .where { uploadIdColumn eq uploadId } .associate { Pair(it[submissionIdColumn], it[groupIdColumn]!!) } - fun generateNewAccessionsForOriginalUpload(uploadId: String) { - val submissionIds = - MetadataUploadAuxTable - .select(submissionIdColumn) - .where { uploadIdColumn eq uploadId } - .map { it[submissionIdColumn] } - + // Returns a list of pairs (submissionId, accession) + fun generateNewAccessions(submissionIds: Collection): List> { + log.info { "Generating ${submissionIds.size} new accessions" } val nextAccessions = getNextSequenceNumbers("accession_sequence", submissionIds.size).map { generateAccessionFromNumberService.generateCustomId(it) } @@ -305,21 +418,7 @@ class UploadDatabaseService( ) } - val submissionIdToAccessionMap = submissionIds.zip(nextAccessions) - - log.info { - "Generated ${submissionIdToAccessionMap.size} new accessions for original upload with UploadId $uploadId:" - } - - submissionIdToAccessionMap.forEach { (submissionId, accession) -> - MetadataUploadAuxTable.update( - where = { - (submissionIdColumn eq submissionId) and (uploadIdColumn eq uploadId) - }, - ) { - it[accessionColumn] = accession - it[versionColumn] = 1 - } - } + log.info { "Generated ${submissionIds.size} new accessions" } + return submissionIds.zip(nextAccessions) } } diff --git a/backend/src/main/kotlin/org/loculus/backend/utils/ParseFastaHeader.kt b/backend/src/main/kotlin/org/loculus/backend/utils/ParseFastaHeader.kt index 4815e61e2b..1bcf8b60a4 100644 --- a/backend/src/main/kotlin/org/loculus/backend/utils/ParseFastaHeader.kt +++ b/backend/src/main/kotlin/org/loculus/backend/utils/ParseFastaHeader.kt @@ -26,7 +26,7 @@ class ParseFastaHeader(private val backendConfig: BackendConfig) { " segment name in the format <$HEADER_TO_CONNECT_METADATA_AND_SEQUENCES>_", ) } - val isolateId = submissionId.substring(0, lastDelimiter) + val isolateId = submissionId.take(lastDelimiter) val segmentId = submissionId.substring(lastDelimiter + 1) if (!validSegmentIds.contains(segmentId)) { throw BadRequestException( From 6015ca345b6ff839c360dc848c07b6f5e8997e64 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Mon, 1 Sep 2025 20:14:48 +0200 Subject: [PATCH 02/11] Finish first pass --- .../org/loculus/backend/model/SubmitModel.kt | 22 +- .../submission/UploadDatabaseService.kt | 308 ++++-------------- 2 files changed, 84 insertions(+), 246 deletions(-) diff --git a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt index 980e99f281..057a336d93 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt @@ -143,13 +143,14 @@ class SubmitModel( val metadata = parseMetadataFile(submissionParams) val rawSequences = parseSequenceFile(submissionParams) val sequences = groupSequencesById(rawSequences, submissionParams.organism) + val files = submissionParams.files if (requiresConsensusSequenceFile(submissionParams.organism)) { log.debug { "Validating submission with uploadId $uploadId" } validateSubmissionIdSetsForConsensusSequences(metadata.keys, sequences.keys) } - submissionParams.files?.let { submittedFiles -> + files?.let { submittedFiles -> validateSubmissionIdSetsForFiles(metadata.keys, submittedFiles.keys) validateFileExistenceAndGroupOwnership(submittedFiles, submissionParams, uploadId) } @@ -157,25 +158,28 @@ class SubmitModel( when (submissionParams) { is SubmissionParams.OriginalSubmissionParams -> { - val submissionIdToAccession = uploadDatabaseService.generateNewAccessions(metadata.keys) - // Actually put the sequences into the db uploadDatabaseService.createNewSequenceEntries( metadata, sequences, files, - submissionParams - + submissionParams, ) } is SubmissionParams.RevisionSubmissionParams -> { log.info { "Associating uploaded sequence data with existing sequence entries with uploadId $uploadId" } - uploadDatabaseService.associateRevisedDataWithExistingSequenceEntries( - uploadId, - submissionParams.organism, - submissionParams.authenticatedUser, + uploadDatabaseService.createRevisionEntries( + metadata, + sequences, + files, + submissionParams ) +// uploadDatabaseService.associateRevisedDataWithExistingSequenceEntries( +// uploadId, +// submissionParams.organism, +// submissionParams.authenticatedUser, +// ) } } diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt index c4f04246b1..c42a0b524d 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt @@ -2,19 +2,13 @@ package org.loculus.backend.service.submission import kotlinx.datetime.LocalDateTime import mu.KotlinLogging -import org.jetbrains.exposed.sql.SqlExpressionBuilder.eq -import org.jetbrains.exposed.sql.VarCharColumnType import org.jetbrains.exposed.sql.batchInsert -import org.jetbrains.exposed.sql.deleteWhere -import org.jetbrains.exposed.sql.statements.StatementType -import org.jetbrains.exposed.sql.transactions.transaction -import org.loculus.backend.api.FileIdAndName +import org.jetbrains.exposed.sql.max import org.loculus.backend.api.Organism import org.loculus.backend.api.OriginalData import org.loculus.backend.api.Status import org.loculus.backend.api.SubmissionIdFilesMap import org.loculus.backend.api.SubmissionIdMapping -import org.loculus.backend.auth.AuthenticatedUser import org.loculus.backend.log.AuditLogger import org.loculus.backend.model.SegmentName import org.loculus.backend.model.SubmissionId @@ -34,11 +28,11 @@ import org.loculus.backend.service.submission.SequenceEntriesTable.releasedAtTim import org.loculus.backend.service.submission.SequenceEntriesTable.originalDataColumn import org.loculus.backend.utils.Accession import org.loculus.backend.utils.DatabaseConstants +import org.loculus.backend.utils.DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT import org.loculus.backend.utils.DateProvider -import org.loculus.backend.utils.FastaEntry import org.loculus.backend.utils.MetadataEntry import org.loculus.backend.utils.ParseFastaHeader -import org.loculus.backend.utils.RevisionEntry +import org.loculus.backend.utils.Version import org.loculus.backend.utils.chunkedForDatabase import org.loculus.backend.utils.getNextSequenceNumbers import org.loculus.backend.utils.processInDatabaseSafeChunks @@ -52,7 +46,8 @@ private const val METADATA_INSERT_COLUMNS = 8 private const val SEQUENCE_ENTRIES_INSERT_COLUMNS = 10 private const val SEQUENCE_BATCH_SIZE = DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT / SEQUENCE_INSERT_COLUMNS private const val METADATA_BATCH_SIZE = DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT / METADATA_INSERT_COLUMNS -private const val SEQUENCE_ENTRIES_BATCH_SIZE = DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT / SEQUENCE_ENTRIES_INSERT_COLUMNS +private const val SEQUENCE_ENTRIES_BATCH_SIZE = + DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT / SEQUENCE_ENTRIES_INSERT_COLUMNS @Service @Transactional @@ -65,53 +60,6 @@ class UploadDatabaseService( private val auditLogger: AuditLogger, private val dateProvider: DateProvider, ) { - - fun batchInsertMetadataInAuxTable( - uploadId: String, - authenticatedUser: AuthenticatedUser, - groupId: Int, - submittedOrganism: Organism, - uploadedMetadataBatch: List, - uploadedAt: LocalDateTime, - files: SubmissionIdFilesMap?, - ) { - uploadedMetadataBatch.chunked(METADATA_BATCH_SIZE).forEach { batch -> - MetadataUploadAuxTable.batchInsert(batch) { - this[submitterColumn] = authenticatedUser.username - this[groupIdColumn] = groupId - this[uploadedAtColumn] = uploadedAt - this[submissionIdColumn] = it.submissionId - this[metadataColumn] = it.metadata - this[filesColumn] = files?.get(it.submissionId) - this[organismColumn] = submittedOrganism.name - this[uploadIdColumn] = uploadId - } - } - } - - fun batchInsertRevisedMetadataInAuxTable( - uploadId: String, - authenticatedUser: AuthenticatedUser, - submittedOrganism: Organism, - uploadedRevisedMetadataBatch: List, - uploadedAt: LocalDateTime, - files: SubmissionIdFilesMap?, - ) { - uploadedRevisedMetadataBatch.chunked(METADATA_BATCH_SIZE).forEach { batch -> - MetadataUploadAuxTable.batchInsert(batch) { - this[accessionColumn] = it.accession - this[submitterColumn] = authenticatedUser.username - this[uploadedAtColumn] = uploadedAt - this[submissionIdColumn] = it.submissionId - this[metadataColumn] = it.metadata - this[filesColumn] = files?.get(it.submissionId) - this[organismColumn] = submittedOrganism.name - this[uploadIdColumn] = uploadId - } - } - } - - data class SequenceEntry( val accession: Accession, val version: Long, @@ -129,7 +77,7 @@ class UploadDatabaseService( fun createNewSequenceEntries( metadata: Map, sequences: Map>, - files: Map>>, + files: SubmissionIdFilesMap?, submissionParams: SubmissionParams.OriginalSubmissionParams, ): List { @@ -153,7 +101,7 @@ class UploadDatabaseService( releasedAtTimestamp = null, originalData = OriginalData( metadata = metadata[submissionId]?.metadata ?: emptyMap(), - files = files[submissionId], + files = files?.get(submissionId), unalignedNucleotideSequences = sequences[submissionId]?.mapValues { (segmentName, sequence) -> compressor.compressNucleotideSequence( sequence, @@ -165,63 +113,88 @@ class UploadDatabaseService( ) } - newEntries.chunkedForDatabase( - { batch -> - SequenceEntriesTable.batchInsert(batch) { - this[accessionColumn] = it.accession - this[versionColumn] = it.version - this[versionCommentColumn] = it.versionComment - this[organismColumn] = it.organism.name - this[submissionIdColumn] = it.submissionId - this[submitterColumn] = it.submitter - this[approverColumn] = it.approver - this[groupIdColumn] = it.groupId - this[submittedAtTimestampColumn] = it.submittedAtTimestamp - this[releasedAtTimestampColumn] = it.releasedAtTimestamp - this[originalDataColumn] = it.originalData - } - emptyList() - }, - SEQUENCE_ENTRIES_INSERT_COLUMNS + val submissionIdMapping = insertEntries(newEntries) + + dataUseTermsDatabaseService.setNewDataUseTerms( + submissionParams.authenticatedUser, + newAccessions.map { it.second }, + submissionParams.dataUseTerms, ) - return newAccessions.map { (submissionId, accession) -> - SubmissionIdMapping( - accession = accession, - version = 1, - submissionId = submissionId, - ) - } + return submissionIdMapping } fun createRevisionEntries( metadata: Map, sequences: Map>, - files: Map>>, + files: SubmissionIdFilesMap?, submissionParams: SubmissionParams.RevisionSubmissionParams, ): List { log.debug { "Creating revision entries" } + val submissionIdToAccession = metadata.map { (submissionId, entry) -> + val accession = entry.metadata["accession"] + ?: throw IllegalStateException("Metadata for submissionId $submissionId does not contain an accession") + Pair(submissionId, accession) + }.toMap() + + submissionIdToAccession.values.processInDatabaseSafeChunks { chunk -> + accessionPreconditionValidator.validate { + thatAccessionsExist(chunk) + .andThatUserIsAllowedToEditSequenceEntries(submissionParams.authenticatedUser) + .andThatSequenceEntriesAreInStates(listOf(Status.APPROVED_FOR_RELEASE)) + .andThatOrganismIs(submissionParams.organism) + } + } + + data class SequenceInfo( + val latestVersion: Version, + val groupId: Int, + ) + + val sequenceInfo = + submissionIdToAccession.values.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) { chunk -> + SequenceEntriesTable.select( + accessionColumn, + versionColumn.max(), + groupIdColumn, + ) + .where { accessionColumn inList chunk } + .groupBy(accessionColumn) + .associate { + it[accessionColumn] to + SequenceInfo( + latestVersion = it[versionColumn], + groupId = it[groupIdColumn], + ) + } + }.flatMap(Map::toList).toMap() + val now = dateProvider.getCurrentDateTime() val newEntries = metadata.values.map { entry -> - val latestVersion = accessionPreconditionValidator.getLatestVersion(entry.accession) + val accession = submissionIdToAccession[entry.submissionId] ?: throw IllegalStateException( + "Metadata for submissionId ${entry.submissionId} does not contain an accession", + ) + val info = sequenceInfo[accession] + ?: throw IllegalStateException("Could not find latest version for accession $accession") + val newVersion = info.latestVersion + 1 SequenceEntry( - accession = entry.accession, - version = latestVersion + 1, - versionComment = submissionParams.versionComment, + accession = accession, + version = newVersion, + versionComment = entry.metadata["versionComment"], organism = submissionParams.organism, submissionId = entry.submissionId, submitter = submissionParams.authenticatedUser.username, approver = null, - groupId = submissionParams.groupId, + groupId = info.groupId, submittedAtTimestamp = now, releasedAtTimestamp = null, originalData = OriginalData( metadata = entry.metadata, - files = files[entry.submissionId], + files = files?.get(entry.submissionId), unalignedNucleotideSequences = sequences[entry.submissionId]?.mapValues { (segmentName, sequence) -> compressor.compressNucleotideSequence( sequence, @@ -233,6 +206,10 @@ class UploadDatabaseService( ) } + return insertEntries(newEntries) + } + + private fun insertEntries(newEntries: List): List { newEntries.chunkedForDatabase( { batch -> SequenceEntriesTable.batchInsert(batch) { @@ -250,160 +227,17 @@ class UploadDatabaseService( } emptyList() }, - SEQUENCE_ENTRIES_INSERT_COLUMNS + SEQUENCE_ENTRIES_INSERT_COLUMNS, ) - return metadata.values.map { entry -> + return newEntries.map { entry -> SubmissionIdMapping( accession = entry.accession, - version = accessionPreconditionValidator.getLatestVersion(entry.accession), + version = entry.version, submissionId = entry.submissionId, ) } } - ) - - fun mapAndCopy(uploadId: String, submissionParams: SubmissionParams): List = transaction { - log.debug { - "mapping and copying sequences with UploadId $uploadId and uploadType: $submissionParams.uploadType" - } - - val mapAndCopySql = """ - INSERT INTO sequence_entries ( - accession, - version, - organism, - submission_id, - submitter, - group_id, - submitted_at, - original_data - ) - SELECT - metadata_upload_aux_table.accession, - metadata_upload_aux_table.version, - metadata_upload_aux_table.organism, - metadata_upload_aux_table.submission_id, - metadata_upload_aux_table.submitter, - metadata_upload_aux_table.group_id, - metadata_upload_aux_table.uploaded_at, - jsonb_build_object( - 'metadata', metadata_upload_aux_table.metadata, - 'files', metadata_upload_aux_table.files, - 'unalignedNucleotideSequences', - COALESCE( - jsonb_object_agg( - sequence_upload_aux_table.segment_name, - sequence_upload_aux_table.compressed_sequence_data::jsonb - ) FILTER (WHERE sequence_upload_aux_table.segment_name IS NOT NULL), - '{}'::jsonb - ) - ) - FROM - metadata_upload_aux_table - LEFT JOIN - sequence_upload_aux_table - ON metadata_upload_aux_table.upload_id = sequence_upload_aux_table.upload_id - AND metadata_upload_aux_table.submission_id = sequence_upload_aux_table.submission_id - WHERE metadata_upload_aux_table.upload_id = ? - GROUP BY - metadata_upload_aux_table.upload_id, - metadata_upload_aux_table.organism, - metadata_upload_aux_table.submission_id, - metadata_upload_aux_table.submitter, - metadata_upload_aux_table.group_id, - metadata_upload_aux_table.uploaded_at - RETURNING accession, version, submission_id; - """.trimIndent() - val insertionResult = exec( - mapAndCopySql, - listOf( - Pair(VarCharColumnType(), uploadId), - ), - explicitStatementType = StatementType.SELECT, - ) { rs -> - val result = mutableListOf() - while (rs.next()) { - result += SubmissionIdMapping( - rs.getString("accession"), - rs.getLong("version"), - rs.getString("submission_id"), - ) - } - result.toList() - } ?: emptyList() - - if (submissionParams is SubmissionParams.OriginalSubmissionParams) { - log.debug { "Setting data use terms for submission $uploadId to ${submissionParams.dataUseTerms}" } - val accessions = insertionResult.map { it.accession } - dataUseTermsDatabaseService.setNewDataUseTerms( - submissionParams.authenticatedUser, - accessions, - submissionParams.dataUseTerms, - ) - } - - auditLogger.log( - username = submissionParams.authenticatedUser.username, - description = "Submitted or revised ${insertionResult.size} sequences: " + - insertionResult.joinToString { it.displayAccessionVersion() }, - ) - - return@transaction insertionResult - } - - fun deleteUploadData(uploadId: String) { - log.debug { "deleting upload data with UploadId $uploadId" } - - MetadataUploadAuxTable.deleteWhere { uploadIdColumn eq uploadId } - SequenceUploadAuxTable.deleteWhere { sequenceUploadIdColumn eq uploadId } - } - - fun associateRevisedDataWithExistingSequenceEntries( - uploadId: String, - organism: Organism, - authenticatedUser: AuthenticatedUser, - ) { - val accessions = - MetadataUploadAuxTable - .select(accessionColumn) - .where { uploadIdColumn eq uploadId } - .map { it[accessionColumn]!! } - - accessions.processInDatabaseSafeChunks { chunk -> - accessionPreconditionValidator.validate { - thatAccessionsExist(chunk) - .andThatUserIsAllowedToEditSequenceEntries(authenticatedUser) - .andThatSequenceEntriesAreInStates(listOf(Status.APPROVED_FOR_RELEASE)) - .andThatOrganismIs(organism) - } - } - - val updateSql = """ - UPDATE metadata_upload_aux_table m - SET - version = sequence_entries.version + 1, - group_id = sequence_entries.group_id - FROM sequence_entries - WHERE - m.upload_id = ? - AND m.accession = sequence_entries.accession - AND ${SequenceEntriesTable.isMaxVersion} - """.trimIndent() - transaction { - exec( - updateSql, - listOf( - Pair(VarCharColumnType(), uploadId), - ), - ) - } - } - - fun getSubmissionIdToGroupMapping(uploadId: String): Map = MetadataUploadAuxTable - .select(submissionIdColumn, groupIdColumn) - .where { uploadIdColumn eq uploadId } - .associate { Pair(it[submissionIdColumn], it[groupIdColumn]!!) } // Returns a list of pairs (submissionId, accession) fun generateNewAccessions(submissionIds: Collection): List> { From f4960c1ddae15d15b50819e20e48a4b0843a5b20 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Mon, 1 Sep 2025 21:01:57 +0200 Subject: [PATCH 03/11] It compiles --- .../org/loculus/backend/model/SubmitModel.kt | 197 +++--------------- .../submission/UploadDatabaseService.kt | 72 +++++-- .../GetOriginalMetadataEndpointTest.kt | 49 ++--- 3 files changed, 105 insertions(+), 213 deletions(-) diff --git a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt index 057a336d93..e8bcc8b9e0 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt @@ -3,7 +3,6 @@ package org.loculus.backend.model import mu.KotlinLogging import org.apache.commons.compress.archivers.zip.ZipFile import org.apache.commons.compress.compressors.CompressorStreamFactory -import org.jetbrains.exposed.exceptions.ExposedSQLException import org.loculus.backend.api.DataUseTerms import org.loculus.backend.api.Organism import org.loculus.backend.api.SubmissionIdFilesMap @@ -12,11 +11,9 @@ import org.loculus.backend.api.getAllFileIds import org.loculus.backend.auth.AuthenticatedUser import org.loculus.backend.config.BackendConfig import org.loculus.backend.controller.BadRequestException -import org.loculus.backend.controller.DuplicateKeyException import org.loculus.backend.controller.UnprocessableEntityException import org.loculus.backend.service.datauseterms.DataUseTermsPreconditionValidator import org.loculus.backend.service.files.FilesDatabaseService -import org.loculus.backend.service.files.S3Service import org.loculus.backend.service.groupmanagement.GroupManagementPreconditionValidator import org.loculus.backend.service.submission.CompressionAlgorithm import org.loculus.backend.service.submission.MetadataUploadAuxTable @@ -28,7 +25,6 @@ import org.loculus.backend.utils.FastaReader import org.loculus.backend.utils.MetadataEntry import org.loculus.backend.utils.ParseFastaHeader import org.loculus.backend.utils.metadataEntryStreamAsSequence -import org.loculus.backend.utils.revisionEntryStreamAsSequence import org.springframework.stereotype.Service import org.springframework.transaction.annotation.Transactional import org.springframework.web.multipart.MultipartFile @@ -95,7 +91,6 @@ class SubmitModel( private val submissionIdFilesMappingPreconditionValidator: SubmissionIdFilesMappingPreconditionValidator, private val dateProvider: DateProvider, private val backendConfig: BackendConfig, - private val s3Service: S3Service, private val parseFastaHeader: ParseFastaHeader, ) { @@ -117,7 +112,7 @@ class SubmitModel( uploadId: String, submissionParams: SubmissionParams, batchSize: Int = 1000, - ): List = try { + ): List { log.info { "Processing submission (type: ${submissionParams.uploadType.name}) with uploadId $uploadId" } @@ -136,10 +131,6 @@ class SubmitModel( dataUseTermsPreconditionValidator.checkThatRestrictedUntilIsAllowed(submissionParams.dataUseTerms) } - // 1. Parse metadata and sequences - // 2. Do validations - // 3. Insert into sequenceEntries - val metadata = parseMetadataFile(submissionParams) val rawSequences = parseSequenceFile(submissionParams) val sequences = groupSequencesById(rawSequences, submissionParams.organism) @@ -150,15 +141,13 @@ class SubmitModel( validateSubmissionIdSetsForConsensusSequences(metadata.keys, sequences.keys) } - files?.let { submittedFiles -> - validateSubmissionIdSetsForFiles(metadata.keys, submittedFiles.keys) - validateFileExistenceAndGroupOwnership(submittedFiles, submissionParams, uploadId) + files?.let { + validateSubmissionIdSetsForFiles(metadata.keys, it.keys) + validateFileExistenceAndGroupOwnership(it, submissionParams, metadata) } - - when (submissionParams) { + return when (submissionParams) { is SubmissionParams.OriginalSubmissionParams -> { - // Actually put the sequences into the db uploadDatabaseService.createNewSequenceEntries( metadata, sequences, @@ -168,25 +157,16 @@ class SubmitModel( } is SubmissionParams.RevisionSubmissionParams -> { - log.info { "Associating uploaded sequence data with existing sequence entries with uploadId $uploadId" } uploadDatabaseService.createRevisionEntries( metadata, sequences, files, submissionParams ) -// uploadDatabaseService.associateRevisedDataWithExistingSequenceEntries( -// uploadId, -// submissionParams.organism, -// submissionParams.authenticatedUser, -// ) } - } - log.debug { "Persisting submission with uploadId $uploadId" } - uploadDatabaseService.mapAndCopy(uploadId, submissionParams) - } finally { - uploadDatabaseService.deleteUploadData(uploadId) + else -> throw IllegalStateException("Unknown submission type: ${submissionParams.uploadType}") + } } private fun parseMetadataFile(submissionParams: SubmissionParams): Map { @@ -196,7 +176,6 @@ class SubmitModel( metadataFileTypes, metadataTempFileToDelete, ) - // Collect the metadataStream into a map so we can do validations val metadata: Map = metadataEntryStreamAsSequence(metadataStream) .associateBy { it.submissionId } @@ -244,50 +223,6 @@ class SubmitModel( } - /** - * Inserts the uploaded metadata (and sequence data) into the 'aux' tables in the database. - */ - private fun insertDataIntoAux(uploadId: String, submissionParams: SubmissionParams, batchSize: Int) { - val metadataTempFileToDelete = MaybeFile() - val metadataStream = getStreamFromFile( - submissionParams.metadataFile, - metadataFileTypes, - metadataTempFileToDelete, - ) - try { - uploadMetadata(uploadId, submissionParams, metadataStream, batchSize) - } finally { - metadataTempFileToDelete.delete() - } - - val sequenceFile = submissionParams.sequenceFile - if (sequenceFile == null) { - if (requiresConsensusSequenceFile(submissionParams.organism)) { - throw BadRequestException( - "Submissions for organism ${submissionParams.organism.name} require a sequence file.", - ) - } - } else { - if (!requiresConsensusSequenceFile(submissionParams.organism)) { - throw BadRequestException( - "Sequence uploads are not allowed for organism ${submissionParams.organism.name}.", - ) - } - - val sequenceTempFileToDelete = MaybeFile() - try { - val sequenceStream = getStreamFromFile( - sequenceFile, - sequenceFileTypes, - sequenceTempFileToDelete, - ) - uploadSequences(uploadId, sequenceStream, batchSize, submissionParams.organism) - } finally { - sequenceTempFileToDelete.delete() - } - } - } - class MaybeFile : Closeable { var file: File? = null fun delete() { @@ -328,81 +263,6 @@ class SubmitModel( ) } - private fun uploadMetadata( - uploadId: String, - submissionParams: SubmissionParams, - metadataStream: InputStream, - batchSize: Int, - ) { - log.debug { - "intermediate storing uploaded metadata of type ${submissionParams.uploadType.name} " + - "from $submissionParams.submitter with UploadId $uploadId" - } - val now = dateProvider.getCurrentDateTime() - try { - when (submissionParams) { - is SubmissionParams.OriginalSubmissionParams -> { - metadataEntryStreamAsSequence(metadataStream) - .chunked(batchSize) - .forEach { batch -> - uploadDatabaseService.batchInsertMetadataInAuxTable( - uploadId = uploadId, - authenticatedUser = submissionParams.authenticatedUser, - groupId = submissionParams.groupId, - submittedOrganism = submissionParams.organism, - uploadedMetadataBatch = batch, - uploadedAt = now, - files = submissionParams.files, - ) - } - } - - is SubmissionParams.RevisionSubmissionParams -> { - revisionEntryStreamAsSequence(metadataStream) - .chunked(batchSize) - .forEach { batch -> - uploadDatabaseService.batchInsertRevisedMetadataInAuxTable( - uploadId = uploadId, - authenticatedUser = submissionParams.authenticatedUser, - submittedOrganism = submissionParams.organism, - uploadedRevisedMetadataBatch = batch, - uploadedAt = now, - files = submissionParams.files, - ) - } - } - } - } catch (e: ExposedSQLException) { - if (e.sqlState == UNIQUE_CONSTRAINT_VIOLATION_SQL_STATE) { - throw DuplicateKeyException( - "Metadata file contains at least one duplicate submissionId: ${e.cause?.cause}", - ) - } - throw e - } - } - - private fun uploadSequences(uploadId: String, sequenceStream: InputStream, batchSize: Int, organism: Organism) { - log.info { - "intermediate storing uploaded sequence data with UploadId $uploadId" - } - FastaReader(sequenceStream).asSequence().chunked(batchSize).forEach { batch -> - try { - uploadDatabaseService.batchInsertSequencesInAuxTable( - uploadId, - organism, - batch, - ) - } catch (e: ExposedSQLException) { - if (e.sqlState == UNIQUE_CONSTRAINT_VIOLATION_SQL_STATE) { - throw DuplicateKeyException( - "Sequence file contains at least one duplicate submissionId: ${e.cause?.cause}", - ) - } - throw e - } - } - } private fun getFileType(file: MultipartFile, expectedFileType: ValidExtension): CompressionAlgorithm { val originalFilename = file.originalFilename @@ -461,13 +321,13 @@ class SubmitModel( private fun validateFileExistenceAndGroupOwnership( submittedFiles: SubmissionIdFilesMap, submissionParams: SubmissionParams, - uploadId: String, + metadata: Map, ) { val usedFileIds = submittedFiles.getAllFileIds() - val fileGroups = filesDatabaseService.getGroupIds(usedFileIds) + val fileToGroupId = filesDatabaseService.getGroupIds(usedFileIds) log.debug { "Validating that all submitted file IDs exist." } - val notExistingIds = usedFileIds.subtract(fileGroups.keys) + val notExistingIds = usedFileIds.subtract(fileToGroupId.keys) if (notExistingIds.isNotEmpty()) { throw BadRequestException("The File IDs $notExistingIds do not exist.") } @@ -475,28 +335,31 @@ class SubmitModel( log.debug { "Validating that submitted files belong to the group that their associated submission belongs to." } - if (submissionParams is SubmissionParams.OriginalSubmissionParams) { - fileGroups.forEach { - if (it.value != submissionParams.groupId) { - throw BadRequestException( - "The File ${it.key} does not belong to group ${submissionParams.groupId}.", - ) - } - } - } else if (submissionParams is SubmissionParams.RevisionSubmissionParams) { - val submissionIdGroups = uploadDatabaseService.getSubmissionIdToGroupMapping(uploadId) - submittedFiles.forEach { - val submissionGroup = submissionIdGroups[it.key] - val associatedFileIds = it.value.values.flatten().map { it.fileId } - associatedFileIds.forEach { fileId -> - val fileGroup = fileGroups[fileId] - if (fileGroup != submissionGroup) { + when (submissionParams) { + is SubmissionParams.OriginalSubmissionParams -> { + fileToGroupId.forEach { + if (it.value != submissionParams.groupId) { throw BadRequestException( - "File $fileId does not belong to group $submissionGroup.", + "The File ${it.key} does not belong to group ${submissionParams.groupId}.", ) } } } + + is SubmissionParams.RevisionSubmissionParams -> { + val submissionIdToGroup = uploadDatabaseService.submissionIdToGroup(metadata) + submittedFiles.forEach { (submissionId, filesMap) -> + val submissionGroup = submissionIdToGroup[submissionId] + filesMap.values.flatten().map { it.fileId }.forEach { fileId -> + val fileGroup = fileToGroupId[fileId] + if (fileGroup != submissionGroup) { + throw BadRequestException( + "File $fileId does not belong to group $submissionGroup.", + ) + } + } + } + } } } diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt index c42a0b524d..13a4144b0c 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt @@ -74,6 +74,11 @@ class UploadDatabaseService( val originalData: OriginalData, ) + data class SequenceInfo( + val latestVersion: Version, + val groupId: Int, + ) + fun createNewSequenceEntries( metadata: Map, sequences: Map>, @@ -149,28 +154,7 @@ class UploadDatabaseService( } } - data class SequenceInfo( - val latestVersion: Version, - val groupId: Int, - ) - - val sequenceInfo = - submissionIdToAccession.values.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) { chunk -> - SequenceEntriesTable.select( - accessionColumn, - versionColumn.max(), - groupIdColumn, - ) - .where { accessionColumn inList chunk } - .groupBy(accessionColumn) - .associate { - it[accessionColumn] to - SequenceInfo( - latestVersion = it[versionColumn], - groupId = it[groupIdColumn], - ) - } - }.flatMap(Map::toList).toMap() + val sequenceInfo = accessionToSequenceInfo(submissionIdToAccession.values) val now = dateProvider.getCurrentDateTime() @@ -255,4 +239,48 @@ class UploadDatabaseService( log.info { "Generated ${submissionIds.size} new accessions" } return submissionIds.zip(nextAccessions) } + + fun accessionToSequenceInfo(accessions: Collection): Map { + return accessions.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) + { chunk -> + SequenceEntriesTable.select( + accessionColumn, + versionColumn.max(), + groupIdColumn, + ) + .where { accessionColumn inList chunk } + .groupBy(accessionColumn) + .associate { + it[accessionColumn] to + SequenceInfo( + latestVersion = it[versionColumn], + groupId = it[groupIdColumn], + ) + } + }.flatMap(Map::toList).toMap() + } + + fun submissionIdToGroup(metadata: Map): Map { + val submissionIdToAccession = metadata.map { (submissionId, entry) -> + (submissionId to entry.metadata["accession"]!!) + } + + val accessionToGroup = submissionIdToAccession.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) + { chunk -> + SequenceEntriesTable.select( + accessionColumn, + groupIdColumn, + ) + .where { accessionColumn inList chunk.map { it.second } } + .associate { + it[accessionColumn] to it[groupIdColumn] + } + }.flatMap(Map::toList).toMap() + + return submissionIdToAccession.associate { (submissionId, accession) -> + submissionId to (accessionToGroup[accession] + ?: throw IllegalStateException("Could not find groupId for accession $accession")) + } + } } + diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/GetOriginalMetadataEndpointTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/GetOriginalMetadataEndpointTest.kt index 82caf133bf..d66dc16f75 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/GetOriginalMetadataEndpointTest.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/GetOriginalMetadataEndpointTest.kt @@ -165,30 +165,31 @@ class GetOriginalMetadataEndpointTest( assertThat(responseAccessionVersions, `is`(expectedAccessionVersions)) } - @Test - fun `GIVEN there are sequences currently being uploaded THEN returns locked`() { - val uploadId = "upload id" - val mockUser = mockk() - every { mockUser.username }.returns("username") - - uploadDatabaseService.batchInsertMetadataInAuxTable( - uploadId = uploadId, - authenticatedUser = mockUser, - groupId = 1, - submittedOrganism = Organism("organism"), - uploadedMetadataBatch = listOf(MetadataEntry("submission id", mapOf("key" to "value"))), - uploadedAt = LocalDateTime(2024, 1, 1, 1, 1, 1), - null, - ) - - submissionControllerClient.getOriginalMetadata() - .andExpect(status().isLocked) - - uploadDatabaseService.deleteUploadData(uploadId) - - submissionControllerClient.getOriginalMetadata() - .andExpect(status().isOk) - } + // This test no longer works because we don't use the Aux table anymore +// @Test +// fun `GIVEN there are sequences currently being uploaded THEN returns locked`() { +// val uploadId = "upload id" +// val mockUser = mockk() +// every { mockUser.username }.returns("username") +// +// uploadDatabaseService.batchInsertMetadataInAuxTable( +// uploadId = uploadId, +// authenticatedUser = mockUser, +// groupId = 1, +// submittedOrganism = Organism("organism"), +// uploadedMetadataBatch = listOf(MetadataEntry("submission id", mapOf("key" to "value"))), +// uploadedAt = LocalDateTime(2024, 1, 1, 1, 1, 1), +// null, +// ) +// +// submissionControllerClient.getOriginalMetadata() +// .andExpect(status().isLocked) +// +// uploadDatabaseService.deleteUploadData(uploadId) +// +// submissionControllerClient.getOriginalMetadata() +// .andExpect(status().isOk) +// } // Regression test for https://github.com/loculus-project/loculus/issues/4036 @Test From e767ffe744a62bcb294d740004d975a302331401 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Mon, 1 Sep 2025 21:04:53 +0200 Subject: [PATCH 04/11] Bye aux tables --- .../submission/MetadataUploadAuxTable.kt | 24 ------------------- .../submission/SequenceUploadAuxTable.kt | 15 ------------ 2 files changed, 39 deletions(-) delete mode 100644 backend/src/main/kotlin/org/loculus/backend/service/submission/MetadataUploadAuxTable.kt delete mode 100644 backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceUploadAuxTable.kt diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/MetadataUploadAuxTable.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/MetadataUploadAuxTable.kt deleted file mode 100644 index 4293131fb8..0000000000 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/MetadataUploadAuxTable.kt +++ /dev/null @@ -1,24 +0,0 @@ -package org.loculus.backend.service.submission - -import org.jetbrains.exposed.sql.Table -import org.jetbrains.exposed.sql.kotlin.datetime.datetime -import org.loculus.backend.api.FileCategoryFilesMap -import org.loculus.backend.service.jacksonSerializableJsonb - -const val METADATA_UPLOAD_AUX_TABLE_NAME = "metadata_upload_aux_table" - -object MetadataUploadAuxTable : Table(METADATA_UPLOAD_AUX_TABLE_NAME) { - val accessionColumn = varchar("accession", 255).nullable() - val versionColumn = long("version").nullable() - val uploadIdColumn = varchar("upload_id", 255) - val organismColumn = varchar("organism", 255) - val submissionIdColumn = varchar("submission_id", 255) - val submitterColumn = varchar("submitter", 255) - val groupIdColumn = integer("group_id").nullable() - val uploadedAtColumn = datetime("uploaded_at") - val metadataColumn = - jacksonSerializableJsonb>("metadata").nullable() - val filesColumn = - jacksonSerializableJsonb("files").nullable() - override val primaryKey = PrimaryKey(uploadIdColumn, submissionIdColumn) -} diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceUploadAuxTable.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceUploadAuxTable.kt deleted file mode 100644 index 9f931ac91b..0000000000 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceUploadAuxTable.kt +++ /dev/null @@ -1,15 +0,0 @@ -package org.loculus.backend.service.submission - -import org.jetbrains.exposed.sql.Table -import org.loculus.backend.service.jacksonSerializableJsonb - -const val SEQUENCE_UPLOAD_AUX_TABLE_NAME = "sequence_upload_aux_table" - -object SequenceUploadAuxTable : Table(SEQUENCE_UPLOAD_AUX_TABLE_NAME) { - val sequenceUploadIdColumn = varchar("upload_id", 255) - val sequenceSubmissionIdColumn = varchar("submission_id", 255) - val segmentNameColumn = varchar("segment_name", 255) - val compressedSequenceDataColumn = jacksonSerializableJsonb("compressed_sequence_data") - - override val primaryKey = PrimaryKey(sequenceUploadIdColumn, sequenceSubmissionIdColumn, segmentNameColumn) -} From 8b2a0dd65f6be06f558d68678b4306abd6c1fa10 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Mon, 1 Sep 2025 21:10:15 +0200 Subject: [PATCH 05/11] Remove orphaned mentions of aux tables --- .../kotlin/org/loculus/backend/model/SubmitModel.kt | 11 ----------- .../service/debug/DeleteSequenceDataService.kt | 4 ---- 2 files changed, 15 deletions(-) diff --git a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt index e8bcc8b9e0..789012bf9d 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt @@ -16,8 +16,6 @@ import org.loculus.backend.service.datauseterms.DataUseTermsPreconditionValidato import org.loculus.backend.service.files.FilesDatabaseService import org.loculus.backend.service.groupmanagement.GroupManagementPreconditionValidator import org.loculus.backend.service.submission.CompressionAlgorithm -import org.loculus.backend.service.submission.MetadataUploadAuxTable -import org.loculus.backend.service.submission.SequenceUploadAuxTable import org.loculus.backend.service.submission.SubmissionIdFilesMappingPreconditionValidator import org.loculus.backend.service.submission.UploadDatabaseService import org.loculus.backend.utils.DateProvider @@ -363,15 +361,6 @@ class SubmitModel( } } - @Transactional(readOnly = true) - fun checkIfStillProcessingSubmittedData(): Boolean { - val metadataInAuxTable: Boolean = - MetadataUploadAuxTable.select(MetadataUploadAuxTable.submissionIdColumn).count() > 0 - val sequencesInAuxTable: Boolean = - SequenceUploadAuxTable.select(SequenceUploadAuxTable.sequenceSubmissionIdColumn).count() > 0 - return metadataInAuxTable || sequencesInAuxTable - } - private fun requiresConsensusSequenceFile(organism: Organism) = backendConfig.getInstanceConfig(organism) .schema .submissionDataTypes diff --git a/backend/src/main/kotlin/org/loculus/backend/service/debug/DeleteSequenceDataService.kt b/backend/src/main/kotlin/org/loculus/backend/service/debug/DeleteSequenceDataService.kt index 949a370ad5..9fedc89965 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/debug/DeleteSequenceDataService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/debug/DeleteSequenceDataService.kt @@ -4,10 +4,8 @@ import org.jetbrains.exposed.sql.deleteAll import org.loculus.backend.config.BackendConfig import org.loculus.backend.service.datauseterms.DataUseTermsTable import org.loculus.backend.service.submission.CurrentProcessingPipelineTable -import org.loculus.backend.service.submission.MetadataUploadAuxTable import org.loculus.backend.service.submission.SequenceEntriesPreprocessedDataTable import org.loculus.backend.service.submission.SequenceEntriesTable -import org.loculus.backend.service.submission.SequenceUploadAuxTable import org.loculus.backend.utils.DateProvider import org.springframework.stereotype.Component import org.springframework.transaction.annotation.Transactional @@ -18,8 +16,6 @@ class DeleteSequenceDataService(private val dateProvider: DateProvider, private fun deleteAllSequenceData() { SequenceEntriesTable.deleteAll() SequenceEntriesPreprocessedDataTable.deleteAll() - MetadataUploadAuxTable.deleteAll() - SequenceUploadAuxTable.deleteAll() DataUseTermsTable.deleteAll() CurrentProcessingPipelineTable.deleteAll() CurrentProcessingPipelineTable.setV1ForOrganismsIfNotExist( From 03fe9b40d0c511f300ef49c94667c4183d03c2f3 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Mon, 1 Sep 2025 21:13:32 +0200 Subject: [PATCH 06/11] ktlintformat --- .../controller/SubmissionController.kt | 2 +- .../org/loculus/backend/model/SubmitModel.kt | 20 ++++---- .../submission/UploadDatabaseService.kt | 46 ++++++++----------- 3 files changed, 28 insertions(+), 40 deletions(-) diff --git a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt index 73d3e535e6..72eca8fa30 100644 --- a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt +++ b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt @@ -109,7 +109,7 @@ open class SubmissionController( val innerDataUseTermsType = if (backendConfig.dataUseTerms.enabled) { dataUseTermsType ?: throw BadRequestException( - "the 'dataUseTermsType' needs to be provided." + "the 'dataUseTermsType' needs to be provided.", ) } else { DataUseTermsType.OPEN diff --git a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt index 789012bf9d..fa1c576488 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt @@ -32,7 +32,6 @@ import java.io.File import java.io.InputStream import java.util.UUID - const val HEADER_TO_CONNECT_METADATA_AND_SEQUENCES = "id" const val HEADER_TO_CONNECT_METADATA_AND_SEQUENCES_ALTERNATE_FOR_BACKCOMPAT = "submissionId" @@ -159,7 +158,7 @@ class SubmitModel( metadata, sequences, files, - submissionParams + submissionParams, ) } @@ -167,8 +166,8 @@ class SubmitModel( } } - private fun parseMetadataFile(submissionParams: SubmissionParams): Map { - return MaybeFile().use { metadataTempFileToDelete -> + private fun parseMetadataFile(submissionParams: SubmissionParams): Map = + MaybeFile().use { metadataTempFileToDelete -> val metadataStream = getStreamFromFile( submissionParams.metadataFile, metadataFileTypes, @@ -179,7 +178,6 @@ class SubmitModel( metadata } - } private fun parseSequenceFile(submissionParams: SubmissionParams): Map { val sequenceFile = submissionParams.sequenceFile @@ -220,7 +218,6 @@ class SubmitModel( return sequencesById } - class MaybeFile : Closeable { var file: File? = null fun delete() { @@ -261,7 +258,6 @@ class SubmitModel( ) } - private fun getFileType(file: MultipartFile, expectedFileType: ValidExtension): CompressionAlgorithm { val originalFilename = file.originalFilename ?: throw BadRequestException("${expectedFileType.displayName} file missing") @@ -277,8 +273,8 @@ class SubmitModel( .flatMap { it.value }.joinToString(", .") throw BadRequestException( "${expectedFileType.displayName} has wrong extension. Must be " + - ".${expectedFileType.validExtensions.joinToString(", .")} for uncompressed submissions or " + - ".$allowedCompressionFormats for compressed submissions", + ".${expectedFileType.validExtensions.joinToString(", .")} for uncompressed submissions or " + + ".$allowedCompressionFormats for compressed submissions", ) } @@ -292,13 +288,13 @@ class SubmitModel( if (metadataKeysNotInSequences.isNotEmpty() || sequenceKeysNotInMetadata.isNotEmpty()) { val metadataNotPresentErrorText = if (metadataKeysNotInSequences.isNotEmpty()) { "Metadata file contains ${metadataKeysNotInSequences.size} ids that are not present " + - "in the sequence file: " + metadataKeysNotInSequences.toList().joinToString(limit = 10) + "; " + "in the sequence file: " + metadataKeysNotInSequences.toList().joinToString(limit = 10) + "; " } else { "" } val sequenceNotPresentErrorText = if (sequenceKeysNotInMetadata.isNotEmpty()) { "Sequence file contains ${sequenceKeysNotInMetadata.size} ids that are not present " + - "in the metadata file: " + sequenceKeysNotInMetadata.toList().joinToString(limit = 10) + "in the metadata file: " + sequenceKeysNotInMetadata.toList().joinToString(limit = 10) } else { "" } @@ -311,7 +307,7 @@ class SubmitModel( if (filesKeysNotInMetadata.isNotEmpty()) { throw UnprocessableEntityException( "File upload contains ${filesKeysNotInMetadata.size} submissionIds that are not present in the " + - "metadata file: " + filesKeysNotInMetadata.toList().joinToString(limit = 10), + "metadata file: " + filesKeysNotInMetadata.toList().joinToString(limit = 10), ) } } diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt index 13a4144b0c..815ec71098 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt @@ -15,17 +15,17 @@ import org.loculus.backend.model.SubmissionId import org.loculus.backend.model.SubmissionParams import org.loculus.backend.service.GenerateAccessionFromNumberService import org.loculus.backend.service.datauseterms.DataUseTermsDatabaseService -import org.loculus.backend.service.submission.SequenceEntriesTable.versionColumn -import org.loculus.backend.service.submission.SequenceEntriesTable.versionCommentColumn import org.loculus.backend.service.submission.SequenceEntriesTable.accessionColumn -import org.loculus.backend.service.submission.SequenceEntriesTable.organismColumn -import org.loculus.backend.service.submission.SequenceEntriesTable.submissionIdColumn -import org.loculus.backend.service.submission.SequenceEntriesTable.submitterColumn import org.loculus.backend.service.submission.SequenceEntriesTable.approverColumn import org.loculus.backend.service.submission.SequenceEntriesTable.groupIdColumn -import org.loculus.backend.service.submission.SequenceEntriesTable.submittedAtTimestampColumn -import org.loculus.backend.service.submission.SequenceEntriesTable.releasedAtTimestampColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.organismColumn import org.loculus.backend.service.submission.SequenceEntriesTable.originalDataColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.releasedAtTimestampColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.submissionIdColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.submittedAtTimestampColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.submitterColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.versionColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.versionCommentColumn import org.loculus.backend.utils.Accession import org.loculus.backend.utils.DatabaseConstants import org.loculus.backend.utils.DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT @@ -74,10 +74,7 @@ class UploadDatabaseService( val originalData: OriginalData, ) - data class SequenceInfo( - val latestVersion: Version, - val groupId: Int, - ) + data class SequenceInfo(val latestVersion: Version, val groupId: Int) fun createNewSequenceEntries( metadata: Map, @@ -85,7 +82,6 @@ class UploadDatabaseService( files: SubmissionIdFilesMap?, submissionParams: SubmissionParams.OriginalSubmissionParams, ): List { - log.debug { "Creating new sequence entries" } val now = dateProvider.getCurrentDateTime() @@ -127,7 +123,6 @@ class UploadDatabaseService( ) return submissionIdMapping - } fun createRevisionEntries( @@ -136,7 +131,6 @@ class UploadDatabaseService( files: SubmissionIdFilesMap?, submissionParams: SubmissionParams.RevisionSubmissionParams, ): List { - log.debug { "Creating revision entries" } val submissionIdToAccession = metadata.map { (submissionId, entry) -> @@ -240,9 +234,8 @@ class UploadDatabaseService( return submissionIds.zip(nextAccessions) } - fun accessionToSequenceInfo(accessions: Collection): Map { - return accessions.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) - { chunk -> + fun accessionToSequenceInfo(accessions: Collection): Map = + accessions.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) { chunk -> SequenceEntriesTable.select( accessionColumn, versionColumn.max(), @@ -252,21 +245,19 @@ class UploadDatabaseService( .groupBy(accessionColumn) .associate { it[accessionColumn] to - SequenceInfo( - latestVersion = it[versionColumn], - groupId = it[groupIdColumn], - ) + SequenceInfo( + latestVersion = it[versionColumn], + groupId = it[groupIdColumn], + ) } }.flatMap(Map::toList).toMap() - } fun submissionIdToGroup(metadata: Map): Map { val submissionIdToAccession = metadata.map { (submissionId, entry) -> (submissionId to entry.metadata["accession"]!!) } - val accessionToGroup = submissionIdToAccession.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) - { chunk -> + val accessionToGroup = submissionIdToAccession.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) { chunk -> SequenceEntriesTable.select( accessionColumn, groupIdColumn, @@ -278,9 +269,10 @@ class UploadDatabaseService( }.flatMap(Map::toList).toMap() return submissionIdToAccession.associate { (submissionId, accession) -> - submissionId to (accessionToGroup[accession] - ?: throw IllegalStateException("Could not find groupId for accession $accession")) + submissionId to ( + accessionToGroup[accession] + ?: throw IllegalStateException("Could not find groupId for accession $accession") + ) } } } - From 07d782ef47a2fa95c5f317367b3f61245a36773f Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Mon, 1 Sep 2025 21:55:18 +0200 Subject: [PATCH 07/11] use more canonical autodeleting temp file --- .../controller/SubmissionController.kt | 9 +-- .../backend/model/ReleasedDataModel.kt | 4 -- .../org/loculus/backend/model/SubmitModel.kt | 70 +++++++++---------- .../controller/EndpointTestExtension.kt | 4 -- 4 files changed, 39 insertions(+), 48 deletions(-) diff --git a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt index 72eca8fa30..d4200245e3 100644 --- a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt +++ b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt @@ -425,10 +425,11 @@ open class SubmissionController( @HiddenParam authenticatedUser: AuthenticatedUser, @RequestParam compression: CompressionFormat?, ): ResponseEntity { - val stillProcessing = submitModel.checkIfStillProcessingSubmittedData() - if (stillProcessing) { - return ResponseEntity.status(HttpStatus.LOCKED).build() - } + // No longer works since we've removed the aux tables + // val stillProcessing = submitModel.checkIfStillProcessingSubmittedData() + // if (stillProcessing) { + // return ResponseEntity.status(HttpStatus.LOCKED).build() + // } val headers = HttpHeaders() headers.contentType = MediaType.parseMediaType(MediaType.APPLICATION_NDJSON_VALUE) diff --git a/backend/src/main/kotlin/org/loculus/backend/model/ReleasedDataModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/ReleasedDataModel.kt index f78e4c1521..8d6f3a28ad 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/ReleasedDataModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/ReleasedDataModel.kt @@ -22,11 +22,9 @@ import org.loculus.backend.service.files.S3Service import org.loculus.backend.service.groupmanagement.GROUPS_TABLE_NAME import org.loculus.backend.service.submission.CURRENT_PROCESSING_PIPELINE_TABLE_NAME import org.loculus.backend.service.submission.EXTERNAL_METADATA_TABLE_NAME -import org.loculus.backend.service.submission.METADATA_UPLOAD_AUX_TABLE_NAME import org.loculus.backend.service.submission.RawProcessedData import org.loculus.backend.service.submission.SEQUENCE_ENTRIES_PREPROCESSED_DATA_TABLE_NAME import org.loculus.backend.service.submission.SEQUENCE_ENTRIES_TABLE_NAME -import org.loculus.backend.service.submission.SEQUENCE_UPLOAD_AUX_TABLE_NAME import org.loculus.backend.service.submission.SubmissionDatabaseService import org.loculus.backend.service.submission.UpdateTrackerTable import org.loculus.backend.utils.Accession @@ -47,10 +45,8 @@ val RELEASED_DATA_RELATED_TABLES: List = CURRENT_PROCESSING_PIPELINE_TABLE_NAME, EXTERNAL_METADATA_TABLE_NAME, GROUPS_TABLE_NAME, - METADATA_UPLOAD_AUX_TABLE_NAME, SEQUENCE_ENTRIES_TABLE_NAME, SEQUENCE_ENTRIES_PREPROCESSED_DATA_TABLE_NAME, - SEQUENCE_UPLOAD_AUX_TABLE_NAME, DATA_USE_TERMS_TABLE_NAME, ) diff --git a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt index fa1c576488..f53022ce55 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt @@ -24,7 +24,6 @@ import org.loculus.backend.utils.MetadataEntry import org.loculus.backend.utils.ParseFastaHeader import org.loculus.backend.utils.metadataEntryStreamAsSequence import org.springframework.stereotype.Service -import org.springframework.transaction.annotation.Transactional import org.springframework.web.multipart.MultipartFile import java.io.BufferedInputStream import java.io.Closeable @@ -167,11 +166,12 @@ class SubmitModel( } private fun parseMetadataFile(submissionParams: SubmissionParams): Map = - MaybeFile().use { metadataTempFileToDelete -> + + AutoDeletingTempFile().use { tempFile -> val metadataStream = getStreamFromFile( submissionParams.metadataFile, metadataFileTypes, - metadataTempFileToDelete, + tempFile, ) val metadata: Map = metadataEntryStreamAsSequence(metadataStream) .associateBy { it.submissionId } @@ -195,11 +195,11 @@ class SubmitModel( ) } - return MaybeFile().use { sequenceTempFileToDelete -> + return AutoDeletingTempFile().use { tempFile -> val sequenceStream = getStreamFromFile( sequenceFile, sequenceFileTypes, - sequenceTempFileToDelete, + tempFile, ) FastaReader(sequenceStream).asSequence().associate { it.sampleName to it.sequence } } @@ -218,45 +218,43 @@ class SubmitModel( return sequencesById } - class MaybeFile : Closeable { - var file: File? = null - fun delete() { - file?.delete() + class AutoDeletingTempFile : + File( + System.getProperty("java.io.tmpdir"), + UUID.randomUUID().toString(), + ), + Closeable { + + init { + createNewFile() } override fun close() { - delete() + if (exists()) { + delete() + } } } - private fun getStreamFromFile( - file: MultipartFile, - dataType: ValidExtension, - maybeFileToDelete: MaybeFile, - ): InputStream = when (getFileType(file, dataType)) { - CompressionAlgorithm.ZIP -> { - val tempFile = File.createTempFile( - "upload_" + dataType.displayName.replace(" ", ""), - UUID.randomUUID().toString(), - ) - maybeFileToDelete.file = tempFile - - file.transferTo(tempFile) - val zipFile = ZipFile.builder() - .setFile(tempFile) - .setUseUnicodeExtraFields(true) - .get() - BufferedInputStream(zipFile.getInputStream(zipFile.entries.nextElement())) - } + private fun getStreamFromFile(file: MultipartFile, dataType: ValidExtension, tempFile: File): InputStream = + when (getFileType(file, dataType)) { + CompressionAlgorithm.ZIP -> { + file.transferTo(tempFile) + val zipFile = ZipFile.builder() + .setFile(tempFile) + .setUseUnicodeExtraFields(true) + .get() + BufferedInputStream(zipFile.getInputStream(zipFile.entries.nextElement())) + } - CompressionAlgorithm.NONE -> - BufferedInputStream(file.inputStream) + CompressionAlgorithm.NONE -> + BufferedInputStream(file.inputStream) - else -> - CompressorStreamFactory().createCompressorInputStream( - BufferedInputStream(file.inputStream), - ) - } + else -> + CompressorStreamFactory().createCompressorInputStream( + BufferedInputStream(file.inputStream), + ) + } private fun getFileType(file: MultipartFile, expectedFileType: ValidExtension): CompressionAlgorithm { val originalFilename = file.originalFilename diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/EndpointTestExtension.kt b/backend/src/test/kotlin/org/loculus/backend/controller/EndpointTestExtension.kt index 660e380047..a707129bbf 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/EndpointTestExtension.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/EndpointTestExtension.kt @@ -26,10 +26,8 @@ import org.loculus.backend.service.files.FILES_TABLE_NAME import org.loculus.backend.service.groupmanagement.GROUPS_TABLE_NAME import org.loculus.backend.service.groupmanagement.USER_GROUPS_TABLE_NAME import org.loculus.backend.service.submission.CURRENT_PROCESSING_PIPELINE_TABLE_NAME -import org.loculus.backend.service.submission.METADATA_UPLOAD_AUX_TABLE_NAME import org.loculus.backend.service.submission.SEQUENCE_ENTRIES_PREPROCESSED_DATA_TABLE_NAME import org.loculus.backend.service.submission.SEQUENCE_ENTRIES_TABLE_NAME -import org.loculus.backend.service.submission.SEQUENCE_UPLOAD_AUX_TABLE_NAME import org.loculus.backend.testutil.TestEnvironment import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc import org.springframework.boot.test.context.SpringBootTest @@ -230,8 +228,6 @@ private fun clearDatabaseStatement(): String = """ $SEQUENCE_ENTRIES_TABLE_NAME, $SEQUENCE_ENTRIES_PREPROCESSED_DATA_TABLE_NAME, $USER_GROUPS_TABLE_NAME, - $METADATA_UPLOAD_AUX_TABLE_NAME, - $SEQUENCE_UPLOAD_AUX_TABLE_NAME, $DATA_USE_TERMS_TABLE_NAME, $CURRENT_PROCESSING_PIPELINE_TABLE_NAME, $FILES_TABLE_NAME, From 54d9ba20810e8050679b363b92490b93dd81c43c Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Tue, 2 Sep 2025 16:33:54 +0200 Subject: [PATCH 08/11] Fix exposed statement --- .../service/submission/UploadDatabaseService.kt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt index 815ec71098..2fa0b7f7ac 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt @@ -236,18 +236,21 @@ class UploadDatabaseService( fun accessionToSequenceInfo(accessions: Collection): Map = accessions.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) { chunk -> + val maxVersion = versionColumn.max() + val maxGroupId = groupIdColumn.max() // All group IDs should be same, so max works + SequenceEntriesTable.select( accessionColumn, - versionColumn.max(), - groupIdColumn, + maxVersion, + maxGroupId, ) .where { accessionColumn inList chunk } .groupBy(accessionColumn) .associate { it[accessionColumn] to SequenceInfo( - latestVersion = it[versionColumn], - groupId = it[groupIdColumn], + latestVersion = it[maxVersion]!!, + groupId = it[maxGroupId]!!, ) } }.flatMap(Map::toList).toMap() From 724fb50dd8f7d458ea1d261fa37c5e511143f059 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Tue, 2 Sep 2025 17:34:35 +0200 Subject: [PATCH 09/11] Properly validate metadata and sequences files after parsing, adjust error messages to make tests pass with new more informative error messages --- .../org/loculus/backend/model/SubmitModel.kt | 28 +++++++++++-- .../submission/UploadDatabaseService.kt | 39 +++++++++++++++---- .../submission/ReviseEndpointTest.kt | 23 ++++++----- .../submission/SubmitEndpointTest.kt | 4 +- .../controller/submission/SubmitFiles.kt | 2 +- 5 files changed, 73 insertions(+), 23 deletions(-) diff --git a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt index f53022ce55..6c445e7d65 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt @@ -173,10 +173,20 @@ class SubmitModel( metadataFileTypes, tempFile, ) - val metadata: Map = metadataEntryStreamAsSequence(metadataStream) - .associateBy { it.submissionId } - metadata + val metadataEntries = metadataEntryStreamAsSequence(metadataStream).toList() + + val duplicateIds = metadataEntries.groupBy { it.submissionId } + .filter { it.value.size > 1 } + .keys + if (duplicateIds.isNotEmpty()) { + throw UnprocessableEntityException( + "Metadata file contains duplicated submissionIds: " + + duplicateIds.joinToString(", ", transform = { "`$it`" }), + ) + } + + metadataEntries.associateBy { it.submissionId } } private fun parseSequenceFile(submissionParams: SubmissionParams): Map { @@ -201,7 +211,17 @@ class SubmitModel( sequenceFileTypes, tempFile, ) - FastaReader(sequenceStream).asSequence().associate { it.sampleName to it.sequence } + val sequencesList = FastaReader(sequenceStream).asSequence().toList() + + val duplicateHeaders = sequencesList.groupBy { it.sampleName }.filter { it.value.size > 1 }.keys + if (duplicateHeaders.isNotEmpty()) { + throw UnprocessableEntityException( + "Sequence file contains duplicated FASTA ids: " + + duplicateHeaders.joinToString(", ", transform = { "`$it`" }), + ) + } + + sequencesList.associate { it.sampleName to it.sequence } } } diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt index 2fa0b7f7ac..fcebee3942 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt @@ -9,6 +9,7 @@ import org.loculus.backend.api.OriginalData import org.loculus.backend.api.Status import org.loculus.backend.api.SubmissionIdFilesMap import org.loculus.backend.api.SubmissionIdMapping +import org.loculus.backend.controller.UnprocessableEntityException import org.loculus.backend.log.AuditLogger import org.loculus.backend.model.SegmentName import org.loculus.backend.model.SubmissionId @@ -133,11 +134,7 @@ class UploadDatabaseService( ): List { log.debug { "Creating revision entries" } - val submissionIdToAccession = metadata.map { (submissionId, entry) -> - val accession = entry.metadata["accession"] - ?: throw IllegalStateException("Metadata for submissionId $submissionId does not contain an accession") - Pair(submissionId, accession) - }.toMap() + val submissionIdToAccession = submissionIdToAccession(metadata) submissionIdToAccession.values.processInDatabaseSafeChunks { chunk -> accessionPreconditionValidator.validate { @@ -238,7 +235,7 @@ class UploadDatabaseService( accessions.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) { chunk -> val maxVersion = versionColumn.max() val maxGroupId = groupIdColumn.max() // All group IDs should be same, so max works - + SequenceEntriesTable.select( accessionColumn, maxVersion, @@ -255,11 +252,39 @@ class UploadDatabaseService( } }.flatMap(Map::toList).toMap() - fun submissionIdToGroup(metadata: Map): Map { + private fun submissionIdToAccession(metadata: Map): Map { + if (metadata.values.any { !it.metadata.containsKey("accession") }) { + throw UnprocessableEntityException("Metadata file is missing required column 'accession'") + } + val submissionIdToAccession = metadata.map { (submissionId, entry) -> (submissionId to entry.metadata["accession"]!!) } + val submissionIdsWithoutAccessions = submissionIdToAccession.filter { it.second.isBlank() } + if (submissionIdsWithoutAccessions.isNotEmpty()) { + throw UnprocessableEntityException( + "The rows with the following submissionIds are missing accessions in metadata file: " + + formatListWithBackticks(submissionIdsWithoutAccessions.map { it.first }), + ) + } + + val duplicateAccessions = submissionIdToAccession.groupBy { it.second } + .filter { it.value.size > 1 } + if (duplicateAccessions.isNotEmpty()) { + throw UnprocessableEntityException( + "Some accessions appear multiple times in metadata file: " + + formatListWithBackticks(duplicateAccessions.keys), + ) + } + + return submissionIdToAccession.toMap() + } + + private fun formatListWithBackticks(list: Collection): String = list.joinToString(", ") { "`$it`" } + + fun submissionIdToGroup(metadata: Map): Map { + val submissionIdToAccession = submissionIdToAccession(metadata).toList() val accessionToGroup = submissionIdToAccession.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) { chunk -> SequenceEntriesTable.select( accessionColumn, diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/ReviseEndpointTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/ReviseEndpointTest.kt index 3b334c27db..44e931d728 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/ReviseEndpointTest.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/ReviseEndpointTest.kt @@ -141,8 +141,8 @@ class ReviseEndpointTest( content = """ accession submissionId firstColumn - 123 someHeader_main someValue - ${accessions.first()} someHeader2_main someOtherValue + 123 someHeader someValue + ${accessions.first()} someHeader2 someOtherValue """.trimIndent(), ), SubmitFiles.sequenceFileWith(), @@ -520,25 +520,30 @@ class ReviseEndpointTest( 2 sameHeader someValue2 """.trimIndent(), ), - SubmitFiles.sequenceFileWith(), + SubmitFiles.sequenceFileWith( + content = """ + >sameHeader + AC + """.trimIndent(), + ), status().isUnprocessableEntity, "Unprocessable Entity", - "Metadata file contains at least one duplicate submissionId", + "Metadata file contains duplicated submissionIds: `sameHeader`", ), Arguments.of( "duplicate headers in sequence file", SubmitFiles.revisedMetadataFileWith(), SubmitFiles.sequenceFileWith( content = """ - >sameHeader_main + >sameHeader AC - >sameHeader_main + >sameHeader AC """.trimIndent(), ), status().isUnprocessableEntity, "Unprocessable Entity", - "Sequence file contains at least one duplicate submissionId", + "Sequence file contains duplicated FASTA ids: `sameHeader`", ), Arguments.of( "metadata file misses headers", @@ -591,7 +596,7 @@ class ReviseEndpointTest( SubmitFiles.sequenceFileWith(), status().isUnprocessableEntity, "Unprocessable Entity", - "The revised metadata file does not contain the header 'accession'", + "Metadata file is missing required column 'accession'", ), Arguments.of( "metadata file with one row with missing accession", @@ -605,7 +610,7 @@ class ReviseEndpointTest( SubmitFiles.sequenceFileWith(), status().isUnprocessableEntity, "Unprocessable Entity", - "A row in metadata file contains no accession", + "The rows with the following submissionIds are missing accessions in metadata file: `someHeader`", ), ) } diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitEndpointTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitEndpointTest.kt index a533c503e5..43550d48e8 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitEndpointTest.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitEndpointTest.kt @@ -472,7 +472,7 @@ class SubmitEndpointTest( DefaultFiles.sequencesFile, status().isUnprocessableEntity, "Unprocessable Entity", - "Metadata file contains at least one duplicate submissionId", + "Metadata file contains duplicated submissionIds: `sameHeader`", DEFAULT_ORGANISM, DataUseTerms.Open, ), @@ -489,7 +489,7 @@ class SubmitEndpointTest( ), status().isUnprocessableEntity, "Unprocessable Entity", - "Sequence file contains at least one duplicate submissionId", + "Sequence file contains duplicated FASTA ids: `sameHeader_main`", DEFAULT_ORGANISM, DataUseTerms.Open, ), diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitFiles.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitFiles.kt index 614cdd6a0f..9c7f9f34a1 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitFiles.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitFiles.kt @@ -112,7 +112,7 @@ object SubmitFiles { name: String = "sequenceFile", originalFilename: String = "sequences.fasta", mediaType: String = TEXT_PLAIN_VALUE, - content: String = ">someHeader_main\nAC\n>someHeader2_main\nAC", + content: String = ">someHeader\nAC\n>someHeader2\nAC", compression: CompressionAlgorithm = CompressionAlgorithm.NONE, ): MockMultipartFile { val contentStream = compressString(content, compression) From c96e9d657dee01dc4835a09246f5cbfd41651de8 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Tue, 2 Sep 2025 21:10:45 +0200 Subject: [PATCH 10/11] Make max metadata and sequence file sizes configurable, add tests --- .../backend/config/BackendSpringConfig.kt | 15 ++ .../org/loculus/backend/config/S3Config.kt | 6 + .../controller/SubmissionController.kt | 21 +++ .../SubmissionControllerDescriptions.kt | 4 + .../org/loculus/backend/model/SubmitModel.kt | 88 +++++++---- .../src/main/resources/application.properties | 4 + .../controller/ExceptionHandlerTest.kt | 2 +- .../submission/FileSizeLimitsTest.kt | 144 ++++++++++++++++++ 8 files changed, 253 insertions(+), 31 deletions(-) create mode 100644 backend/src/test/kotlin/org/loculus/backend/controller/submission/FileSizeLimitsTest.kt diff --git a/backend/src/main/kotlin/org/loculus/backend/config/BackendSpringConfig.kt b/backend/src/main/kotlin/org/loculus/backend/config/BackendSpringConfig.kt index 5cf9861bf3..ead8956776 100644 --- a/backend/src/main/kotlin/org/loculus/backend/config/BackendSpringConfig.kt +++ b/backend/src/main/kotlin/org/loculus/backend/config/BackendSpringConfig.kt @@ -36,6 +36,10 @@ object BackendSpringProperty { const val DEBUG_MODE = "loculus.debug-mode" const val ENABLE_SEQSETS = "loculus.enable-seqsets" + const val MAX_METADATA_FILE_SIZE = "loculus.submission.max-metadata-file-size-bytes" + const val MAX_SEQUENCE_FILE_SIZE = "loculus.submission.max-sequence-file-size-bytes" + const val MAX_UNCOMPRESSED_SEQUENCE_SIZE = "loculus.submission.max-uncompressed-sequence-size-bytes" + const val S3_ENABLED = "loculus.s3.enabled" const val S3_BUCKET_ENDPOINT = "loculus.s3.bucket.endpoint" const val S3_BUCKET_INTERNAL_ENDPOINT = "loculus.s3.bucket.internal-endpoint" @@ -109,6 +113,17 @@ class BackendSpringConfig { @Bean fun openApi(backendConfig: BackendConfig) = buildOpenApiSchema(backendConfig) + @Bean + fun fileSizeConfig( + @Value("\${${BackendSpringProperty.MAX_METADATA_FILE_SIZE}}") maxMetadataFileSize: Long, + @Value("\${${BackendSpringProperty.MAX_SEQUENCE_FILE_SIZE}}") maxSequenceFileSize: Long, + @Value("\${${BackendSpringProperty.MAX_UNCOMPRESSED_SEQUENCE_SIZE}}") maxUncompressedSequenceSize: Long, + ): FileSizeConfig = FileSizeConfig( + maxMetadataFileSize = maxMetadataFileSize, + maxSequenceFileSize = maxSequenceFileSize, + maxUncompressedSequenceSize = maxUncompressedSequenceSize, + ) + @Bean fun s3Config( @Value("\${${BackendSpringProperty.S3_ENABLED}}") enabled: Boolean = false, diff --git a/backend/src/main/kotlin/org/loculus/backend/config/S3Config.kt b/backend/src/main/kotlin/org/loculus/backend/config/S3Config.kt index 6b11c4d751..48fdbf4e33 100644 --- a/backend/src/main/kotlin/org/loculus/backend/config/S3Config.kt +++ b/backend/src/main/kotlin/org/loculus/backend/config/S3Config.kt @@ -10,3 +10,9 @@ data class S3BucketConfig( val accessKey: String, val secretKey: String, ) + +data class FileSizeConfig( + val maxMetadataFileSize: Long, + val maxSequenceFileSize: Long, + val maxUncompressedSequenceSize: Long, +) diff --git a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt index d4200245e3..9009e01a7e 100644 --- a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt +++ b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt @@ -37,6 +37,7 @@ import org.loculus.backend.api.UnprocessedData import org.loculus.backend.auth.AuthenticatedUser import org.loculus.backend.auth.HiddenParam import org.loculus.backend.config.BackendConfig +import org.loculus.backend.config.FileSizeConfig import org.loculus.backend.controller.LoculusCustomHeaders.X_TOTAL_RECORDS import org.loculus.backend.log.REQUEST_ID_MDC_KEY import org.loculus.backend.log.RequestIdContext @@ -52,6 +53,7 @@ import org.springframework.http.HttpHeaders import org.springframework.http.HttpStatus import org.springframework.http.MediaType import org.springframework.http.ResponseEntity +import org.springframework.util.unit.DataSize import org.springframework.validation.annotation.Validated import org.springframework.web.bind.annotation.DeleteMapping import org.springframework.web.bind.annotation.GetMapping @@ -65,6 +67,8 @@ import org.springframework.web.bind.annotation.RequestPart import org.springframework.web.bind.annotation.ResponseStatus import org.springframework.web.bind.annotation.RestController import org.springframework.web.multipart.MultipartFile +import org.springframework.web.server.PayloadTooLargeException +import org.springframework.web.server.ResponseStatusException import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody import java.util.UUID import io.swagger.v3.oas.annotations.parameters.RequestBody as SwaggerRequestBody @@ -82,11 +86,13 @@ open class SubmissionController( private val iteratorStreamer: IteratorStreamer, private val requestIdContext: RequestIdContext, private val backendConfig: BackendConfig, + private val fileSizeConfig: FileSizeConfig, private val objectMapper: ObjectMapper, ) { @Operation(description = SUBMIT_DESCRIPTION) @ApiResponse(responseCode = "200", description = SUBMIT_RESPONSE_DESCRIPTION) @ApiResponse(responseCode = "400", description = SUBMIT_ERROR_RESPONSE) + @ApiResponse(responseCode = "413", description = PAYLOAD_TOO_LARGE_ERROR_RESPONSE) @PostMapping("/submit", consumes = ["multipart/form-data"]) fun submit( @PathVariable @Valid organism: Organism, @@ -106,6 +112,8 @@ open class SubmissionController( ) @RequestParam restrictedUntil: String?, @Parameter(description = FILE_MAPPING_DESCRIPTION) @RequestPart(required = false) fileMapping: String?, ): List { + metadataFile.requireUnder(fileSizeConfig.maxMetadataFileSize, "metadata") + sequenceFile?.requireUnder(fileSizeConfig.maxSequenceFileSize, "sequence") val innerDataUseTermsType = if (backendConfig.dataUseTerms.enabled) { dataUseTermsType ?: throw BadRequestException( @@ -130,6 +138,7 @@ open class SubmissionController( @Operation(description = REVISE_DESCRIPTION) @ApiResponse(responseCode = "200", description = REVISE_RESPONSE_DESCRIPTION) + @ApiResponse(responseCode = "413", description = PAYLOAD_TOO_LARGE_ERROR_RESPONSE) @PostMapping("/revise", consumes = ["multipart/form-data"]) fun revise( @PathVariable @Valid organism: Organism, @@ -138,6 +147,8 @@ open class SubmissionController( @Parameter(description = SEQUENCE_FILE_DESCRIPTION) @RequestParam sequenceFile: MultipartFile?, @Parameter(description = FILE_MAPPING_DESCRIPTION) @RequestPart(required = false) fileMapping: String?, ): List { + metadataFile.requireUnder(fileSizeConfig.maxMetadataFileSize, "metadata") + sequenceFile?.requireUnder(fileSizeConfig.maxSequenceFileSize, "sequence") val fileMappingParsed = parseFileMapping(fileMapping, organism) val params = SubmissionParams.RevisionSubmissionParams( organism, @@ -545,3 +556,13 @@ open class SubmissionController( return fileMappingParsed } } + +private fun MultipartFile.requireUnder(limitBytes: Long, name: String) { + if (size > limitBytes) { + val maxHuman = DataSize.ofBytes(limitBytes).toString() + throw ResponseStatusException( + HttpStatus.PAYLOAD_TOO_LARGE, + "$name file is too large. Max $maxHuman.", + ) + } +} diff --git a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionControllerDescriptions.kt b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionControllerDescriptions.kt index 36c01350ae..bd7883aa51 100644 --- a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionControllerDescriptions.kt +++ b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionControllerDescriptions.kt @@ -14,6 +14,10 @@ const val SUBMIT_ERROR_RESPONSE = """ The data use terms type have not been provided, even though they are enabled for this Loculus instance. """ +const val PAYLOAD_TOO_LARGE_ERROR_RESPONSE = """ +Payload too large. File size exceeds configured limits. +""" + const val METADATA_FILE_DESCRIPTION = """ A TSV (tab separated values) file containing the metadata of the submitted sequence entries. The file may be compressed with zstd, xz, zip, gzip, lzma, bzip2 (with common extensions). diff --git a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt index 6c445e7d65..49f52695fe 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt @@ -10,6 +10,7 @@ import org.loculus.backend.api.SubmissionIdMapping import org.loculus.backend.api.getAllFileIds import org.loculus.backend.auth.AuthenticatedUser import org.loculus.backend.config.BackendConfig +import org.loculus.backend.config.FileSizeConfig import org.loculus.backend.controller.BadRequestException import org.loculus.backend.controller.UnprocessableEntityException import org.loculus.backend.service.datauseterms.DataUseTermsPreconditionValidator @@ -19,12 +20,16 @@ import org.loculus.backend.service.submission.CompressionAlgorithm import org.loculus.backend.service.submission.SubmissionIdFilesMappingPreconditionValidator import org.loculus.backend.service.submission.UploadDatabaseService import org.loculus.backend.utils.DateProvider +import org.loculus.backend.utils.FastaEntry import org.loculus.backend.utils.FastaReader import org.loculus.backend.utils.MetadataEntry import org.loculus.backend.utils.ParseFastaHeader import org.loculus.backend.utils.metadataEntryStreamAsSequence +import org.springframework.http.HttpStatus import org.springframework.stereotype.Service +import org.springframework.util.unit.DataSize import org.springframework.web.multipart.MultipartFile +import org.springframework.web.server.ResponseStatusException import java.io.BufferedInputStream import java.io.Closeable import java.io.File @@ -87,6 +92,7 @@ class SubmitModel( private val submissionIdFilesMappingPreconditionValidator: SubmissionIdFilesMappingPreconditionValidator, private val dateProvider: DateProvider, private val backendConfig: BackendConfig, + private val fileSizeConfig: FileSizeConfig, private val parseFastaHeader: ParseFastaHeader, ) { @@ -104,11 +110,7 @@ class SubmitModel( } } - fun processSubmissions( - uploadId: String, - submissionParams: SubmissionParams, - batchSize: Int = 1000, - ): List { + fun processSubmissions(uploadId: String, submissionParams: SubmissionParams): List { log.info { "Processing submission (type: ${submissionParams.uploadType.name}) with uploadId $uploadId" } @@ -211,18 +213,44 @@ class SubmitModel( sequenceFileTypes, tempFile, ) - val sequencesList = FastaReader(sequenceStream).asSequence().toList() - val duplicateHeaders = sequencesList.groupBy { it.sampleName }.filter { it.value.size > 1 }.keys - if (duplicateHeaders.isNotEmpty()) { - throw UnprocessableEntityException( - "Sequence file contains duplicated FASTA ids: " + - duplicateHeaders.joinToString(", ", transform = { "`$it`" }), + parseSequencesWithSizeLimit(sequenceStream) + } + } + + private fun parseSequencesWithSizeLimit(sequenceStream: InputStream): Map { + val sequences = mutableMapOf() + val duplicateHeaders = mutableSetOf() + var totalSize = 0L + + FastaReader(sequenceStream).asSequence().forEach { fastaEntry -> + val entrySize = fastaEntry.sampleName.length + fastaEntry.sequence.length + totalSize += entrySize + + if (totalSize > fileSizeConfig.maxUncompressedSequenceSize) { + val maxHuman = DataSize.ofBytes(fileSizeConfig.maxUncompressedSequenceSize).toString() + throw ResponseStatusException( + HttpStatus.PAYLOAD_TOO_LARGE, + "Uncompressed sequence data exceeds maximum allowed size. Max $maxHuman. " + + "Consider splitting your submission into smaller batches.", ) } - sequencesList.associate { it.sampleName to it.sequence } + if (sequences.containsKey(fastaEntry.sampleName)) { + duplicateHeaders.add(fastaEntry.sampleName) + } + + sequences[fastaEntry.sampleName] = fastaEntry.sequence + } + + if (duplicateHeaders.isNotEmpty()) { + throw UnprocessableEntityException( + "Sequence file contains duplicated FASTA ids: " + + duplicateHeaders.joinToString(", ") { "`$it`" }, + ) } + + return sequences } private fun groupSequencesById( @@ -238,24 +266,6 @@ class SubmitModel( return sequencesById } - class AutoDeletingTempFile : - File( - System.getProperty("java.io.tmpdir"), - UUID.randomUUID().toString(), - ), - Closeable { - - init { - createNewFile() - } - - override fun close() { - if (exists()) { - delete() - } - } - } - private fun getStreamFromFile(file: MultipartFile, dataType: ValidExtension, tempFile: File): InputStream = when (getFileType(file, dataType)) { CompressionAlgorithm.ZIP -> { @@ -379,4 +389,22 @@ class SubmitModel( .schema .submissionDataTypes .consensusSequences + + private class AutoDeletingTempFile : + File( + System.getProperty("java.io.tmpdir"), + UUID.randomUUID().toString(), + ), + Closeable { + + init { + createNewFile() + } + + override fun close() { + if (exists()) { + delete() + } + } + } } diff --git a/backend/src/main/resources/application.properties b/backend/src/main/resources/application.properties index 57113c07e4..530e2215e6 100644 --- a/backend/src/main/resources/application.properties +++ b/backend/src/main/resources/application.properties @@ -26,6 +26,10 @@ loculus.cleanup.task.run-every-seconds=60 loculus.stream.batch-size=1000 loculus.debug-mode=false +loculus.submission.max-metadata-file-size-bytes=52428800 +loculus.submission.max-sequence-file-size-bytes=209715200 +loculus.submission.max-uncompressed-sequence-size-bytes=524288000 + loculus.s3.enabled=false loculus.s3.bucket.endpoint= loculus.s3.bucket.bucket= diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/ExceptionHandlerTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/ExceptionHandlerTest.kt index d8b9a519e8..2170ae176c 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/ExceptionHandlerTest.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/ExceptionHandlerTest.kt @@ -121,7 +121,7 @@ class ExceptionHandlerWithMockedModelTest(@Autowired val mockMvc: MockMvc) { @Test fun `WHEN I submit a request with invalid organism THEN it should return a descriptive error message`() { - every { submitModel.processSubmissions(any(), any(), any()) } returns validResponse + every { submitModel.processSubmissions(any(), any()) } returns validResponse mockMvc.perform( multipart("/unknownOrganism/submit") diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/FileSizeLimitsTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/FileSizeLimitsTest.kt new file mode 100644 index 0000000000..15e4c274b0 --- /dev/null +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/FileSizeLimitsTest.kt @@ -0,0 +1,144 @@ +package org.loculus.backend.controller.submission + +import org.hamcrest.Matchers.containsString +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import org.loculus.backend.controller.EndpointTest +import org.loculus.backend.controller.groupmanagement.GroupManagementControllerClient +import org.loculus.backend.controller.groupmanagement.andGetGroupId +import org.loculus.backend.service.submission.CompressionAlgorithm +import org.springframework.beans.factory.annotation.Autowired +import org.springframework.http.MediaType.APPLICATION_PROBLEM_JSON +import org.springframework.test.context.TestPropertySource +import org.springframework.test.web.servlet.result.MockMvcResultMatchers.content +import org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath +import org.springframework.test.web.servlet.result.MockMvcResultMatchers.status + +@EndpointTest +@TestPropertySource( + properties = [ + "loculus.submission.max-metadata-file-size-bytes=100", + "loculus.submission.max-sequence-file-size-bytes=200", + "loculus.submission.max-uncompressed-sequence-size-bytes=300", + ], +) +class FileSizeLimitsTest( + @Autowired val submissionControllerClient: SubmissionControllerClient, + @Autowired val groupManagementClient: GroupManagementControllerClient, +) { + var groupId: Int = 0 + + @BeforeEach + fun prepareNewGroup() { + groupId = groupManagementClient.createNewGroup().andGetGroupId() + } + + @Test + fun `WHEN metadata file exceeds configured limit THEN returns PayloadTooLarge error`() { + val largeMetadataContent = "submissionId\tfirstColumn\n" + "x".repeat(200) // Exceeds 100 byte limit + val largeMetadataFile = SubmitFiles.metadataFileWith(content = largeMetadataContent) + + submissionControllerClient.submit( + metadataFile = largeMetadataFile, + sequencesFile = SubmitFiles.DefaultFiles.sequencesFile, + groupId = groupId, + ) + .andExpect(status().isPayloadTooLarge) + .andExpect(content().contentType(APPLICATION_PROBLEM_JSON)) + .andExpect(jsonPath("$.detail", containsString("metadata file is too large. Max 100B"))) + } + + @Test + fun `WHEN sequence file exceeds configured limit THEN returns PayloadTooLarge error`() { + val largeSequenceContent = ">custom0\n" + "A".repeat(300) // Exceeds 200 byte limit + val largeSequenceFile = SubmitFiles.sequenceFileWith(content = largeSequenceContent) + + // Use small metadata file to avoid hitting metadata limit first + val smallMetadata = "submissionId\tfirstColumn\ncustom0\tval" // Under 100 bytes + val smallMetadataFile = SubmitFiles.metadataFileWith(content = smallMetadata) + + submissionControllerClient.submit( + metadataFile = smallMetadataFile, + sequencesFile = largeSequenceFile, + groupId = groupId, + ) + .andExpect(status().isPayloadTooLarge) + .andExpect(content().contentType(APPLICATION_PROBLEM_JSON)) + .andExpect(jsonPath("$.detail", containsString("sequence file is too large. Max 200B"))) + } + + @Test + fun `WHEN uncompressed sequence data exceeds configured limit THEN returns PayloadTooLarge error`() { + // Create small compressed file that expands beyond 300 bytes when parsed + // ~308 bytes total when parsed + val largeUncompressedSequences = + ">c0\n" + "A".repeat(100) + "\n>c1\n" + "C".repeat(100) + "\n>c2\n" + "G".repeat(100) + + // Use compression to make the actual file size smaller than our limits + val compressedSequenceFile = SubmitFiles.sequenceFileWith( + content = largeUncompressedSequences, + compression = CompressionAlgorithm.GZIP, + ) + + // Matching metadata + val smallMetadata = "submissionId\tfirstColumn\nc0\tv0\nc1\tv1\nc2\tv2" // Under 100 bytes + val smallMetadataFile = SubmitFiles.metadataFileWith(content = smallMetadata) + + submissionControllerClient.submit( + metadataFile = smallMetadataFile, + sequencesFile = compressedSequenceFile, + groupId = groupId, + ) + .andExpect(status().isPayloadTooLarge) + .andExpect(content().contentType(APPLICATION_PROBLEM_JSON)) + .andExpect(jsonPath("$.detail", containsString("Uncompressed sequence data exceeds maximum allowed size"))) + } + + @Test + fun `WHEN files are within configured limits THEN submission succeeds`() { + val smallMetadata = "submissionId\tfirstColumn\ncustom0\tvalue" // Under 100 bytes + val smallSequence = ">custom0\nACGT" // Under 200 bytes and under 300 bytes uncompressed + + val metadataFile = SubmitFiles.metadataFileWith(content = smallMetadata) + val sequenceFile = SubmitFiles.sequenceFileWith(content = smallSequence) + + submissionControllerClient.submit( + metadataFile = metadataFile, + sequencesFile = sequenceFile, + groupId = groupId, + ) + .andExpect(status().isOk) + .andExpect(jsonPath("$[0].submissionId").value("custom0")) + } + + @Test + fun `WHEN revise metadata file exceeds configured limit THEN returns PayloadTooLarge error`() { + val largeMetadataContent = "accession\tsubmissionId\tfirstColumn\n" + "acc1\tcustom0\t" + "x".repeat(200) + val largeMetadataFile = SubmitFiles.revisedMetadataFileWith(content = largeMetadataContent) + + submissionControllerClient.reviseSequenceEntries( + metadataFile = largeMetadataFile, + sequencesFile = SubmitFiles.DefaultFiles.sequencesFile, + ) + .andExpect(status().isPayloadTooLarge) + .andExpect(content().contentType(APPLICATION_PROBLEM_JSON)) + .andExpect(jsonPath("$.detail", containsString("metadata file is too large. Max 100B"))) + } + + @Test + fun `WHEN revise sequence file exceeds configured limit THEN returns PayloadTooLarge error`() { + val largeSequenceContent = ">custom0\n" + "A".repeat(300) + val largeSequenceFile = SubmitFiles.sequenceFileWith(content = largeSequenceContent) + + val smallMetadata = "accession\tsubmissionId\tfirstColumn\nacc1\tcustom0\tval" + val smallMetadataFile = SubmitFiles.revisedMetadataFileWith(content = smallMetadata) + + submissionControllerClient.reviseSequenceEntries( + metadataFile = smallMetadataFile, + sequencesFile = largeSequenceFile, + ) + .andExpect(status().isPayloadTooLarge) + .andExpect(content().contentType(APPLICATION_PROBLEM_JSON)) + .andExpect(jsonPath("$.detail", containsString("sequence file is too large. Max 200B"))) + } +} From caa670596e17d746d854a3c1a8fb1d5dbeddf8fc Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Tue, 2 Sep 2025 21:21:51 +0200 Subject: [PATCH 11/11] Format max sizes in more human friendly way consistently --- .../loculus/backend/controller/SubmissionController.kt | 3 ++- .../main/kotlin/org/loculus/backend/model/SubmitModel.kt | 4 ++-- .../main/kotlin/org/loculus/backend/utils/FormatUtils.kt | 8 ++++++++ 3 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 backend/src/main/kotlin/org/loculus/backend/utils/FormatUtils.kt diff --git a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt index 9009e01a7e..f8b82fc4aa 100644 --- a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt +++ b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt @@ -48,6 +48,7 @@ import org.loculus.backend.model.SubmitModel import org.loculus.backend.service.submission.SubmissionDatabaseService import org.loculus.backend.utils.Accession import org.loculus.backend.utils.IteratorStreamer +import org.loculus.backend.utils.formatBytesHuman import org.slf4j.MDC import org.springframework.http.HttpHeaders import org.springframework.http.HttpStatus @@ -559,7 +560,7 @@ open class SubmissionController( private fun MultipartFile.requireUnder(limitBytes: Long, name: String) { if (size > limitBytes) { - val maxHuman = DataSize.ofBytes(limitBytes).toString() + val maxHuman = formatBytesHuman(limitBytes) throw ResponseStatusException( HttpStatus.PAYLOAD_TOO_LARGE, "$name file is too large. Max $maxHuman.", diff --git a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt index 49f52695fe..1691ebde8b 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt @@ -24,10 +24,10 @@ import org.loculus.backend.utils.FastaEntry import org.loculus.backend.utils.FastaReader import org.loculus.backend.utils.MetadataEntry import org.loculus.backend.utils.ParseFastaHeader +import org.loculus.backend.utils.formatBytesHuman import org.loculus.backend.utils.metadataEntryStreamAsSequence import org.springframework.http.HttpStatus import org.springframework.stereotype.Service -import org.springframework.util.unit.DataSize import org.springframework.web.multipart.MultipartFile import org.springframework.web.server.ResponseStatusException import java.io.BufferedInputStream @@ -228,7 +228,7 @@ class SubmitModel( totalSize += entrySize if (totalSize > fileSizeConfig.maxUncompressedSequenceSize) { - val maxHuman = DataSize.ofBytes(fileSizeConfig.maxUncompressedSequenceSize).toString() + val maxHuman = formatBytesHuman(fileSizeConfig.maxUncompressedSequenceSize) throw ResponseStatusException( HttpStatus.PAYLOAD_TOO_LARGE, "Uncompressed sequence data exceeds maximum allowed size. Max $maxHuman. " + diff --git a/backend/src/main/kotlin/org/loculus/backend/utils/FormatUtils.kt b/backend/src/main/kotlin/org/loculus/backend/utils/FormatUtils.kt new file mode 100644 index 0000000000..9eb452feed --- /dev/null +++ b/backend/src/main/kotlin/org/loculus/backend/utils/FormatUtils.kt @@ -0,0 +1,8 @@ +package org.loculus.backend.utils + +fun formatBytesHuman(bytes: Long): String = when { + bytes >= 1024L * 1024L * 1024L -> "${bytes / (1024L * 1024L * 1024L)}GB" + bytes >= 1024L * 1024L -> "${bytes / (1024L * 1024L)}MB" + bytes >= 1024L -> "${bytes / 1024L}KB" + else -> "${bytes}B" +}