diff --git a/backend/src/main/kotlin/org/loculus/backend/config/BackendSpringConfig.kt b/backend/src/main/kotlin/org/loculus/backend/config/BackendSpringConfig.kt index 5cf9861bf3..ead8956776 100644 --- a/backend/src/main/kotlin/org/loculus/backend/config/BackendSpringConfig.kt +++ b/backend/src/main/kotlin/org/loculus/backend/config/BackendSpringConfig.kt @@ -36,6 +36,10 @@ object BackendSpringProperty { const val DEBUG_MODE = "loculus.debug-mode" const val ENABLE_SEQSETS = "loculus.enable-seqsets" + const val MAX_METADATA_FILE_SIZE = "loculus.submission.max-metadata-file-size-bytes" + const val MAX_SEQUENCE_FILE_SIZE = "loculus.submission.max-sequence-file-size-bytes" + const val MAX_UNCOMPRESSED_SEQUENCE_SIZE = "loculus.submission.max-uncompressed-sequence-size-bytes" + const val S3_ENABLED = "loculus.s3.enabled" const val S3_BUCKET_ENDPOINT = "loculus.s3.bucket.endpoint" const val S3_BUCKET_INTERNAL_ENDPOINT = "loculus.s3.bucket.internal-endpoint" @@ -109,6 +113,17 @@ class BackendSpringConfig { @Bean fun openApi(backendConfig: BackendConfig) = buildOpenApiSchema(backendConfig) + @Bean + fun fileSizeConfig( + @Value("\${${BackendSpringProperty.MAX_METADATA_FILE_SIZE}}") maxMetadataFileSize: Long, + @Value("\${${BackendSpringProperty.MAX_SEQUENCE_FILE_SIZE}}") maxSequenceFileSize: Long, + @Value("\${${BackendSpringProperty.MAX_UNCOMPRESSED_SEQUENCE_SIZE}}") maxUncompressedSequenceSize: Long, + ): FileSizeConfig = FileSizeConfig( + maxMetadataFileSize = maxMetadataFileSize, + maxSequenceFileSize = maxSequenceFileSize, + maxUncompressedSequenceSize = maxUncompressedSequenceSize, + ) + @Bean fun s3Config( @Value("\${${BackendSpringProperty.S3_ENABLED}}") enabled: Boolean = false, diff --git a/backend/src/main/kotlin/org/loculus/backend/config/S3Config.kt b/backend/src/main/kotlin/org/loculus/backend/config/S3Config.kt index 6b11c4d751..48fdbf4e33 100644 --- a/backend/src/main/kotlin/org/loculus/backend/config/S3Config.kt +++ b/backend/src/main/kotlin/org/loculus/backend/config/S3Config.kt @@ -10,3 +10,9 @@ data class S3BucketConfig( val accessKey: String, val secretKey: String, ) + +data class FileSizeConfig( + val maxMetadataFileSize: Long, + val maxSequenceFileSize: Long, + val maxUncompressedSequenceSize: Long, +) diff --git a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt index 80e27fc556..f8b82fc4aa 100644 --- a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt +++ b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt @@ -37,6 +37,7 @@ import org.loculus.backend.api.UnprocessedData import org.loculus.backend.auth.AuthenticatedUser import org.loculus.backend.auth.HiddenParam import org.loculus.backend.config.BackendConfig +import org.loculus.backend.config.FileSizeConfig import org.loculus.backend.controller.LoculusCustomHeaders.X_TOTAL_RECORDS import org.loculus.backend.log.REQUEST_ID_MDC_KEY import org.loculus.backend.log.RequestIdContext @@ -47,11 +48,13 @@ import org.loculus.backend.model.SubmitModel import org.loculus.backend.service.submission.SubmissionDatabaseService import org.loculus.backend.utils.Accession import org.loculus.backend.utils.IteratorStreamer +import org.loculus.backend.utils.formatBytesHuman import org.slf4j.MDC import org.springframework.http.HttpHeaders import org.springframework.http.HttpStatus import org.springframework.http.MediaType import org.springframework.http.ResponseEntity +import org.springframework.util.unit.DataSize import org.springframework.validation.annotation.Validated import org.springframework.web.bind.annotation.DeleteMapping import org.springframework.web.bind.annotation.GetMapping @@ -65,6 +68,8 @@ import org.springframework.web.bind.annotation.RequestPart import org.springframework.web.bind.annotation.ResponseStatus import org.springframework.web.bind.annotation.RestController import org.springframework.web.multipart.MultipartFile +import org.springframework.web.server.PayloadTooLargeException +import org.springframework.web.server.ResponseStatusException import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody import java.util.UUID import io.swagger.v3.oas.annotations.parameters.RequestBody as SwaggerRequestBody @@ -82,11 +87,13 @@ open class SubmissionController( private val iteratorStreamer: IteratorStreamer, private val requestIdContext: RequestIdContext, private val backendConfig: BackendConfig, + private val fileSizeConfig: FileSizeConfig, private val objectMapper: ObjectMapper, ) { @Operation(description = SUBMIT_DESCRIPTION) @ApiResponse(responseCode = "200", description = SUBMIT_RESPONSE_DESCRIPTION) @ApiResponse(responseCode = "400", description = SUBMIT_ERROR_RESPONSE) + @ApiResponse(responseCode = "413", description = PAYLOAD_TOO_LARGE_ERROR_RESPONSE) @PostMapping("/submit", consumes = ["multipart/form-data"]) fun submit( @PathVariable @Valid organism: Organism, @@ -106,14 +113,16 @@ open class SubmissionController( ) @RequestParam restrictedUntil: String?, @Parameter(description = FILE_MAPPING_DESCRIPTION) @RequestPart(required = false) fileMapping: String?, ): List { - var innerDataUseTermsType = DataUseTermsType.OPEN - if (backendConfig.dataUseTerms.enabled) { - if (dataUseTermsType == null) { - throw BadRequestException("the 'dataUseTermsType' needs to be provided.") + metadataFile.requireUnder(fileSizeConfig.maxMetadataFileSize, "metadata") + sequenceFile?.requireUnder(fileSizeConfig.maxSequenceFileSize, "sequence") + val innerDataUseTermsType = + if (backendConfig.dataUseTerms.enabled) { + dataUseTermsType ?: throw BadRequestException( + "the 'dataUseTermsType' needs to be provided.", + ) } else { - innerDataUseTermsType = dataUseTermsType + DataUseTermsType.OPEN } - } val fileMappingParsed = parseFileMapping(fileMapping, organism) val params = SubmissionParams.OriginalSubmissionParams( @@ -130,6 +139,7 @@ open class SubmissionController( @Operation(description = REVISE_DESCRIPTION) @ApiResponse(responseCode = "200", description = REVISE_RESPONSE_DESCRIPTION) + @ApiResponse(responseCode = "413", description = PAYLOAD_TOO_LARGE_ERROR_RESPONSE) @PostMapping("/revise", consumes = ["multipart/form-data"]) fun revise( @PathVariable @Valid organism: Organism, @@ -138,6 +148,8 @@ open class SubmissionController( @Parameter(description = SEQUENCE_FILE_DESCRIPTION) @RequestParam sequenceFile: MultipartFile?, @Parameter(description = FILE_MAPPING_DESCRIPTION) @RequestPart(required = false) fileMapping: String?, ): List { + metadataFile.requireUnder(fileSizeConfig.maxMetadataFileSize, "metadata") + sequenceFile?.requireUnder(fileSizeConfig.maxSequenceFileSize, "sequence") val fileMappingParsed = parseFileMapping(fileMapping, organism) val params = SubmissionParams.RevisionSubmissionParams( organism, @@ -425,10 +437,11 @@ open class SubmissionController( @HiddenParam authenticatedUser: AuthenticatedUser, @RequestParam compression: CompressionFormat?, ): ResponseEntity { - val stillProcessing = submitModel.checkIfStillProcessingSubmittedData() - if (stillProcessing) { - return ResponseEntity.status(HttpStatus.LOCKED).build() - } + // No longer works since we've removed the aux tables + // val stillProcessing = submitModel.checkIfStillProcessingSubmittedData() + // if (stillProcessing) { + // return ResponseEntity.status(HttpStatus.LOCKED).build() + // } val headers = HttpHeaders() headers.contentType = MediaType.parseMediaType(MediaType.APPLICATION_NDJSON_VALUE) @@ -544,3 +557,13 @@ open class SubmissionController( return fileMappingParsed } } + +private fun MultipartFile.requireUnder(limitBytes: Long, name: String) { + if (size > limitBytes) { + val maxHuman = formatBytesHuman(limitBytes) + throw ResponseStatusException( + HttpStatus.PAYLOAD_TOO_LARGE, + "$name file is too large. Max $maxHuman.", + ) + } +} diff --git a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionControllerDescriptions.kt b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionControllerDescriptions.kt index 36c01350ae..bd7883aa51 100644 --- a/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionControllerDescriptions.kt +++ b/backend/src/main/kotlin/org/loculus/backend/controller/SubmissionControllerDescriptions.kt @@ -14,6 +14,10 @@ const val SUBMIT_ERROR_RESPONSE = """ The data use terms type have not been provided, even though they are enabled for this Loculus instance. """ +const val PAYLOAD_TOO_LARGE_ERROR_RESPONSE = """ +Payload too large. File size exceeds configured limits. +""" + const val METADATA_FILE_DESCRIPTION = """ A TSV (tab separated values) file containing the metadata of the submitted sequence entries. The file may be compressed with zstd, xz, zip, gzip, lzma, bzip2 (with common extensions). diff --git a/backend/src/main/kotlin/org/loculus/backend/model/ReleasedDataModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/ReleasedDataModel.kt index f78e4c1521..8d6f3a28ad 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/ReleasedDataModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/ReleasedDataModel.kt @@ -22,11 +22,9 @@ import org.loculus.backend.service.files.S3Service import org.loculus.backend.service.groupmanagement.GROUPS_TABLE_NAME import org.loculus.backend.service.submission.CURRENT_PROCESSING_PIPELINE_TABLE_NAME import org.loculus.backend.service.submission.EXTERNAL_METADATA_TABLE_NAME -import org.loculus.backend.service.submission.METADATA_UPLOAD_AUX_TABLE_NAME import org.loculus.backend.service.submission.RawProcessedData import org.loculus.backend.service.submission.SEQUENCE_ENTRIES_PREPROCESSED_DATA_TABLE_NAME import org.loculus.backend.service.submission.SEQUENCE_ENTRIES_TABLE_NAME -import org.loculus.backend.service.submission.SEQUENCE_UPLOAD_AUX_TABLE_NAME import org.loculus.backend.service.submission.SubmissionDatabaseService import org.loculus.backend.service.submission.UpdateTrackerTable import org.loculus.backend.utils.Accession @@ -47,10 +45,8 @@ val RELEASED_DATA_RELATED_TABLES: List = CURRENT_PROCESSING_PIPELINE_TABLE_NAME, EXTERNAL_METADATA_TABLE_NAME, GROUPS_TABLE_NAME, - METADATA_UPLOAD_AUX_TABLE_NAME, SEQUENCE_ENTRIES_TABLE_NAME, SEQUENCE_ENTRIES_PREPROCESSED_DATA_TABLE_NAME, - SEQUENCE_UPLOAD_AUX_TABLE_NAME, DATA_USE_TERMS_TABLE_NAME, ) diff --git a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt index cf447175b7..1691ebde8b 100644 --- a/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt +++ b/backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt @@ -3,7 +3,6 @@ package org.loculus.backend.model import mu.KotlinLogging import org.apache.commons.compress.archivers.zip.ZipFile import org.apache.commons.compress.compressors.CompressorStreamFactory -import org.jetbrains.exposed.exceptions.ExposedSQLException import org.loculus.backend.api.DataUseTerms import org.loculus.backend.api.Organism import org.loculus.backend.api.SubmissionIdFilesMap @@ -11,28 +10,31 @@ import org.loculus.backend.api.SubmissionIdMapping import org.loculus.backend.api.getAllFileIds import org.loculus.backend.auth.AuthenticatedUser import org.loculus.backend.config.BackendConfig +import org.loculus.backend.config.FileSizeConfig import org.loculus.backend.controller.BadRequestException -import org.loculus.backend.controller.DuplicateKeyException import org.loculus.backend.controller.UnprocessableEntityException import org.loculus.backend.service.datauseterms.DataUseTermsPreconditionValidator import org.loculus.backend.service.files.FilesDatabaseService -import org.loculus.backend.service.files.S3Service import org.loculus.backend.service.groupmanagement.GroupManagementPreconditionValidator import org.loculus.backend.service.submission.CompressionAlgorithm -import org.loculus.backend.service.submission.MetadataUploadAuxTable -import org.loculus.backend.service.submission.SequenceUploadAuxTable import org.loculus.backend.service.submission.SubmissionIdFilesMappingPreconditionValidator import org.loculus.backend.service.submission.UploadDatabaseService import org.loculus.backend.utils.DateProvider +import org.loculus.backend.utils.FastaEntry import org.loculus.backend.utils.FastaReader +import org.loculus.backend.utils.MetadataEntry +import org.loculus.backend.utils.ParseFastaHeader +import org.loculus.backend.utils.formatBytesHuman import org.loculus.backend.utils.metadataEntryStreamAsSequence -import org.loculus.backend.utils.revisionEntryStreamAsSequence +import org.springframework.http.HttpStatus import org.springframework.stereotype.Service -import org.springframework.transaction.annotation.Transactional import org.springframework.web.multipart.MultipartFile +import org.springframework.web.server.ResponseStatusException import java.io.BufferedInputStream +import java.io.Closeable import java.io.File import java.io.InputStream +import java.util.UUID const val HEADER_TO_CONNECT_METADATA_AND_SEQUENCES = "id" const val HEADER_TO_CONNECT_METADATA_AND_SEQUENCES_ALTERNATE_FOR_BACKCOMPAT = "submissionId" @@ -90,7 +92,8 @@ class SubmitModel( private val submissionIdFilesMappingPreconditionValidator: SubmissionIdFilesMappingPreconditionValidator, private val dateProvider: DateProvider, private val backendConfig: BackendConfig, - private val s3Service: S3Service, + private val fileSizeConfig: FileSizeConfig, + private val parseFastaHeader: ParseFastaHeader, ) { companion object AcceptedFileTypes { @@ -107,11 +110,7 @@ class SubmitModel( } } - fun processSubmissions( - uploadId: String, - submissionParams: SubmissionParams, - batchSize: Int = 1000, - ): List = try { + fun processSubmissions(uploadId: String, submissionParams: SubmissionParams): List { log.info { "Processing submission (type: ${submissionParams.uploadType.name}) with uploadId $uploadId" } @@ -122,70 +121,77 @@ class SubmitModel( .validateMultipartUploads(submissionParams.files) .validateFilesExist(submissionParams.files) - insertDataIntoAux( - uploadId, - submissionParams, - batchSize, - ) + if (submissionParams is SubmissionParams.OriginalSubmissionParams) { + groupManagementPreconditionValidator.validateUserIsAllowedToModifyGroup( + submissionParams.groupId, + submissionParams.authenticatedUser, + ) + dataUseTermsPreconditionValidator.checkThatRestrictedUntilIsAllowed(submissionParams.dataUseTerms) + } + + val metadata = parseMetadataFile(submissionParams) + val rawSequences = parseSequenceFile(submissionParams) + val sequences = groupSequencesById(rawSequences, submissionParams.organism) + val files = submissionParams.files - val metadataSubmissionIds = uploadDatabaseService.getMetadataUploadSubmissionIds(uploadId).toSet() if (requiresConsensusSequenceFile(submissionParams.organism)) { log.debug { "Validating submission with uploadId $uploadId" } - val sequenceSubmissionIds = uploadDatabaseService.getSequenceUploadSubmissionIds(uploadId).toSet() - validateSubmissionIdSetsForConsensusSequences(metadataSubmissionIds, sequenceSubmissionIds) + validateSubmissionIdSetsForConsensusSequences(metadata.keys, sequences.keys) } - if (submissionParams is SubmissionParams.RevisionSubmissionParams) { - log.info { "Associating uploaded sequence data with existing sequence entries with uploadId $uploadId" } - uploadDatabaseService.associateRevisedDataWithExistingSequenceEntries( - uploadId, - submissionParams.organism, - submissionParams.authenticatedUser, - ) + files?.let { + validateSubmissionIdSetsForFiles(metadata.keys, it.keys) + validateFileExistenceAndGroupOwnership(it, submissionParams, metadata) } - submissionParams.files?.let { submittedFiles -> - val fileSubmissionIds = submittedFiles.keys - validateSubmissionIdSetsForFiles(metadataSubmissionIds, fileSubmissionIds) - validateFileExistenceAndGroupOwnership(submittedFiles, submissionParams, uploadId) - } + return when (submissionParams) { + is SubmissionParams.OriginalSubmissionParams -> { + uploadDatabaseService.createNewSequenceEntries( + metadata, + sequences, + files, + submissionParams, + ) + } - if (submissionParams is SubmissionParams.OriginalSubmissionParams) { - log.info { "Generating new accessions for uploaded sequence data with uploadId $uploadId" } - uploadDatabaseService.generateNewAccessionsForOriginalUpload(uploadId) - } + is SubmissionParams.RevisionSubmissionParams -> { + uploadDatabaseService.createRevisionEntries( + metadata, + sequences, + files, + submissionParams, + ) + } - log.debug { "Persisting submission with uploadId $uploadId" } - uploadDatabaseService.mapAndCopy(uploadId, submissionParams) - } finally { - uploadDatabaseService.deleteUploadData(uploadId) + else -> throw IllegalStateException("Unknown submission type: ${submissionParams.uploadType}") + } } - /** - * Inserts the uploaded metadata (and sequence data) into the 'aux' tables in the database. - */ - private fun insertDataIntoAux(uploadId: String, submissionParams: SubmissionParams, batchSize: Int) { - if (submissionParams is SubmissionParams.OriginalSubmissionParams) { - groupManagementPreconditionValidator.validateUserIsAllowedToModifyGroup( - submissionParams.groupId, - submissionParams.authenticatedUser, + private fun parseMetadataFile(submissionParams: SubmissionParams): Map = + + AutoDeletingTempFile().use { tempFile -> + val metadataStream = getStreamFromFile( + submissionParams.metadataFile, + metadataFileTypes, + tempFile, ) - dataUseTermsPreconditionValidator.checkThatRestrictedUntilIsAllowed(submissionParams.dataUseTerms) - } - val metadataTempFileToDelete = MaybeFile() - val metadataStream = getStreamFromFile( - submissionParams.metadataFile, - uploadId, - metadataFileTypes, - metadataTempFileToDelete, - ) - try { - uploadMetadata(uploadId, submissionParams, metadataStream, batchSize) - } finally { - metadataTempFileToDelete.delete() + val metadataEntries = metadataEntryStreamAsSequence(metadataStream).toList() + + val duplicateIds = metadataEntries.groupBy { it.submissionId } + .filter { it.value.size > 1 } + .keys + if (duplicateIds.isNotEmpty()) { + throw UnprocessableEntityException( + "Metadata file contains duplicated submissionIds: " + + duplicateIds.joinToString(", ", transform = { "`$it`" }), + ) + } + + metadataEntries.associateBy { it.submissionId } } + private fun parseSequenceFile(submissionParams: SubmissionParams): Map { val sequenceFile = submissionParams.sequenceFile if (sequenceFile == null) { if (requiresConsensusSequenceFile(submissionParams.organism)) { @@ -193,140 +199,92 @@ class SubmitModel( "Submissions for organism ${submissionParams.organism.name} require a sequence file.", ) } - } else { - if (!requiresConsensusSequenceFile(submissionParams.organism)) { - throw BadRequestException( - "Sequence uploads are not allowed for organism ${submissionParams.organism.name}.", + return emptyMap() + } + if (!requiresConsensusSequenceFile(submissionParams.organism)) { + throw BadRequestException( + "Sequence uploads are not allowed for organism ${submissionParams.organism.name}.", + ) + } + + return AutoDeletingTempFile().use { tempFile -> + val sequenceStream = getStreamFromFile( + sequenceFile, + sequenceFileTypes, + tempFile, + ) + + parseSequencesWithSizeLimit(sequenceStream) + } + } + + private fun parseSequencesWithSizeLimit(sequenceStream: InputStream): Map { + val sequences = mutableMapOf() + val duplicateHeaders = mutableSetOf() + var totalSize = 0L + + FastaReader(sequenceStream).asSequence().forEach { fastaEntry -> + val entrySize = fastaEntry.sampleName.length + fastaEntry.sequence.length + totalSize += entrySize + + if (totalSize > fileSizeConfig.maxUncompressedSequenceSize) { + val maxHuman = formatBytesHuman(fileSizeConfig.maxUncompressedSequenceSize) + throw ResponseStatusException( + HttpStatus.PAYLOAD_TOO_LARGE, + "Uncompressed sequence data exceeds maximum allowed size. Max $maxHuman. " + + "Consider splitting your submission into smaller batches.", ) } - val sequenceTempFileToDelete = MaybeFile() - try { - val sequenceStream = getStreamFromFile( - sequenceFile, - uploadId, - sequenceFileTypes, - sequenceTempFileToDelete, - ) - uploadSequences(uploadId, sequenceStream, batchSize, submissionParams.organism) - } finally { - sequenceTempFileToDelete.delete() + if (sequences.containsKey(fastaEntry.sampleName)) { + duplicateHeaders.add(fastaEntry.sampleName) } - } - } - class MaybeFile { - var file: File? = null - fun delete() { - file?.delete() + sequences[fastaEntry.sampleName] = fastaEntry.sequence } - } - private fun getStreamFromFile( - file: MultipartFile, - uploadId: String, - dataType: ValidExtension, - maybeFileToDelete: MaybeFile, - ): InputStream = when (getFileType(file, dataType)) { - CompressionAlgorithm.ZIP -> { - val tempFile = File.createTempFile( - "upload_" + dataType.displayName.replace(" ", ""), - uploadId, + if (duplicateHeaders.isNotEmpty()) { + throw UnprocessableEntityException( + "Sequence file contains duplicated FASTA ids: " + + duplicateHeaders.joinToString(", ") { "`$it`" }, ) - maybeFileToDelete.file = tempFile - - file.transferTo(tempFile) - val zipFile = ZipFile.builder() - .setFile(tempFile) - .setUseUnicodeExtraFields(true) - .get() - BufferedInputStream(zipFile.getInputStream(zipFile.entries.nextElement())) } - CompressionAlgorithm.NONE -> - BufferedInputStream(file.inputStream) - - else -> - CompressorStreamFactory().createCompressorInputStream( - BufferedInputStream(file.inputStream), - ) + return sequences } - private fun uploadMetadata( - uploadId: String, - submissionParams: SubmissionParams, - metadataStream: InputStream, - batchSize: Int, - ) { - log.debug { - "intermediate storing uploaded metadata of type ${submissionParams.uploadType.name} " + - "from $submissionParams.submitter with UploadId $uploadId" + private fun groupSequencesById( + sequences: Map, + organism: Organism, + ): Map> { + val sequencesById = mutableMapOf>() + sequences.forEach { (header, sequence) -> + val (submissionId, segmentName) = parseFastaHeader.parse(header, organism) + val segmentsForId = sequencesById.getOrPut(submissionId) { mutableMapOf() } + segmentsForId[segmentName] = sequence } - val now = dateProvider.getCurrentDateTime() - try { - when (submissionParams) { - is SubmissionParams.OriginalSubmissionParams -> { - metadataEntryStreamAsSequence(metadataStream) - .chunked(batchSize) - .forEach { batch -> - uploadDatabaseService.batchInsertMetadataInAuxTable( - uploadId = uploadId, - authenticatedUser = submissionParams.authenticatedUser, - groupId = submissionParams.groupId, - submittedOrganism = submissionParams.organism, - uploadedMetadataBatch = batch, - uploadedAt = now, - files = submissionParams.files, - ) - } - } + return sequencesById + } - is SubmissionParams.RevisionSubmissionParams -> { - revisionEntryStreamAsSequence(metadataStream) - .chunked(batchSize) - .forEach { batch -> - uploadDatabaseService.batchInsertRevisedMetadataInAuxTable( - uploadId = uploadId, - authenticatedUser = submissionParams.authenticatedUser, - submittedOrganism = submissionParams.organism, - uploadedRevisedMetadataBatch = batch, - uploadedAt = now, - files = submissionParams.files, - ) - } - } - } - } catch (e: ExposedSQLException) { - if (e.sqlState == UNIQUE_CONSTRAINT_VIOLATION_SQL_STATE) { - throw DuplicateKeyException( - "Metadata file contains at least one duplicate submissionId: ${e.cause?.cause}", - ) + private fun getStreamFromFile(file: MultipartFile, dataType: ValidExtension, tempFile: File): InputStream = + when (getFileType(file, dataType)) { + CompressionAlgorithm.ZIP -> { + file.transferTo(tempFile) + val zipFile = ZipFile.builder() + .setFile(tempFile) + .setUseUnicodeExtraFields(true) + .get() + BufferedInputStream(zipFile.getInputStream(zipFile.entries.nextElement())) } - throw e - } - } - private fun uploadSequences(uploadId: String, sequenceStream: InputStream, batchSize: Int, organism: Organism) { - log.info { - "intermediate storing uploaded sequence data with UploadId $uploadId" - } - FastaReader(sequenceStream).asSequence().chunked(batchSize).forEach { batch -> - try { - uploadDatabaseService.batchInsertSequencesInAuxTable( - uploadId, - organism, - batch, + CompressionAlgorithm.NONE -> + BufferedInputStream(file.inputStream) + + else -> + CompressorStreamFactory().createCompressorInputStream( + BufferedInputStream(file.inputStream), ) - } catch (e: ExposedSQLException) { - if (e.sqlState == UNIQUE_CONSTRAINT_VIOLATION_SQL_STATE) { - throw DuplicateKeyException( - "Sequence file contains at least one duplicate submissionId: ${e.cause?.cause}", - ) - } - throw e - } } - } private fun getFileType(file: MultipartFile, expectedFileType: ValidExtension): CompressionAlgorithm { val originalFilename = file.originalFilename @@ -385,13 +343,13 @@ class SubmitModel( private fun validateFileExistenceAndGroupOwnership( submittedFiles: SubmissionIdFilesMap, submissionParams: SubmissionParams, - uploadId: String, + metadata: Map, ) { val usedFileIds = submittedFiles.getAllFileIds() - val fileGroups = filesDatabaseService.getGroupIds(usedFileIds) + val fileToGroupId = filesDatabaseService.getGroupIds(usedFileIds) log.debug { "Validating that all submitted file IDs exist." } - val notExistingIds = usedFileIds.subtract(fileGroups.keys) + val notExistingIds = usedFileIds.subtract(fileToGroupId.keys) if (notExistingIds.isNotEmpty()) { throw BadRequestException("The File IDs $notExistingIds do not exist.") } @@ -399,42 +357,54 @@ class SubmitModel( log.debug { "Validating that submitted files belong to the group that their associated submission belongs to." } - if (submissionParams is SubmissionParams.OriginalSubmissionParams) { - fileGroups.forEach { - if (it.value != submissionParams.groupId) { - throw BadRequestException( - "The File ${it.key} does not belong to group ${submissionParams.groupId}.", - ) - } - } - } else if (submissionParams is SubmissionParams.RevisionSubmissionParams) { - val submissionIdGroups = uploadDatabaseService.getSubmissionIdToGroupMapping(uploadId) - submittedFiles.forEach { - val submissionGroup = submissionIdGroups[it.key] - val associatedFileIds = it.value.values.flatten().map { it.fileId } - associatedFileIds.forEach { fileId -> - val fileGroup = fileGroups[fileId] - if (fileGroup != submissionGroup) { + when (submissionParams) { + is SubmissionParams.OriginalSubmissionParams -> { + fileToGroupId.forEach { + if (it.value != submissionParams.groupId) { throw BadRequestException( - "File $fileId does not belong to group $submissionGroup.", + "The File ${it.key} does not belong to group ${submissionParams.groupId}.", ) } } } - } - } - @Transactional(readOnly = true) - fun checkIfStillProcessingSubmittedData(): Boolean { - val metadataInAuxTable: Boolean = - MetadataUploadAuxTable.select(MetadataUploadAuxTable.submissionIdColumn).count() > 0 - val sequencesInAuxTable: Boolean = - SequenceUploadAuxTable.select(SequenceUploadAuxTable.sequenceSubmissionIdColumn).count() > 0 - return metadataInAuxTable || sequencesInAuxTable + is SubmissionParams.RevisionSubmissionParams -> { + val submissionIdToGroup = uploadDatabaseService.submissionIdToGroup(metadata) + submittedFiles.forEach { (submissionId, filesMap) -> + val submissionGroup = submissionIdToGroup[submissionId] + filesMap.values.flatten().map { it.fileId }.forEach { fileId -> + val fileGroup = fileToGroupId[fileId] + if (fileGroup != submissionGroup) { + throw BadRequestException( + "File $fileId does not belong to group $submissionGroup.", + ) + } + } + } + } + } } private fun requiresConsensusSequenceFile(organism: Organism) = backendConfig.getInstanceConfig(organism) .schema .submissionDataTypes .consensusSequences + + private class AutoDeletingTempFile : + File( + System.getProperty("java.io.tmpdir"), + UUID.randomUUID().toString(), + ), + Closeable { + + init { + createNewFile() + } + + override fun close() { + if (exists()) { + delete() + } + } + } } diff --git a/backend/src/main/kotlin/org/loculus/backend/service/debug/DeleteSequenceDataService.kt b/backend/src/main/kotlin/org/loculus/backend/service/debug/DeleteSequenceDataService.kt index 949a370ad5..9fedc89965 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/debug/DeleteSequenceDataService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/debug/DeleteSequenceDataService.kt @@ -4,10 +4,8 @@ import org.jetbrains.exposed.sql.deleteAll import org.loculus.backend.config.BackendConfig import org.loculus.backend.service.datauseterms.DataUseTermsTable import org.loculus.backend.service.submission.CurrentProcessingPipelineTable -import org.loculus.backend.service.submission.MetadataUploadAuxTable import org.loculus.backend.service.submission.SequenceEntriesPreprocessedDataTable import org.loculus.backend.service.submission.SequenceEntriesTable -import org.loculus.backend.service.submission.SequenceUploadAuxTable import org.loculus.backend.utils.DateProvider import org.springframework.stereotype.Component import org.springframework.transaction.annotation.Transactional @@ -18,8 +16,6 @@ class DeleteSequenceDataService(private val dateProvider: DateProvider, private fun deleteAllSequenceData() { SequenceEntriesTable.deleteAll() SequenceEntriesPreprocessedDataTable.deleteAll() - MetadataUploadAuxTable.deleteAll() - SequenceUploadAuxTable.deleteAll() DataUseTermsTable.deleteAll() CurrentProcessingPipelineTable.deleteAll() CurrentProcessingPipelineTable.setV1ForOrganismsIfNotExist( diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/CompressionService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/CompressionService.kt index f5ff4f02b7..7007c29fba 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/CompressionService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/CompressionService.kt @@ -5,6 +5,7 @@ import org.loculus.backend.api.GeneticSequence import org.loculus.backend.api.Organism import org.loculus.backend.api.OriginalData import org.loculus.backend.api.ProcessedData +import org.loculus.backend.api.SegmentName import org.loculus.backend.config.BackendConfig import org.springframework.stereotype.Service import java.nio.charset.StandardCharsets @@ -27,7 +28,7 @@ class CompressionService(private val backendConfig: BackendConfig) { fun compressNucleotideSequence( uncompressedSequence: GeneticSequence, - segmentName: String, + segmentName: SegmentName, organism: Organism, ): CompressedSequence = compress( uncompressedSequence, diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/MetadataUploadAuxTable.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/MetadataUploadAuxTable.kt deleted file mode 100644 index 4293131fb8..0000000000 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/MetadataUploadAuxTable.kt +++ /dev/null @@ -1,24 +0,0 @@ -package org.loculus.backend.service.submission - -import org.jetbrains.exposed.sql.Table -import org.jetbrains.exposed.sql.kotlin.datetime.datetime -import org.loculus.backend.api.FileCategoryFilesMap -import org.loculus.backend.service.jacksonSerializableJsonb - -const val METADATA_UPLOAD_AUX_TABLE_NAME = "metadata_upload_aux_table" - -object MetadataUploadAuxTable : Table(METADATA_UPLOAD_AUX_TABLE_NAME) { - val accessionColumn = varchar("accession", 255).nullable() - val versionColumn = long("version").nullable() - val uploadIdColumn = varchar("upload_id", 255) - val organismColumn = varchar("organism", 255) - val submissionIdColumn = varchar("submission_id", 255) - val submitterColumn = varchar("submitter", 255) - val groupIdColumn = integer("group_id").nullable() - val uploadedAtColumn = datetime("uploaded_at") - val metadataColumn = - jacksonSerializableJsonb>("metadata").nullable() - val filesColumn = - jacksonSerializableJsonb("files").nullable() - override val primaryKey = PrimaryKey(uploadIdColumn, submissionIdColumn) -} diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceEntriesTable.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceEntriesTable.kt index 1643e16533..13c52b7d57 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceEntriesTable.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceEntriesTable.kt @@ -25,7 +25,7 @@ object SequenceEntriesTable : Table(SEQUENCE_ENTRIES_TABLE_NAME) { val organismColumn = varchar("organism", 255) val submissionIdColumn = varchar("submission_id", 255) val submitterColumn = varchar("submitter", 255) - val approverColumn = varchar("approver", 255) + val approverColumn = varchar("approver", 255).nullable() val groupIdColumn = integer("group_id") val submittedAtTimestampColumn = datetime("submitted_at") val releasedAtTimestampColumn = datetime("released_at").nullable() diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceUploadAuxTable.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceUploadAuxTable.kt deleted file mode 100644 index 9f931ac91b..0000000000 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/SequenceUploadAuxTable.kt +++ /dev/null @@ -1,15 +0,0 @@ -package org.loculus.backend.service.submission - -import org.jetbrains.exposed.sql.Table -import org.loculus.backend.service.jacksonSerializableJsonb - -const val SEQUENCE_UPLOAD_AUX_TABLE_NAME = "sequence_upload_aux_table" - -object SequenceUploadAuxTable : Table(SEQUENCE_UPLOAD_AUX_TABLE_NAME) { - val sequenceUploadIdColumn = varchar("upload_id", 255) - val sequenceSubmissionIdColumn = varchar("submission_id", 255) - val segmentNameColumn = varchar("segment_name", 255) - val compressedSequenceDataColumn = jacksonSerializableJsonb("compressed_sequence_data") - - override val primaryKey = PrimaryKey(sequenceUploadIdColumn, sequenceSubmissionIdColumn, segmentNameColumn) -} diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt index ca9cef8919..fcebee3942 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/UploadDatabaseService.kt @@ -2,42 +2,38 @@ package org.loculus.backend.service.submission import kotlinx.datetime.LocalDateTime import mu.KotlinLogging -import org.jetbrains.exposed.sql.SqlExpressionBuilder.eq -import org.jetbrains.exposed.sql.VarCharColumnType -import org.jetbrains.exposed.sql.and import org.jetbrains.exposed.sql.batchInsert -import org.jetbrains.exposed.sql.deleteWhere -import org.jetbrains.exposed.sql.statements.StatementType -import org.jetbrains.exposed.sql.transactions.transaction -import org.jetbrains.exposed.sql.update +import org.jetbrains.exposed.sql.max import org.loculus.backend.api.Organism +import org.loculus.backend.api.OriginalData import org.loculus.backend.api.Status import org.loculus.backend.api.SubmissionIdFilesMap import org.loculus.backend.api.SubmissionIdMapping -import org.loculus.backend.auth.AuthenticatedUser +import org.loculus.backend.controller.UnprocessableEntityException import org.loculus.backend.log.AuditLogger +import org.loculus.backend.model.SegmentName import org.loculus.backend.model.SubmissionId import org.loculus.backend.model.SubmissionParams import org.loculus.backend.service.GenerateAccessionFromNumberService import org.loculus.backend.service.datauseterms.DataUseTermsDatabaseService -import org.loculus.backend.service.submission.MetadataUploadAuxTable.accessionColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.filesColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.groupIdColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.metadataColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.organismColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.submissionIdColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.submitterColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.uploadIdColumn -import org.loculus.backend.service.submission.MetadataUploadAuxTable.uploadedAtColumn -import org.loculus.backend.service.submission.SequenceUploadAuxTable.compressedSequenceDataColumn -import org.loculus.backend.service.submission.SequenceUploadAuxTable.segmentNameColumn -import org.loculus.backend.service.submission.SequenceUploadAuxTable.sequenceSubmissionIdColumn -import org.loculus.backend.service.submission.SequenceUploadAuxTable.sequenceUploadIdColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.accessionColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.approverColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.groupIdColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.organismColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.originalDataColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.releasedAtTimestampColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.submissionIdColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.submittedAtTimestampColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.submitterColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.versionColumn +import org.loculus.backend.service.submission.SequenceEntriesTable.versionCommentColumn +import org.loculus.backend.utils.Accession import org.loculus.backend.utils.DatabaseConstants -import org.loculus.backend.utils.FastaEntry +import org.loculus.backend.utils.DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT +import org.loculus.backend.utils.DateProvider import org.loculus.backend.utils.MetadataEntry import org.loculus.backend.utils.ParseFastaHeader -import org.loculus.backend.utils.RevisionEntry +import org.loculus.backend.utils.Version import org.loculus.backend.utils.chunkedForDatabase import org.loculus.backend.utils.getNextSequenceNumbers import org.loculus.backend.utils.processInDatabaseSafeChunks @@ -48,8 +44,11 @@ private val log = KotlinLogging.logger { } private const val SEQUENCE_INSERT_COLUMNS = 4 private const val METADATA_INSERT_COLUMNS = 8 +private const val SEQUENCE_ENTRIES_INSERT_COLUMNS = 10 private const val SEQUENCE_BATCH_SIZE = DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT / SEQUENCE_INSERT_COLUMNS private const val METADATA_BATCH_SIZE = DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT / METADATA_INSERT_COLUMNS +private const val SEQUENCE_ENTRIES_BATCH_SIZE = + DatabaseConstants.POSTGRESQL_PARAMETER_LIMIT / SEQUENCE_ENTRIES_INSERT_COLUMNS @Service @Transactional @@ -60,241 +59,164 @@ class UploadDatabaseService( private val dataUseTermsDatabaseService: DataUseTermsDatabaseService, private val generateAccessionFromNumberService: GenerateAccessionFromNumberService, private val auditLogger: AuditLogger, + private val dateProvider: DateProvider, ) { + data class SequenceEntry( + val accession: Accession, + val version: Long, + val versionComment: String?, + val organism: Organism, + val submissionId: SubmissionId, + val submitter: String, + val approver: String?, + val groupId: Int, + val submittedAtTimestamp: LocalDateTime, + val releasedAtTimestamp: LocalDateTime?, + val originalData: OriginalData, + ) - fun batchInsertMetadataInAuxTable( - uploadId: String, - authenticatedUser: AuthenticatedUser, - groupId: Int, - submittedOrganism: Organism, - uploadedMetadataBatch: List, - uploadedAt: LocalDateTime, - files: SubmissionIdFilesMap?, - ) { - uploadedMetadataBatch.chunked(METADATA_BATCH_SIZE).forEach { batch -> - MetadataUploadAuxTable.batchInsert(batch) { - this[submitterColumn] = authenticatedUser.username - this[groupIdColumn] = groupId - this[uploadedAtColumn] = uploadedAt - this[submissionIdColumn] = it.submissionId - this[metadataColumn] = it.metadata - this[filesColumn] = files?.get(it.submissionId) - this[organismColumn] = submittedOrganism.name - this[uploadIdColumn] = uploadId - } - } - } + data class SequenceInfo(val latestVersion: Version, val groupId: Int) - fun batchInsertRevisedMetadataInAuxTable( - uploadId: String, - authenticatedUser: AuthenticatedUser, - submittedOrganism: Organism, - uploadedRevisedMetadataBatch: List, - uploadedAt: LocalDateTime, + fun createNewSequenceEntries( + metadata: Map, + sequences: Map>, files: SubmissionIdFilesMap?, - ) { - uploadedRevisedMetadataBatch.chunked(METADATA_BATCH_SIZE).forEach { batch -> - MetadataUploadAuxTable.batchInsert(batch) { - this[accessionColumn] = it.accession - this[submitterColumn] = authenticatedUser.username - this[uploadedAtColumn] = uploadedAt - this[submissionIdColumn] = it.submissionId - this[metadataColumn] = it.metadata - this[filesColumn] = files?.get(it.submissionId) - this[organismColumn] = submittedOrganism.name - this[uploadIdColumn] = uploadId - } - } - } - - fun batchInsertSequencesInAuxTable( - uploadId: String, - submittedOrganism: Organism, - uploadedSequencesBatch: List, - ) { - uploadedSequencesBatch.chunkedForDatabase({ batch -> - SequenceUploadAuxTable.batchInsert(batch) { - val (submissionId, segmentName) = parseFastaHeader.parse(it.sampleName, submittedOrganism) - this[sequenceSubmissionIdColumn] = submissionId - this[segmentNameColumn] = segmentName - this[sequenceUploadIdColumn] = uploadId - this[compressedSequenceDataColumn] = compressor.compressNucleotideSequence( - it.sequence, - segmentName, - submittedOrganism, - ) - } - emptyList() - }, SEQUENCE_INSERT_COLUMNS) - } - - fun getMetadataUploadSubmissionIds(uploadId: String): List = MetadataUploadAuxTable - .select( - uploadIdColumn, - submissionIdColumn, - ) - .where { uploadIdColumn eq uploadId } - .map { it[submissionIdColumn] } + submissionParams: SubmissionParams.OriginalSubmissionParams, + ): List { + log.debug { "Creating new sequence entries" } - fun getSequenceUploadSubmissionIds(uploadId: String): List = SequenceUploadAuxTable - .select( - sequenceUploadIdColumn, - sequenceSubmissionIdColumn, - ) - .where { sequenceUploadIdColumn eq uploadId } - .map { - it[sequenceSubmissionIdColumn] - } + val now = dateProvider.getCurrentDateTime() - fun mapAndCopy(uploadId: String, submissionParams: SubmissionParams): List = transaction { - log.debug { - "mapping and copying sequences with UploadId $uploadId and uploadType: $submissionParams.uploadType" - } + val newAccessions = generateNewAccessions(metadata.keys) - val mapAndCopySql = """ - INSERT INTO sequence_entries ( - accession, - version, - organism, - submission_id, - submitter, - group_id, - submitted_at, - original_data - ) - SELECT - metadata_upload_aux_table.accession, - metadata_upload_aux_table.version, - metadata_upload_aux_table.organism, - metadata_upload_aux_table.submission_id, - metadata_upload_aux_table.submitter, - metadata_upload_aux_table.group_id, - metadata_upload_aux_table.uploaded_at, - jsonb_build_object( - 'metadata', metadata_upload_aux_table.metadata, - 'files', metadata_upload_aux_table.files, - 'unalignedNucleotideSequences', - COALESCE( - jsonb_object_agg( - sequence_upload_aux_table.segment_name, - sequence_upload_aux_table.compressed_sequence_data::jsonb - ) FILTER (WHERE sequence_upload_aux_table.segment_name IS NOT NULL), - '{}'::jsonb - ) - ) - FROM - metadata_upload_aux_table - LEFT JOIN - sequence_upload_aux_table - ON metadata_upload_aux_table.upload_id = sequence_upload_aux_table.upload_id - AND metadata_upload_aux_table.submission_id = sequence_upload_aux_table.submission_id - WHERE metadata_upload_aux_table.upload_id = ? - GROUP BY - metadata_upload_aux_table.upload_id, - metadata_upload_aux_table.organism, - metadata_upload_aux_table.submission_id, - metadata_upload_aux_table.submitter, - metadata_upload_aux_table.group_id, - metadata_upload_aux_table.uploaded_at - RETURNING accession, version, submission_id; - """.trimIndent() - val insertionResult = exec( - mapAndCopySql, - listOf( - Pair(VarCharColumnType(), uploadId), - ), - explicitStatementType = StatementType.SELECT, - ) { rs -> - val result = mutableListOf() - while (rs.next()) { - result += SubmissionIdMapping( - rs.getString("accession"), - rs.getLong("version"), - rs.getString("submission_id"), - ) - } - result.toList() - } ?: emptyList() - - if (submissionParams is SubmissionParams.OriginalSubmissionParams) { - log.debug { "Setting data use terms for submission $uploadId to ${submissionParams.dataUseTerms}" } - val accessions = insertionResult.map { it.accession } - dataUseTermsDatabaseService.setNewDataUseTerms( - submissionParams.authenticatedUser, - accessions, - submissionParams.dataUseTerms, + val newEntries = newAccessions.map { (submissionId, accession) -> + SequenceEntry( + accession = accession, + version = 1, + versionComment = null, + organism = submissionParams.organism, + submissionId = submissionId, + submitter = submissionParams.authenticatedUser.username, + approver = null, + groupId = submissionParams.groupId, + submittedAtTimestamp = now, + releasedAtTimestamp = null, + originalData = OriginalData( + metadata = metadata[submissionId]?.metadata ?: emptyMap(), + files = files?.get(submissionId), + unalignedNucleotideSequences = sequences[submissionId]?.mapValues { (segmentName, sequence) -> + compressor.compressNucleotideSequence( + sequence, + segmentName, + submissionParams.organism, + ) + } ?: emptyMap(), + ), ) } - auditLogger.log( - username = submissionParams.authenticatedUser.username, - description = "Submitted or revised ${insertionResult.size} sequences: " + - insertionResult.joinToString { it.displayAccessionVersion() }, + val submissionIdMapping = insertEntries(newEntries) + + dataUseTermsDatabaseService.setNewDataUseTerms( + submissionParams.authenticatedUser, + newAccessions.map { it.second }, + submissionParams.dataUseTerms, ) - return@transaction insertionResult + return submissionIdMapping } - fun deleteUploadData(uploadId: String) { - log.debug { "deleting upload data with UploadId $uploadId" } + fun createRevisionEntries( + metadata: Map, + sequences: Map>, + files: SubmissionIdFilesMap?, + submissionParams: SubmissionParams.RevisionSubmissionParams, + ): List { + log.debug { "Creating revision entries" } - MetadataUploadAuxTable.deleteWhere { uploadIdColumn eq uploadId } - SequenceUploadAuxTable.deleteWhere { sequenceUploadIdColumn eq uploadId } - } + val submissionIdToAccession = submissionIdToAccession(metadata) - fun associateRevisedDataWithExistingSequenceEntries( - uploadId: String, - organism: Organism, - authenticatedUser: AuthenticatedUser, - ) { - val accessions = - MetadataUploadAuxTable - .select(accessionColumn) - .where { uploadIdColumn eq uploadId } - .map { it[accessionColumn]!! } - - accessions.processInDatabaseSafeChunks { chunk -> + submissionIdToAccession.values.processInDatabaseSafeChunks { chunk -> accessionPreconditionValidator.validate { thatAccessionsExist(chunk) - .andThatUserIsAllowedToEditSequenceEntries(authenticatedUser) + .andThatUserIsAllowedToEditSequenceEntries(submissionParams.authenticatedUser) .andThatSequenceEntriesAreInStates(listOf(Status.APPROVED_FOR_RELEASE)) - .andThatOrganismIs(organism) + .andThatOrganismIs(submissionParams.organism) } } - val updateSql = """ - UPDATE metadata_upload_aux_table m - SET - version = sequence_entries.version + 1, - group_id = sequence_entries.group_id - FROM sequence_entries - WHERE - m.upload_id = ? - AND m.accession = sequence_entries.accession - AND ${SequenceEntriesTable.isMaxVersion} - """.trimIndent() - transaction { - exec( - updateSql, - listOf( - Pair(VarCharColumnType(), uploadId), + val sequenceInfo = accessionToSequenceInfo(submissionIdToAccession.values) + + val now = dateProvider.getCurrentDateTime() + + val newEntries = metadata.values.map { entry -> + val accession = submissionIdToAccession[entry.submissionId] ?: throw IllegalStateException( + "Metadata for submissionId ${entry.submissionId} does not contain an accession", + ) + val info = sequenceInfo[accession] + ?: throw IllegalStateException("Could not find latest version for accession $accession") + val newVersion = info.latestVersion + 1 + SequenceEntry( + accession = accession, + version = newVersion, + versionComment = entry.metadata["versionComment"], + organism = submissionParams.organism, + submissionId = entry.submissionId, + submitter = submissionParams.authenticatedUser.username, + approver = null, + groupId = info.groupId, + submittedAtTimestamp = now, + releasedAtTimestamp = null, + originalData = OriginalData( + metadata = entry.metadata, + files = files?.get(entry.submissionId), + unalignedNucleotideSequences = sequences[entry.submissionId]?.mapValues { (segmentName, sequence) -> + compressor.compressNucleotideSequence( + sequence, + segmentName, + submissionParams.organism, + ) + } ?: emptyMap(), ), ) } + + return insertEntries(newEntries) } - fun getSubmissionIdToGroupMapping(uploadId: String): Map = MetadataUploadAuxTable - .select(submissionIdColumn, groupIdColumn) - .where { uploadIdColumn eq uploadId } - .associate { Pair(it[submissionIdColumn], it[groupIdColumn]!!) } + private fun insertEntries(newEntries: List): List { + newEntries.chunkedForDatabase( + { batch -> + SequenceEntriesTable.batchInsert(batch) { + this[accessionColumn] = it.accession + this[versionColumn] = it.version + this[versionCommentColumn] = it.versionComment + this[organismColumn] = it.organism.name + this[submissionIdColumn] = it.submissionId + this[submitterColumn] = it.submitter + this[approverColumn] = it.approver + this[groupIdColumn] = it.groupId + this[submittedAtTimestampColumn] = it.submittedAtTimestamp + this[releasedAtTimestampColumn] = it.releasedAtTimestamp + this[originalDataColumn] = it.originalData + } + emptyList() + }, + SEQUENCE_ENTRIES_INSERT_COLUMNS, + ) - fun generateNewAccessionsForOriginalUpload(uploadId: String) { - val submissionIds = - MetadataUploadAuxTable - .select(submissionIdColumn) - .where { uploadIdColumn eq uploadId } - .map { it[submissionIdColumn] } + return newEntries.map { entry -> + SubmissionIdMapping( + accession = entry.accession, + version = entry.version, + submissionId = entry.submissionId, + ) + } + } + // Returns a list of pairs (submissionId, accession) + fun generateNewAccessions(submissionIds: Collection): List> { + log.info { "Generating ${submissionIds.size} new accessions" } val nextAccessions = getNextSequenceNumbers("accession_sequence", submissionIds.size).map { generateAccessionFromNumberService.generateCustomId(it) } @@ -305,21 +227,80 @@ class UploadDatabaseService( ) } - val submissionIdToAccessionMap = submissionIds.zip(nextAccessions) + log.info { "Generated ${submissionIds.size} new accessions" } + return submissionIds.zip(nextAccessions) + } - log.info { - "Generated ${submissionIdToAccessionMap.size} new accessions for original upload with UploadId $uploadId:" + fun accessionToSequenceInfo(accessions: Collection): Map = + accessions.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) { chunk -> + val maxVersion = versionColumn.max() + val maxGroupId = groupIdColumn.max() // All group IDs should be same, so max works + + SequenceEntriesTable.select( + accessionColumn, + maxVersion, + maxGroupId, + ) + .where { accessionColumn inList chunk } + .groupBy(accessionColumn) + .associate { + it[accessionColumn] to + SequenceInfo( + latestVersion = it[maxVersion]!!, + groupId = it[maxGroupId]!!, + ) + } + }.flatMap(Map::toList).toMap() + + private fun submissionIdToAccession(metadata: Map): Map { + if (metadata.values.any { !it.metadata.containsKey("accession") }) { + throw UnprocessableEntityException("Metadata file is missing required column 'accession'") } - submissionIdToAccessionMap.forEach { (submissionId, accession) -> - MetadataUploadAuxTable.update( - where = { - (submissionIdColumn eq submissionId) and (uploadIdColumn eq uploadId) - }, - ) { - it[accessionColumn] = accession - it[versionColumn] = 1 - } + val submissionIdToAccession = metadata.map { (submissionId, entry) -> + (submissionId to entry.metadata["accession"]!!) + } + + val submissionIdsWithoutAccessions = submissionIdToAccession.filter { it.second.isBlank() } + if (submissionIdsWithoutAccessions.isNotEmpty()) { + throw UnprocessableEntityException( + "The rows with the following submissionIds are missing accessions in metadata file: " + + formatListWithBackticks(submissionIdsWithoutAccessions.map { it.first }), + ) + } + + val duplicateAccessions = submissionIdToAccession.groupBy { it.second } + .filter { it.value.size > 1 } + if (duplicateAccessions.isNotEmpty()) { + throw UnprocessableEntityException( + "Some accessions appear multiple times in metadata file: " + + formatListWithBackticks(duplicateAccessions.keys), + ) + } + + return submissionIdToAccession.toMap() + } + + private fun formatListWithBackticks(list: Collection): String = list.joinToString(", ") { "`$it`" } + + fun submissionIdToGroup(metadata: Map): Map { + val submissionIdToAccession = submissionIdToAccession(metadata).toList() + val accessionToGroup = submissionIdToAccession.chunked(POSTGRESQL_PARAMETER_LIMIT / 2) { chunk -> + SequenceEntriesTable.select( + accessionColumn, + groupIdColumn, + ) + .where { accessionColumn inList chunk.map { it.second } } + .associate { + it[accessionColumn] to it[groupIdColumn] + } + }.flatMap(Map::toList).toMap() + + return submissionIdToAccession.associate { (submissionId, accession) -> + submissionId to ( + accessionToGroup[accession] + ?: throw IllegalStateException("Could not find groupId for accession $accession") + ) } } } diff --git a/backend/src/main/kotlin/org/loculus/backend/utils/FormatUtils.kt b/backend/src/main/kotlin/org/loculus/backend/utils/FormatUtils.kt new file mode 100644 index 0000000000..9eb452feed --- /dev/null +++ b/backend/src/main/kotlin/org/loculus/backend/utils/FormatUtils.kt @@ -0,0 +1,8 @@ +package org.loculus.backend.utils + +fun formatBytesHuman(bytes: Long): String = when { + bytes >= 1024L * 1024L * 1024L -> "${bytes / (1024L * 1024L * 1024L)}GB" + bytes >= 1024L * 1024L -> "${bytes / (1024L * 1024L)}MB" + bytes >= 1024L -> "${bytes / 1024L}KB" + else -> "${bytes}B" +} diff --git a/backend/src/main/kotlin/org/loculus/backend/utils/ParseFastaHeader.kt b/backend/src/main/kotlin/org/loculus/backend/utils/ParseFastaHeader.kt index 4815e61e2b..1bcf8b60a4 100644 --- a/backend/src/main/kotlin/org/loculus/backend/utils/ParseFastaHeader.kt +++ b/backend/src/main/kotlin/org/loculus/backend/utils/ParseFastaHeader.kt @@ -26,7 +26,7 @@ class ParseFastaHeader(private val backendConfig: BackendConfig) { " segment name in the format <$HEADER_TO_CONNECT_METADATA_AND_SEQUENCES>_", ) } - val isolateId = submissionId.substring(0, lastDelimiter) + val isolateId = submissionId.take(lastDelimiter) val segmentId = submissionId.substring(lastDelimiter + 1) if (!validSegmentIds.contains(segmentId)) { throw BadRequestException( diff --git a/backend/src/main/resources/application.properties b/backend/src/main/resources/application.properties index 57113c07e4..530e2215e6 100644 --- a/backend/src/main/resources/application.properties +++ b/backend/src/main/resources/application.properties @@ -26,6 +26,10 @@ loculus.cleanup.task.run-every-seconds=60 loculus.stream.batch-size=1000 loculus.debug-mode=false +loculus.submission.max-metadata-file-size-bytes=52428800 +loculus.submission.max-sequence-file-size-bytes=209715200 +loculus.submission.max-uncompressed-sequence-size-bytes=524288000 + loculus.s3.enabled=false loculus.s3.bucket.endpoint= loculus.s3.bucket.bucket= diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/EndpointTestExtension.kt b/backend/src/test/kotlin/org/loculus/backend/controller/EndpointTestExtension.kt index 660e380047..a707129bbf 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/EndpointTestExtension.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/EndpointTestExtension.kt @@ -26,10 +26,8 @@ import org.loculus.backend.service.files.FILES_TABLE_NAME import org.loculus.backend.service.groupmanagement.GROUPS_TABLE_NAME import org.loculus.backend.service.groupmanagement.USER_GROUPS_TABLE_NAME import org.loculus.backend.service.submission.CURRENT_PROCESSING_PIPELINE_TABLE_NAME -import org.loculus.backend.service.submission.METADATA_UPLOAD_AUX_TABLE_NAME import org.loculus.backend.service.submission.SEQUENCE_ENTRIES_PREPROCESSED_DATA_TABLE_NAME import org.loculus.backend.service.submission.SEQUENCE_ENTRIES_TABLE_NAME -import org.loculus.backend.service.submission.SEQUENCE_UPLOAD_AUX_TABLE_NAME import org.loculus.backend.testutil.TestEnvironment import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc import org.springframework.boot.test.context.SpringBootTest @@ -230,8 +228,6 @@ private fun clearDatabaseStatement(): String = """ $SEQUENCE_ENTRIES_TABLE_NAME, $SEQUENCE_ENTRIES_PREPROCESSED_DATA_TABLE_NAME, $USER_GROUPS_TABLE_NAME, - $METADATA_UPLOAD_AUX_TABLE_NAME, - $SEQUENCE_UPLOAD_AUX_TABLE_NAME, $DATA_USE_TERMS_TABLE_NAME, $CURRENT_PROCESSING_PIPELINE_TABLE_NAME, $FILES_TABLE_NAME, diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/ExceptionHandlerTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/ExceptionHandlerTest.kt index d8b9a519e8..2170ae176c 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/ExceptionHandlerTest.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/ExceptionHandlerTest.kt @@ -121,7 +121,7 @@ class ExceptionHandlerWithMockedModelTest(@Autowired val mockMvc: MockMvc) { @Test fun `WHEN I submit a request with invalid organism THEN it should return a descriptive error message`() { - every { submitModel.processSubmissions(any(), any(), any()) } returns validResponse + every { submitModel.processSubmissions(any(), any()) } returns validResponse mockMvc.perform( multipart("/unknownOrganism/submit") diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/FileSizeLimitsTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/FileSizeLimitsTest.kt new file mode 100644 index 0000000000..15e4c274b0 --- /dev/null +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/FileSizeLimitsTest.kt @@ -0,0 +1,144 @@ +package org.loculus.backend.controller.submission + +import org.hamcrest.Matchers.containsString +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import org.loculus.backend.controller.EndpointTest +import org.loculus.backend.controller.groupmanagement.GroupManagementControllerClient +import org.loculus.backend.controller.groupmanagement.andGetGroupId +import org.loculus.backend.service.submission.CompressionAlgorithm +import org.springframework.beans.factory.annotation.Autowired +import org.springframework.http.MediaType.APPLICATION_PROBLEM_JSON +import org.springframework.test.context.TestPropertySource +import org.springframework.test.web.servlet.result.MockMvcResultMatchers.content +import org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath +import org.springframework.test.web.servlet.result.MockMvcResultMatchers.status + +@EndpointTest +@TestPropertySource( + properties = [ + "loculus.submission.max-metadata-file-size-bytes=100", + "loculus.submission.max-sequence-file-size-bytes=200", + "loculus.submission.max-uncompressed-sequence-size-bytes=300", + ], +) +class FileSizeLimitsTest( + @Autowired val submissionControllerClient: SubmissionControllerClient, + @Autowired val groupManagementClient: GroupManagementControllerClient, +) { + var groupId: Int = 0 + + @BeforeEach + fun prepareNewGroup() { + groupId = groupManagementClient.createNewGroup().andGetGroupId() + } + + @Test + fun `WHEN metadata file exceeds configured limit THEN returns PayloadTooLarge error`() { + val largeMetadataContent = "submissionId\tfirstColumn\n" + "x".repeat(200) // Exceeds 100 byte limit + val largeMetadataFile = SubmitFiles.metadataFileWith(content = largeMetadataContent) + + submissionControllerClient.submit( + metadataFile = largeMetadataFile, + sequencesFile = SubmitFiles.DefaultFiles.sequencesFile, + groupId = groupId, + ) + .andExpect(status().isPayloadTooLarge) + .andExpect(content().contentType(APPLICATION_PROBLEM_JSON)) + .andExpect(jsonPath("$.detail", containsString("metadata file is too large. Max 100B"))) + } + + @Test + fun `WHEN sequence file exceeds configured limit THEN returns PayloadTooLarge error`() { + val largeSequenceContent = ">custom0\n" + "A".repeat(300) // Exceeds 200 byte limit + val largeSequenceFile = SubmitFiles.sequenceFileWith(content = largeSequenceContent) + + // Use small metadata file to avoid hitting metadata limit first + val smallMetadata = "submissionId\tfirstColumn\ncustom0\tval" // Under 100 bytes + val smallMetadataFile = SubmitFiles.metadataFileWith(content = smallMetadata) + + submissionControllerClient.submit( + metadataFile = smallMetadataFile, + sequencesFile = largeSequenceFile, + groupId = groupId, + ) + .andExpect(status().isPayloadTooLarge) + .andExpect(content().contentType(APPLICATION_PROBLEM_JSON)) + .andExpect(jsonPath("$.detail", containsString("sequence file is too large. Max 200B"))) + } + + @Test + fun `WHEN uncompressed sequence data exceeds configured limit THEN returns PayloadTooLarge error`() { + // Create small compressed file that expands beyond 300 bytes when parsed + // ~308 bytes total when parsed + val largeUncompressedSequences = + ">c0\n" + "A".repeat(100) + "\n>c1\n" + "C".repeat(100) + "\n>c2\n" + "G".repeat(100) + + // Use compression to make the actual file size smaller than our limits + val compressedSequenceFile = SubmitFiles.sequenceFileWith( + content = largeUncompressedSequences, + compression = CompressionAlgorithm.GZIP, + ) + + // Matching metadata + val smallMetadata = "submissionId\tfirstColumn\nc0\tv0\nc1\tv1\nc2\tv2" // Under 100 bytes + val smallMetadataFile = SubmitFiles.metadataFileWith(content = smallMetadata) + + submissionControllerClient.submit( + metadataFile = smallMetadataFile, + sequencesFile = compressedSequenceFile, + groupId = groupId, + ) + .andExpect(status().isPayloadTooLarge) + .andExpect(content().contentType(APPLICATION_PROBLEM_JSON)) + .andExpect(jsonPath("$.detail", containsString("Uncompressed sequence data exceeds maximum allowed size"))) + } + + @Test + fun `WHEN files are within configured limits THEN submission succeeds`() { + val smallMetadata = "submissionId\tfirstColumn\ncustom0\tvalue" // Under 100 bytes + val smallSequence = ">custom0\nACGT" // Under 200 bytes and under 300 bytes uncompressed + + val metadataFile = SubmitFiles.metadataFileWith(content = smallMetadata) + val sequenceFile = SubmitFiles.sequenceFileWith(content = smallSequence) + + submissionControllerClient.submit( + metadataFile = metadataFile, + sequencesFile = sequenceFile, + groupId = groupId, + ) + .andExpect(status().isOk) + .andExpect(jsonPath("$[0].submissionId").value("custom0")) + } + + @Test + fun `WHEN revise metadata file exceeds configured limit THEN returns PayloadTooLarge error`() { + val largeMetadataContent = "accession\tsubmissionId\tfirstColumn\n" + "acc1\tcustom0\t" + "x".repeat(200) + val largeMetadataFile = SubmitFiles.revisedMetadataFileWith(content = largeMetadataContent) + + submissionControllerClient.reviseSequenceEntries( + metadataFile = largeMetadataFile, + sequencesFile = SubmitFiles.DefaultFiles.sequencesFile, + ) + .andExpect(status().isPayloadTooLarge) + .andExpect(content().contentType(APPLICATION_PROBLEM_JSON)) + .andExpect(jsonPath("$.detail", containsString("metadata file is too large. Max 100B"))) + } + + @Test + fun `WHEN revise sequence file exceeds configured limit THEN returns PayloadTooLarge error`() { + val largeSequenceContent = ">custom0\n" + "A".repeat(300) + val largeSequenceFile = SubmitFiles.sequenceFileWith(content = largeSequenceContent) + + val smallMetadata = "accession\tsubmissionId\tfirstColumn\nacc1\tcustom0\tval" + val smallMetadataFile = SubmitFiles.revisedMetadataFileWith(content = smallMetadata) + + submissionControllerClient.reviseSequenceEntries( + metadataFile = smallMetadataFile, + sequencesFile = largeSequenceFile, + ) + .andExpect(status().isPayloadTooLarge) + .andExpect(content().contentType(APPLICATION_PROBLEM_JSON)) + .andExpect(jsonPath("$.detail", containsString("sequence file is too large. Max 200B"))) + } +} diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/GetOriginalMetadataEndpointTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/GetOriginalMetadataEndpointTest.kt index 82caf133bf..d66dc16f75 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/GetOriginalMetadataEndpointTest.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/GetOriginalMetadataEndpointTest.kt @@ -165,30 +165,31 @@ class GetOriginalMetadataEndpointTest( assertThat(responseAccessionVersions, `is`(expectedAccessionVersions)) } - @Test - fun `GIVEN there are sequences currently being uploaded THEN returns locked`() { - val uploadId = "upload id" - val mockUser = mockk() - every { mockUser.username }.returns("username") - - uploadDatabaseService.batchInsertMetadataInAuxTable( - uploadId = uploadId, - authenticatedUser = mockUser, - groupId = 1, - submittedOrganism = Organism("organism"), - uploadedMetadataBatch = listOf(MetadataEntry("submission id", mapOf("key" to "value"))), - uploadedAt = LocalDateTime(2024, 1, 1, 1, 1, 1), - null, - ) - - submissionControllerClient.getOriginalMetadata() - .andExpect(status().isLocked) - - uploadDatabaseService.deleteUploadData(uploadId) - - submissionControllerClient.getOriginalMetadata() - .andExpect(status().isOk) - } + // This test no longer works because we don't use the Aux table anymore +// @Test +// fun `GIVEN there are sequences currently being uploaded THEN returns locked`() { +// val uploadId = "upload id" +// val mockUser = mockk() +// every { mockUser.username }.returns("username") +// +// uploadDatabaseService.batchInsertMetadataInAuxTable( +// uploadId = uploadId, +// authenticatedUser = mockUser, +// groupId = 1, +// submittedOrganism = Organism("organism"), +// uploadedMetadataBatch = listOf(MetadataEntry("submission id", mapOf("key" to "value"))), +// uploadedAt = LocalDateTime(2024, 1, 1, 1, 1, 1), +// null, +// ) +// +// submissionControllerClient.getOriginalMetadata() +// .andExpect(status().isLocked) +// +// uploadDatabaseService.deleteUploadData(uploadId) +// +// submissionControllerClient.getOriginalMetadata() +// .andExpect(status().isOk) +// } // Regression test for https://github.com/loculus-project/loculus/issues/4036 @Test diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/ReviseEndpointTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/ReviseEndpointTest.kt index 3b334c27db..44e931d728 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/ReviseEndpointTest.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/ReviseEndpointTest.kt @@ -141,8 +141,8 @@ class ReviseEndpointTest( content = """ accession submissionId firstColumn - 123 someHeader_main someValue - ${accessions.first()} someHeader2_main someOtherValue + 123 someHeader someValue + ${accessions.first()} someHeader2 someOtherValue """.trimIndent(), ), SubmitFiles.sequenceFileWith(), @@ -520,25 +520,30 @@ class ReviseEndpointTest( 2 sameHeader someValue2 """.trimIndent(), ), - SubmitFiles.sequenceFileWith(), + SubmitFiles.sequenceFileWith( + content = """ + >sameHeader + AC + """.trimIndent(), + ), status().isUnprocessableEntity, "Unprocessable Entity", - "Metadata file contains at least one duplicate submissionId", + "Metadata file contains duplicated submissionIds: `sameHeader`", ), Arguments.of( "duplicate headers in sequence file", SubmitFiles.revisedMetadataFileWith(), SubmitFiles.sequenceFileWith( content = """ - >sameHeader_main + >sameHeader AC - >sameHeader_main + >sameHeader AC """.trimIndent(), ), status().isUnprocessableEntity, "Unprocessable Entity", - "Sequence file contains at least one duplicate submissionId", + "Sequence file contains duplicated FASTA ids: `sameHeader`", ), Arguments.of( "metadata file misses headers", @@ -591,7 +596,7 @@ class ReviseEndpointTest( SubmitFiles.sequenceFileWith(), status().isUnprocessableEntity, "Unprocessable Entity", - "The revised metadata file does not contain the header 'accession'", + "Metadata file is missing required column 'accession'", ), Arguments.of( "metadata file with one row with missing accession", @@ -605,7 +610,7 @@ class ReviseEndpointTest( SubmitFiles.sequenceFileWith(), status().isUnprocessableEntity, "Unprocessable Entity", - "A row in metadata file contains no accession", + "The rows with the following submissionIds are missing accessions in metadata file: `someHeader`", ), ) } diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitEndpointTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitEndpointTest.kt index a533c503e5..43550d48e8 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitEndpointTest.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitEndpointTest.kt @@ -472,7 +472,7 @@ class SubmitEndpointTest( DefaultFiles.sequencesFile, status().isUnprocessableEntity, "Unprocessable Entity", - "Metadata file contains at least one duplicate submissionId", + "Metadata file contains duplicated submissionIds: `sameHeader`", DEFAULT_ORGANISM, DataUseTerms.Open, ), @@ -489,7 +489,7 @@ class SubmitEndpointTest( ), status().isUnprocessableEntity, "Unprocessable Entity", - "Sequence file contains at least one duplicate submissionId", + "Sequence file contains duplicated FASTA ids: `sameHeader_main`", DEFAULT_ORGANISM, DataUseTerms.Open, ), diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitFiles.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitFiles.kt index 614cdd6a0f..9c7f9f34a1 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitFiles.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitFiles.kt @@ -112,7 +112,7 @@ object SubmitFiles { name: String = "sequenceFile", originalFilename: String = "sequences.fasta", mediaType: String = TEXT_PLAIN_VALUE, - content: String = ">someHeader_main\nAC\n>someHeader2_main\nAC", + content: String = ">someHeader\nAC\n>someHeader2\nAC", compression: CompressionAlgorithm = CompressionAlgorithm.NONE, ): MockMultipartFile { val contentStream = compressString(content, compression)