Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 65 additions & 37 deletions kubernetes/loculus/templates/_merged-reference-genomes.tpl
Original file line number Diff line number Diff line change
@@ -1,60 +1,88 @@
{{- define "loculus.mergeReferenceGenomes" -}}
{{- $referenceGenomes := . -}}
{{- $segmentFirstConfig := . -}}
{{- $lapisNucleotideSequences := list -}}
{{- $lapisGenes := list -}}

{{- if len $referenceGenomes | eq 1 }}
{{- include "loculus.generateReferenceGenome" (first (values $referenceGenomes)) -}}
{{- else }}
{{- range $suborganismName, $referenceGenomeRaw := $referenceGenomes -}}
{{- $referenceGenome := include "loculus.generateReferenceGenome" $referenceGenomeRaw | fromYaml -}}
{{/* Handle empty reference genomes */}}
{{- if or (not $segmentFirstConfig) (eq (len $segmentFirstConfig) 0) -}}
{{- $result := dict "nucleotideSequences" (list) "genes" (list) -}}
{{- $result | toYaml -}}
{{- else -}}

{{- $nucleotideSequences := $referenceGenome.nucleotideSequences -}}
{{- if $nucleotideSequences -}}
{{- if eq (len $nucleotideSequences) 1 -}}
{{- $lapisNucleotideSequences = append $lapisNucleotideSequences (dict
"name" $suborganismName
"sequence" (first $nucleotideSequences).sequence)
-}}
{{- else -}}
{{- range $sequence := $nucleotideSequences -}}
{{- $lapisNucleotideSequences = append $lapisNucleotideSequences (dict
"name" (printf "%s-%s" $suborganismName $sequence.name)
"sequence" $sequence.sequence
{{/* Extract all unique reference names from the first segment */}}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

huh? I think this is also an error, we need to do this for every segment not just the first one... also I dont think we should add the reference as a prefix

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The goals of the current PR were just to support the current code/set up in a flipped form. There are a bunch of other changes (such as this) that would be needed to fully complete the feature for some organisms.

{{- $referenceNames := list -}}
{{- $firstSegment := first (values $segmentFirstConfig) -}}
{{- $referenceNames = keys $firstSegment -}}

{{/* Check if this is single-reference mode (only one reference across all segments) */}}
{{- if eq (len $referenceNames) 1 -}}
{{/* Single reference mode - no prefixing */}}
{{- $singleRef := first $referenceNames -}}

{{/* Process each segment */}}
{{- range $segmentName, $refMap := $segmentFirstConfig -}}
{{- $refData := index $refMap $singleRef -}}
{{- if $refData -}}
{{/* Add nucleotide sequence */}}
{{- $lapisNucleotideSequences = append $lapisNucleotideSequences (dict
"name" $segmentName
"sequence" $refData.sequence
) -}}

{{/* Add genes if present */}}
{{- if $refData.genes -}}
{{- range $geneName, $geneData := $refData.genes -}}
{{- $lapisGenes = append $lapisGenes (dict
"name" $geneName
"sequence" $geneData.sequence
) -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- end -}}

{{- else -}}
{{/* Multi-reference mode - prefix with reference name */}}

{{/* Process each reference */}}
{{- range $refName := $referenceNames -}}
{{/* Process each segment */}}
{{- range $segmentName, $refMap := $segmentFirstConfig -}}
{{- $refData := index $refMap $refName -}}
{{- if $refData -}}
{{/* Add nucleotide sequence with reference prefix */}}
{{- $lapisNucleotideSequences = append $lapisNucleotideSequences (dict
"name" (printf "%s-%s" $refName $segmentName)
"sequence" $refData.sequence
) -}}

{{- if $referenceGenome.genes -}}
{{- range $gene := $referenceGenome.genes -}}
{{- $lapisGenes = append $lapisGenes (dict
"name" (printf "%s-%s" $suborganismName $gene.name)
"sequence" $gene.sequence)
-}}
{{/* Add genes with reference prefix if present */}}
{{- if $refData.genes -}}
{{- range $geneName, $geneData := $refData.genes -}}
{{- $lapisGenes = append $lapisGenes (dict
"name" (printf "%s-%s" $refName $geneName)
"sequence" $geneData.sequence
) -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- end -}}

{{- $result := dict "nucleotideSequences" $lapisNucleotideSequences "genes" $lapisGenes -}}
{{- $result | toYaml -}}
{{- end -}}

{{- $result := dict "nucleotideSequences" $lapisNucleotideSequences "genes" $lapisGenes -}}
{{- $result | toYaml -}}
{{- end -}}
{{- end -}}


{{- define "loculus.extractUniqueRawNucleotideSequenceNames" -}}
{{- $referenceGenomes := . -}}
{{- $segmentNames := list -}}
{{- $segmentFirstConfig := . -}}

{{- range $suborganismName, $referenceGenomeRaw := $referenceGenomes -}}
{{- $referenceGenome := include "loculus.generateReferenceGenome" $referenceGenomeRaw | fromYaml -}}

{{- range $sequence := $referenceGenome.nucleotideSequences -}}
{{- $segmentNames = append $segmentNames $sequence.name -}}
{{- end -}}
{{- end -}}
{{/* Extract segment names directly from top-level keys */}}
{{- $segmentNames := keys $segmentFirstConfig -}}

segments:
{{- $segmentNames | uniq | toYaml | nindent 2 -}}
{{- $segmentNames | sortAlpha | toYaml | nindent 2 -}}
{{- end -}}
104 changes: 47 additions & 57 deletions kubernetes/loculus/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -793,70 +793,60 @@
"groups": ["organism"],
"docsIncludePrefix": false,
"type": "object",
"description": "An object where the keys are the suborganism names and the values are a [Reference Genome](#reference-genome-type). If there is only one suborganism, then the key must be \"singleReference\".",
"description": "Segment-first reference genome structure. The top-level keys are segment names, and each segment maps to reference genomes keyed by reference name (e.g., CV-A16, CV-A10). Each reference contains a nucleotide sequence and optionally genes. All segments must define the same set of reference names.",
"additionalProperties": false,
"patternProperties": {
"^[a-zA-Z0-9_-]+$": {
"type": "object",
"additionalProperties": false,
"properties": {
"nucleotideSequences": {
"groups": ["reference-genome"],
"docsIncludePrefix": false,
"type": "array",
"description": "Array of [Nucleotide sequence (type)](#nucleotidesequence-type)",
"items": {
"type": "object",
"additionalProperties": false,
"properties": {
"name": {
"groups": ["nucleotide-sequence"],
"docsIncludePrefix": false,
"type": "string",
"description": "Name of the sequence"
},
"sequence": {
"groups": ["nucleotide-sequence"],
"docsIncludePrefix": false,
"type": "string"
},
"insdcAccessionFull": {
"groups": ["nucleotide-sequence"],
"docsIncludePrefix": false,
"type": "string",
"description": "INSDC accession of the sequence"
}
"description": "Segment name (e.g., 'main', 'L', 'M', 'S')",
"patternProperties": {
"^[a-zA-Z0-9_-]+$": {
"type": "object",
"description": "Reference name (e.g., 'CV-A16', 'CV-A10', or 'singleReference')",
"additionalProperties": false,
"properties": {
"sequence": {
"groups": ["nucleotide-sequence"],
"docsIncludePrefix": false,
"type": "string",
"description": "The nucleotide sequence for this segment/reference combination"
},
"required": ["name", "sequence"]
}
},
"genes": {
"groups": ["reference-genome"],
"docsIncludePrefix": false,
"type": "array",
"description": "Array of [Gene (type)](#gene-type)",
"items": {
"type": "object",
"additionalProperties": false,
"properties": {
"name": {
"groups": ["gene"],
"docsIncludePrefix": false,
"type": "string",
"description": "Name of the sequence."
},
"sequence": {
"groups": ["gene"],
"docsIncludePrefix": false,
"type": "string"
}
"insdcAccessionFull": {
"groups": ["nucleotide-sequence"],
"docsIncludePrefix": false,
"type": "string",
"description": "INSDC accession of the sequence"
},
"required": ["name", "sequence"]
}
"genes": {
"groups": ["gene"],
"docsIncludePrefix": false,
"type": "object",
"description": "Genes for this segment/reference combination",
"patternProperties": {
"^[a-zA-Z0-9_-]+$": {
"type": "object",
"description": "Gene name (e.g., 'VP4', 'NS1')",
"additionalProperties": false,
"properties": {
"sequence": {
"groups": ["gene"],
"docsIncludePrefix": false,
"type": "string",
"description": "The amino acid or nucleotide sequence for this gene"
}
},
"required": ["sequence"]
}
},
"additionalProperties": false
}
},
"required": ["sequence"]
}
}
},
"additionalProperties": false
}
},
"additionalProperties": false
}
}
},
"required": ["schema"]
Expand Down
Loading
Loading