diff --git a/integration-tests/tests/pages/review.page.ts b/integration-tests/tests/pages/review.page.ts
index 8a50b6d63e..1b26860d58 100644
--- a/integration-tests/tests/pages/review.page.ts
+++ b/integration-tests/tests/pages/review.page.ts
@@ -22,7 +22,7 @@ export class ReviewPage {
     async waitForZeroProcessing() {
         await expect(this.page.locator('[data-testid="review-page-control-panel"]')).toContainText(
             '0 awaiting processing',
-            { timeout: 60000 },
+            { timeout: 200000 },
         );
     }
 
diff --git a/integration-tests/tests/specs/features/search/lineage-field.spec.ts b/integration-tests/tests/specs/features/search/lineage-field.spec.ts
index 6d31683df6..6cef8fab28 100644
--- a/integration-tests/tests/specs/features/search/lineage-field.spec.ts
+++ b/integration-tests/tests/specs/features/search/lineage-field.spec.ts
@@ -7,7 +7,7 @@ import { v4 as uuidv4 } from 'uuid';
 const SEQUENCE = 'ATTGATCTCATCATTT';
 
 test('Lineage field lineage counts', async ({ page, pageWithGroup }) => {
-    test.setTimeout(95_000);
+    test.setTimeout(900_000);
     const uuid = uuidv4();
 
     await page.goto('/');
diff --git a/kubernetes/loculus/templates/_common-metadata.tpl b/kubernetes/loculus/templates/_common-metadata.tpl
index 72968acb54..db2ec373e3 100644
--- a/kubernetes/loculus/templates/_common-metadata.tpl
+++ b/kubernetes/loculus/templates/_common-metadata.tpl
@@ -251,8 +251,8 @@ organisms:
       {{ end }}
       {{ .website | toYaml | nindent 6 }}
       {{- end }}
-    referenceGenomes:
-      {{ $instance.referenceGenomes | toYaml | nindent 6 }}
+    referenceGenome:
+      {{ include "loculus.mergeReferenceGenomes" $instance.referenceGenomes | nindent 6 }}
   {{- end }}
 {{- end }}
 
diff --git a/kubernetes/loculus/templates/loculus-preprocessing-config.yaml b/kubernetes/loculus/templates/loculus-preprocessing-config.yaml
index 63118bfc76..c51d881307 100644
--- a/kubernetes/loculus/templates/loculus-preprocessing-config.yaml
+++ b/kubernetes/loculus/templates/loculus-preprocessing-config.yaml
@@ -2,7 +2,12 @@
 {{- $metadata := ($organismConfig.schema | include "loculus.patchMetadataSchema" | fromYaml).metadata }}
 {{- $rawNucleotideSequences := (($organismConfig.schema | include "loculus.patchMetadataSchema" | fromYaml).nucleotideSequences) }}
 {{- $nucleotideSequences := ($rawNucleotideSequences | default "" ) }}
-{{- $nucleotideSequencesList := (eq $rawNucleotideSequences nil | ternary (list "main") $rawNucleotideSequences) }}
+{{- $referenceGenomes:= include "loculus.mergeReferenceGenomes" $organismConfig.referenceGenomes | fromYaml }}
+{{- $genesList := (eq $referenceGenomes.genes nil | ternary (list) $referenceGenomes.genes) }}
+{{- $genesDict := dict "genes" (list) -}}
+{{- range $g := $genesList }}
+  {{- $_ := set $genesDict "genes" (append ($genesDict.genes) $g.name) -}}
+{{- end }}
 {{- range $processingIndex, $processingConfig := $organismConfig.preprocessing }}
 {{- if $processingConfig.configFile }}
 {{- /* Use the enaDepositionConfig as the base config */}}
@@ -17,7 +22,7 @@ data:
   preprocessing-config.yaml: |
     organism: {{ $organism }}
     {{- $preproAndEnaConfigFile | toYaml | nindent 4 }}
-    {{- (dict "nucleotideSequences" $nucleotideSequencesList) | toYaml | nindent 4 }}
+    {{- $genesDict | toYaml | nindent 4 }}
     processing_spec:
       {{- $args := dict "metadata" $metadata "nucleotideSequences" $nucleotideSequences }}
       {{- include "loculus.preprocessingSpecs" $args | nindent 6 }}
diff --git a/kubernetes/loculus/values.yaml b/kubernetes/loculus/values.yaml
index ec8e04163c..00ef3d6358 100644
--- a/kubernetes/loculus/values.yaml
+++ b/kubernetes/loculus/values.yaml
@@ -1312,7 +1312,6 @@ defaultOrganismConfig: &defaultOrganismConfig
       replicas: 2
       configFile: &preprocessingConfigFile
         log_level: DEBUG
-        genes: []
         batch_size: 100
         create_embl_file: true
   ingest: &ingest
@@ -1356,16 +1355,18 @@ defaultOrganisms:
       - <<: *preprocessing
         version: 1
         configFile:
-          <<: *preprocessingConfigFile      
-          genes: [NP, VP35, VP40, GP, sGP, ssGP, VP30, VP24, L]
-          nextclade_dataset_name: nextstrain/ebola/sudan
+          <<: *preprocessingConfigFile
+          nucleotideSequences:
+            - name: "main"
+              nextclade_dataset_name: nextstrain/ebola/sudan
           nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/ebola/data_output
       - <<: *preprocessing
         version: 2
         configFile:
           <<: *preprocessingConfigFile
-          genes: [NP, VP35, VP40, GP, sGP, ssGP, VP30, VP24, L]
-          nextclade_dataset_name: nextstrain/ebola/sudan
+          nucleotideSequences:
+            - name: "main"
+              nextclade_dataset_name: nextstrain/ebola/sudan
           nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/ebola/data_output
   west-nile:
     <<: *defaultOrganismConfig
@@ -1417,9 +1418,10 @@ defaultOrganisms:
         version: 1
         configFile:
           <<: *preprocessingConfigFile
-          nextclade_dataset_name: nextstrain/wnv/all-lineages
+          nucleotideSequences:
+            - name: "main"
+              nextclade_dataset_name: nextstrain/wnv/all-lineages
           nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/wnv/data_output
-          genes: [capsid, prM, env, NS1, NS2A, NS2B, NS3, NS4A, 2K, NS4B, NS5]
     ingest:
       <<: *ingest
       configFile:
@@ -1674,8 +1676,9 @@ defaultOrganisms:
           - "prepro"
         configFile:
           log_level: DEBUG
-          genes: []
           batch_size: 100
+          nucleotideSequences:
+            - name: "main"
     referenceGenomes:
       singleReference:
         nucleotideSequences:
@@ -1734,9 +1737,14 @@ defaultOrganisms:
         configFile:
           <<: *preprocessingConfigFile
           log_level: DEBUG
-          nextclade_dataset_name: nextstrain/cchfv/linked
+          nucleotideSequences:
+            - name: L
+              nextclade_dataset_name: nextstrain/cchfv/linked/L
+            - name: M
+              nextclade_dataset_name: nextstrain/cchfv/linked/M
+            - name: S
+              nextclade_dataset_name: nextstrain/cchfv/linked/S
           nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/cornelius-cchfv/data_output
-          genes: [RdRp, GPC, NP]
     ingest:
       <<: *ingest
       configFile:
@@ -1772,7 +1780,7 @@ defaultOrganisms:
             sequence: "[[URL:https://corneliusroemer.github.io/seqs/artefacts/cchf/NP.fasta]]"
   enteroviruses:
     <<: *defaultOrganismConfig
-    enabled: false
+    enabled: true
     schema:
       <<: *schema
       organismName: "Enterovirus"
@@ -1848,9 +1856,23 @@ defaultOrganisms:
         configFile:
           <<: *preprocessingConfigFile
           log_level: INFO
-          nextclade_dataset_name: community/hodcroftlab/enterovirus/enterovirus/linked
+          classify_with_nextclade_sort: True
+          minimizer_index: "https://raw.githubusercontent.com/alejandra-gonzalezsanchez/loculus-evs/master/evs_minimizer-index.json"
+          nucleotideSequences:
+            - name: CV-A16
+              nextclade_dataset_name: community/hodcroftlab/enterovirus/enterovirus/linked/CV-A16
+              accepted_sort_matches: ["community/hodcroftlab/enterovirus/cva16", "community/hodcroftlab/enterovirus/enterovirus/linked/CV-A16"]
+              gene_prefix: "CV-A16-"
+            - name: CV-A10
+              nextclade_dataset_name: community/hodcroftlab/enterovirus/enterovirus/linked/CV-A10
+              gene_prefix: "CV-A10-"
+            - name: EV-A71
+              nextclade_dataset_name: community/hodcroftlab/enterovirus/enterovirus/linked/EV-A71
+              gene_prefix: "EV-A71-"
+            - name: EV-D68
+              gene_prefix: "EV-D68-"
+              nextclade_dataset_name: community/hodcroftlab/enterovirus/enterovirus/linked/EV-D68
           nextclade_dataset_server: https://raw.githubusercontent.com/alejandra-gonzalezsanchez/nextclade_data/multi-pathogen-evs/data_output
-          genes: ["CV-A16-VP4", "CV-A16-VP2", "CV-A16-VP3", "CV-A16-VP1", "CV-A16-2A", "CV-A16-2B", "CV-A16-2C", "CV-A16-3A", "CV-A16-3B", "CV-A16-3C", "CV-A16-3D", "CV-A10-VP4", "CV-A10-VP2", "CV-A10-VP3", "CV-A10-VP1", "CV-A10-2A", "CV-A10-2B", "CV-A10-2C", "CV-A10-3A", "CV-A10-3B", "CV-A10-3C", "CV-A10-3D", "EV-A71-VP4", "EV-A71-VP2", "EV-A71-VP3", "EV-A71-VP1", "EV-A71-2A", "EV-A71-2B", "EV-A71-2C", "EV-A71-3A", "EV-A71-3B", "EV-A71-3C", "EV-A71-3D", "EV-D68-VP4", "EV-D68-VP2", "EV-D68-VP3", "EV-D68-VP1", "EV-D68-2A", "EV-D68-2B", "EV-D68-2C", "EV-D68-3A", "EV-D68-3B", "EV-D68-3C", "EV-D68-3D"]
     ingest:
       <<: *ingest
       configFile:
@@ -2070,7 +2092,7 @@ enforceHTTPS: true
 registrationTermsMessage: >
   You must agree to the <a href="http://main.loculus.org/terms">terms of use</a>.
 
-enaDeposition: 
+enaDeposition:
   submitToEnaProduction: false
   enaDbName: Loculus
   enaUniqueSuffix: Loculus
diff --git a/preprocessing/nextclade/src/loculus_preprocessing/config.py b/preprocessing/nextclade/src/loculus_preprocessing/config.py
index d84d5403ca..142ca67e05 100644
--- a/preprocessing/nextclade/src/loculus_preprocessing/config.py
+++ b/preprocessing/nextclade/src/loculus_preprocessing/config.py
@@ -32,8 +32,21 @@ class AlignmentRequirement(StrEnum):
     # Determines whether ALL or ANY segments that a user provides must align.
     # ANY: warn if some segments fail and some segments align
     # ALL: error if any segment fails even if some segments align
+    # NONE: do not align any segments, just process them as-is 
+    # - set if no nextclade dataset is provided
     ANY = "ANY"
     ALL = "ALL"
+    NONE = "NONE"
+
+
+@dataclass
+class NextcladeSequenceAndDataset:
+    name: str = "main"
+    nextclade_dataset_name: str | None = None
+    nextclade_dataset_tag: str | None = None
+    nextclade_dataset_server: str | None = None
+    accepted_sort_matches: list[str] | None = None
+    gene_prefix: str | None = None
 
 
 @dataclass
@@ -51,21 +64,20 @@ class Config:
     keycloak_token_path: str = "realms/loculus/protocol/openid-connect/token"  # noqa: S105
 
     organism: str = "mpox"
+    nucleotideSequences: list[NextcladeSequenceAndDataset] = dataclasses.field(  # noqa: N815
+        default_factory=list
+    )
     genes: list[str] = dataclasses.field(default_factory=list)
-    nucleotideSequences: list[str] = dataclasses.field(default_factory=lambda: ["main"])  # noqa: N815
     processing_spec: dict[str, dict[str, Any]] = dataclasses.field(default_factory=dict)
     multi_segment: bool = False
 
     alignment_requirement: AlignmentRequirement = AlignmentRequirement.ALL
-    nextclade_dataset_name: str | None = None
-    nextclade_dataset_name_map: dict[str, str] | None = None
-    nextclade_dataset_tag: str | None = None
     nextclade_dataset_server: str = "https://data.clades.nextstrain.org/v3"
-    nextclade_dataset_server_map: dict[str, str] | None = None
 
     require_nextclade_sort_match: bool = False
     minimizer_url: str | None = None
-    accepted_dataset_matches: list[str] = dataclasses.field(default_factory=list)
+    classify_with_nextclade_sort: bool = False
+
     create_embl_file: bool = False
     scientific_name: str = "Orthonairovirus haemorrhagiae"
     molecule_type: MoleculeType = MoleculeType.GENOMIC_RNA
@@ -77,6 +89,36 @@ class Config:
     )
 
 
+def assign_nextclade_sequence_and_dataset(
+    nuc_seq_values: list[dict[str, Any]],
+) -> list[NextcladeSequenceAndDataset]:
+    if not isinstance(nuc_seq_values, list):
+        error_msg = f"nucleotideSequences should be a list of dicts, got: {type(nuc_seq_values)}"
+        logger.error(error_msg)
+        raise ValueError(error_msg)
+    nextclade_sequence_and_dataset_list: list[NextcladeSequenceAndDataset] = []
+    for value in nuc_seq_values:
+        if value is None or not isinstance(value, dict):
+            continue
+        seq_and_dataset = NextcladeSequenceAndDataset()
+        for seq_key, seq_value in value.items():
+            if hasattr(seq_and_dataset, seq_key) and seq_value is not None:
+                setattr(seq_and_dataset, seq_key, seq_value)
+        nextclade_sequence_and_dataset_list.append(seq_and_dataset)
+    return nextclade_sequence_and_dataset_list
+
+
+def set_alignment_requirement(
+    config: Config) -> AlignmentRequirement:
+    need_nextclade_dataset: bool = False
+    for sequence in config.nucleotideSequences:
+        if sequence.nextclade_dataset_name:
+            need_nextclade_dataset = True
+    if not need_nextclade_dataset:
+        return AlignmentRequirement.NONE
+    return config.alignment_requirement
+
+
 def load_config_from_yaml(config_file: str, config: Config | None = None) -> Config:
     config = Config() if config is None else copy.deepcopy(config)
     with open(config_file, encoding="utf-8") as file:
@@ -84,6 +126,9 @@ def load_config_from_yaml(config_file: str, config: Config | None = None) -> Con
         logger.debug(f"Loaded config from {config_file}: {yaml_config}")
     for key, value in yaml_config.items():
         if value is not None and hasattr(config, key):
+            if key == "nucleotideSequences":
+                setattr(config, key, assign_nextclade_sequence_and_dataset(value))
+                continue
             attr = getattr(config, key)
             if isinstance(attr, StrEnum):
                 try:
@@ -171,4 +216,6 @@ def get_config(config_file: str | None = None, ignore_args: bool = False) -> Con
     if len(config.nucleotideSequences) > 1:
         config.multi_segment = True
 
+    config.alignment_requirement = set_alignment_requirement(config)
+
     return config
diff --git a/preprocessing/nextclade/src/loculus_preprocessing/prepro.py b/preprocessing/nextclade/src/loculus_preprocessing/prepro.py
index 1148472cbe..c4adba2b4d 100644
--- a/preprocessing/nextclade/src/loculus_preprocessing/prepro.py
+++ b/preprocessing/nextclade/src/loculus_preprocessing/prepro.py
@@ -24,7 +24,7 @@
     submit_processed_sequences,
     upload_embl_file_to_presigned_url,
 )
-from .config import AlignmentRequirement, Config
+from .config import AlignmentRequirement, Config, NextcladeSequenceAndDataset
 from .datatypes import (
     AccessionVersion,
     Alerts,
@@ -75,11 +75,12 @@ def parse_nextclade_tsv(
     ],
     result_dir: str,
     config: Config,
-    segment: SegmentName,
+    sequence_and_dataset: NextcladeSequenceAndDataset,
 ) -> tuple[
     defaultdict[AccessionVersion, defaultdict[GeneName, list[AminoAcidInsertion]]],
     defaultdict[AccessionVersion, defaultdict[SegmentName, list[NucleotideInsertion]]],
 ]:
+    segment = sequence_and_dataset.name
     with Path(result_dir + "/nextclade.tsv").open(encoding="utf-8") as nextclade_tsv:
         reader = csv.DictReader(nextclade_tsv, delimiter="\t")
         for row in reader:
@@ -94,7 +95,12 @@ def parse_nextclade_tsv(
                     continue
                 gene, val = ins.split(":", maxsplit=1)
                 if gene in config.genes:
-                    amino_acid_insertions[id][gene].append(val)
+                    gene_name = (
+                        sequence_and_dataset.gene_prefix + gene
+                        if sequence_and_dataset.gene_prefix
+                        else gene
+                    )
+                    amino_acid_insertions[id][gene_name].append(val)
                 else:
                     logger.debug(
                         "Note: Nextclade found AA insertion in gene missing from config in gene "
@@ -130,32 +136,20 @@ def parse_nextclade_json(
 
 
 def run_sort(
-    result_file_dir: str,
+    result_file: str,
     input_file: str,
-    alerts: Alerts,
     config: Config,
-    segment: SegmentName,
+    nextclade_dataset_server: str,
     dataset_dir: str,
-) -> Alerts:
+) -> pd.DataFrame:
     """
     Run nextclade
     - use config.minimizer_url or default minimizer from nextclade server
-    - assert highest score is in config.accepted_dataset_matches
-    (default is nextclade_dataset_name)
     """
-    nextclade_dataset_name = get_nextclade_dataset_name(config, segment)
-    if not config.accepted_dataset_matches and not nextclade_dataset_name:
-        logger.warning("No nextclade dataset name or accepted dataset match list found in config")
-        return alerts
-    nextclade_dataset_server = get_nextclade_dataset_server(config, segment)
-
     if config.minimizer_url:
         minimizer_file = dataset_dir + "/minimizer/minimizer.json"
 
-    accepted_dataset_names = config.accepted_dataset_matches or [nextclade_dataset_name]  # type: ignore
-
-    result_file = result_file_dir + "/sort_output.tsv"
-    command = [
+    subprocess_args_with_emtpy_strings = [
         "nextclade3",
         "sort",
         input_file,
@@ -174,14 +168,15 @@ def run_sort(
         f"{nextclade_dataset_server}",
     ]
 
-    logger.debug(f"Running nextclade sort: {command}")
+    subprocess_args = [arg for arg in subprocess_args_with_emtpy_strings if arg]
+
+    logger.debug(f"Running nextclade sort: {subprocess_args}")
 
-    exit_code = subprocess.run(command, check=False).returncode  # noqa: S603
+    exit_code = subprocess.run(subprocess_args, check=False).returncode  # noqa: S603
     if exit_code != 0:
         msg = f"nextclade sort failed with exit code {exit_code}"
         raise Exception(msg)
-
-    df = pd.read_csv(
+    return pd.read_csv(
         result_file,
         sep="\t",
         dtype={
@@ -192,6 +187,39 @@ def run_sort(
         },
     )
 
+
+def check_nextclade_sort_matches(  # noqa: PLR0913, PLR0917
+    result_file_dir: str,
+    input_file: str,
+    alerts: Alerts,
+    config: Config,
+    sequence_and_dataset: NextcladeSequenceAndDataset,
+    dataset_dir: str,
+) -> Alerts:
+    """
+    Run nextclade sort
+    - assert highest score is in sequence_and_dataset.accepted_sort_matches
+    (default is nextclade_dataset_name)
+    """
+    nextclade_dataset_name = sequence_and_dataset.nextclade_dataset_name
+    if not sequence_and_dataset.accepted_sort_matches and not nextclade_dataset_name:
+        logger.warning("No nextclade dataset name or accepted dataset match list found in config")
+        return alerts
+    nextclade_dataset_server = (
+        sequence_and_dataset.nextclade_dataset_server or config.nextclade_dataset_server
+    )
+
+    accepted_dataset_names = sequence_and_dataset.accepted_sort_matches or [nextclade_dataset_name]  # type: ignore
+
+    result_file = result_file_dir + "/sort_output.tsv"
+    df = run_sort(
+        result_file,
+        input_file,
+        config,
+        nextclade_dataset_server,
+        dataset_dir,
+    )
+
     hits = df.dropna(subset=["score"]).sort_values("score", ascending=False)
     best_hits = hits.groupby("seqName", as_index=False).first()
 
@@ -208,7 +236,7 @@ def run_sort(
                     "Sequence does not appear to match reference, per `nextclade sort`. "
                     "Double check you are submitting to the correct organism."
                 ),
-            ),
+            )
         )
 
     for _, row in best_hits.iterrows():
@@ -219,7 +247,7 @@ def run_sort(
                     ProcessingAnnotationAlignment,
                     AnnotationSourceType.NUCLEOTIDE_SEQUENCE,
                     message=(
-                        f"This sequence best matches {row['dataset']}, "
+                        f"Sequence best matches {row['dataset']}, "
                         "a different organism than the one you are submitting to: "
                         f"{config.organism}. It is therefore not possible to release. "
                         "Contact the administrator if you think this message is an error."
@@ -230,12 +258,109 @@ def run_sort(
     return alerts
 
 
+# TODO: running this for each sequence is inefficient, should be run once per batch
+def classify_with_nextclade_sort(
+    input_unaligned_sequences: dict[str, NucleotideSequence | None],
+    unaligned_nucleotide_sequences: dict[SegmentName, NucleotideSequence | None],
+    errors: list[ProcessingAnnotation],
+    config: Config,
+    dataset_dir: str,
+):
+    """
+    Run nextclade sort
+    - assert highest score is in sequence_and_dataset.accepted_sort_matches
+    (default is nextclade_dataset_name)
+    """
+    nextclade_dataset_server = config.nextclade_dataset_server
+
+    with TemporaryDirectory(delete=not config.keep_tmp_dir) as result_dir:
+        input_file = result_dir + "/input.fasta"
+        os.makedirs(os.path.dirname(input_file), exist_ok=True)
+        with open(input_file, "w", encoding="utf-8") as f:
+            for id, seq in input_unaligned_sequences.items():
+                f.write(f">{id}\n")
+                f.write(f"{seq}\n")
+
+        result_file = result_dir + "/sort_output.tsv"
+        df = run_sort(
+            result_file,
+            input_file,
+            config,
+            nextclade_dataset_server,
+            dataset_dir,
+        )
+
+        no_hits = df[df["score"].isna()]
+        hits = df.dropna(subset=["score"]).sort_values("score", ascending=False)
+        for seq_name in no_hits["seqName"].unique():
+            if seq_name not in hits["seqName"].unique():
+                msg = (
+                    f"Sequence {seq_name} does not appear to match any reference for organism: "
+                    f"{config.organism} per `nextclade sort`. "
+                    f"Double check you are submitting to the correct organism."
+                )
+                # TODO: only error when config.alignment_requirement == "ALL", otherwise warn
+                errors.append(
+                    ProcessingAnnotation.from_single(
+                        ProcessingAnnotationAlignment,
+                        AnnotationSourceType.NUCLEOTIDE_SEQUENCE,
+                        message=msg,
+                    )
+                )
+
+        best_hits = hits.groupby("seqName", as_index=False).first()
+        logger.info(f"Found hits: {best_hits['seqName'].tolist()}")
+
+        for _, row in best_hits.iterrows():
+            not_found = True
+            for segment in config.nucleotideSequences:
+                default = [segment.nextclade_dataset_name] if segment.nextclade_dataset_name else []
+                accepted_dataset_names = segment.accepted_sort_matches or default
+                if row["dataset"] in accepted_dataset_names:
+                    unaligned_nucleotide_sequences[segment.name] = input_unaligned_sequences[
+                        row["seqName"]
+                    ]
+                    not_found = False
+                    break
+            if not_found:
+                msg = (
+                    f"Sequence {row['seqName']} best matches {row['dataset']}, "
+                    "which is currently not an accepted option for organism: "
+                    f"{config.organism}. It is therefore not possible to release. "
+                    "Contact the administrator if you think this message is an error."
+                )
+                errors.append(
+                    ProcessingAnnotation.from_single(
+                        ProcessingAnnotationAlignment,
+                        AnnotationSourceType.NUCLEOTIDE_SEQUENCE,
+                        message=msg,
+                    )
+                )
+
+    return (unaligned_nucleotide_sequences, errors)
+
+
 def assign_segment(
     input_unaligned_sequences: dict[str, NucleotideSequence | None],
     unaligned_nucleotide_sequences: dict[SegmentName, NucleotideSequence | None],
     errors: list[ProcessingAnnotation],
     config: Config,
+    dataset_dir: str | None = None,
 ):
+    if config.classify_with_nextclade_sort:
+        if not dataset_dir:
+            error_msg = (
+                "Dataset directory must be provided when classify_with_nextclade_sort is True"
+            )
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        return classify_with_nextclade_sort(
+            input_unaligned_sequences,
+            unaligned_nucleotide_sequences,
+            dataset_dir=dataset_dir,
+            errors=errors,
+            config=config,
+        )
     valid_segments = set()
     duplicate_segments = set()
     if not config.nucleotideSequences:
@@ -265,7 +390,8 @@ def assign_segment(
             unaligned_nucleotide_sequences,
             errors,
         )
-    for segment in config.nucleotideSequences:
+    for sequence_and_dataset in config.nucleotideSequences:
+        segment = sequence_and_dataset.name
         unaligned_segment = [
             data
             for data in input_unaligned_sequences
@@ -303,7 +429,7 @@ def assign_segment(
                     f"{', '.join(remaining_segments)}. "
                     "Each metadata entry can have multiple corresponding fasta sequence "
                     "entries with format <submissionId>_<segmentName> valid segments are: "
-                    f"{', '.join(config.nucleotideSequences)}."
+                    f"{', '.join([sequence_and_dataset.name for sequence_and_dataset in config.nucleotideSequences])}."
                 ),
             )
         )
@@ -341,8 +467,8 @@ def enrich_with_nextclade(  # noqa: C901, PLR0914, PLR0915
         id = entry.accessionVersion
         input_metadata[id] = entry.data.metadata
         input_metadata[id]["submitter"] = entry.data.submitter
-        input_metadata[id]["group_id"] = entry.data.group_id
         input_metadata[id]["submittedAt"] = entry.data.submittedAt
+        input_metadata[id]["group_id"] = entry.data.group_id
         aligned_aminoacid_sequences[id] = {}
         unaligned_nucleotide_sequences[id] = {}
         aligned_nucleotide_sequences[id] = {}
@@ -356,6 +482,7 @@ def enrich_with_nextclade(  # noqa: C901, PLR0914, PLR0915
             unaligned_nucleotide_sequences=unaligned_nucleotide_sequences[id],
             errors=alerts.errors[id],
             config=config,
+            dataset_dir=dataset_dir,
         )
 
     nextclade_metadata: defaultdict[
@@ -368,9 +495,12 @@ def enrich_with_nextclade(  # noqa: C901, PLR0914, PLR0915
         AccessionVersion, defaultdict[GeneName, list[AminoAcidInsertion]]
     ] = defaultdict(lambda: defaultdict(list))
     with TemporaryDirectory(delete=not config.keep_tmp_dir) as result_dir:  # noqa: PLR1702
-        for segment in config.nucleotideSequences:
-            result_dir_seg = result_dir if segment == "main" else result_dir + "/" + segment
-            dataset_dir_seg = dataset_dir if segment == "main" else dataset_dir + "/" + segment
+        for sequence_and_dataset in config.nucleotideSequences:
+            segment = sequence_and_dataset.name
+            result_dir_seg = result_dir if not config.multi_segment else result_dir + "/" + segment
+            dataset_dir_seg = (
+                dataset_dir if not config.multi_segment else dataset_dir + "/" + segment
+            )
             input_file = result_dir_seg + "/input.fasta"
             os.makedirs(os.path.dirname(input_file), exist_ok=True)
             is_empty: bool = True
@@ -384,7 +514,9 @@ def enrich_with_nextclade(  # noqa: C901, PLR0914, PLR0915
                 continue
 
             if config.require_nextclade_sort_match:
-                alerts = run_sort(result_dir_seg, input_file, alerts, config, segment, dataset_dir)
+                alerts = check_nextclade_sort_matches(
+                    result_dir_seg, input_file, alerts, config, sequence_and_dataset, dataset_dir
+                )
 
             command = [
                 "nextclade3",
@@ -422,7 +554,12 @@ def enrich_with_nextclade(  # noqa: C901, PLR0914, PLR0915
                             masked_sequence = mask_terminal_gaps(
                                 str(aligned_sequence.seq), mask_char="X"
                             )
-                            aligned_aminoacid_sequences[sequence_id][gene] = masked_sequence
+                            gene_name = (
+                                sequence_and_dataset.gene_prefix + gene
+                                if sequence_and_dataset.gene_prefix
+                                else gene
+                            )
+                            aligned_aminoacid_sequences[sequence_id][gene_name] = masked_sequence
                 except FileNotFoundError:
                     # TODO: Add warning to each sequence
                     logger.info(
@@ -435,7 +572,11 @@ def enrich_with_nextclade(  # noqa: C901, PLR0914, PLR0915
                 result_dir_seg, nextclade_metadata, segment, unaligned_nucleotide_sequences
             )  # this includes the "annotation" field
             amino_acid_insertions, nucleotide_insertions = parse_nextclade_tsv(
-                amino_acid_insertions, nucleotide_insertions, result_dir_seg, config, segment
+                amino_acid_insertions,
+                nucleotide_insertions,
+                result_dir_seg,
+                config,
+                sequence_and_dataset,
             )
 
     return {
@@ -555,7 +696,7 @@ def add_input_metadata(
             if not unprocessed.nextcladeMetadata[segment]:
                 message = (
                     "Nucleotide sequence failed to align"
-                    if segment == "main"
+                    if not config.multi_segment
                     else f"Nucleotide sequence for {segment} failed to align"
                 )
                 annotation = ProcessingAnnotation.from_single(
@@ -722,15 +863,16 @@ def process_single(  # noqa: C901
             config=config,
         )
 
-    for segment in config.nucleotideSequences:
+    for sequence_and_dataset in config.nucleotideSequences:
+        segment = sequence_and_dataset.name
         sequence = unprocessed.unalignedNucleotideSequences.get(segment, None)
-        key = "length" if segment == "main" else "length_" + segment
+        key = "length" if not config.multi_segment else "length_" + segment
         if key in config.processing_spec:
             output_metadata[key] = len(sequence) if sequence else 0
 
     for output_field, spec_dict in config.processing_spec.items():
         length_fields = [
-            "length" if segment == "main" else "length_" + segment
+            "length" if not config.multi_segment else "length_" + segment.name
             for segment in config.nucleotideSequences
         ]
         if output_field in length_fields:
@@ -771,7 +913,8 @@ def process_single(  # noqa: C901
         return processed_entry_no_alignment(id, unprocessed, output_metadata, errors, warnings)
 
     aligned_segments = set()
-    for segment in config.nucleotideSequences:
+    for sequence_and_dataset in config.nucleotideSequences:
+        segment = sequence_and_dataset.name
         if unprocessed.alignedNucleotideSequences.get(segment, None):
             aligned_segments.add(segment)
 
@@ -788,7 +931,8 @@ def process_single(  # noqa: C901
 
     if config.create_embl_file and unprocessed.nextcladeMetadata is not None:
         annotations = {}
-        for segment in config.nucleotideSequences:
+        for sequence_and_dataset in config.nucleotideSequences:
+            segment = sequence_and_dataset.name
             if segment in unprocessed.nextcladeMetadata:
                 annotations[segment] = None
                 if unprocessed.nextcladeMetadata[segment]:
@@ -853,7 +997,7 @@ def process_all(
 ) -> Sequence[SubmissionData]:
     processed_results = []
     logger.debug(f"Processing {len(unprocessed)} unprocessed sequences")
-    if config.nextclade_dataset_name:
+    if config.alignment_requirement != AlignmentRequirement.NONE:
         nextclade_results = enrich_with_nextclade(unprocessed, dataset_dir, config)
         for id, result in nextclade_results.items():
             try:
@@ -874,30 +1018,15 @@ def process_all(
     return processed_results
 
 
-def get_nextclade_dataset_name(config: Config, segment: SegmentName) -> str | None:
-    if config.nextclade_dataset_name_map and segment in config.nextclade_dataset_name_map:
-        return config.nextclade_dataset_name_map[segment]
-    if not config.nextclade_dataset_name:
-        return None
-    return (
-        config.nextclade_dataset_name
-        if segment == "main"
-        else config.nextclade_dataset_name + "/" + segment
-    )
-
-
-def get_nextclade_dataset_server(config: Config, segment: SegmentName) -> str:
-    if config.nextclade_dataset_server_map and segment in config.nextclade_dataset_server_map:
-        return config.nextclade_dataset_server_map[segment]
-    return config.nextclade_dataset_server
-
-
 def download_nextclade_dataset(dataset_dir: str, config: Config) -> None:
-    for segment in config.nucleotideSequences:
-        nextclade_dataset_name = get_nextclade_dataset_name(config, segment)
-        nextclade_dataset_server = get_nextclade_dataset_server(config, segment)
+    for sequence_and_dataset in config.nucleotideSequences:
+        name = sequence_and_dataset.name
+        nextclade_dataset_name = sequence_and_dataset.nextclade_dataset_name
+        nextclade_dataset_server = (
+            sequence_and_dataset.nextclade_dataset_server or config.nextclade_dataset_server
+        )
 
-        dataset_dir_seg = dataset_dir if segment == "main" else dataset_dir + "/" + segment
+        dataset_dir_seg = dataset_dir if not config.multi_segment else dataset_dir + "/" + name
         dataset_download_command = [
             "nextclade3",
             "dataset",
@@ -907,8 +1036,8 @@ def download_nextclade_dataset(dataset_dir: str, config: Config) -> None:
             f"--output-dir={dataset_dir_seg}",
         ]
 
-        if config.nextclade_dataset_tag is not None:
-            dataset_download_command.append(f"--tag={config.nextclade_dataset_tag}")
+        if sequence_and_dataset.nextclade_dataset_tag is not None:
+            dataset_download_command.append(f"--tag={sequence_and_dataset.nextclade_dataset_tag}")
 
         logger.info("Downloading Nextclade dataset: %s", dataset_download_command)
         if subprocess.run(dataset_download_command, check=False).returncode != 0:  # noqa: S603
@@ -952,7 +1081,7 @@ def upload_flatfiles(processed: Sequence[SubmissionData], config: Config) -> Non
 
 def run(config: Config) -> None:
     with TemporaryDirectory(delete=not config.keep_tmp_dir) as dataset_dir:
-        if config.nextclade_dataset_name:
+        if config.alignment_requirement != AlignmentRequirement.NONE:
             download_nextclade_dataset(dataset_dir, config)
         if config.minimizer_url and config.require_nextclade_sort_match:
             download_minimizer(config.minimizer_url, dataset_dir + "/minimizer/minimizer.json")
diff --git a/preprocessing/nextclade/tests/factory_methods.py b/preprocessing/nextclade/tests/factory_methods.py
index 6911da68fe..23411c99e9 100644
--- a/preprocessing/nextclade/tests/factory_methods.py
+++ b/preprocessing/nextclade/tests/factory_methods.py
@@ -46,7 +46,7 @@ class ProcessingAnnotationHelper:
 @dataclass
 class ProcessedAlignment:
     unalignedNucleotideSequences: dict[str, str | None] = field(  # noqa: N815
-        default_factory=lambda: {"main": None}
+        default_factory=dict
     )
     alignedNucleotideSequences: dict[str, str | None] = field(  # noqa: N815
         default_factory=dict
diff --git a/preprocessing/nextclade/tests/multi_segment_config.yaml b/preprocessing/nextclade/tests/multi_segment_config.yaml
index bfac8b94e7..bf81cb8121 100644
--- a/preprocessing/nextclade/tests/multi_segment_config.yaml
+++ b/preprocessing/nextclade/tests/multi_segment_config.yaml
@@ -6,11 +6,12 @@ genes:
 - VP24EbolaZaire
 - LEbolaZaire
 log_level: DEBUG
-nextclade_dataset_name: ebola-dataset
 nextclade_dataset_server: fake_url
 nucleotideSequences:
-- ebola-sudan
-- ebola-zaire
+- name: ebola-sudan
+  nextclade_dataset_name: ebola-dataset/ebola-sudan
+- name: ebola-zaire
+  nextclade_dataset_name: ebola-dataset/ebola-zaire
 organism: multi-ebola-test
 processing_spec:
   totalInsertedNucs_ebola-zaire:
diff --git a/preprocessing/nextclade/tests/single_segment_config.yaml b/preprocessing/nextclade/tests/single_segment_config.yaml
index 670665961c..0ae57b7a3a 100644
--- a/preprocessing/nextclade/tests/single_segment_config.yaml
+++ b/preprocessing/nextclade/tests/single_segment_config.yaml
@@ -3,14 +3,14 @@ genes:
 - NPEbolaSudan
 - VP35EbolaSudan
 log_level: DEBUG
-nextclade_dataset_name: ebola-sudan-test-dataset
 nextclade_dataset_server: fake_url
 scientific_name: "Test Ebola Sudan Virus"
 molecule_type: "genomic RNA"
 topology: "linear"
 db_name: "Loculus"
 nucleotideSequences:
-- main
+- name: main
+  nextclade_dataset_name: ebola-sudan-test-dataset
 organism: ebola-sudan-test
 processing_spec:
   completeness:
diff --git a/website/src/config.ts b/website/src/config.ts
index e91a4050da..cf2043db22 100644
--- a/website/src/config.ts
+++ b/website/src/config.ts
@@ -227,7 +227,7 @@ export function getLapisUrl(serviceConfig: ServiceUrls, organism: string): strin
 }
 
 export function getReferenceGenome(organism: string): ReferenceGenome {
-    return Object.values(getConfig(organism).referenceGenomes)[0];
+    return getConfig(organism).referenceGenome;
 }
 
 const getAccession = (n: NamedSequence): ReferenceAccession => {
diff --git a/website/src/types/config.ts b/website/src/types/config.ts
index 68fb966a39..bba32812de 100644
--- a/website/src/types/config.ts
+++ b/website/src/types/config.ts
@@ -1,7 +1,7 @@
 import z from 'zod';
 
 import { mutationProportionCount, orderByType } from './lapis.ts';
-import { referenceGenomes } from './referencesGenomes.ts';
+import { referenceGenome } from './referencesGenomes.ts';
 
 // These metadata types need to be kept in sync with the backend config class `MetadataType` in Config.kt
 export const metadataPossibleTypes = z.enum([
@@ -156,7 +156,7 @@ export type Schema = z.infer<typeof schema>;
 
 export const instanceConfig = z.object({
     schema,
-    referenceGenomes,
+    referenceGenome,
 });
 export type InstanceConfig = z.infer<typeof instanceConfig>;