From 58c71177573c3cae79f28e9503d0ead9b7f22c2f Mon Sep 17 00:00:00 2001 From: Thomas Walsh Date: Fri, 27 Jun 2025 09:13:49 +0100 Subject: [PATCH 1/5] Take the strain, disentangle clustersets --- .../Web/Component/Gene/ComparaOrthologs.pm | 123 +++++++++--------- .../Web/Component/Gene/ComparaParalogs.pm | 2 +- .../EnsEMBL/Web/Component/Gene/ComparaTree.pm | 27 +++- .../Web/Component/Gene/Compara_Portal.pm | 7 +- .../Web/Component/Gene/HomologAlignment.pm | 11 +- modules/EnsEMBL/Web/ConfigPacker.pm | 19 +++ modules/EnsEMBL/Web/Configuration/Gene.pm | 16 +-- modules/EnsEMBL/Web/Form/ViewConfigForm.pm | 45 ++++--- modules/EnsEMBL/Web/Object/Gene.pm | 1 - .../EnsEMBL/Web/Query/Availability/Gene.pm | 27 ++++ modules/EnsEMBL/Web/Utils/Compara.pm | 93 +++++++++++++ .../Web/ViewConfig/Gene/ComparaOrthologs.pm | 24 ++-- .../Web/ViewConfig/Gene/ComparaTree.pm | 2 +- modules/EnsEMBL/Web/ZMenu/ComparaOrthologs.pm | 2 +- 14 files changed, 285 insertions(+), 114 deletions(-) create mode 100644 modules/EnsEMBL/Web/Utils/Compara.pm diff --git a/modules/EnsEMBL/Web/Component/Gene/ComparaOrthologs.pm b/modules/EnsEMBL/Web/Component/Gene/ComparaOrthologs.pm index 40744527a1..891c894eb9 100644 --- a/modules/EnsEMBL/Web/Component/Gene/ComparaOrthologs.pm +++ b/modules/EnsEMBL/Web/Component/Gene/ComparaOrthologs.pm @@ -23,6 +23,7 @@ use strict; use HTML::Entities qw(encode_entities); +use EnsEMBL::Web::Utils::Compara qw(orthoset_prod_names); use EnsEMBL::Web::Utils::FormatText qw(glossary_helptip get_glossary_entry pluralise); use base qw(EnsEMBL::Web::Component::Gene); @@ -46,8 +47,9 @@ sub content { my $biotype = $object->Obj->get_Biotype; # We expect a Biotype object, though it could be a biotype name. my $is_ncrna = ( ref $biotype eq 'Bio::EnsEMBL::Biotype' ? $biotype->biotype_group =~ /noncoding$/ : $biotype =~ /RNA/ ); my $species_name = $species_defs->GROUP_DISPLAY_NAME; - my $strain_url = $hub->is_strain ? "Strain_" : ""; - my $strain_param = $hub->is_strain ? ";strain=1" : ""; # initialize variable even if is_strain is false, to avoid warnings + my $is_strain_view = $hub->action =~ /^Strain_/ ? 1 : 0; + my $strain_url = $is_strain_view ? "Strain_" : ""; + my $strain_param = $is_strain_view ? ";strain=1" : ""; # initialize variable even if is_strain_view is false, to avoid warnings my @orthologues = ( $object->get_homology_matches('ENSEMBL_ORTHOLOGUES', undef, undef, $cdb), @@ -78,11 +80,14 @@ sub content { delete $compara_species->{'ancestral_sequences'}; } + my $orthoset_prod_names = EnsEMBL::Web::Utils::Compara::orthoset_prod_names($hub, $cdb, $is_strain_view); + my $orthoset_prod_name_set = {map {$_ => 1} @$orthoset_prod_names}; + ## Work out which species we want to skip over, based on page type and user's configuration my $this_group = $species_defs->STRAIN_GROUP; my $species_not_shown = {}; - my $strains_not_shown = {}; - my $strain_refs = {}; + my $species_not_relevant = {}; + my $unshown_strain_types = {}; my $hidden = {}; foreach my $prod_name (keys %$compara_species) { @@ -92,30 +97,23 @@ sub content { next if $species eq $hub->species; ## Ignore current species my $label = $is_pan ? $pan_lookup->{$prod_name}{'display_name'} : $species_defs->species_label($species); - ## Should we be showing this orthologue on this pagpe by default? - my $strain_group = $species_defs->get_config($species, 'STRAIN_GROUP'); - my $related_taxon = $species_defs->get_config($species, 'RELATED_TAXON'); - if ($hub->action =~ /^Strain_/) { - unless (($strain_group && $strain_group eq $this_group) || ($related_taxon && $related_taxon eq $species_defs->RELATED_TAXON)) { - $species_not_shown->{$species} = $label; - next; - } - } - else { - if ($strain_group) { - if ($strain_group eq $prod_name) { - $strain_refs->{$species} = $label; - } - else { - ## Do not show any strain species on main species view - $strains_not_shown->{$species} = $label; - next; - } - } + # Should we be showing this orthologue on this page by default? + unless ($orthoset_prod_name_set->{$prod_name}) { + $species_not_relevant->{$species} = 1; + next; } ## Do we even have an orthologue for this species? unless ($orthologue_list{$species}) { + + my $strain_group = $species_defs->get_config($species, 'STRAIN_GROUP'); + my $strain_type = $strain_group && $prod_name ne $strain_group + ? $species_defs->get_config($species, 'STRAIN_TYPE') + : 'species' + ; + + $unshown_strain_types->{$strain_type} += 1; + $species_not_shown->{$species} = $label; next; } @@ -200,8 +198,8 @@ sub content { my $anc_node_ids = $self->fetch_anc_node_ids($cdb); foreach my $species (sort { ($a =~ /^<.*?>(.+)/ ? $1 : $a) cmp ($b =~ /^<.*?>(.+)/ ? $1 : $b) } keys %orthologue_list) { next unless $species; + next if $species_not_relevant->{$species}; next if $species_not_shown->{$species}; - next if $strains_not_shown->{$species}; next if $hidden->{$species}; my ($species_label, $prodname); @@ -366,29 +364,26 @@ sub content { ); } - if (($hub->action =~ /^Strain_/ && keys %$strains_not_shown) - || ($hub->action !~ /^Strain_/ && keys %$species_not_shown)) { - my ($total, $no_ortho_species, $strain_refs_html); - if ($hub->action =~ /^Strain_/) { - $total = scalar keys %$strains_not_shown; - $no_ortho_species = $self->get_no_ortho_species_html($strains_not_shown, $sets_by_species); - $strain_refs_html = ''; - } - else { - $total = scalar keys %$species_not_shown; - unless ($is_pan) { - $no_ortho_species = $self->get_no_ortho_species_html($species_not_shown, $sets_by_species); - $strain_refs_html = $self->get_strain_refs_html($strain_refs, $species_not_shown); - } + if (keys %$species_not_shown) { + my $no_ortho_species; + my $total = scalar keys %$species_not_shown; + unless ($is_pan) { + $no_ortho_species = $self->get_no_ortho_species_html($species_not_shown, $sets_by_species); } my $not_shown_list = $is_pan ? '' : sprintf('', $no_ortho_species); + my $strain_type_breakdown = $self->get_strain_type_breakdown($unshown_strain_types, $total); + + my $not_shown_desc = $total > 1 + ? "are not shown in the table above because they don't have any orthologue with" + : "is not shown in the table above because it doesn't have any orthologue with" + ; + $html .= '
' . $self->_info( 'Species without orthologues', sprintf( - qq(

%d species are not shown in the table above because they don't have any orthologue with %s.

+ qq(

%d %s %s %s.

%s -%s -

), $total, $self->object->Obj->stable_id, $not_shown_list, $strain_refs_html), +

), $total, $strain_type_breakdown, $not_shown_desc, $self->object->Obj->stable_id, $not_shown_list), undef, 'no_ortho_message_pad' ); @@ -408,7 +403,7 @@ sub create_gene_tree_links { my $orthologue = $params->{orthologue}; my $hub = $self->hub; - my $strain_url = $hub->is_strain ? "Strain_" : ""; + my $strain_url = $hub->action =~ /^Strain_/ ? "Strain_" : ""; my $tree_url = $hub->url({ type => 'Gene', @@ -429,32 +424,23 @@ sub species_sets { my $hub = $self->hub; my $species_defs = $hub->species_defs; my $is_pan = $cdb =~ /compara_pan_ensembl/; + my $is_strain_view = $self->hub->action =~ /^Strain_/; - return "" if $self->hub->action =~ /^Strain/; #No summary table needed for strains + return "" if $is_strain_view; #No summary table needed for strains my ($set_order, $species_sets, $set_mappings) = $self->species_set_config($cdb); #setting $cdb enables us to fetch Pan species sets return "" unless $set_order; - my $compara_spp = {}; + my $compara_spp = EnsEMBL::Web::Utils::Compara::orthoset_prod_names($hub, $cdb, $is_strain_view); my $lookup = $species_defs->prodnames_to_urls_lookup($cdb); - my $pan_info = {}; - if ($is_pan) { - $pan_info = $species_defs->multi_val('PAN_COMPARA_LOOKUP'); - $compara_spp = {map { $_ => 1} keys %$pan_info}; - } - else { - $compara_spp = { %{$species_defs->multi_hash->{'DATABASE_COMPARA'}{'COMPARA_SPECIES'}} }; - delete $compara_spp->{'ancestral_sequences'}; - } + my $pan_info = $is_pan ? $species_defs->multi_val('PAN_COMPARA_LOOKUP') : {}; my %orthologue_map = qw(SEED BRH PIP RHS); my $sets_by_species = {}; my $ortho_type = {}; - foreach (keys %$compara_spp) { + foreach (@$compara_spp) { my $species = $lookup->{$_}; - next unless $species; #skip species absent from URL lookup (e.g. Human in Ensembl Plants) - next if $self->hub->is_strain($species); #skip strain species my $orthologues = $orthologue_list->{$species} || {}; my $no_ortho = 0; @@ -500,7 +486,7 @@ sub species_set_config {} # Stub, as it's clade-specific - implement in plugins sub fetch_anc_node_ids {} # Another stub, only for specific divisions (e.g. Metazoa) -sub get_strain_refs_html { +sub get_strain_refs_html { # not in use as of 2025-06 my ($self, $strain_refs, $species_not_shown) = @_; return '' unless keys %{$strain_refs||{}}; @@ -529,7 +515,8 @@ sub get_no_ortho_species_html { my $hub = $self->hub; my $html = ''; - foreach (sort {lc $a cmp lc $b} keys %$species_not_shown) { + # Species will be easier to find if we sort them by display name. + foreach (sort {lc $species_not_shown->{$a} cmp lc $species_not_shown->{$b}} keys %$species_not_shown) { my $class = $sets_by_species->{$_} ? sprintf(' class="%s"', join(' ', @{$sets_by_species->{$_}})) : ''; $html .= sprintf '%s', $class, $species_not_shown->{$_}; } @@ -537,6 +524,24 @@ sub get_no_ortho_species_html { return $html; } +sub get_strain_type_breakdown { +## Get text listing strain types in order of decreasing frequency. + my ($self, $strain_types, $num_genomes) = @_; + + my @ordered_strain_types = sort {$strain_types->{$b} <=> $strain_types->{$a} || $a cmp $b} keys %$strain_types; + + if ($num_genomes > 1) { + @ordered_strain_types = map { pluralise($_) } @ordered_strain_types; + } + + my $strain_type_text = scalar(@ordered_strain_types) > 1 + ? join(', ', @ordered_strain_types[0 .. ($#ordered_strain_types-1)]) . ' and ' . $ordered_strain_types[-1] + : $ordered_strain_types[0] + ; + + return $strain_type_text; +} + sub get_export_data { ## Get data for export my ($self, $flag) = @_; diff --git a/modules/EnsEMBL/Web/Component/Gene/ComparaParalogs.pm b/modules/EnsEMBL/Web/Component/Gene/ComparaParalogs.pm index 1600ee3775..2014edb335 100644 --- a/modules/EnsEMBL/Web/Component/Gene/ComparaParalogs.pm +++ b/modules/EnsEMBL/Web/Component/Gene/ComparaParalogs.pm @@ -42,7 +42,7 @@ sub content { my $cdb = shift || $hub->param('cdb') || 'compara'; my $biotype = $self->object->Obj->get_Biotype; # We expect a Biotype object, though it could be a biotype name. my $is_ncrna = ( ref $biotype eq 'Bio::EnsEMBL::Biotype' ? $biotype->biotype_group =~ /noncoding$/ : $biotype =~ /RNA/ ); - my $strain_url = $hub->is_strain ? 'Strain_' : ''; + my $strain_url = $hub->action =~ /^Strain_/ ? 'Strain_' : ''; my %paralogue_list = %{$self->object->get_homology_matches('ENSEMBL_PARALOGUES', 'paralog|gene_split', undef, $cdb)}; return '

No paralogues have been identified for this gene

' unless keys %paralogue_list; diff --git a/modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm b/modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm index 96fbc454ed..f77f2161db 100644 --- a/modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm +++ b/modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm @@ -38,7 +38,8 @@ sub get_details { my $member = $object->get_compara_Member({'stable_id' => $object->stable_id, 'cdb' => $cdb}); return (undef, 'Gene is not in the compara database') unless $member; - my $strain_tree = $self->hub->species_defs->get_config($self->hub->species,'RELATED_TAXON') if ($self->hub->is_strain || $self->hub->param('strain') || $self->hub->action =~ /Strain_/); + my $strain_tree = $self->hub->species_defs->get_config($self->hub->species,'RELATED_TAXON') if ($self->hub->param('strain') || $self->hub->action =~ /^Strain_/); + my $tree = $object->get_GeneTree($cdb,"", $strain_tree); return (undef, 'Gene is not in a compara tree') unless $tree; @@ -94,7 +95,7 @@ sub content { my $hub = $self->hub; my $object = $self->object || $self->hub->core_object('gene'); my $is_genetree = $object && $object->isa('EnsEMBL::Web::Object::GeneTree') ? 1 : 0; - my $is_strain = $hub->is_strain || $hub->param('strain') || $hub->action =~ /Strain_/; + my $is_strain_view = $hub->param('strain') || $hub->action =~ /^Strain_/; my ($gene, $member, $tree, $node); my $type = $self->param('data_type') || $hub->type; @@ -147,7 +148,7 @@ sub content { if (defined $parent) { if ($vc->get('super_tree') eq 'on' || $self->param('super_tree') eq 'on') { - my $super_url = $self->ajax_url('sub_supertree',{ cdb => $cdb, update_panel => undef, strain => $is_strain }); + my $super_url = $self->ajax_url('sub_supertree',{ cdb => $cdb, update_panel => undef, strain => $is_strain_view }); $html .= qq(
); } else { $html .= $self->_info( @@ -162,10 +163,18 @@ sub content { } if ($hub->type eq 'Gene') { - if ($tree->tree->clusterset_id ne $clusterset_id && !$self->is_strain) { + my $obs_clusterset_id = $tree->tree->clusterset_id; + my $exp_clusterset_id = $is_strain_view && $clusterset_id eq 'default' + ? $hub->species_defs->get_config($hub->species, 'RELATED_TAXON') + : $clusterset_id + ; + + if ($obs_clusterset_id ne $exp_clusterset_id) { $html .= $self->_info('Phylogenetic model selection', sprintf( - 'The phylogenetic model %s is not available for this tree. Showing the default (consensus) tree instead.', $clusterset_id + 'The phylogenetic model %s is not available for this tree. Showing the %s tree instead.', + $exp_clusterset_id, + $obs_clusterset_id, ) ); } elsif ($tree->tree->ref_root_id) { @@ -275,7 +284,7 @@ sub content { image_width => $image_width, slice_number => '1|1', cdb => $cdb, - strain => $is_strain, + strain => $is_strain_view, }); # Keep track of collapsed nodes @@ -388,7 +397,11 @@ sub content { my $collapsed_to_rank = $self->collapsed_nodes($tree, $node, "rank_$rank", $highlight_genome_db_id, $highlight_gene); push @rank_options, sprintf qq{\n}, $hub->url({ collapse => $collapsed_to_rank, g1 => $highlight_gene, gtr => $rank }), $rank eq $selected_rank ? 'selected' : '', ucfirst $rank; } - push @view_links, sprintf qq{
  • Collapse all the nodes at the taxonomic rank
  • }, join("\n", @rank_options) if(!$self->is_strain); + # The ability to collapse by taxonomic rank was not seen as + # particularly useful in a strain gene-tree view ( ENSWEB-3037 ). + if(!$is_strain_view) { + push @view_links, sprintf qq{
  • Collapse all the nodes at the taxonomic rank
  • }, join("\n", @rank_options); + } } $html .= $image->render; diff --git a/modules/EnsEMBL/Web/Component/Gene/Compara_Portal.pm b/modules/EnsEMBL/Web/Component/Gene/Compara_Portal.pm index 59d4827134..bcc7affa36 100644 --- a/modules/EnsEMBL/Web/Component/Gene/Compara_Portal.pm +++ b/modules/EnsEMBL/Web/Component/Gene/Compara_Portal.pm @@ -35,8 +35,9 @@ sub content { my $hub = $self->hub; my $availability = $self->object->availability; my $location = $hub->url({ type => 'Location', action => 'Compara' }); - my $strain_url = ($self->is_strain || $hub->action =~ /^Strain_/) ? "Strain_" : ""; - my $strain_avail = ($self->is_strain || $hub->action =~ /^Strain_/) ? "strain_" : ""; + my $is_strain_view = $hub->action =~ /^Strain_/; + my $strain_url = $is_strain_view ? "Strain_" : ""; + my $strain_avail = $is_strain_view ? "strain_" : ""; my $ortho_image = $strain_avail ? 'strain_ortho.gif' : 'compara_ortho.gif'; my $para_image = $strain_avail ? 'strain_para.gif' : 'compara_para.gif'; @@ -49,7 +50,7 @@ sub content { { title => 'Families', img => '80/compara_fam.gif', url => $availability->{'family'} ? $hub->url({ action => 'Family' }) : '' }, ]; - @$buttons = grep { $_->{title} !~ /^Families$|^Genomic alignments$/ } @$buttons if($self->is_strain); #remove the one we dont show for strains species + @$buttons = grep { $_->{title} !~ /^Families$|^Genomic alignments$/ } @$buttons if($is_strain_view); #remove the one we dont show for strain views my $html = $self->button_portal($buttons, 'portal-small'); $html .= qq{

    More views of comparative genomics data, such as multiple alignments and synteny, are available on the Location page for this gene.

    }; diff --git a/modules/EnsEMBL/Web/Component/Gene/HomologAlignment.pm b/modules/EnsEMBL/Web/Component/Gene/HomologAlignment.pm index bfc5387c88..764b5c3033 100644 --- a/modules/EnsEMBL/Web/Component/Gene/HomologAlignment.pm +++ b/modules/EnsEMBL/Web/Component/Gene/HomologAlignment.pm @@ -85,10 +85,13 @@ sub content { my $label = $external_species ? $pan_lookup->{$prodname}{'display_name'} : $species_defs->species_label($member_species); my $location = sprintf '%s:%d-%d', $gene->dnafrag->name, $gene->dnafrag_start, $gene->dnafrag_end; - if (!$second_gene && $member_species ne $species && $hub->param('species_' .$prodname) eq 'off') { - $flag = 0; - $skipped{$label}++; - next; + if (!$second_gene && $member_species ne $species) { + my $species_toggle = $hub->param('species_' .$prodname); + if (!defined $species_toggle || $species_toggle eq 'off') { + $skipped{$label}++ if defined $species_toggle; + $flag = 0; + next; + } } if ($gene->stable_id eq $gene_id) { diff --git a/modules/EnsEMBL/Web/ConfigPacker.pm b/modules/EnsEMBL/Web/ConfigPacker.pm index e10e7ac79e..64b438395e 100644 --- a/modules/EnsEMBL/Web/ConfigPacker.pm +++ b/modules/EnsEMBL/Web/ConfigPacker.pm @@ -1387,11 +1387,30 @@ sub _summarise_compara_db { $sth->execute; while (my ($sp, $clusterset_id, $strain_type) = $sth->fetchrow_array) { + $self->db_tree->{$db_name}{'CLUSTERSET_PRODNAMES'}{$clusterset_id}{$sp} = 1; next if exists $preferred_clusterset_id{$sp} && $clusterset_id ne $preferred_clusterset_id{$sp}; $self->db_tree->{$db_name}{'CLUSTERSETS'}{$sp} = $clusterset_id; $self->db_tree->{$db_name}{'STRAIN_TYPES'}{$sp} = $strain_type; } + if (exists $self->db_tree->{$db_name}{'CLUSTERSET_PRODNAMES'}) { + + my $default_oset_spp_aref = $dbh->selectcol_arrayref(' + select distinct gd.name + from method_link_species_set mlss + join method_link ml using(method_link_id) + join species_set ss using(species_set_id) + join species_set_header ssh using(species_set_id) + join genome_db gd using(genome_db_id) + where ml.type in ("PROTEIN_TREES", "NC_TREES") + and trim(leading "collection-" from ssh.name) = "default"; + '); + + foreach my $sp (@$default_oset_spp_aref) { + $self->db_tree->{$db_name}{'CLUSTERSET_PRODNAMES'}{'default'}{$sp} = 1; + } + } + ################################################################### ## Cache MLSS for quick lookup in ImageConfig diff --git a/modules/EnsEMBL/Web/Configuration/Gene.pm b/modules/EnsEMBL/Web/Configuration/Gene.pm index 01514f1644..f1cf98b7b6 100644 --- a/modules/EnsEMBL/Web/Configuration/Gene.pm +++ b/modules/EnsEMBL/Web/Configuration/Gene.pm @@ -80,7 +80,7 @@ sub populate_tree { my $compara_menu = $self->create_node('Compara', 'Comparative Genomics', [qw(strain_button_panel EnsEMBL::Web::Component::Gene::Compara_Portal)], - {'availability' => 'gene database:compara core not_strain'} + {'availability' => 'gene database:compara core has_default_compara'} ); $compara_menu->append($self->create_node('Compara_Alignments', 'Genomic alignments', @@ -93,34 +93,34 @@ sub populate_tree { $compara_menu->append($self->create_node('Compara_Tree', 'Gene tree', [qw( image EnsEMBL::Web::Component::Gene::ComparaTree )], - { 'availability' => 'gene database:compara core has_gene_tree not_strain' } + { 'availability' => 'gene database:compara core has_gene_tree' } )); $compara_menu->append($self->create_node('SpeciesTree', 'Gene gain/loss tree', [qw( image EnsEMBL::Web::Component::Gene::SpeciesTree )], - { 'availability' => 'gene database:compara core has_species_tree not_strain' } + { 'availability' => 'gene database:compara core has_species_tree' } )); my $ol_node = $self->create_node('Compara_Ortholog', 'Orthologues', [qw( orthologues EnsEMBL::Web::Component::Gene::ComparaOrthologs )], - { 'availability' => 'gene database:compara core has_orthologs not_strain', 'concise' => 'Orthologues' } + { 'availability' => 'gene database:compara core has_orthologs', 'concise' => 'Orthologues' } ); $ol_node->append($self->create_subnode('Compara_Ortholog/Alignment', 'Orthologue alignment', [qw( alignment EnsEMBL::Web::Component::Gene::HomologAlignment )], - { 'availability' => 'gene database:compara core has_orthologs not_strain', 'no_menu_entry' => 1 } + { 'availability' => 'gene database:compara core has_orthologs', 'no_menu_entry' => 1 } )); $compara_menu->append($ol_node); my $pl_node = $self->create_node('Compara_Paralog', 'Paralogues', [qw(paralogues EnsEMBL::Web::Component::Gene::ComparaParalogs)], - { 'availability' => 'gene database:compara core has_paralogs not_strain', 'concise' => 'Paralogues' } + { 'availability' => 'gene database:compara core has_paralogs', 'concise' => 'Paralogues' } ); $pl_node->append($self->create_subnode('Compara_Paralog/Alignment', 'Paralogue alignment', [qw( alignment EnsEMBL::Web::Component::Gene::HomologAlignment )], - { 'availability' => 'gene database:compara core has_paralogs not_strain', 'no_menu_entry' => 1 } + { 'availability' => 'gene database:compara core has_paralogs', 'no_menu_entry' => 1 } )); $compara_menu->append($pl_node); @@ -134,7 +134,7 @@ sub populate_tree { my $strain_type_name = ucfirst $strain_type; my $strain_compara_menu = $self->create_node('Strain_Compara', $strain_type_name . 's', [qw(strain_button_panel EnsEMBL::Web::Component::Gene::Compara_Portal)], - {'availability' => 'gene database:compara core', 'closed' => $collapse } + {'availability' => 'gene database:compara core has_strain_compara', 'closed' => $collapse } ); $strain_compara_menu->append($self->create_node('Strain_Compara_Tree', 'Gene tree', diff --git a/modules/EnsEMBL/Web/Form/ViewConfigForm.pm b/modules/EnsEMBL/Web/Form/ViewConfigForm.pm index 8605df60be..cb257c4d4d 100644 --- a/modules/EnsEMBL/Web/Form/ViewConfigForm.pm +++ b/modules/EnsEMBL/Web/Form/ViewConfigForm.pm @@ -25,6 +25,7 @@ no warnings "uninitialized"; use HTML::Entities qw(encode_entities); use EnsEMBL::Web::Attributes; +use EnsEMBL::Web::Utils::Compara qw(orthoset_prod_names); use parent qw(EnsEMBL::Web::Form); @@ -546,30 +547,38 @@ sub add_species_fieldset { my $self = shift; my $hub = $self->view_config->hub; my $species_defs = $self->view_config->species_defs; - my $lookup = $species_defs->prodnames_to_urls_lookup; + + my $function = $hub->referer->{'ENSEMBL_FUNCTION'}; + my $cdb = $function =~ /pan_compara/ ? 'compara_pan_ensembl' : 'compara'; + + my $page_action = $hub->referer->{'ENSEMBL_ACTION'}; + my $strain = $hub->param('strain') || $hub->action =~ /^Strain_/ || $page_action =~ /^Strain_/; + + my $compara_spp = EnsEMBL::Web::Utils::Compara::orthoset_prod_names($hub, $cdb, $strain); + + my $pan_lookup; + my $url_lookup; + if ($cdb eq 'compara_pan_ensembl') { + $pan_lookup = $species_defs->multi_val('PAN_COMPARA_LOOKUP'); + } else { + $url_lookup = $species_defs->prodnames_to_urls_lookup($cdb); + } + my $species; - foreach (keys %{$species_defs->multi_hash->{'DATABASE_COMPARA'}{'COMPARA_SPECIES'}}) { - my $url = $lookup->{$_}; - $species->{$_} = {'url' => $url, 'name' => $species_defs->species_label($url)}; + foreach (@{$compara_spp}) { + $species->{$_} = $cdb eq 'compara_pan_ensembl' + ? $pan_lookup->{$_}{'display_name'} + : $species_defs->species_label($url_lookup->{$_}) + ; } - foreach (sort { ($species->{$a}{'name'} =~ /^<.*?>(.+)/ ? $1 : $species->{$a}{'name'}) - cmp ($species->{$b}{'name'} =~ /^<.*?>(.+)/ ? $1 : $species->{$b}{'name'}) } keys %$species) { - ## If statement to show/hide strain or main species depending on the view you are on - ## When you are on a main species, do not show strain species - my $url = $species->{$_}{'url'}; - next if (!$hub->param('strain') && $hub->is_strain($url)); - ## When you are on a strain species or strain view from main species, show only strain species - next if (($hub->param('strain') || $hub->is_strain) && !$hub->species_defs->get_config($url, 'RELATED_TAXON')); - ## But only show strains from the same group as the current species! - next if ($hub->param('strain') && (lc $hub->species_defs->get_config($url, 'RELATED_TAXON') - ne lc $hub->species_defs->get_config($hub->species, 'RELATED_TAXON'))); - - + foreach (sort { ($species->{$a} =~ /^<.*?>(.+)/ ? $1 : $species->{$a}) + cmp ($species->{$b} =~ /^<.*?>(.+)/ ? $1 : $species->{$b}) } keys %$species) { + $self->add_form_element({ 'fieldset' => 'Selected species', 'type' => 'CheckBox', - 'label' => $species->{$_}{'name'}, + 'label' => $species->{$_}, 'name' => 'species_'.$_, 'value' => 'yes', }); diff --git a/modules/EnsEMBL/Web/Object/Gene.pm b/modules/EnsEMBL/Web/Object/Gene.pm index 1176d717f9..5db7927637 100644 --- a/modules/EnsEMBL/Web/Object/Gene.pm +++ b/modules/EnsEMBL/Web/Object/Gene.pm @@ -61,7 +61,6 @@ sub availability { } elsif ($obj->isa('Bio::EnsEMBL::Compara::Family')) { $availability->{'family'} = 1; } - $availability->{'not_strain'} = $self->hub->is_strain ? 0 : 1; #availability to check if species is a strain or not, it has to be this way round (used in Gene Configuration to disable main compara view on strain species) $availability->{"has_interactions"} = $self->interaction_check; # check if interactions data is available for a gene $self->{'_availability'} = $availability; diff --git a/modules/EnsEMBL/Web/Query/Availability/Gene.pm b/modules/EnsEMBL/Web/Query/Availability/Gene.pm index 18beb323ee..790097290d 100644 --- a/modules/EnsEMBL/Web/Query/Availability/Gene.pm +++ b/modules/EnsEMBL/Web/Query/Availability/Gene.pm @@ -307,6 +307,25 @@ sub get { if($self->variation_db_adaptor($args)) { $out->{'has_phenotypes'} = $self->_get_phenotype($args); } + + $out->{'has_default_compara'} = $out->{'database:compara'} && ( + $out->{'has_gene_tree'} + || $out->{'has_species_tree'} + || $out->{'has_orthologs'} + || $out->{'has_paralogs'} + || $out->{'has_homoeologs'} + || $out->{'has_homologs'} + || $out->{'family'} + || $out->{'has_alignments'} + ); + + $out->{'has_strain_compara'} = $out->{'database:compara'} && ( + $out->{'has_strain_gene_tree'} + || $out->{'has_strain_orthologs'} + || $out->{'has_strain_paralogs'} + || $out->{'has_strain_homoeologs'} + ); + if($out->{'database:compara_pan_ensembl'} && $self->pancompara_db_adaptor) { $out->{'family_pan_ensembl'} = !!$counts->{'families_pan'}; $out->{'has_gene_tree_pan'} = @@ -314,6 +333,14 @@ sub get { for (qw(alignments_pan paralogs_pan orthologs_pan)) { $out->{"has_$_"} = $counts->{$_}; } + + $out->{'has_pan_compara'} = $out->{'database:compara_pan_ensembl'} && ( + $out->{'has_gene_tree_pan'} + || $out->{'has_orthologs_pan'} + || $out->{'has_paralogs_pan'} + || $out->{'family_pan_ensembl'} + || $out->{'has_alignments_pan'} + ); } return [$out]; diff --git a/modules/EnsEMBL/Web/Utils/Compara.pm b/modules/EnsEMBL/Web/Utils/Compara.pm new file mode 100644 index 0000000000..d8dce027d3 --- /dev/null +++ b/modules/EnsEMBL/Web/Utils/Compara.pm @@ -0,0 +1,93 @@ +=head1 LICENSE + +Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +Copyright [2016-2025] EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=cut + +package EnsEMBL::Web::Utils::Compara; + +use strict; + + +sub _get_non_strain_orthoset_prod_names { + my ($hub, $url_lookup) = @_; + + my $cdb_info = $hub->species_defs->multi_val('DATABASE_COMPARA'); + + my $prod_name_set; + if (exists $cdb_info->{'CLUSTERSET_PRODNAMES'} && exists $cdb_info->{'CLUSTERSET_PRODNAMES'}{'default'}) { + $prod_name_set = $cdb_info->{'CLUSTERSET_PRODNAMES'}{'default'}; + } else { + $prod_name_set = $cdb_info->{'COMPARA_SPECIES'}; + } + + # Skip species absent from URL lookup (e.g. Human in Ensembl Plants) + return [grep { $prod_name_set->{$_} && exists $url_lookup->{$_} } keys %{$prod_name_set}]; +} + + +sub _get_strain_orthoset_prod_names { + my ($hub, $url_lookup) = @_; + + my $species_defs = $hub->species_defs; + my $cdb_info = $species_defs->multi_val('DATABASE_COMPARA'); + my $species_url = $hub->species; + + my $orthoset_prod_names = []; + if ($species_url && $species_url ne 'Multi') { + my $strain_cset_id = $species_defs->get_config($species_url, 'RELATED_TAXON'); + if (exists $cdb_info->{'CLUSTERSET_PRODNAMES'} && exists $cdb_info->{'CLUSTERSET_PRODNAMES'}{$strain_cset_id}) { + $orthoset_prod_names = [keys %{$cdb_info->{'CLUSTERSET_PRODNAMES'}{$strain_cset_id}}]; + } + } + + unless (@{$orthoset_prod_names}) { + $orthoset_prod_names = _get_non_strain_orthoset_prod_names($hub, $url_lookup); + } + + return $orthoset_prod_names; +} + + +sub orthoset_prod_names { + ## Gets the appropriate set of Compara orthology production + ## names for the given hub, Compara and strain status. + my ($hub, $compara_db, $strain) = @_; + + $compara_db |= 'compara'; + $strain |= 0; + + my $species_defs = $hub->species_defs; + + my $url_lookup = $species_defs->prodnames_to_urls_lookup($compara_db); + delete $url_lookup->{'ancestral_sequences'}; + + my $orthoset_prod_names = []; + if ($compara_db eq 'compara_pan_ensembl') { + $orthoset_prod_names = [keys %{$url_lookup}]; + } else { + if ($strain) { + $orthoset_prod_names = _get_strain_orthoset_prod_names($hub, $url_lookup); + } else { + $orthoset_prod_names = _get_non_strain_orthoset_prod_names($hub, $url_lookup); + } + } + + return $orthoset_prod_names; +} + + +1; diff --git a/modules/EnsEMBL/Web/ViewConfig/Gene/ComparaOrthologs.pm b/modules/EnsEMBL/Web/ViewConfig/Gene/ComparaOrthologs.pm index 866b6e8c6b..d9b8c53678 100644 --- a/modules/EnsEMBL/Web/ViewConfig/Gene/ComparaOrthologs.pm +++ b/modules/EnsEMBL/Web/ViewConfig/Gene/ComparaOrthologs.pm @@ -22,6 +22,8 @@ package EnsEMBL::Web::ViewConfig::Gene::ComparaOrthologs; use strict; use warnings; +use EnsEMBL::Web::Utils::Compara qw(orthoset_prod_names); + use parent qw(EnsEMBL::Web::ViewConfig); sub _new { @@ -37,17 +39,17 @@ sub init_cacheable { ## Abstract method implementation my $self = shift; my $hub = $self->hub; - my $action = $hub->param('data_action') || $hub->action; - foreach (sort $hub->species_defs->valid_species) { - ## If statement to show/hide strain or main species depending on the view you are on - ## When you are on a main species, do not show strain species - next if ($action !~ /Strain_/ && $hub->is_strain($_)); - ## When you are on a strain species or strain view from main species, show only strain species - next if (($action =~ /Strain_/ || $hub->is_strain) && !$hub->species_defs->get_config($_, 'RELATED_TAXON')); - ## But only show strains from the same group as the current species! - next if ($action =~ /Strain_/ && (lc $hub->species_defs->get_config($_, 'RELATED_TAXON') - ne lc $hub->species_defs->get_config($hub->species, 'RELATED_TAXON'))); - $self->set_default_options({ 'species_' . $hub->species_defs->get_config($_, 'SPECIES_PRODUCTION_NAME') => 'yes' }); + + my $function = $hub->referer->{'ENSEMBL_FUNCTION'}; + my $cdb = $function =~ /pan_compara/ ? 'compara_pan_ensembl' : 'compara'; + + my $page_action = $hub->referer->{'ENSEMBL_ACTION'}; + my $strain = $hub->param('strain') || $page_action =~ /^Strain_/; + + my $compara_spp = EnsEMBL::Web::Utils::Compara::orthoset_prod_names($hub, $cdb, $strain); + + foreach my $prod_name (sort @{$compara_spp}) { + $self->set_default_options({ "species_${prod_name}" => 'yes' }); } $self->title('Homologs'); diff --git a/modules/EnsEMBL/Web/ViewConfig/Gene/ComparaTree.pm b/modules/EnsEMBL/Web/ViewConfig/Gene/ComparaTree.pm index ddc0d3d316..0e5bb1f496 100644 --- a/modules/EnsEMBL/Web/ViewConfig/Gene/ComparaTree.pm +++ b/modules/EnsEMBL/Web/ViewConfig/Gene/ComparaTree.pm @@ -112,7 +112,7 @@ sub form_fields { 'value' => 'on', }; - my @groups = ($self->hub->param('strain') || $self->hub->is_strain) ? () : $self->_groups; #hide these options for strain view or strain species + my @groups = $self->hub->param('strain') ? () : $self->_groups; #hide these options for strain view if (@groups) { my $taxon_labels = $self->hub->species_defs->TAXON_LABEL; diff --git a/modules/EnsEMBL/Web/ZMenu/ComparaOrthologs.pm b/modules/EnsEMBL/Web/ZMenu/ComparaOrthologs.pm index c44be061a7..9d3adc6032 100644 --- a/modules/EnsEMBL/Web/ZMenu/ComparaOrthologs.pm +++ b/modules/EnsEMBL/Web/ZMenu/ComparaOrthologs.pm @@ -28,7 +28,7 @@ use base qw(EnsEMBL::Web::ZMenu); sub content { my $self = shift; my $hub = $self->hub; - my $strain_url = $hub->is_strain || $hub->param('strain') ? "Strain_" : ""; + my $strain_url = $hub->param('strain') ? "Strain_" : ""; my $align_url = $hub->url({ type => 'Gene', From e9fee0cd7e7af7ca2081856d31ba8870d4206184 Mon Sep 17 00:00:00 2001 From: Thomas Walsh Date: Sat, 5 Jul 2025 10:16:48 +0100 Subject: [PATCH 2/5] Get view-config strain view status from param or page action These appear to be sufficient. --- modules/EnsEMBL/Web/Form/ViewConfigForm.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/EnsEMBL/Web/Form/ViewConfigForm.pm b/modules/EnsEMBL/Web/Form/ViewConfigForm.pm index cb257c4d4d..da1ba74283 100644 --- a/modules/EnsEMBL/Web/Form/ViewConfigForm.pm +++ b/modules/EnsEMBL/Web/Form/ViewConfigForm.pm @@ -552,7 +552,7 @@ sub add_species_fieldset { my $cdb = $function =~ /pan_compara/ ? 'compara_pan_ensembl' : 'compara'; my $page_action = $hub->referer->{'ENSEMBL_ACTION'}; - my $strain = $hub->param('strain') || $hub->action =~ /^Strain_/ || $page_action =~ /^Strain_/; + my $strain = $hub->param('strain') || $page_action =~ /^Strain_/; my $compara_spp = EnsEMBL::Web::Utils::Compara::orthoset_prod_names($hub, $cdb, $strain); From 1a5378eb99f23fd0e56ae11bc3d525dc8f0da365 Mon Sep 17 00:00:00 2001 From: Thomas Walsh Date: Sun, 6 Jul 2025 21:04:59 +0100 Subject: [PATCH 3/5] Refrain from converting strain consensus clusterset_id This may be unnecessary. --- modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm b/modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm index f77f2161db..6f36df56df 100644 --- a/modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm +++ b/modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm @@ -163,18 +163,10 @@ sub content { } if ($hub->type eq 'Gene') { - my $obs_clusterset_id = $tree->tree->clusterset_id; - my $exp_clusterset_id = $is_strain_view && $clusterset_id eq 'default' - ? $hub->species_defs->get_config($hub->species, 'RELATED_TAXON') - : $clusterset_id - ; - - if ($obs_clusterset_id ne $exp_clusterset_id) { + if ($tree->tree->clusterset_id ne $clusterset_id) { $html .= $self->_info('Phylogenetic model selection', sprintf( - 'The phylogenetic model %s is not available for this tree. Showing the %s tree instead.', - $exp_clusterset_id, - $obs_clusterset_id, + 'The phylogenetic model %s is not available for this tree. Showing the %s tree instead.', $clusterset_id, $tree->tree->clusterset_id, ) ); } elsif ($tree->tree->ref_root_id) { From 6c82e60010913c5841e36ff182077ffb6a21b833 Mon Sep 17 00:00:00 2001 From: Thomas Walsh Date: Fri, 11 Jul 2025 05:43:20 +0100 Subject: [PATCH 4/5] Keep not_strain availability tag --- modules/EnsEMBL/Web/Object/Gene.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/EnsEMBL/Web/Object/Gene.pm b/modules/EnsEMBL/Web/Object/Gene.pm index 5db7927637..1176d717f9 100644 --- a/modules/EnsEMBL/Web/Object/Gene.pm +++ b/modules/EnsEMBL/Web/Object/Gene.pm @@ -61,6 +61,7 @@ sub availability { } elsif ($obj->isa('Bio::EnsEMBL::Compara::Family')) { $availability->{'family'} = 1; } + $availability->{'not_strain'} = $self->hub->is_strain ? 0 : 1; #availability to check if species is a strain or not, it has to be this way round (used in Gene Configuration to disable main compara view on strain species) $availability->{"has_interactions"} = $self->interaction_check; # check if interactions data is available for a gene $self->{'_availability'} = $availability; From f8af22fef30b55f9423bbd0e35d7abaa326d5cf7 Mon Sep 17 00:00:00 2001 From: Thomas Walsh Date: Fri, 11 Jul 2025 05:48:06 +0100 Subject: [PATCH 5/5] Lowercase i tags --- modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm b/modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm index 6f36df56df..dbd51f16f1 100644 --- a/modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm +++ b/modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm @@ -166,7 +166,7 @@ sub content { if ($tree->tree->clusterset_id ne $clusterset_id) { $html .= $self->_info('Phylogenetic model selection', sprintf( - 'The phylogenetic model %s is not available for this tree. Showing the %s tree instead.', $clusterset_id, $tree->tree->clusterset_id, + 'The phylogenetic model %s is not available for this tree. Showing the %s tree instead.', $clusterset_id, $tree->tree->clusterset_id, ) ); } elsif ($tree->tree->ref_root_id) {