Skip to content

Take the strain, disentangle clustersets #1093

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 64 additions & 59 deletions modules/EnsEMBL/Web/Component/Gene/ComparaOrthologs.pm
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use strict;

use HTML::Entities qw(encode_entities);

use EnsEMBL::Web::Utils::Compara qw(orthoset_prod_names);
use EnsEMBL::Web::Utils::FormatText qw(glossary_helptip get_glossary_entry pluralise);

use base qw(EnsEMBL::Web::Component::Gene);
Expand All @@ -46,8 +47,9 @@ sub content {
my $biotype = $object->Obj->get_Biotype; # We expect a Biotype object, though it could be a biotype name.
my $is_ncrna = ( ref $biotype eq 'Bio::EnsEMBL::Biotype' ? $biotype->biotype_group =~ /noncoding$/ : $biotype =~ /RNA/ );
my $species_name = $species_defs->GROUP_DISPLAY_NAME;
my $strain_url = $hub->is_strain ? "Strain_" : "";
my $strain_param = $hub->is_strain ? ";strain=1" : ""; # initialize variable even if is_strain is false, to avoid warnings
my $is_strain_view = $hub->action =~ /^Strain_/ ? 1 : 0;
my $strain_url = $is_strain_view ? "Strain_" : "";
my $strain_param = $is_strain_view ? ";strain=1" : ""; # initialize variable even if is_strain_view is false, to avoid warnings

my @orthologues = (
$object->get_homology_matches('ENSEMBL_ORTHOLOGUES', undef, undef, $cdb),
Expand Down Expand Up @@ -78,11 +80,14 @@ sub content {
delete $compara_species->{'ancestral_sequences'};
}

my $orthoset_prod_names = EnsEMBL::Web::Utils::Compara::orthoset_prod_names($hub, $cdb, $is_strain_view);
my $orthoset_prod_name_set = {map {$_ => 1} @$orthoset_prod_names};

## Work out which species we want to skip over, based on page type and user's configuration
my $this_group = $species_defs->STRAIN_GROUP;
my $species_not_shown = {};
my $strains_not_shown = {};
my $strain_refs = {};
my $species_not_relevant = {};
my $unshown_strain_types = {};
my $hidden = {};

foreach my $prod_name (keys %$compara_species) {
Expand All @@ -92,30 +97,23 @@ sub content {
next if $species eq $hub->species; ## Ignore current species
my $label = $is_pan ? $pan_lookup->{$prod_name}{'display_name'} : $species_defs->species_label($species);

## Should we be showing this orthologue on this pagpe by default?
my $strain_group = $species_defs->get_config($species, 'STRAIN_GROUP');
my $related_taxon = $species_defs->get_config($species, 'RELATED_TAXON');
if ($hub->action =~ /^Strain_/) {
unless (($strain_group && $strain_group eq $this_group) || ($related_taxon && $related_taxon eq $species_defs->RELATED_TAXON)) {
Copy link
Contributor Author

@twalsh-ebi twalsh-ebi Jun 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue ENSCOMPARASW-8511 is related to this requirement for either the STRAIN_GROUP or RELATED_TAXON to match in strain views.

This could be circumvented by checking instead for whether the production name is in the orthoset_prod_names set for the given strain gene-tree collection (as below).

(See ENSCOMPARASW-8511 and ENSCOMPARASW-8516 for more information on the orthoset returned by orthoset_prod_names.)

$species_not_shown->{$species} = $label;
next;
}
}
else {
if ($strain_group) {
if ($strain_group eq $prod_name) {
$strain_refs->{$species} = $label;
}
else {
## Do not show any strain species on main species view
$strains_not_shown->{$species} = $label;
next;
}
}
# Should we be showing this orthologue on this page by default?
unless ($orthoset_prod_name_set->{$prod_name}) {
$species_not_relevant->{$species} = 1;
next;
}

## Do we even have an orthologue for this species?
unless ($orthologue_list{$species}) {

my $strain_group = $species_defs->get_config($species, 'STRAIN_GROUP');
my $strain_type = $strain_group && $prod_name ne $strain_group
? $species_defs->get_config($species, 'STRAIN_TYPE')
: 'species'
;

$unshown_strain_types->{$strain_type} += 1;

$species_not_shown->{$species} = $label;
next;
}
Expand Down Expand Up @@ -200,8 +198,8 @@ sub content {
my $anc_node_ids = $self->fetch_anc_node_ids($cdb);
foreach my $species (sort { ($a =~ /^<.*?>(.+)/ ? $1 : $a) cmp ($b =~ /^<.*?>(.+)/ ? $1 : $b) } keys %orthologue_list) {
next unless $species;
next if $species_not_relevant->{$species};
next if $species_not_shown->{$species};
next if $strains_not_shown->{$species};
next if $hidden->{$species};

my ($species_label, $prodname);
Expand Down Expand Up @@ -366,29 +364,26 @@ sub content {
);
}

if (($hub->action =~ /^Strain_/ && keys %$strains_not_shown)
|| ($hub->action !~ /^Strain_/ && keys %$species_not_shown)) {
my ($total, $no_ortho_species, $strain_refs_html);
if ($hub->action =~ /^Strain_/) {
$total = scalar keys %$strains_not_shown;
$no_ortho_species = $self->get_no_ortho_species_html($strains_not_shown, $sets_by_species);
$strain_refs_html = '';
}
else {
$total = scalar keys %$species_not_shown;
unless ($is_pan) {
$no_ortho_species = $self->get_no_ortho_species_html($species_not_shown, $sets_by_species);
$strain_refs_html = $self->get_strain_refs_html($strain_refs, $species_not_shown);
}
if (keys %$species_not_shown) {
my $no_ortho_species;
my $total = scalar keys %$species_not_shown;
unless ($is_pan) {
$no_ortho_species = $self->get_no_ortho_species_html($species_not_shown, $sets_by_species);
}
my $not_shown_list = $is_pan ? '' : sprintf('<ul id="no_ortho_species">%s</ul>', $no_ortho_species);
my $strain_type_breakdown = $self->get_strain_type_breakdown($unshown_strain_types, $total);

my $not_shown_desc = $total > 1
? "are not shown in the table above because they don't have any orthologue with"
: "is not shown in the table above because it doesn't have any orthologue with"
;

$html .= '<br /><a name="list_no_ortho"/>' . $self->_info(
'Species without orthologues',
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What this looks like if 1 breed lacks orthologues:
ENSSSCG00110020516_no_ortho_sandbox


What this looks like if many breeds, isolates and species lack orthologues:
ENSSSCG00000048809_no_ortho_sandbox

sprintf(
qq(<p><span class="no_ortho_count">%d</span> species are not shown in the table above because they don't have any orthologue with %s.</p>
qq(<p><span class="no_ortho_count">%d</span> %s %s %s.</p>
%s
%s
</p> <input type="hidden" class="panel_type" value="ComparaOrtholog" />), $total, $self->object->Obj->stable_id, $not_shown_list, $strain_refs_html),
</p> <input type="hidden" class="panel_type" value="ComparaOrtholog" />), $total, $strain_type_breakdown, $not_shown_desc, $self->object->Obj->stable_id, $not_shown_list),
undef,
'no_ortho_message_pad'
);
Expand All @@ -408,7 +403,7 @@ sub create_gene_tree_links {
my $orthologue = $params->{orthologue};

my $hub = $self->hub;
my $strain_url = $hub->is_strain ? "Strain_" : "";
my $strain_url = $hub->action =~ /^Strain_/ ? "Strain_" : "";

my $tree_url = $hub->url({
type => 'Gene',
Expand All @@ -429,32 +424,23 @@ sub species_sets {
my $hub = $self->hub;
my $species_defs = $hub->species_defs;
my $is_pan = $cdb =~ /compara_pan_ensembl/;
my $is_strain_view = $self->hub->action =~ /^Strain_/;

return "" if $self->hub->action =~ /^Strain/; #No summary table needed for strains
return "" if $is_strain_view; #No summary table needed for strains

my ($set_order, $species_sets, $set_mappings) = $self->species_set_config($cdb); #setting $cdb enables us to fetch Pan species sets

return "" unless $set_order;

my $compara_spp = {};
my $compara_spp = EnsEMBL::Web::Utils::Compara::orthoset_prod_names($hub, $cdb, $is_strain_view);
my $lookup = $species_defs->prodnames_to_urls_lookup($cdb);
my $pan_info = {};
if ($is_pan) {
$pan_info = $species_defs->multi_val('PAN_COMPARA_LOOKUP');
$compara_spp = {map { $_ => 1} keys %$pan_info};
}
else {
$compara_spp = { %{$species_defs->multi_hash->{'DATABASE_COMPARA'}{'COMPARA_SPECIES'}} };
delete $compara_spp->{'ancestral_sequences'};
}
my $pan_info = $is_pan ? $species_defs->multi_val('PAN_COMPARA_LOOKUP') : {};
my %orthologue_map = qw(SEED BRH PIP RHS);
my $sets_by_species = {};
my $ortho_type = {};

foreach (keys %$compara_spp) {
foreach (@$compara_spp) {
my $species = $lookup->{$_};
next unless $species; #skip species absent from URL lookup (e.g. Human in Ensembl Plants)
next if $self->hub->is_strain($species); #skip strain species

my $orthologues = $orthologue_list->{$species} || {};
my $no_ortho = 0;
Expand Down Expand Up @@ -500,7 +486,7 @@ sub species_set_config {} # Stub, as it's clade-specific - implement in plugins

sub fetch_anc_node_ids {} # Another stub, only for specific divisions (e.g. Metazoa)

sub get_strain_refs_html {
sub get_strain_refs_html { # not in use as of 2025-06
my ($self, $strain_refs, $species_not_shown) = @_;
return '' unless keys %{$strain_refs||{}};

Expand Down Expand Up @@ -529,14 +515,33 @@ sub get_no_ortho_species_html {
my $hub = $self->hub;
my $html = '';

foreach (sort {lc $a cmp lc $b} keys %$species_not_shown) {
# Species will be easier to find if we sort them by display name.
foreach (sort {lc $species_not_shown->{$a} cmp lc $species_not_shown->{$b}} keys %$species_not_shown) {
my $class = $sets_by_species->{$_} ? sprintf(' class="%s"', join(' ', @{$sets_by_species->{$_}})) : '';
$html .= sprintf '<li%s>%s</li>', $class, $species_not_shown->{$_};
}

return $html;
}

sub get_strain_type_breakdown {
## Get text listing strain types in order of decreasing frequency.
my ($self, $strain_types, $num_genomes) = @_;

my @ordered_strain_types = sort {$strain_types->{$b} <=> $strain_types->{$a} || $a cmp $b} keys %$strain_types;

if ($num_genomes > 1) {
@ordered_strain_types = map { pluralise($_) } @ordered_strain_types;
}

my $strain_type_text = scalar(@ordered_strain_types) > 1
? join(', ', @ordered_strain_types[0 .. ($#ordered_strain_types-1)]) . ' and ' . $ordered_strain_types[-1]
: $ordered_strain_types[0]
;

return $strain_type_text;
}

sub get_export_data {
## Get data for export
my ($self, $flag) = @_;
Expand Down
2 changes: 1 addition & 1 deletion modules/EnsEMBL/Web/Component/Gene/ComparaParalogs.pm
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ sub content {
my $cdb = shift || $hub->param('cdb') || 'compara';
my $biotype = $self->object->Obj->get_Biotype; # We expect a Biotype object, though it could be a biotype name.
my $is_ncrna = ( ref $biotype eq 'Bio::EnsEMBL::Biotype' ? $biotype->biotype_group =~ /noncoding$/ : $biotype =~ /RNA/ );
my $strain_url = $hub->is_strain ? 'Strain_' : '';
my $strain_url = $hub->action =~ /^Strain_/ ? 'Strain_' : '';
my %paralogue_list = %{$self->object->get_homology_matches('ENSEMBL_PARALOGUES', 'paralog|gene_split', undef, $cdb)};

return '<p>No paralogues have been identified for this gene</p>' unless keys %paralogue_list;
Expand Down
19 changes: 12 additions & 7 deletions modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ sub get_details {
my $member = $object->get_compara_Member({'stable_id' => $object->stable_id, 'cdb' => $cdb});
return (undef, '<strong>Gene is not in the compara database</strong>') unless $member;

my $strain_tree = $self->hub->species_defs->get_config($self->hub->species,'RELATED_TAXON') if ($self->hub->is_strain || $self->hub->param('strain') || $self->hub->action =~ /Strain_/);
my $strain_tree = $self->hub->species_defs->get_config($self->hub->species,'RELATED_TAXON') if ($self->hub->param('strain') || $self->hub->action =~ /^Strain_/);

my $tree = $object->get_GeneTree($cdb,"", $strain_tree);
return (undef, '<strong>Gene is not in a compara tree</strong>') unless $tree;

Expand Down Expand Up @@ -94,7 +95,7 @@ sub content {
my $hub = $self->hub;
my $object = $self->object || $self->hub->core_object('gene');
my $is_genetree = $object && $object->isa('EnsEMBL::Web::Object::GeneTree') ? 1 : 0;
my $is_strain = $hub->is_strain || $hub->param('strain') || $hub->action =~ /Strain_/;
my $is_strain_view = $hub->param('strain') || $hub->action =~ /^Strain_/;
my ($gene, $member, $tree, $node);

my $type = $self->param('data_type') || $hub->type;
Expand Down Expand Up @@ -147,7 +148,7 @@ sub content {
if (defined $parent) {

if ($vc->get('super_tree') eq 'on' || $self->param('super_tree') eq 'on') {
my $super_url = $self->ajax_url('sub_supertree',{ cdb => $cdb, update_panel => undef, strain => $is_strain });
my $super_url = $self->ajax_url('sub_supertree',{ cdb => $cdb, update_panel => undef, strain => $is_strain_view });
$html .= qq(<div class="ajax"><input type="hidden" class="ajax_load" value="$super_url" /></div>);
} else {
$html .= $self->_info(
Expand All @@ -162,10 +163,10 @@ sub content {
}

if ($hub->type eq 'Gene') {
if ($tree->tree->clusterset_id ne $clusterset_id && !$self->is_strain) {
if ($tree->tree->clusterset_id ne $clusterset_id) {
$html .= $self->_info('Phylogenetic model selection',
sprintf(
'The phylogenetic model <I>%s</I> is not available for this tree. Showing the default (consensus) tree instead.', $clusterset_id
'The phylogenetic model <i>%s</i> is not available for this tree. Showing the <i>%s</i> tree instead.', $clusterset_id, $tree->tree->clusterset_id,
)
);
} elsif ($tree->tree->ref_root_id) {
Expand Down Expand Up @@ -275,7 +276,7 @@ sub content {
image_width => $image_width,
slice_number => '1|1',
cdb => $cdb,
strain => $is_strain,
strain => $is_strain_view,
});

# Keep track of collapsed nodes
Expand Down Expand Up @@ -388,7 +389,11 @@ sub content {
my $collapsed_to_rank = $self->collapsed_nodes($tree, $node, "rank_$rank", $highlight_genome_db_id, $highlight_gene);
push @rank_options, sprintf qq{<option value="%s" %s>%s</option>\n}, $hub->url({ collapse => $collapsed_to_rank, g1 => $highlight_gene, gtr => $rank }), $rank eq $selected_rank ? 'selected' : '', ucfirst $rank;
}
push @view_links, sprintf qq{<li>Collapse all the nodes at the taxonomic rank <select onchange="Ensembl.redirect(this.value)">%s</select></li>}, join("\n", @rank_options) if(!$self->is_strain);
# The ability to collapse by taxonomic rank was not seen as
# particularly useful in a strain gene-tree view ( ENSWEB-3037 ).
if(!$is_strain_view) {
push @view_links, sprintf qq{<li>Collapse all the nodes at the taxonomic rank <select onchange="Ensembl.redirect(this.value)">%s</select></li>}, join("\n", @rank_options);
}
}

$html .= $image->render;
Expand Down
7 changes: 4 additions & 3 deletions modules/EnsEMBL/Web/Component/Gene/Compara_Portal.pm
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ sub content {
my $hub = $self->hub;
my $availability = $self->object->availability;
my $location = $hub->url({ type => 'Location', action => 'Compara' });
my $strain_url = ($self->is_strain || $hub->action =~ /^Strain_/) ? "Strain_" : "";
my $strain_avail = ($self->is_strain || $hub->action =~ /^Strain_/) ? "strain_" : "";
my $is_strain_view = $hub->action =~ /^Strain_/;
my $strain_url = $is_strain_view ? "Strain_" : "";
my $strain_avail = $is_strain_view ? "strain_" : "";

my $ortho_image = $strain_avail ? 'strain_ortho.gif' : 'compara_ortho.gif';
my $para_image = $strain_avail ? 'strain_para.gif' : 'compara_para.gif';
Expand All @@ -49,7 +50,7 @@ sub content {
{ title => 'Families', img => '80/compara_fam.gif', url => $availability->{'family'} ? $hub->url({ action => 'Family' }) : '' },
];

@$buttons = grep { $_->{title} !~ /^Families$|^Genomic alignments$/ } @$buttons if($self->is_strain); #remove the one we dont show for strains species
@$buttons = grep { $_->{title} !~ /^Families$|^Genomic alignments$/ } @$buttons if($is_strain_view); #remove the one we dont show for strain views
my $html = $self->button_portal($buttons, 'portal-small');
$html .= qq{<p>More views of comparative genomics data, such as multiple alignments and synteny, are available on the <a href="$location">Location</a> page for this gene.</p>};

Expand Down
11 changes: 7 additions & 4 deletions modules/EnsEMBL/Web/Component/Gene/HomologAlignment.pm
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,13 @@ sub content {
my $label = $external_species ? $pan_lookup->{$prodname}{'display_name'} : $species_defs->species_label($member_species);
my $location = sprintf '%s:%d-%d', $gene->dnafrag->name, $gene->dnafrag_start, $gene->dnafrag_end;

if (!$second_gene && $member_species ne $species && $hub->param('species_' .$prodname) eq 'off') {
$flag = 0;
$skipped{$label}++;
next;
if (!$second_gene && $member_species ne $species) {
my $species_toggle = $hub->param('species_' .$prodname);
if (!defined $species_toggle || $species_toggle eq 'off') {
$skipped{$label}++ if defined $species_toggle;
$flag = 0;
next;
}
}

if ($gene->stable_id eq $gene_id) {
Expand Down
19 changes: 19 additions & 0 deletions modules/EnsEMBL/Web/ConfigPacker.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1387,11 +1387,30 @@ sub _summarise_compara_db {
$sth->execute;

while (my ($sp, $clusterset_id, $strain_type) = $sth->fetchrow_array) {
$self->db_tree->{$db_name}{'CLUSTERSET_PRODNAMES'}{$clusterset_id}{$sp} = 1;
next if exists $preferred_clusterset_id{$sp} && $clusterset_id ne $preferred_clusterset_id{$sp};
$self->db_tree->{$db_name}{'CLUSTERSETS'}{$sp} = $clusterset_id;
$self->db_tree->{$db_name}{'STRAIN_TYPES'}{$sp} = $strain_type;
}

if (exists $self->db_tree->{$db_name}{'CLUSTERSET_PRODNAMES'}) {

my $default_oset_spp_aref = $dbh->selectcol_arrayref('
select distinct gd.name
from method_link_species_set mlss
join method_link ml using(method_link_id)
join species_set ss using(species_set_id)
join species_set_header ssh using(species_set_id)
join genome_db gd using(genome_db_id)
where ml.type in ("PROTEIN_TREES", "NC_TREES")
and trim(leading "collection-" from ssh.name) = "default";
');

foreach my $sp (@$default_oset_spp_aref) {
$self->db_tree->{$db_name}{'CLUSTERSET_PRODNAMES'}{'default'}{$sp} = 1;
}
}

###################################################################
## Cache MLSS for quick lookup in ImageConfig

Expand Down
Loading