|
1 | 1 | from functools import cached_property
|
2 | 2 |
|
3 |
| -import tskit |
4 |
| -import numpy as np |
5 | 3 | import numba
|
| 4 | +import numpy as np |
6 | 5 | import pandas as pd
|
| 6 | +import tskit |
7 | 7 |
|
8 | 8 | spec = [
|
9 | 9 | ("num_edges", numba.int64),
|
@@ -43,7 +43,7 @@ def __init__(
|
43 | 43 | self.in_range = np.zeros(2, dtype=np.int64)
|
44 | 44 | self.out_range = np.zeros(2, dtype=np.int64)
|
45 | 45 |
|
46 |
| - def next(self): |
| 46 | + def next(self): # noqa |
47 | 47 | left = self.interval[1]
|
48 | 48 | j = self.in_range[1]
|
49 | 49 | k = self.out_range[1]
|
@@ -220,7 +220,7 @@ def mutations_df(self):
|
220 | 220 | unknown = tskit.is_unknown_time(mutations_time)
|
221 | 221 | mutations_time[unknown] = self.ts.nodes_time[mutations_node[unknown]]
|
222 | 222 |
|
223 |
| - node_flag = ts.nodes_flags[mutations_node] |
| 223 | + # node_flag = ts.nodes_flags[mutations_node] |
224 | 224 | position = ts.sites_position[ts.mutations_site]
|
225 | 225 |
|
226 | 226 | tables = self.ts.tables
|
@@ -440,90 +440,6 @@ def make_sliding_windows(self, iterable, size, overlap=0):
|
440 | 440 | end += step
|
441 | 441 | yield iterable[start:]
|
442 | 442 |
|
443 |
| - def plot_polytomy_fractions( |
444 |
| - self, region_start=None, region_end=None, window_size=100_000, overlap=0 |
445 |
| - ): |
446 |
| - """ |
447 |
| - Plots the fraction of polytomies in windows actoss the genomic sequence |
448 |
| - """ |
449 |
| - if region_start is None: |
450 |
| - region_start = max(0, self.ts.tables.sites.position[0] - 50_000) |
451 |
| - if region_end is None: |
452 |
| - region_end = self.ts.tables.sites.position[-1] + 50_000 |
453 |
| - fig, ax = plt.subplots(figsize=(20, 5)) |
454 |
| - polytomy_fractions = self.calc_polytomy_fractions() |
455 |
| - poly_fracs_by_pos = self.map_stats_to_genome(polytomy_fractions) |
456 |
| - poly_fracs_means = [] |
457 |
| - poly_fracs_sd = [] |
458 |
| - genomic_positions = [] |
459 |
| - for poly_win in self.make_sliding_windows( |
460 |
| - poly_fracs_by_pos, window_size, overlap |
461 |
| - ): |
462 |
| - poly_fracs_means.append(np.mean(poly_win)) |
463 |
| - poly_fracs_sd.append(np.std(poly_win)) |
464 |
| - for gen_win in self.make_sliding_windows( |
465 |
| - np.arange(1, self.ts.sequence_length), window_size, overlap |
466 |
| - ): |
467 |
| - genomic_positions.append(gen_win[0] / 1_000_000) |
468 |
| - ax.plot( |
469 |
| - genomic_positions, |
470 |
| - poly_fracs_means, |
471 |
| - label="mean", |
472 |
| - linewidth=0.5, |
473 |
| - ) |
474 |
| - ax.fill_between( |
475 |
| - genomic_positions, |
476 |
| - np.array(poly_fracs_means) - np.array(poly_fracs_sd), |
477 |
| - np.array(poly_fracs_means) + np.array(poly_fracs_sd), |
478 |
| - alpha=0.3, |
479 |
| - label="mean +/- std", |
480 |
| - ) |
481 |
| - missing_vals = np.take(genomic_positions, np.where(np.isnan(poly_fracs_means))) |
482 |
| - ax.plot( |
483 |
| - missing_vals, |
484 |
| - np.zeros(len(missing_vals)), |
485 |
| - color="red", |
486 |
| - marker="o", |
487 |
| - label="missing data", |
488 |
| - ) |
489 |
| - ax.set_xlabel(f"Position on chr {self.chr}(Mb)", fontsize=10) |
490 |
| - ax.set_ylabel("Window mean", fontsize=10) |
491 |
| - ax.set_title("Polytomy score", fontsize=10) |
492 |
| - ax.set_ylim(0, 1) |
493 |
| - ax.set_xlim(region_start / 1_000_000, region_end / 1_000_000) |
494 |
| - handles, labels = ax.get_legend_handles_labels() |
495 |
| - unique = [ |
496 |
| - (h, l) |
497 |
| - for i, (h, l) in enumerate(zip(handles, labels)) |
498 |
| - if l not in labels[:i] |
499 |
| - ] |
500 |
| - ax.legend(*zip(*unique)) |
501 |
| - plt.show() |
502 |
| - |
503 |
| - def plot_mutations_per_site_along_seq( |
504 |
| - self, region_start=None, region_end=None, hist_bins=1000 |
505 |
| - ): |
506 |
| - count = self.sites_num_mutations |
507 |
| - pos = self.ts.sites_position |
508 |
| - if region_start is None: |
509 |
| - region_start = pos[0] |
510 |
| - if region_end is None: |
511 |
| - region_end = pos[-1] |
512 |
| - grid = sns.jointplot( |
513 |
| - x=pos / 1_000_000, |
514 |
| - y=count, |
515 |
| - kind="scatter", |
516 |
| - marginal_ticks=True, |
517 |
| - alpha=0.5, |
518 |
| - marginal_kws=dict(bins=hist_bins), |
519 |
| - xlim=(region_start / 1_000_000, region_end / 1_000_000), |
520 |
| - ) |
521 |
| - grid.ax_marg_y.remove() |
522 |
| - grid.fig.set_figwidth(20) |
523 |
| - grid.fig.set_figheight(8) |
524 |
| - grid.ax_joint.set_xlabel("Position on genome (Mb)") |
525 |
| - grid.ax_joint.set_ylabel("Number of mutations") |
526 |
| - |
527 | 443 | def calc_mean_node_arity(self):
|
528 | 444 | span_sums = np.bincount(
|
529 | 445 | self.ts.edges_parent,
|
|
0 commit comments