Skip to content

Question of expected data #198

@seraphimangel77

Description

@seraphimangel77

Hi,

Thank you very much for your outstanding work! I used the following code with the GM12878 model to obtain the predictions for my fragments.
import numpy as np
import matplotlib.pyplot as plt
locations = [
'chr1:100000000-101048576', 'chr1:101048576-102097152',
'chr1:200000000-201048576', 'chr1:201048576-202097152',
'chr2:50000000-51048576', 'chr2:51048576-52097152',
'chr2:75000000-76048576', 'chr2:76048576-77097152',
'chr2:150000000-151048576', 'chr2:151048576-152097152',
'chr3:50000000-51048576', 'chr3:51048576-52097152',
'chr3:125000000-126048576', 'chr3:126048576-127097152',
'chr4:100000000-101048576', 'chr4:101048576-102097152',
'chr4:150000000-151048576', 'chr4:151048576-152097152',
'chr5:60000000-61048576', 'chr5:61048576-62097152',
'chr5:100000000-101048576', 'chr5:101048576-102097152',
'chr6:25000000-26048576', 'chr6:26048576-27097152',
'chr6:125000000-126048576', 'chr6:126048576-127097152',
'chr7:35000000-36048576', 'chr7:36048576-37097152',
'chr7:135000000-136048576', 'chr7:136048576-137097152',
'chr8:65000000-66048576', 'chr8:66048576-67097152',
'chr8:95000000-96048576', 'chr8:96048576-97097152',
'chr9:25000000-26048576', 'chr9:26048576-27097152',
'chr9:95000000-96048576', 'chr9:96048576-97097152',
'chr11:35000000-36048576', 'chr11:36048576-37097152',
'chr11:75000000-76048576', 'chr11:76048576-77097152',
'chr13:35000000-36048576', 'chr13:36048576-37097152',
'chr13:60000000-61048576', 'chr13:61048576-62097152'
]

seq_length = 2**20

for loc in locations:
chrm, pos = loc.split(':')
seq_start, seq_end = map(int, pos.split('-'))

seq = fasta_open.fetch(chrm, seq_start, seq_end).upper()
actual_seq_length = len(seq)
if actual_seq_length != seq_length:
    raise ValueError(f'len(seq) != seq_length: {actual_seq_length} != {seq_length}')

seq_1hot = dna_io.dna_1hot(seq)

test_pred_from_seq = seqnn_model.model.predict(np.expand_dims(seq_1hot, 0))

myseq_str = f"{chrm}:{seq_start}-{seq_end}"

plt.figure(figsize=(8, 4))
target_index = 2
vmin = -2
vmax = 2

mat_pred = from_upper_triu(test_pred_from_seq[:, :, target_index], target_length1_cropped, hic_diags)

plt.subplot(121)
im = plt.matshow(mat_pred, fignum=False, cmap='RdBu_r', vmax=vmax, vmin=vmin)
plt.colorbar(im, fraction=.04, pad=0.05, ticks=[-2, -1, 0, 1, 2])
plt.title('pred-' + str(hic_num_to_name_dict[target_index]), y=1.15)
plt.ylabel(myseq_str)

mat_target = from_upper_triu(test_target[:, :, target_index], target_length1_cropped, hic_diags)
plt.subplot(122)
im = plt.matshow(mat_target, fignum=False, cmap='RdBu_r', vmax=vmax, vmin=vmin)
plt.colorbar(im, fraction=.04, pad=0.05, ticks=[-2, -1, 0, 1, 2])
plt.title('target-' + str(hic_num_to_name_dict[target_index]), y=1.15)

plt.tight_layout()

output_pdf = f'/project/pathology/Mani_lab/s231294/Data/basenji/manuscripts/akita/data/interaction_matrix_GM12878/{chrm}_{seq_start}-{seq_end}.pdf'
plt.savefig(output_pdf)
plt.close()

output_csv = f'/project/pathology/Mani_lab/s231294/Data/basenji/manuscripts/akita/data/interaction_matrix_GM12878/{chrm}_{seq_start}-{seq_end}.csv'
np.savetxt(output_csv, mat_pred, delimiter=',')

print(f"has saved {output_pdf}")
print(f"has saved {output_csv}")

I saved the predictions as mat_pred and exported the matrix. According to your article, each value represents log(observed/expected), with a range of -2 to 2.

I found that using seqnn_model.model.predict(), I obtained the predicted results, and then using mat_pred = from_upper_triu(test_pred_from_seq[:, :, target_index], target_length1_cropped, hic_diags), I got mat_pred, which represents log(observed/expected), is it correct?

I would like to obtain the expected values (the original expected values before the log transformation). Could you do me a huge favor that advise me on how to obtain the expected values?

Thank you very much for your help. I look forward to your reply.

Best,
Cecelia

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions