-
Notifications
You must be signed in to change notification settings - Fork 136
Description
Hi,
Thank you very much for your outstanding work! I used the following code with the GM12878 model to obtain the predictions for my fragments.
import numpy as np
import matplotlib.pyplot as plt
locations = [
'chr1:100000000-101048576', 'chr1:101048576-102097152',
'chr1:200000000-201048576', 'chr1:201048576-202097152',
'chr2:50000000-51048576', 'chr2:51048576-52097152',
'chr2:75000000-76048576', 'chr2:76048576-77097152',
'chr2:150000000-151048576', 'chr2:151048576-152097152',
'chr3:50000000-51048576', 'chr3:51048576-52097152',
'chr3:125000000-126048576', 'chr3:126048576-127097152',
'chr4:100000000-101048576', 'chr4:101048576-102097152',
'chr4:150000000-151048576', 'chr4:151048576-152097152',
'chr5:60000000-61048576', 'chr5:61048576-62097152',
'chr5:100000000-101048576', 'chr5:101048576-102097152',
'chr6:25000000-26048576', 'chr6:26048576-27097152',
'chr6:125000000-126048576', 'chr6:126048576-127097152',
'chr7:35000000-36048576', 'chr7:36048576-37097152',
'chr7:135000000-136048576', 'chr7:136048576-137097152',
'chr8:65000000-66048576', 'chr8:66048576-67097152',
'chr8:95000000-96048576', 'chr8:96048576-97097152',
'chr9:25000000-26048576', 'chr9:26048576-27097152',
'chr9:95000000-96048576', 'chr9:96048576-97097152',
'chr11:35000000-36048576', 'chr11:36048576-37097152',
'chr11:75000000-76048576', 'chr11:76048576-77097152',
'chr13:35000000-36048576', 'chr13:36048576-37097152',
'chr13:60000000-61048576', 'chr13:61048576-62097152'
]
seq_length = 2**20
for loc in locations:
chrm, pos = loc.split(':')
seq_start, seq_end = map(int, pos.split('-'))
seq = fasta_open.fetch(chrm, seq_start, seq_end).upper()
actual_seq_length = len(seq)
if actual_seq_length != seq_length:
raise ValueError(f'len(seq) != seq_length: {actual_seq_length} != {seq_length}')
seq_1hot = dna_io.dna_1hot(seq)
test_pred_from_seq = seqnn_model.model.predict(np.expand_dims(seq_1hot, 0))
myseq_str = f"{chrm}:{seq_start}-{seq_end}"
plt.figure(figsize=(8, 4))
target_index = 2
vmin = -2
vmax = 2
mat_pred = from_upper_triu(test_pred_from_seq[:, :, target_index], target_length1_cropped, hic_diags)
plt.subplot(121)
im = plt.matshow(mat_pred, fignum=False, cmap='RdBu_r', vmax=vmax, vmin=vmin)
plt.colorbar(im, fraction=.04, pad=0.05, ticks=[-2, -1, 0, 1, 2])
plt.title('pred-' + str(hic_num_to_name_dict[target_index]), y=1.15)
plt.ylabel(myseq_str)
mat_target = from_upper_triu(test_target[:, :, target_index], target_length1_cropped, hic_diags)
plt.subplot(122)
im = plt.matshow(mat_target, fignum=False, cmap='RdBu_r', vmax=vmax, vmin=vmin)
plt.colorbar(im, fraction=.04, pad=0.05, ticks=[-2, -1, 0, 1, 2])
plt.title('target-' + str(hic_num_to_name_dict[target_index]), y=1.15)
plt.tight_layout()
output_pdf = f'/project/pathology/Mani_lab/s231294/Data/basenji/manuscripts/akita/data/interaction_matrix_GM12878/{chrm}_{seq_start}-{seq_end}.pdf'
plt.savefig(output_pdf)
plt.close()
output_csv = f'/project/pathology/Mani_lab/s231294/Data/basenji/manuscripts/akita/data/interaction_matrix_GM12878/{chrm}_{seq_start}-{seq_end}.csv'
np.savetxt(output_csv, mat_pred, delimiter=',')
print(f"has saved {output_pdf}")
print(f"has saved {output_csv}")
I saved the predictions as mat_pred and exported the matrix. According to your article, each value represents log(observed/expected), with a range of -2 to 2.
I found that using seqnn_model.model.predict(), I obtained the predicted results, and then using mat_pred = from_upper_triu(test_pred_from_seq[:, :, target_index], target_length1_cropped, hic_diags), I got mat_pred, which represents log(observed/expected), is it correct?
I would like to obtain the expected values (the original expected values before the log transformation). Could you do me a huge favor that advise me on how to obtain the expected values?
Thank you very much for your help. I look forward to your reply.
Best,
Cecelia