diff --git a/care/1_convert-tpm-hugo.ipynb b/care/1_convert-tpm-hugo.ipynb index c8570bc..dcf83f2 100644 --- a/care/1_convert-tpm-hugo.ipynb +++ b/care/1_convert-tpm-hugo.ipynb @@ -14,7 +14,7 @@ "\n", "Changelog:\n", "2018-05-02 : Sum the TMP values, rather than take the mean, of combined duplicate genes See issue #130.\n", - "2017-12-05 : Average genes together in TPM space, not log2 TPM space; the latter is incorrect.\n", + "2017-12-05 : Average genes together in TPM space, not log2 TPM space; the latter is incorrect. (later changed to summing genes)\n", "\n", "Input : \n", " - conf.json\n", @@ -40,7 +40,7 @@ "- TPM column extracted\n", "- ensembl genes translated to hugo\n", " - File written as rsem.genes.tpm.hugo.tab\n", - "- hugo genes averaged and uniqued in TPM space\n", + "- hugo genes summed and uniqued in TPM space\n", "- log2(n+1) normalization applied\n", " - File written as rsem.genes.tpm.hugo.log2plus1.dedupe.tab\n" ] @@ -140,7 +140,7 @@ "# Takes : dataframe of TPM columns\n", "# returns : dataframe. \n", " # finds all row labels that are duplicate\n", - " # averages their values together to create an unique row label\n", + " # sum their values together to create an unique row label\n", "def uniquify_genes(exp):\n", " return exp.groupby(exp.index).sum()" ]