{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import sys\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "sns.set()\n", "import itertools\n", "from matplotlib import rcParams\n", "from collections import defaultdict\n", "rcParams['font.family'] = 'sans-serif'\n", "rcParams['font.sans-serif'] = ['Arial']\n", "rcParams['pdf.fonttype'] = 42\n", "rcParams['axes.formatter.useoffset'] = False" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Loading datasets:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "fiveeight = pd.read_csv(\"../../Analysis/diffBUM-HMM/mature_rRNA_5.8S_control_identical_conditions_diff_BUM_HMM.txt\",\\\n", " sep=\"\\t\",\\\n", " index_col=0,\n", " header=0)\n", "five = pd.read_csv(\"../../Analysis/diffBUM-HMM/mature_rRNA_5S_control_identical_conditions_diff_BUM_HMM.txt\",\\\n", " sep=\"\\t\",\\\n", " index_col=0,\n", " header=0)\n", "twentyfive = pd.read_csv(\"../../Analysis/diffBUM-HMM/mature_rRNA_25S_control_identical_conditions_diff_BUM_HMM.txt\",\\\n", " sep=\"\\t\",\\\n", " index_col=0,\n", " header=0)\n", "eighteen = pd.read_csv(\"../../Analysis/diffBUM-HMM/mature_rRNA_18S_control_identical_conditions_diff_BUM_HMM.txt\",\\\n", " sep=\"\\t\",\\\n", " index_col=0,\n", " header=0)\n", "\n", "eighteennew = pd.read_csv(\"../../Analysis/diffBUM-HMM/mature_rRNA_18S_control_identical_conditions_Fun12analysis_diff_BUM_HMM.txt\",\\\n", " sep=\"\\t\",\\\n", " index_col=0,\n", " header=0)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dropping positions with insufficient coverage:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "position = fiveeight[fiveeight == -999].dropna().index\n", "fiveeight.drop(position,inplace=True)\n", "position = five[five == -999].dropna().index\n", "five.drop(position,inplace=True)\n", "position = eighteen[eighteen == -999].dropna().index\n", "eighteen.drop(position,inplace=True)\n", "\n", "position = eighteennew[eighteennew== -999].dropna().index\n", "eighteennew.drop(position,inplace=True)\n", "\n", "position = twentyfive[twentyfive == -999].dropna().index\n", "twentyfive.drop(position,inplace=True)\n", "\n", "### Only the last position had a -999 in the datasets." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>UU</th>\n", " <th>UM</th>\n", " <th>MU</th>\n", " <th>MM</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <td>1</td>\n", " <td>1.0</td>\n", " <td>1.694943e-63</td>\n", " <td>1.694943e-63</td>\n", " <td>2.872833e-126</td>\n", " </tr>\n", " <tr>\n", " <td>2</td>\n", " <td>1.0</td>\n", " <td>3.862743e-12</td>\n", " <td>3.862743e-12</td>\n", " <td>1.492079e-23</td>\n", " </tr>\n", " <tr>\n", " <td>3</td>\n", " <td>1.0</td>\n", " <td>5.676103e-24</td>\n", " <td>5.676103e-24</td>\n", " <td>3.221815e-47</td>\n", " </tr>\n", " <tr>\n", " <td>4</td>\n", " <td>1.0</td>\n", " <td>7.203460e-16</td>\n", " <td>7.203460e-16</td>\n", " <td>5.188983e-31</td>\n", " </tr>\n", " <tr>\n", " <td>5</td>\n", " <td>1.0</td>\n", " <td>5.886530e-29</td>\n", " <td>5.886530e-29</td>\n", " <td>3.465124e-57</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " UU UM MU MM\n", "1 1.0 1.694943e-63 1.694943e-63 2.872833e-126\n", "2 1.0 3.862743e-12 3.862743e-12 1.492079e-23\n", "3 1.0 5.676103e-24 5.676103e-24 3.221815e-47\n", "4 1.0 7.203460e-16 7.203460e-16 5.188983e-31\n", "5 1.0 5.886530e-29 5.886530e-29 3.465124e-57" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eighteennew.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Making violinplots:" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Gathering all the data from the UM and MU columns that have the posteriors for being differentially modified:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "_fiveeight = list(itertools.chain.from_iterable(fiveeight[[\"UM\",\"MU\"]].values))\n", "_five = list(itertools.chain.from_iterable(five[[\"UM\",\"MU\"]].values))\n", "_twentyfive = list(itertools.chain.from_iterable(twentyfive[[\"UM\",\"MU\"]].values))\n", "_eighteen = list(itertools.chain.from_iterable(eighteen[[\"UM\",\"MU\"]].values))\n", "\n", "_eighteennew = list(itertools.chain.from_iterable(eighteennew[[\"UM\",\"MU\"]].values))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>UU</th>\n", " <th>UM</th>\n", " <th>MU</th>\n", " <th>MM</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <td>1</td>\n", " <td>1.000000</td>\n", " <td>2.699683e-11</td>\n", " <td>2.699683e-11</td>\n", " <td>7.288288e-22</td>\n", " </tr>\n", " <tr>\n", " <td>2</td>\n", " <td>1.000000</td>\n", " <td>1.647115e-22</td>\n", " <td>1.647115e-22</td>\n", " <td>2.712988e-44</td>\n", " </tr>\n", " <tr>\n", " <td>3</td>\n", " <td>1.000000</td>\n", " <td>1.879406e-32</td>\n", " <td>1.879406e-32</td>\n", " <td>3.532169e-64</td>\n", " </tr>\n", " <tr>\n", " <td>4</td>\n", " <td>1.000000</td>\n", " <td>4.654247e-24</td>\n", " <td>4.654247e-24</td>\n", " <td>2.166201e-47</td>\n", " </tr>\n", " <tr>\n", " <td>5</td>\n", " <td>1.000000</td>\n", " <td>2.401017e-14</td>\n", " <td>2.401017e-14</td>\n", " <td>5.764884e-28</td>\n", " </tr>\n", " <tr>\n", " <td>6</td>\n", " <td>0.999999</td>\n", " <td>3.115782e-07</td>\n", " <td>3.115782e-07</td>\n", " <td>9.708104e-14</td>\n", " </tr>\n", " <tr>\n", " <td>7</td>\n", " <td>1.000000</td>\n", " <td>3.736168e-12</td>\n", " <td>3.736168e-12</td>\n", " <td>1.395895e-23</td>\n", " </tr>\n", " <tr>\n", " <td>8</td>\n", " <td>1.000000</td>\n", " <td>4.472711e-14</td>\n", " <td>4.472711e-14</td>\n", " <td>2.000514e-27</td>\n", " </tr>\n", " <tr>\n", " <td>9</td>\n", " <td>1.000000</td>\n", " <td>1.333472e-22</td>\n", " <td>1.333472e-22</td>\n", " <td>1.778149e-44</td>\n", " </tr>\n", " <tr>\n", " <td>10</td>\n", " <td>1.000000</td>\n", " <td>1.276043e-30</td>\n", " <td>1.276043e-30</td>\n", " <td>1.628285e-60</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " UU UM MU MM\n", "1 1.000000 2.699683e-11 2.699683e-11 7.288288e-22\n", "2 1.000000 1.647115e-22 1.647115e-22 2.712988e-44\n", "3 1.000000 1.879406e-32 1.879406e-32 3.532169e-64\n", "4 1.000000 4.654247e-24 4.654247e-24 2.166201e-47\n", "5 1.000000 2.401017e-14 2.401017e-14 5.764884e-28\n", "6 0.999999 3.115782e-07 3.115782e-07 9.708104e-14\n", "7 1.000000 3.736168e-12 3.736168e-12 1.395895e-23\n", "8 1.000000 4.472711e-14 4.472711e-14 2.000514e-27\n", "9 1.000000 1.333472e-22 1.333472e-22 1.778149e-44\n", "10 1.000000 1.276043e-30 1.276043e-30 1.628285e-60" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fiveeight[:10]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>UU</th>\n", " <th>UM</th>\n", " <th>MU</th>\n", " <th>MM</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <td>1</td>\n", " <td>1.0</td>\n", " <td>1.694943e-63</td>\n", " <td>1.694943e-63</td>\n", " <td>2.872833e-126</td>\n", " </tr>\n", " <tr>\n", " <td>2</td>\n", " <td>1.0</td>\n", " <td>3.862743e-12</td>\n", " <td>3.862743e-12</td>\n", " <td>1.492079e-23</td>\n", " </tr>\n", " <tr>\n", " <td>3</td>\n", " <td>1.0</td>\n", " <td>5.676103e-24</td>\n", " <td>5.676103e-24</td>\n", " <td>3.221815e-47</td>\n", " </tr>\n", " <tr>\n", " <td>4</td>\n", " <td>1.0</td>\n", " <td>7.203460e-16</td>\n", " <td>7.203460e-16</td>\n", " <td>5.188983e-31</td>\n", " </tr>\n", " <tr>\n", " <td>5</td>\n", " <td>1.0</td>\n", " <td>5.886530e-29</td>\n", " <td>5.886530e-29</td>\n", " <td>3.465124e-57</td>\n", " </tr>\n", " <tr>\n", " <td>6</td>\n", " <td>1.0</td>\n", " <td>2.685610e-11</td>\n", " <td>2.685610e-11</td>\n", " <td>7.212499e-22</td>\n", " </tr>\n", " <tr>\n", " <td>7</td>\n", " <td>1.0</td>\n", " <td>2.340133e-22</td>\n", " <td>2.340133e-22</td>\n", " <td>5.476222e-44</td>\n", " </tr>\n", " <tr>\n", " <td>8</td>\n", " <td>1.0</td>\n", " <td>2.489041e-18</td>\n", " <td>2.489041e-18</td>\n", " <td>6.195327e-36</td>\n", " </tr>\n", " <tr>\n", " <td>9</td>\n", " <td>1.0</td>\n", " <td>2.491819e-08</td>\n", " <td>2.491819e-08</td>\n", " <td>6.209161e-16</td>\n", " </tr>\n", " <tr>\n", " <td>10</td>\n", " <td>1.0</td>\n", " <td>2.037583e-30</td>\n", " <td>2.037583e-30</td>\n", " <td>4.151746e-60</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " UU UM MU MM\n", "1 1.0 1.694943e-63 1.694943e-63 2.872833e-126\n", "2 1.0 3.862743e-12 3.862743e-12 1.492079e-23\n", "3 1.0 5.676103e-24 5.676103e-24 3.221815e-47\n", "4 1.0 7.203460e-16 7.203460e-16 5.188983e-31\n", "5 1.0 5.886530e-29 5.886530e-29 3.465124e-57\n", "6 1.0 2.685610e-11 2.685610e-11 7.212499e-22\n", "7 1.0 2.340133e-22 2.340133e-22 5.476222e-44\n", "8 1.0 2.489041e-18 2.489041e-18 6.195327e-36\n", "9 1.0 2.491819e-08 2.491819e-08 6.209161e-16\n", "10 1.0 2.037583e-30 2.037583e-30 4.151746e-60" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eighteennew[:10]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "_fiveeightMM = list(itertools.chain.from_iterable(fiveeight[[\"MM\"]].values))\n", "_fiveMM = list(itertools.chain.from_iterable(five[[\"MM\"]].values))\n", "_twentyfiveMM = list(itertools.chain.from_iterable(twentyfive[[\"MM\"]].values))\n", "_eighteenMM = list(itertools.chain.from_iterable(eighteen[[\"MM\"]].values))\n", "\n", "_eighteennewMM = list(itertools.chain.from_iterable(eighteennew[[\"MM\"]].values))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Putting it in a dataframe, 25S first:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "index = np.arange(0,len(_twentyfive))\n", "values = pd.DataFrame(np.nan,index=index,columns=[\"5SMM\",\"5.8SMM\",\"18SMM\",\"18S-Fun12MM\",\"25SMM\",\"5S\",\"5.8S\",\"18S\",\"18S-Fun12\",\"25S\"])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "length = len(_five)-1\n", "values.loc[0:length,\"5S\"] = _five\n", "\n", "length = len(_fiveeight)-1\n", "values.loc[0:length,\"5.8S\"] = _fiveeight\n", "\n", "length = len(_eighteen)-1\n", "values.loc[0:length,\"18S\"] = _eighteen\n", "\n", "length = len(_eighteennew)-1\n", "values.loc[0:length,\"18S-Fun12\"] = _eighteennew\n", "\n", "length = len(_twentyfive)-1\n", "values.loc[0:length,\"25S\"] = _twentyfive\n", "\n", "length = len(_fiveMM)-1\n", "values.loc[0:length,\"5SMM\"] = _fiveMM\n", "\n", "length = len(_fiveeightMM)-1\n", "values.loc[0:length,\"5.8SMM\"] = _fiveeightMM\n", "\n", "length = len(_eighteenMM)-1\n", "values.loc[0:length,\"18SMM\"] = _eighteenMM\n", "\n", "length = len(_eighteennewMM)-1\n", "values.loc[0:length,\"18S-Fun12MM\"] = _eighteennewMM\n", "\n", "length = len(_twentyfiveMM)-1\n", "values.loc[0:length,\"25SMM\"] = _twentyfiveMM" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>5SMM</th>\n", " <th>5.8SMM</th>\n", " <th>18SMM</th>\n", " <th>18S-Fun12MM</th>\n", " <th>25SMM</th>\n", " <th>5S</th>\n", " <th>5.8S</th>\n", " <th>18S</th>\n", " <th>18S-Fun12</th>\n", " <th>25S</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <td>0</td>\n", " <td>9.999888e-01</td>\n", " <td>7.288288e-22</td>\n", " <td>1.981656e-27</td>\n", " <td>2.872833e-126</td>\n", " <td>0.295833</td>\n", " <td>5.588118e-06</td>\n", " <td>2.699683e-11</td>\n", " <td>4.451579e-14</td>\n", " <td>1.694943e-63</td>\n", " <td>2.480723e-01</td>\n", " </tr>\n", " <tr>\n", " <td>1</td>\n", " <td>7.138806e-03</td>\n", " <td>2.712988e-44</td>\n", " <td>5.230114e-43</td>\n", " <td>1.492079e-23</td>\n", " <td>0.988544</td>\n", " <td>5.588118e-06</td>\n", " <td>2.699683e-11</td>\n", " <td>4.451579e-14</td>\n", " <td>1.694943e-63</td>\n", " <td>2.480723e-01</td>\n", " </tr>\n", " <tr>\n", " <td>2</td>\n", " <td>4.629931e-34</td>\n", " <td>3.532169e-64</td>\n", " <td>1.970948e-33</td>\n", " <td>3.221815e-47</td>\n", " <td>1.000000</td>\n", " <td>7.735265e-02</td>\n", " <td>1.647115e-22</td>\n", " <td>7.231953e-22</td>\n", " <td>3.862743e-12</td>\n", " <td>5.711572e-03</td>\n", " </tr>\n", " <tr>\n", " <td>3</td>\n", " <td>3.819163e-59</td>\n", " <td>2.166201e-47</td>\n", " <td>3.534559e-76</td>\n", " <td>5.188983e-31</td>\n", " <td>1.000000</td>\n", " <td>7.735265e-02</td>\n", " <td>1.647115e-22</td>\n", " <td>7.231953e-22</td>\n", " <td>3.862743e-12</td>\n", " <td>5.711572e-03</td>\n", " </tr>\n", " <tr>\n", " <td>4</td>\n", " <td>1.592421e-98</td>\n", " <td>5.764884e-28</td>\n", " <td>3.745262e-22</td>\n", " <td>3.465124e-57</td>\n", " <td>1.000000</td>\n", " <td>2.151727e-17</td>\n", " <td>1.879406e-32</td>\n", " <td>4.439537e-17</td>\n", " <td>5.676103e-24</td>\n", " <td>1.258842e-11</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " 5SMM 5.8SMM 18SMM 18S-Fun12MM 25SMM \\\n", "0 9.999888e-01 7.288288e-22 1.981656e-27 2.872833e-126 0.295833 \n", "1 7.138806e-03 2.712988e-44 5.230114e-43 1.492079e-23 0.988544 \n", "2 4.629931e-34 3.532169e-64 1.970948e-33 3.221815e-47 1.000000 \n", "3 3.819163e-59 2.166201e-47 3.534559e-76 5.188983e-31 1.000000 \n", "4 1.592421e-98 5.764884e-28 3.745262e-22 3.465124e-57 1.000000 \n", "\n", " 5S 5.8S 18S 18S-Fun12 25S \n", "0 5.588118e-06 2.699683e-11 4.451579e-14 1.694943e-63 2.480723e-01 \n", "1 5.588118e-06 2.699683e-11 4.451579e-14 1.694943e-63 2.480723e-01 \n", "2 7.735265e-02 1.647115e-22 7.231953e-22 3.862743e-12 5.711572e-03 \n", "3 7.735265e-02 1.647115e-22 7.231953e-22 3.862743e-12 5.711572e-03 \n", "4 2.151727e-17 1.879406e-32 4.439537e-17 5.676103e-24 1.258842e-11 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "values.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Making plots:" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.arange(0.0,1.0,0.1)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "<Figure size 1080x432 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig,ax = plt.subplots(figsize=(15,6))\n", "ax = sns.boxplot(data=values)\n", "ylabels = [0,0.2,0.4,0.6,0.8,1.0]\n", "ax.set_yticks(ylabels)\n", "ax.set_yticklabels(ylabels)\n", "fig.savefig(\"diffBUM_HMM_output_control_samples.pdf\",dpi=400)\n", "\n", "\n", "\n", "#ON 15 January 2021 I ran up until here to generate new figure with 18S-Fun2; does not make sense to run remaining cells as they are nto affected by latest results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Check if anything at all was modified in 5.8S and whether it makes sense with the structure!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Making 5.8S MM output file:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "fiveeight[\"MM\"].to_csv(\"5.8S_MM_values.txt\",sep=\"\\t\",header=False)\n", "eighteen[\"MM\"].to_csv(\"18S_MM_values.txt\",sep=\"\\t\",header=False)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "totalmodnucs = len(eighteen[eighteen.MM >= 0.95])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 253 out of the 1799 nucleotides with sufficient coverage were called modified in all three datasets:" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "253" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "totalmodnucs" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "totalnucs = len(eighteen.index)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1799" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "totalnucs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plot nucleotides predicted to be modified by diffBUM_HMM on 18S structure (requires pyCRAC)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "Bad key \"text.kerning_factor\" on line 4 in\n", "/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle.\n", "You probably need to get an updated matplotlibrc file from\n", "http://github.com/matplotlib/matplotlib/blob/master/matplotlibrc.template\n", "or from the matplotlib source distribution\n" ] } ], "source": [ "%%bash\n", "\n", "DIR=../../Scripts\n", "\n", "python $DIR/colorStructureFile.py \\\n", "-f \"../../Analysis/rRNA_secondary_structure_diagrams/S_cerevisiae.svg\" \\\n", "-d \"18S_MM_values.txt\" \\\n", "-c BUM_HMM \\\n", "-o \"18S_MM_values.svg\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The control dataset is a DMS Structure-Seq dataset. So the expectation would be that mostly A's and C's would be called modified by difBUM_HMM. Check this!!!!:" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "refseq = \"\".join([i.strip() for i in open(\"../../Reference_sequences/18S_refseq.txt\",\"r\").readlines()])" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'TATCTGGTTG'" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "refseq[:10]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1800" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(refseq)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Int64Index([ 7, 8, 9, 10, 13, 18, 21, 22, 23, 24,\n", " ...\n", " 1705, 1706, 1719, 1720, 1721, 1722, 1727, 1729, 1730, 1748],\n", " dtype='int64', length=253)\n" ] } ], "source": [ "significant = eighteen[eighteen.MM >= 0.95].index\n", "print(significant)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " UU UM MU MM\n", "7 5.843202e-09 7.643500e-05 7.643500e-05 0.999847\n", "8 3.881934e-19 6.230517e-10 6.230517e-10 1.000000\n", "9 1.030573e-13 3.210253e-07 3.210253e-07 0.999999\n", "10 4.675099e-06 2.157523e-03 2.157523e-03 0.995680\n", "13 9.115940e-08 3.018350e-04 3.018350e-04 0.999396\n", "... ... ... ... ...\n", "1722 2.873284e-14 1.695076e-07 1.695076e-07 1.000000\n", "1727 5.923253e-21 7.696267e-11 7.696267e-11 1.000000\n", "1729 7.931163e-26 2.816232e-13 2.816232e-13 1.000000\n", "1730 1.820529e-33 4.266766e-17 4.266766e-17 1.000000\n", "1748 4.296382e-18 2.072772e-09 2.072772e-09 1.000000\n", "\n", "[253 rows x 4 columns]\n" ] } ], "source": [ "print(eighteen[eighteen.MM >= 0.95])" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "nucdict = defaultdict(int)\n", "for pos in significant:\n", " nuc = refseq[pos]\n", " nucdict[nuc] += 1" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "defaultdict(int, {'T': 35, 'G': 17, 'A': 154, 'C': 47})" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nucdict" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig,ax = plt.subplots()\n", "x = np.arange(len(nucdict.keys()))\n", "y = nucdict.values()\n", "ax.bar(x,y)\n", "ax.set_xticks([0,1,2,3])\n", "ax.set_xticklabels([\"U\",\"G\",\"A\",\"C\"],fontsize=18)\n", "ax.set_ylabel(\"Nucleotide Count\",fontsize=14)\n", "ax.set_xlabel(\"Nucleotide\",fontsize=14)\n", "fig.savefig(\"18S_rRNA_control_MM_nucleotide_counts.pdf\",dpi=400)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }