{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from IPython.display import display" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def merge_df(files):\n", " merged_df = pd.DataFrame()\n", " noms = [\"MESG1\",\"MESG2\",\"MESG4\",\"MEWB5\",\"MEWB6\",\"MEWB7\"]\n", " i = 0\n", " for f in files:\n", " df = pd.read_csv(f, sep='\\t')\n", " nom_colonne = noms[i]\n", " df = df.rename(columns={\n", " 'length': f'length_{nom_colonne}',\n", " 'eff_length': f'eff_length_{nom_colonne}',\n", " 'est_counts': f'est_counts_{nom_colonne}',\n", " 'tpm': f'tpm_{nom_colonne}'\n", " })\n", " if merged_df.empty:\n", " merged_df = df\n", " else:\n", " merged_df = pd.merge(merged_df, df, on='target_id')\n", " i += 1\n", " cols = ['target_id'] \n", " types = ['length', 'eff_length', 'est_counts', 'tpm']\n", " for t in types:\n", " cols.extend([col for col in merged_df.columns if col.startswith(t)])\n", "\n", " merged_df = merged_df[cols]\n", " new_col = []\n", " for i, v in enumerate(merged_df['target_id']):\n", " v = v.split('_')[-3]+'_'+v.split('_')[-2]\n", " new_col.append(v)\n", " merged_df['product_accession'] = new_col\n", " return merged_df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "35353\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
target_idlength_MESG1length_MESG2length_MESG4length_MEWB5length_MEWB6length_MEWB7eff_length_MESG1eff_length_MESG2eff_length_MESG4eff_length_MEWB5eff_length_MEWB6eff_length_MEWB7est_counts_MESG1est_counts_MESG2est_counts_MESG4est_counts_MEWB5est_counts_MEWB6est_counts_MEWB7tpm_MESG1tpm_MESG2tpm_MESG4tpm_MEWB5tpm_MEWB6tpm_MEWB7product_accession
0lcl|NC_004354.4_ncrna_NR_003723.2_121421421421421421497.0000097.0000097.0000097.0000097.0000097.0000085.8989118.36854.6776313.6800333.661000247.62700053.2475046.61890033.038700454.10200399.273000299.026000NR_003723.2
1lcl|NC_004354.4_mrna_NM_001103384.3_21894189418941894189418941777.000001777.000001777.000001777.000001777.000001777.000000.00002.0000.0000671.9990224.886000856.4510000.000000.0429970.00000053.1030014.68960056.454300NM_001103384.3
2lcl|NC_004354.4_mrna_NM_001258513.2_32028202820282028202820281911.000001911.000001911.000001911.000001911.000001911.000000.00000.0000.000056.127212.0458000.0259230.000000.0000000.0000004.124310.7316650.001589NM_001258513.2
3lcl|NC_004354.4_mrna_NM_001258512.2_42036203620362036203620361919.000001919.000001919.000001919.000001919.000001919.000000.00000.0000.0000100.87400.06835295.5231000.000000.0000000.0000007.381420.0041345.830630NM_001258512.2
4lcl|NC_004354.4_mrna_NM_001297796.1_55089508950895089508950894972.000004972.000004972.000004972.000004972.000004972.00000131.9400141.57561.5141194.5240107.724000159.3700001.595621.0878200.7251515.493882.5148703.754540NM_001297796.1
.................................................................................
35348lcl|NC_024511.2_trna_Dmel_CR34091_353496666666666665.791225.791225.791225.791225.791225.791220.00000.0000.00000.00000.0000000.0000000.000000.0000000.0000000.000000.0000000.000000Dmel_CR34091
35349lcl|NC_024511.2_trna_Dmel_CR34093_353506565656565655.717295.717295.717295.717295.717295.717290.00000.0000.00000.00001.0000000.0000000.000000.0000000.0000000.0000020.3023000.000000Dmel_CR34093
35350lcl|NC_024511.2_rrna_Dmel_CR34094_353511324132413241324132413241207.000001207.000001207.000001207.000001207.000001207.00000240513.0000226787.000160627.0000566164.0000560931.000000558865.00000011981.600007178.1200007800.03000065867.8000053943.40000054235.300000Dmel_CR34094
35351lcl|NC_024511.2_trna_Dmel_CR34095_353527373737373736.365876.365876.365876.365876.365876.365870.00000.0000.00000.00000.0000000.0000000.000000.0000000.0000000.000000.0000000.000000Dmel_CR34095
35352lcl|NC_024511.2_rrna_Dmel_CR34096_35353786786786786786786669.00000669.00000669.00000669.00000669.00000669.00000150.0000172.00063.0000183.0000268.000000219.00000013.481809.8220505.51950038.4117046.49910038.344300Dmel_CR34096
\n", "

35353 rows × 26 columns

\n", "
" ], "text/plain": [ " target_id length_MESG1 length_MESG2 \\\n", "0 lcl|NC_004354.4_ncrna_NR_003723.2_1 214 214 \n", "1 lcl|NC_004354.4_mrna_NM_001103384.3_2 1894 1894 \n", "2 lcl|NC_004354.4_mrna_NM_001258513.2_3 2028 2028 \n", "3 lcl|NC_004354.4_mrna_NM_001258512.2_4 2036 2036 \n", "4 lcl|NC_004354.4_mrna_NM_001297796.1_5 5089 5089 \n", "... ... ... ... \n", "35348 lcl|NC_024511.2_trna_Dmel_CR34091_35349 66 66 \n", "35349 lcl|NC_024511.2_trna_Dmel_CR34093_35350 65 65 \n", "35350 lcl|NC_024511.2_rrna_Dmel_CR34094_35351 1324 1324 \n", "35351 lcl|NC_024511.2_trna_Dmel_CR34095_35352 73 73 \n", "35352 lcl|NC_024511.2_rrna_Dmel_CR34096_35353 786 786 \n", "\n", " length_MESG4 length_MEWB5 length_MEWB6 length_MEWB7 \\\n", "0 214 214 214 214 \n", "1 1894 1894 1894 1894 \n", "2 2028 2028 2028 2028 \n", "3 2036 2036 2036 2036 \n", "4 5089 5089 5089 5089 \n", "... ... ... ... ... \n", "35348 66 66 66 66 \n", "35349 65 65 65 65 \n", "35350 1324 1324 1324 1324 \n", "35351 73 73 73 73 \n", "35352 786 786 786 786 \n", "\n", " eff_length_MESG1 eff_length_MESG2 eff_length_MESG4 eff_length_MEWB5 \\\n", "0 97.00000 97.00000 97.00000 97.00000 \n", "1 1777.00000 1777.00000 1777.00000 1777.00000 \n", "2 1911.00000 1911.00000 1911.00000 1911.00000 \n", "3 1919.00000 1919.00000 1919.00000 1919.00000 \n", "4 4972.00000 4972.00000 4972.00000 4972.00000 \n", "... ... ... ... ... \n", "35348 5.79122 5.79122 5.79122 5.79122 \n", "35349 5.71729 5.71729 5.71729 5.71729 \n", "35350 1207.00000 1207.00000 1207.00000 1207.00000 \n", "35351 6.36587 6.36587 6.36587 6.36587 \n", "35352 669.00000 669.00000 669.00000 669.00000 \n", "\n", " eff_length_MEWB6 eff_length_MEWB7 est_counts_MESG1 est_counts_MESG2 \\\n", "0 97.00000 97.00000 85.8989 118.368 \n", "1 1777.00000 1777.00000 0.0000 2.000 \n", "2 1911.00000 1911.00000 0.0000 0.000 \n", "3 1919.00000 1919.00000 0.0000 0.000 \n", "4 4972.00000 4972.00000 131.9400 141.575 \n", "... ... ... ... ... \n", "35348 5.79122 5.79122 0.0000 0.000 \n", "35349 5.71729 5.71729 0.0000 0.000 \n", "35350 1207.00000 1207.00000 240513.0000 226787.000 \n", "35351 6.36587 6.36587 0.0000 0.000 \n", "35352 669.00000 669.00000 150.0000 172.000 \n", "\n", " est_counts_MESG4 est_counts_MEWB5 est_counts_MEWB6 est_counts_MEWB7 \\\n", "0 54.6776 313.6800 333.661000 247.627000 \n", "1 0.0000 671.9990 224.886000 856.451000 \n", "2 0.0000 56.1272 12.045800 0.025923 \n", "3 0.0000 100.8740 0.068352 95.523100 \n", "4 61.5141 194.5240 107.724000 159.370000 \n", "... ... ... ... ... \n", "35348 0.0000 0.0000 0.000000 0.000000 \n", "35349 0.0000 0.0000 1.000000 0.000000 \n", "35350 160627.0000 566164.0000 560931.000000 558865.000000 \n", "35351 0.0000 0.0000 0.000000 0.000000 \n", "35352 63.0000 183.0000 268.000000 219.000000 \n", "\n", " tpm_MESG1 tpm_MESG2 tpm_MESG4 tpm_MEWB5 tpm_MEWB6 \\\n", "0 53.24750 46.618900 33.038700 454.10200 399.273000 \n", "1 0.00000 0.042997 0.000000 53.10300 14.689600 \n", "2 0.00000 0.000000 0.000000 4.12431 0.731665 \n", "3 0.00000 0.000000 0.000000 7.38142 0.004134 \n", "4 1.59562 1.087820 0.725151 5.49388 2.514870 \n", "... ... ... ... ... ... \n", "35348 0.00000 0.000000 0.000000 0.00000 0.000000 \n", "35349 0.00000 0.000000 0.000000 0.00000 20.302300 \n", "35350 11981.60000 7178.120000 7800.030000 65867.80000 53943.400000 \n", "35351 0.00000 0.000000 0.000000 0.00000 0.000000 \n", "35352 13.48180 9.822050 5.519500 38.41170 46.499100 \n", "\n", " tpm_MEWB7 product_accession \n", "0 299.026000 NR_003723.2 \n", "1 56.454300 NM_001103384.3 \n", "2 0.001589 NM_001258513.2 \n", "3 5.830630 NM_001258512.2 \n", "4 3.754540 NM_001297796.1 \n", "... ... ... \n", "35348 0.000000 Dmel_CR34091 \n", "35349 0.000000 Dmel_CR34093 \n", "35350 54235.300000 Dmel_CR34094 \n", "35351 0.000000 Dmel_CR34095 \n", "35352 38.344300 Dmel_CR34096 \n", "\n", "[35353 rows x 26 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "files = [\n", " \"MESG1_RF/abundance.tsv\",\n", " \"MESG2_RF/abundance.tsv\",\n", " \"MESG4_RF/abundance.tsv\",\n", " \"MEWB5_RF/abundance.tsv\",\n", " \"MEWB6_RF/abundance.tsv\",\n", " \"MEWB7_RF/abundance.tsv\"\n", "]\n", "\n", "a = merge_df(files)\n", "pd.set_option('display.max_columns', None)\n", "# pd.set_option('display.max_rows', None)\n", "print(len(a))\n", "display(a)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
target_idlength_MESG1length_MESG2length_MESG4length_MEWB5length_MEWB6length_MEWB7eff_length_MESG1eff_length_MESG2eff_length_MESG4eff_length_MEWB5eff_length_MEWB6eff_length_MEWB7est_counts_MESG1est_counts_MESG2est_counts_MESG4est_counts_MEWB5est_counts_MEWB6est_counts_MEWB7tpm_MESG1tpm_MESG2tpm_MESG4tpm_MEWB5tpm_MEWB6tpm_MEWB7product_accessionlength_SG_moyennelength_WB_moyenneeff_length_SG_moyenneeff_length_WB_moyenneest_counts_SG_moyenneest_counts_WB_moyennetpm_SG_moyennetpm_WB_moyenne
0lcl|NC_004354.4_ncrna_NR_003723.2_121421421421421421497.0000097.0000097.0000097.0000097.0000097.0000085.8989118.36854.6776313.6800333.661000247.62700053.2475046.61890033.038700454.10200399.273000299.026000NR_003723.2214.0214.097.0000097.0000086.314833298.32266744.301700384.133667
1lcl|NC_004354.4_mrna_NM_001103384.3_21894189418941894189418941777.000001777.000001777.000001777.000001777.000001777.000000.00002.0000.0000671.9990224.886000856.4510000.000000.0429970.00000053.1030014.68960056.454300NM_001103384.31894.01894.01777.000001777.000000.666667584.4453330.01433241.415633
2lcl|NC_004354.4_mrna_NM_001258513.2_32028202820282028202820281911.000001911.000001911.000001911.000001911.000001911.000000.00000.0000.000056.127212.0458000.0259230.000000.0000000.0000004.124310.7316650.001589NM_001258513.22028.02028.01911.000001911.000000.00000022.7329740.0000001.619188
3lcl|NC_004354.4_mrna_NM_001258512.2_42036203620362036203620361919.000001919.000001919.000001919.000001919.000001919.000000.00000.0000.0000100.87400.06835295.5231000.000000.0000000.0000007.381420.0041345.830630NM_001258512.22036.02036.01919.000001919.000000.00000065.4884840.0000004.405395
4lcl|NC_004354.4_mrna_NM_001297796.1_55089508950895089508950894972.000004972.000004972.000004972.000004972.000004972.00000131.9400141.57561.5141194.5240107.724000159.3700001.595621.0878200.7251515.493882.5148703.754540NM_001297796.15089.05089.04972.000004972.00000111.676367153.8726671.1361973.921097
.........................................................................................................
35348lcl|NC_024511.2_trna_Dmel_CR34091_353496666666666665.791225.791225.791225.791225.791225.791220.00000.0000.00000.00000.0000000.0000000.000000.0000000.0000000.000000.0000000.000000Dmel_CR3409166.066.05.791225.791220.0000000.0000000.0000000.000000
35349lcl|NC_024511.2_trna_Dmel_CR34093_353506565656565655.717295.717295.717295.717295.717295.717290.00000.0000.00000.00001.0000000.0000000.000000.0000000.0000000.0000020.3023000.000000Dmel_CR3409365.065.05.717295.717290.0000000.3333330.0000006.767433
35350lcl|NC_024511.2_rrna_Dmel_CR34094_353511324132413241324132413241207.000001207.000001207.000001207.000001207.000001207.00000240513.0000226787.000160627.0000566164.0000560931.000000558865.00000011981.600007178.1200007800.03000065867.8000053943.40000054235.300000Dmel_CR340941324.01324.01207.000001207.00000209309.000000561986.6666678986.58333358015.500000
35351lcl|NC_024511.2_trna_Dmel_CR34095_353527373737373736.365876.365876.365876.365876.365876.365870.00000.0000.00000.00000.0000000.0000000.000000.0000000.0000000.000000.0000000.000000Dmel_CR3409573.073.06.365876.365870.0000000.0000000.0000000.000000
35352lcl|NC_024511.2_rrna_Dmel_CR34096_35353786786786786786786669.00000669.00000669.00000669.00000669.00000669.00000150.0000172.00063.0000183.0000268.000000219.00000013.481809.8220505.51950038.4117046.49910038.344300Dmel_CR34096786.0786.0669.00000669.00000128.333333223.3333339.60778341.085033
\n", "

35353 rows × 34 columns

\n", "
" ], "text/plain": [ " target_id length_MESG1 length_MESG2 \\\n", "0 lcl|NC_004354.4_ncrna_NR_003723.2_1 214 214 \n", "1 lcl|NC_004354.4_mrna_NM_001103384.3_2 1894 1894 \n", "2 lcl|NC_004354.4_mrna_NM_001258513.2_3 2028 2028 \n", "3 lcl|NC_004354.4_mrna_NM_001258512.2_4 2036 2036 \n", "4 lcl|NC_004354.4_mrna_NM_001297796.1_5 5089 5089 \n", "... ... ... ... \n", "35348 lcl|NC_024511.2_trna_Dmel_CR34091_35349 66 66 \n", "35349 lcl|NC_024511.2_trna_Dmel_CR34093_35350 65 65 \n", "35350 lcl|NC_024511.2_rrna_Dmel_CR34094_35351 1324 1324 \n", "35351 lcl|NC_024511.2_trna_Dmel_CR34095_35352 73 73 \n", "35352 lcl|NC_024511.2_rrna_Dmel_CR34096_35353 786 786 \n", "\n", " length_MESG4 length_MEWB5 length_MEWB6 length_MEWB7 \\\n", "0 214 214 214 214 \n", "1 1894 1894 1894 1894 \n", "2 2028 2028 2028 2028 \n", "3 2036 2036 2036 2036 \n", "4 5089 5089 5089 5089 \n", "... ... ... ... ... \n", "35348 66 66 66 66 \n", "35349 65 65 65 65 \n", "35350 1324 1324 1324 1324 \n", "35351 73 73 73 73 \n", "35352 786 786 786 786 \n", "\n", " eff_length_MESG1 eff_length_MESG2 eff_length_MESG4 eff_length_MEWB5 \\\n", "0 97.00000 97.00000 97.00000 97.00000 \n", "1 1777.00000 1777.00000 1777.00000 1777.00000 \n", "2 1911.00000 1911.00000 1911.00000 1911.00000 \n", "3 1919.00000 1919.00000 1919.00000 1919.00000 \n", "4 4972.00000 4972.00000 4972.00000 4972.00000 \n", "... ... ... ... ... \n", "35348 5.79122 5.79122 5.79122 5.79122 \n", "35349 5.71729 5.71729 5.71729 5.71729 \n", "35350 1207.00000 1207.00000 1207.00000 1207.00000 \n", "35351 6.36587 6.36587 6.36587 6.36587 \n", "35352 669.00000 669.00000 669.00000 669.00000 \n", "\n", " eff_length_MEWB6 eff_length_MEWB7 est_counts_MESG1 est_counts_MESG2 \\\n", "0 97.00000 97.00000 85.8989 118.368 \n", "1 1777.00000 1777.00000 0.0000 2.000 \n", "2 1911.00000 1911.00000 0.0000 0.000 \n", "3 1919.00000 1919.00000 0.0000 0.000 \n", "4 4972.00000 4972.00000 131.9400 141.575 \n", "... ... ... ... ... \n", "35348 5.79122 5.79122 0.0000 0.000 \n", "35349 5.71729 5.71729 0.0000 0.000 \n", "35350 1207.00000 1207.00000 240513.0000 226787.000 \n", "35351 6.36587 6.36587 0.0000 0.000 \n", "35352 669.00000 669.00000 150.0000 172.000 \n", "\n", " est_counts_MESG4 est_counts_MEWB5 est_counts_MEWB6 est_counts_MEWB7 \\\n", "0 54.6776 313.6800 333.661000 247.627000 \n", "1 0.0000 671.9990 224.886000 856.451000 \n", "2 0.0000 56.1272 12.045800 0.025923 \n", "3 0.0000 100.8740 0.068352 95.523100 \n", "4 61.5141 194.5240 107.724000 159.370000 \n", "... ... ... ... ... \n", "35348 0.0000 0.0000 0.000000 0.000000 \n", "35349 0.0000 0.0000 1.000000 0.000000 \n", "35350 160627.0000 566164.0000 560931.000000 558865.000000 \n", "35351 0.0000 0.0000 0.000000 0.000000 \n", "35352 63.0000 183.0000 268.000000 219.000000 \n", "\n", " tpm_MESG1 tpm_MESG2 tpm_MESG4 tpm_MEWB5 tpm_MEWB6 \\\n", "0 53.24750 46.618900 33.038700 454.10200 399.273000 \n", "1 0.00000 0.042997 0.000000 53.10300 14.689600 \n", "2 0.00000 0.000000 0.000000 4.12431 0.731665 \n", "3 0.00000 0.000000 0.000000 7.38142 0.004134 \n", "4 1.59562 1.087820 0.725151 5.49388 2.514870 \n", "... ... ... ... ... ... \n", "35348 0.00000 0.000000 0.000000 0.00000 0.000000 \n", "35349 0.00000 0.000000 0.000000 0.00000 20.302300 \n", "35350 11981.60000 7178.120000 7800.030000 65867.80000 53943.400000 \n", "35351 0.00000 0.000000 0.000000 0.00000 0.000000 \n", "35352 13.48180 9.822050 5.519500 38.41170 46.499100 \n", "\n", " tpm_MEWB7 product_accession length_SG_moyenne length_WB_moyenne \\\n", "0 299.026000 NR_003723.2 214.0 214.0 \n", "1 56.454300 NM_001103384.3 1894.0 1894.0 \n", "2 0.001589 NM_001258513.2 2028.0 2028.0 \n", "3 5.830630 NM_001258512.2 2036.0 2036.0 \n", "4 3.754540 NM_001297796.1 5089.0 5089.0 \n", "... ... ... ... ... \n", "35348 0.000000 Dmel_CR34091 66.0 66.0 \n", "35349 0.000000 Dmel_CR34093 65.0 65.0 \n", "35350 54235.300000 Dmel_CR34094 1324.0 1324.0 \n", "35351 0.000000 Dmel_CR34095 73.0 73.0 \n", "35352 38.344300 Dmel_CR34096 786.0 786.0 \n", "\n", " eff_length_SG_moyenne eff_length_WB_moyenne est_counts_SG_moyenne \\\n", "0 97.00000 97.00000 86.314833 \n", "1 1777.00000 1777.00000 0.666667 \n", "2 1911.00000 1911.00000 0.000000 \n", "3 1919.00000 1919.00000 0.000000 \n", "4 4972.00000 4972.00000 111.676367 \n", "... ... ... ... \n", "35348 5.79122 5.79122 0.000000 \n", "35349 5.71729 5.71729 0.000000 \n", "35350 1207.00000 1207.00000 209309.000000 \n", "35351 6.36587 6.36587 0.000000 \n", "35352 669.00000 669.00000 128.333333 \n", "\n", " est_counts_WB_moyenne tpm_SG_moyenne tpm_WB_moyenne \n", "0 298.322667 44.301700 384.133667 \n", "1 584.445333 0.014332 41.415633 \n", "2 22.732974 0.000000 1.619188 \n", "3 65.488484 0.000000 4.405395 \n", "4 153.872667 1.136197 3.921097 \n", "... ... ... ... \n", "35348 0.000000 0.000000 0.000000 \n", "35349 0.333333 0.000000 6.767433 \n", "35350 561986.666667 8986.583333 58015.500000 \n", "35351 0.000000 0.000000 0.000000 \n", "35352 223.333333 9.607783 41.085033 \n", "\n", "[35353 rows x 34 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "def moyenne(dataframe, col1, col2, col3):\n", " if len(col1.split('_')) == 2:\n", " nom = col1.split('_')[0]+'_'+col1.split('_')[1][2:4]\n", " else:\n", " nom = col1.split('_')[0]+'_'+col1.split('_')[1]+'_'+col1.split('_')[2][2:4]\n", " dataframe[f'{nom}_moyenne'] = dataframe[[col1, col2, col3]].mean(axis=1)\n", " return dataframe\n", "\n", "columns = a.columns\n", "for i in range(1, len(columns) - 2, 3):\n", " col1, col2, col3 = columns[i], columns[i+1], columns[i+2]\n", " a = moyenne(a, col1, col2, col3)\n", "\n", "display(a)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "tab_corr = pd.read_table('/Users/arthur/PROJECTS/Stage_IJM/data/genomes/melanogaster/GCF_000001215.4_Release_6_plus_ISO1_MT_feature_table.txt')\n", "tab_corr = tab_corr[tab_corr['product_accession'].isin(a['product_accession'])]\n", "tab_corr = tab_corr.reset_index()\n", "valeurs = tab_corr['product_accession'].values\n", "missing_values = []\n", "for i in range(len(a)):\n", " if a['product_accession'][i] not in tab_corr['product_accession'].values:\n", " missing_values.append(a['product_accession'][i])\n", "missing_df = pd.DataFrame({'product_accession': missing_values})\n", "tab_corr = pd.concat([tab_corr, missing_df])\n", "tab_corr.to_csv('table_meta.csv', index=False)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
length_MESG1length_MESG2length_MESG4length_MEWB5length_MEWB6length_MEWB7eff_length_MESG1eff_length_MESG2eff_length_MESG4eff_length_MEWB5...tpm_MEWB7product_accessionlength_SG_moyennelength_WB_moyenneeff_length_SG_moyenneeff_length_WB_moyenneest_counts_SG_moyenneest_counts_WB_moyennetpm_SG_moyennetpm_WB_moyenne
target_id
lcl|NC_004354.4_ncrna_NR_003723.2_121421421421421421497.0000097.0000097.0000097.00000...299.026000NR_003723.2214.0214.097.0000097.0000086.314833298.32266744.301700384.133667
lcl|NC_004354.4_mrna_NM_001103384.3_21894189418941894189418941777.000001777.000001777.000001777.00000...56.454300NM_001103384.31894.01894.01777.000001777.000000.666667584.4453330.01433241.415633
lcl|NC_004354.4_mrna_NM_001258513.2_32028202820282028202820281911.000001911.000001911.000001911.00000...0.001589NM_001258513.22028.02028.01911.000001911.000000.00000022.7329740.0000001.619188
lcl|NC_004354.4_mrna_NM_001258512.2_42036203620362036203620361919.000001919.000001919.000001919.00000...5.830630NM_001258512.22036.02036.01919.000001919.000000.00000065.4884840.0000004.405395
lcl|NC_004354.4_mrna_NM_001297796.1_55089508950895089508950894972.000004972.000004972.000004972.00000...3.754540NM_001297796.15089.05089.04972.000004972.00000111.676367153.8726671.1361973.921097
..................................................................
lcl|NC_024511.2_trna_Dmel_CR34091_353496666666666665.791225.791225.791225.79122...0.000000Dmel_CR3409166.066.05.791225.791220.0000000.0000000.0000000.000000
lcl|NC_024511.2_trna_Dmel_CR34093_353506565656565655.717295.717295.717295.71729...0.000000Dmel_CR3409365.065.05.717295.717290.0000000.3333330.0000006.767433
lcl|NC_024511.2_rrna_Dmel_CR34094_353511324132413241324132413241207.000001207.000001207.000001207.00000...54235.300000Dmel_CR340941324.01324.01207.000001207.00000209309.000000561986.6666678986.58333358015.500000
lcl|NC_024511.2_trna_Dmel_CR34095_353527373737373736.365876.365876.365876.36587...0.000000Dmel_CR3409573.073.06.365876.365870.0000000.0000000.0000000.000000
lcl|NC_024511.2_rrna_Dmel_CR34096_35353786786786786786786669.00000669.00000669.00000669.00000...38.344300Dmel_CR34096786.0786.0669.00000669.00000128.333333223.3333339.60778341.085033
\n", "

35353 rows × 33 columns

\n", "
" ], "text/plain": [ " length_MESG1 length_MESG2 \\\n", "target_id \n", "lcl|NC_004354.4_ncrna_NR_003723.2_1 214 214 \n", "lcl|NC_004354.4_mrna_NM_001103384.3_2 1894 1894 \n", "lcl|NC_004354.4_mrna_NM_001258513.2_3 2028 2028 \n", "lcl|NC_004354.4_mrna_NM_001258512.2_4 2036 2036 \n", "lcl|NC_004354.4_mrna_NM_001297796.1_5 5089 5089 \n", "... ... ... \n", "lcl|NC_024511.2_trna_Dmel_CR34091_35349 66 66 \n", "lcl|NC_024511.2_trna_Dmel_CR34093_35350 65 65 \n", "lcl|NC_024511.2_rrna_Dmel_CR34094_35351 1324 1324 \n", "lcl|NC_024511.2_trna_Dmel_CR34095_35352 73 73 \n", "lcl|NC_024511.2_rrna_Dmel_CR34096_35353 786 786 \n", "\n", " length_MESG4 length_MEWB5 \\\n", "target_id \n", "lcl|NC_004354.4_ncrna_NR_003723.2_1 214 214 \n", "lcl|NC_004354.4_mrna_NM_001103384.3_2 1894 1894 \n", "lcl|NC_004354.4_mrna_NM_001258513.2_3 2028 2028 \n", "lcl|NC_004354.4_mrna_NM_001258512.2_4 2036 2036 \n", "lcl|NC_004354.4_mrna_NM_001297796.1_5 5089 5089 \n", "... ... ... \n", "lcl|NC_024511.2_trna_Dmel_CR34091_35349 66 66 \n", "lcl|NC_024511.2_trna_Dmel_CR34093_35350 65 65 \n", "lcl|NC_024511.2_rrna_Dmel_CR34094_35351 1324 1324 \n", "lcl|NC_024511.2_trna_Dmel_CR34095_35352 73 73 \n", "lcl|NC_024511.2_rrna_Dmel_CR34096_35353 786 786 \n", "\n", " length_MEWB6 length_MEWB7 \\\n", "target_id \n", "lcl|NC_004354.4_ncrna_NR_003723.2_1 214 214 \n", "lcl|NC_004354.4_mrna_NM_001103384.3_2 1894 1894 \n", "lcl|NC_004354.4_mrna_NM_001258513.2_3 2028 2028 \n", "lcl|NC_004354.4_mrna_NM_001258512.2_4 2036 2036 \n", "lcl|NC_004354.4_mrna_NM_001297796.1_5 5089 5089 \n", "... ... ... \n", "lcl|NC_024511.2_trna_Dmel_CR34091_35349 66 66 \n", "lcl|NC_024511.2_trna_Dmel_CR34093_35350 65 65 \n", "lcl|NC_024511.2_rrna_Dmel_CR34094_35351 1324 1324 \n", "lcl|NC_024511.2_trna_Dmel_CR34095_35352 73 73 \n", "lcl|NC_024511.2_rrna_Dmel_CR34096_35353 786 786 \n", "\n", " eff_length_MESG1 eff_length_MESG2 \\\n", "target_id \n", "lcl|NC_004354.4_ncrna_NR_003723.2_1 97.00000 97.00000 \n", "lcl|NC_004354.4_mrna_NM_001103384.3_2 1777.00000 1777.00000 \n", "lcl|NC_004354.4_mrna_NM_001258513.2_3 1911.00000 1911.00000 \n", "lcl|NC_004354.4_mrna_NM_001258512.2_4 1919.00000 1919.00000 \n", "lcl|NC_004354.4_mrna_NM_001297796.1_5 4972.00000 4972.00000 \n", "... ... ... \n", "lcl|NC_024511.2_trna_Dmel_CR34091_35349 5.79122 5.79122 \n", "lcl|NC_024511.2_trna_Dmel_CR34093_35350 5.71729 5.71729 \n", "lcl|NC_024511.2_rrna_Dmel_CR34094_35351 1207.00000 1207.00000 \n", "lcl|NC_024511.2_trna_Dmel_CR34095_35352 6.36587 6.36587 \n", "lcl|NC_024511.2_rrna_Dmel_CR34096_35353 669.00000 669.00000 \n", "\n", " eff_length_MESG4 eff_length_MEWB5 \\\n", "target_id \n", "lcl|NC_004354.4_ncrna_NR_003723.2_1 97.00000 97.00000 \n", "lcl|NC_004354.4_mrna_NM_001103384.3_2 1777.00000 1777.00000 \n", "lcl|NC_004354.4_mrna_NM_001258513.2_3 1911.00000 1911.00000 \n", "lcl|NC_004354.4_mrna_NM_001258512.2_4 1919.00000 1919.00000 \n", "lcl|NC_004354.4_mrna_NM_001297796.1_5 4972.00000 4972.00000 \n", "... ... ... \n", "lcl|NC_024511.2_trna_Dmel_CR34091_35349 5.79122 5.79122 \n", "lcl|NC_024511.2_trna_Dmel_CR34093_35350 5.71729 5.71729 \n", "lcl|NC_024511.2_rrna_Dmel_CR34094_35351 1207.00000 1207.00000 \n", "lcl|NC_024511.2_trna_Dmel_CR34095_35352 6.36587 6.36587 \n", "lcl|NC_024511.2_rrna_Dmel_CR34096_35353 669.00000 669.00000 \n", "\n", " ... tpm_MEWB7 product_accession \\\n", "target_id ... \n", "lcl|NC_004354.4_ncrna_NR_003723.2_1 ... 299.026000 NR_003723.2 \n", "lcl|NC_004354.4_mrna_NM_001103384.3_2 ... 56.454300 NM_001103384.3 \n", "lcl|NC_004354.4_mrna_NM_001258513.2_3 ... 0.001589 NM_001258513.2 \n", "lcl|NC_004354.4_mrna_NM_001258512.2_4 ... 5.830630 NM_001258512.2 \n", "lcl|NC_004354.4_mrna_NM_001297796.1_5 ... 3.754540 NM_001297796.1 \n", "... ... ... ... \n", "lcl|NC_024511.2_trna_Dmel_CR34091_35349 ... 0.000000 Dmel_CR34091 \n", "lcl|NC_024511.2_trna_Dmel_CR34093_35350 ... 0.000000 Dmel_CR34093 \n", "lcl|NC_024511.2_rrna_Dmel_CR34094_35351 ... 54235.300000 Dmel_CR34094 \n", "lcl|NC_024511.2_trna_Dmel_CR34095_35352 ... 0.000000 Dmel_CR34095 \n", "lcl|NC_024511.2_rrna_Dmel_CR34096_35353 ... 38.344300 Dmel_CR34096 \n", "\n", " length_SG_moyenne length_WB_moyenne \\\n", "target_id \n", "lcl|NC_004354.4_ncrna_NR_003723.2_1 214.0 214.0 \n", "lcl|NC_004354.4_mrna_NM_001103384.3_2 1894.0 1894.0 \n", "lcl|NC_004354.4_mrna_NM_001258513.2_3 2028.0 2028.0 \n", "lcl|NC_004354.4_mrna_NM_001258512.2_4 2036.0 2036.0 \n", "lcl|NC_004354.4_mrna_NM_001297796.1_5 5089.0 5089.0 \n", "... ... ... \n", "lcl|NC_024511.2_trna_Dmel_CR34091_35349 66.0 66.0 \n", "lcl|NC_024511.2_trna_Dmel_CR34093_35350 65.0 65.0 \n", "lcl|NC_024511.2_rrna_Dmel_CR34094_35351 1324.0 1324.0 \n", "lcl|NC_024511.2_trna_Dmel_CR34095_35352 73.0 73.0 \n", "lcl|NC_024511.2_rrna_Dmel_CR34096_35353 786.0 786.0 \n", "\n", " eff_length_SG_moyenne \\\n", "target_id \n", "lcl|NC_004354.4_ncrna_NR_003723.2_1 97.00000 \n", "lcl|NC_004354.4_mrna_NM_001103384.3_2 1777.00000 \n", "lcl|NC_004354.4_mrna_NM_001258513.2_3 1911.00000 \n", "lcl|NC_004354.4_mrna_NM_001258512.2_4 1919.00000 \n", "lcl|NC_004354.4_mrna_NM_001297796.1_5 4972.00000 \n", "... ... \n", "lcl|NC_024511.2_trna_Dmel_CR34091_35349 5.79122 \n", "lcl|NC_024511.2_trna_Dmel_CR34093_35350 5.71729 \n", "lcl|NC_024511.2_rrna_Dmel_CR34094_35351 1207.00000 \n", "lcl|NC_024511.2_trna_Dmel_CR34095_35352 6.36587 \n", "lcl|NC_024511.2_rrna_Dmel_CR34096_35353 669.00000 \n", "\n", " eff_length_WB_moyenne \\\n", "target_id \n", "lcl|NC_004354.4_ncrna_NR_003723.2_1 97.00000 \n", "lcl|NC_004354.4_mrna_NM_001103384.3_2 1777.00000 \n", "lcl|NC_004354.4_mrna_NM_001258513.2_3 1911.00000 \n", "lcl|NC_004354.4_mrna_NM_001258512.2_4 1919.00000 \n", "lcl|NC_004354.4_mrna_NM_001297796.1_5 4972.00000 \n", "... ... \n", "lcl|NC_024511.2_trna_Dmel_CR34091_35349 5.79122 \n", "lcl|NC_024511.2_trna_Dmel_CR34093_35350 5.71729 \n", "lcl|NC_024511.2_rrna_Dmel_CR34094_35351 1207.00000 \n", "lcl|NC_024511.2_trna_Dmel_CR34095_35352 6.36587 \n", "lcl|NC_024511.2_rrna_Dmel_CR34096_35353 669.00000 \n", "\n", " est_counts_SG_moyenne \\\n", "target_id \n", "lcl|NC_004354.4_ncrna_NR_003723.2_1 86.314833 \n", "lcl|NC_004354.4_mrna_NM_001103384.3_2 0.666667 \n", "lcl|NC_004354.4_mrna_NM_001258513.2_3 0.000000 \n", "lcl|NC_004354.4_mrna_NM_001258512.2_4 0.000000 \n", "lcl|NC_004354.4_mrna_NM_001297796.1_5 111.676367 \n", "... ... \n", "lcl|NC_024511.2_trna_Dmel_CR34091_35349 0.000000 \n", "lcl|NC_024511.2_trna_Dmel_CR34093_35350 0.000000 \n", "lcl|NC_024511.2_rrna_Dmel_CR34094_35351 209309.000000 \n", "lcl|NC_024511.2_trna_Dmel_CR34095_35352 0.000000 \n", "lcl|NC_024511.2_rrna_Dmel_CR34096_35353 128.333333 \n", "\n", " est_counts_WB_moyenne \\\n", "target_id \n", "lcl|NC_004354.4_ncrna_NR_003723.2_1 298.322667 \n", "lcl|NC_004354.4_mrna_NM_001103384.3_2 584.445333 \n", "lcl|NC_004354.4_mrna_NM_001258513.2_3 22.732974 \n", "lcl|NC_004354.4_mrna_NM_001258512.2_4 65.488484 \n", "lcl|NC_004354.4_mrna_NM_001297796.1_5 153.872667 \n", "... ... \n", "lcl|NC_024511.2_trna_Dmel_CR34091_35349 0.000000 \n", "lcl|NC_024511.2_trna_Dmel_CR34093_35350 0.333333 \n", "lcl|NC_024511.2_rrna_Dmel_CR34094_35351 561986.666667 \n", "lcl|NC_024511.2_trna_Dmel_CR34095_35352 0.000000 \n", "lcl|NC_024511.2_rrna_Dmel_CR34096_35353 223.333333 \n", "\n", " tpm_SG_moyenne tpm_WB_moyenne \n", "target_id \n", "lcl|NC_004354.4_ncrna_NR_003723.2_1 44.301700 384.133667 \n", "lcl|NC_004354.4_mrna_NM_001103384.3_2 0.014332 41.415633 \n", "lcl|NC_004354.4_mrna_NM_001258513.2_3 0.000000 1.619188 \n", "lcl|NC_004354.4_mrna_NM_001258512.2_4 0.000000 4.405395 \n", "lcl|NC_004354.4_mrna_NM_001297796.1_5 1.136197 3.921097 \n", "... ... ... \n", "lcl|NC_024511.2_trna_Dmel_CR34091_35349 0.000000 0.000000 \n", "lcl|NC_024511.2_trna_Dmel_CR34093_35350 0.000000 6.767433 \n", "lcl|NC_024511.2_rrna_Dmel_CR34094_35351 8986.583333 58015.500000 \n", "lcl|NC_024511.2_trna_Dmel_CR34095_35352 0.000000 0.000000 \n", "lcl|NC_024511.2_rrna_Dmel_CR34096_35353 9.607783 41.085033 \n", "\n", "[35353 rows x 33 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
target_idindexlcl|NC_004354.4_ncrna_NR_003723.2_1lcl|NC_004354.4_mrna_NM_001103384.3_2lcl|NC_004354.4_mrna_NM_001258513.2_3lcl|NC_004354.4_mrna_NM_001258512.2_4lcl|NC_004354.4_mrna_NM_001297796.1_5lcl|NC_004354.4_mrna_NM_001297795.1_6lcl|NC_004354.4_mrna_NM_001103385.2_7lcl|NC_004354.4_mrna_NM_001103386.2_8lcl|NC_004354.4_mrna_NM_001169155.1_9...lcl|NC_024511.2_trna_Dmel_CR34081_35344lcl|NC_024511.2_trna_Dmel_CR34082_35345lcl|NC_024511.2_trna_Dmel_CR34084_35346lcl|NC_024511.2_trna_Dmel_CR34087_35347lcl|NC_024511.2_trna_Dmel_CR34088_35348lcl|NC_024511.2_trna_Dmel_CR34091_35349lcl|NC_024511.2_trna_Dmel_CR34093_35350lcl|NC_024511.2_rrna_Dmel_CR34094_35351lcl|NC_024511.2_trna_Dmel_CR34095_35352lcl|NC_024511.2_rrna_Dmel_CR34096_35353
0length_MESG121418942028203650895083508250124254...67656666616665132473786
1length_MESG221418942028203650895083508250124254...67656666616665132473786
2length_MESG421418942028203650895083508250124254...67656666616665132473786
3length_MEWB521418942028203650895083508250124254...67656666616665132473786
4length_MEWB621418942028203650895083508250124254...67656666616665132473786
5length_MEWB721418942028203650895083508250124254...67656666616665132473786
6eff_length_MESG197.01777.01911.01919.04972.04966.04965.04895.04137.0...5.867055.717295.791225.791225.439375.791225.717291207.06.36587669.0
7eff_length_MESG297.01777.01911.01919.04972.04966.04965.04895.04137.0...5.867055.717295.791225.791225.439375.791225.717291207.06.36587669.0
8eff_length_MESG497.01777.01911.01919.04972.04966.04965.04895.04137.0...5.867055.717295.791225.791225.439375.791225.717291207.06.36587669.0
9eff_length_MEWB597.01777.01911.01919.04972.04966.04965.04895.04137.0...5.867055.717295.791225.791225.439375.791225.717291207.06.36587669.0
10eff_length_MEWB697.01777.01911.01919.04972.04966.04965.04895.04137.0...5.867055.717295.791225.791225.439375.791225.717291207.06.36587669.0
11eff_length_MEWB797.01777.01911.01919.04972.04966.04965.04895.04137.0...5.867055.717295.791225.791225.439375.791225.717291207.06.36587669.0
12est_counts_MESG185.89890.00.00.0131.940.00000287.2389547.7420.0...0.00.00.00.00.00.00.0240513.00.0150.0
13est_counts_MESG2118.3682.00.00.0141.5750.0000387.3975507.7210.0...0.00.00.00.00.00.00.0226787.00.0172.0
14est_counts_MESG454.67760.00.00.061.51410.00001660.2771186.8670.0...0.00.00.00.00.00.00.0160627.00.063.0
15est_counts_MEWB5313.68671.99956.1272100.874194.5240.000013276.511374.2315.66697...0.00.00.00.00.00.00.0566164.00.0183.0
16est_counts_MEWB6333.661224.88612.04580.068352107.7240.0870.855146.1390.0...0.00.00.00.00.00.01.0560931.00.0268.0
17est_counts_MEWB7247.627856.4510.02592395.5231159.370.0668.214260.31213.3881...0.00.00.00.00.00.00.0558865.00.0219.0
18tpm_MESG153.24750.00.00.01.595620.01.056516.728320.0...0.00.00.00.00.00.00.011981.60.013.4818
19tpm_MESG246.61890.0429970.00.01.087820.00.672483.962530.0...0.00.00.00.00.00.00.07178.120.09.82205
20tpm_MESG433.03870.00.00.00.7251510.00.7115712.237510.0...0.00.00.00.00.00.00.07800.030.05.5195
21tpm_MEWB5454.10253.1034.124317.381425.493880.07.8204310.73560.192355...0.00.00.00.00.00.00.065867.80.038.4117
22tpm_MEWB6399.27314.68960.7316650.0041342.514870.020.35933.465370.0...0.00.00.00.00.00.020.302353943.40.046.4991
23tpm_MEWB7299.02656.45430.0015895.830633.754540.015.76446.229070.379065...0.00.00.00.00.00.00.054235.30.038.3443
24product_accessionNR_003723.2NM_001103384.3NM_001258513.2NM_001258512.2NM_001297796.1NM_001297795.1NM_001103385.2NM_001103386.2NM_001169155.1...Dmel_CR34081Dmel_CR34082Dmel_CR34084Dmel_CR34087Dmel_CR34088Dmel_CR34091Dmel_CR34093Dmel_CR34094Dmel_CR34095Dmel_CR34096
25length_SG_moyenne214.01894.02028.02036.05089.05083.05082.05012.04254.0...67.065.066.066.061.066.065.01324.073.0786.0
26length_WB_moyenne214.01894.02028.02036.05089.05083.05082.05012.04254.0...67.065.066.066.061.066.065.01324.073.0786.0
27eff_length_SG_moyenne97.01777.01911.01919.04972.04966.04965.04895.04137.0...5.867055.717295.791225.791225.439375.791225.717291207.06.36587669.0
28eff_length_WB_moyenne97.01777.01911.01919.04972.04966.04965.04895.04137.0...5.867055.717295.791225.791225.439375.791225.717291207.06.36587669.0
29est_counts_SG_moyenne86.3148330.6666670.00.0111.6763670.00001678.3045414.110.0...0.00.00.00.00.00.00.0209309.00.0128.333333
30est_counts_WB_moyenne298.322667584.44533322.73297465.488484153.8726670.000005605.193333260.2273336.35169...0.00.00.00.00.00.00.333333561986.6666670.0223.333333
31tpm_SG_moyenne44.30170.0143320.00.01.1361970.00.813524.3094530.0...0.00.00.00.00.00.00.08986.5833330.09.607783
32tpm_WB_moyenne384.13366741.4156331.6191884.4053953.9210970.014.6480436.8100130.190473...0.00.00.00.00.00.06.76743358015.50.041.085033
\n", "

33 rows × 35354 columns

\n", "
" ], "text/plain": [ "target_id index lcl|NC_004354.4_ncrna_NR_003723.2_1 \\\n", "0 length_MESG1 214 \n", "1 length_MESG2 214 \n", "2 length_MESG4 214 \n", "3 length_MEWB5 214 \n", "4 length_MEWB6 214 \n", "5 length_MEWB7 214 \n", "6 eff_length_MESG1 97.0 \n", "7 eff_length_MESG2 97.0 \n", "8 eff_length_MESG4 97.0 \n", "9 eff_length_MEWB5 97.0 \n", "10 eff_length_MEWB6 97.0 \n", "11 eff_length_MEWB7 97.0 \n", "12 est_counts_MESG1 85.8989 \n", "13 est_counts_MESG2 118.368 \n", "14 est_counts_MESG4 54.6776 \n", "15 est_counts_MEWB5 313.68 \n", "16 est_counts_MEWB6 333.661 \n", "17 est_counts_MEWB7 247.627 \n", "18 tpm_MESG1 53.2475 \n", "19 tpm_MESG2 46.6189 \n", "20 tpm_MESG4 33.0387 \n", "21 tpm_MEWB5 454.102 \n", "22 tpm_MEWB6 399.273 \n", "23 tpm_MEWB7 299.026 \n", "24 product_accession NR_003723.2 \n", "25 length_SG_moyenne 214.0 \n", "26 length_WB_moyenne 214.0 \n", "27 eff_length_SG_moyenne 97.0 \n", "28 eff_length_WB_moyenne 97.0 \n", "29 est_counts_SG_moyenne 86.314833 \n", "30 est_counts_WB_moyenne 298.322667 \n", "31 tpm_SG_moyenne 44.3017 \n", "32 tpm_WB_moyenne 384.133667 \n", "\n", "target_id lcl|NC_004354.4_mrna_NM_001103384.3_2 \\\n", "0 1894 \n", "1 1894 \n", "2 1894 \n", "3 1894 \n", "4 1894 \n", "5 1894 \n", "6 1777.0 \n", "7 1777.0 \n", "8 1777.0 \n", "9 1777.0 \n", "10 1777.0 \n", "11 1777.0 \n", "12 0.0 \n", "13 2.0 \n", "14 0.0 \n", "15 671.999 \n", "16 224.886 \n", "17 856.451 \n", "18 0.0 \n", "19 0.042997 \n", "20 0.0 \n", "21 53.103 \n", "22 14.6896 \n", "23 56.4543 \n", "24 NM_001103384.3 \n", "25 1894.0 \n", "26 1894.0 \n", "27 1777.0 \n", "28 1777.0 \n", "29 0.666667 \n", "30 584.445333 \n", "31 0.014332 \n", "32 41.415633 \n", "\n", "target_id lcl|NC_004354.4_mrna_NM_001258513.2_3 \\\n", "0 2028 \n", "1 2028 \n", "2 2028 \n", "3 2028 \n", "4 2028 \n", "5 2028 \n", "6 1911.0 \n", "7 1911.0 \n", "8 1911.0 \n", "9 1911.0 \n", "10 1911.0 \n", "11 1911.0 \n", "12 0.0 \n", "13 0.0 \n", "14 0.0 \n", "15 56.1272 \n", "16 12.0458 \n", "17 0.025923 \n", "18 0.0 \n", "19 0.0 \n", "20 0.0 \n", "21 4.12431 \n", "22 0.731665 \n", "23 0.001589 \n", "24 NM_001258513.2 \n", "25 2028.0 \n", "26 2028.0 \n", "27 1911.0 \n", "28 1911.0 \n", "29 0.0 \n", "30 22.732974 \n", "31 0.0 \n", "32 1.619188 \n", "\n", "target_id lcl|NC_004354.4_mrna_NM_001258512.2_4 \\\n", "0 2036 \n", "1 2036 \n", "2 2036 \n", "3 2036 \n", "4 2036 \n", "5 2036 \n", "6 1919.0 \n", "7 1919.0 \n", "8 1919.0 \n", "9 1919.0 \n", "10 1919.0 \n", "11 1919.0 \n", "12 0.0 \n", "13 0.0 \n", "14 0.0 \n", "15 100.874 \n", "16 0.068352 \n", "17 95.5231 \n", "18 0.0 \n", "19 0.0 \n", "20 0.0 \n", "21 7.38142 \n", "22 0.004134 \n", "23 5.83063 \n", "24 NM_001258512.2 \n", "25 2036.0 \n", "26 2036.0 \n", "27 1919.0 \n", "28 1919.0 \n", "29 0.0 \n", "30 65.488484 \n", "31 0.0 \n", "32 4.405395 \n", "\n", "target_id lcl|NC_004354.4_mrna_NM_001297796.1_5 \\\n", "0 5089 \n", "1 5089 \n", "2 5089 \n", "3 5089 \n", "4 5089 \n", "5 5089 \n", "6 4972.0 \n", "7 4972.0 \n", "8 4972.0 \n", "9 4972.0 \n", "10 4972.0 \n", "11 4972.0 \n", "12 131.94 \n", "13 141.575 \n", "14 61.5141 \n", "15 194.524 \n", "16 107.724 \n", "17 159.37 \n", "18 1.59562 \n", "19 1.08782 \n", "20 0.725151 \n", "21 5.49388 \n", "22 2.51487 \n", "23 3.75454 \n", "24 NM_001297796.1 \n", "25 5089.0 \n", "26 5089.0 \n", "27 4972.0 \n", "28 4972.0 \n", "29 111.676367 \n", "30 153.872667 \n", "31 1.136197 \n", "32 3.921097 \n", "\n", "target_id lcl|NC_004354.4_mrna_NM_001297795.1_6 \\\n", "0 5083 \n", "1 5083 \n", "2 5083 \n", "3 5083 \n", "4 5083 \n", "5 5083 \n", "6 4966.0 \n", "7 4966.0 \n", "8 4966.0 \n", "9 4966.0 \n", "10 4966.0 \n", "11 4966.0 \n", "12 0.000002 \n", "13 0.00003 \n", "14 0.000016 \n", "15 0.000013 \n", "16 0.0 \n", "17 0.0 \n", "18 0.0 \n", "19 0.0 \n", "20 0.0 \n", "21 0.0 \n", "22 0.0 \n", "23 0.0 \n", "24 NM_001297795.1 \n", "25 5083.0 \n", "26 5083.0 \n", "27 4966.0 \n", "28 4966.0 \n", "29 0.000016 \n", "30 0.000005 \n", "31 0.0 \n", "32 0.0 \n", "\n", "target_id lcl|NC_004354.4_mrna_NM_001103385.2_7 \\\n", "0 5082 \n", "1 5082 \n", "2 5082 \n", "3 5082 \n", "4 5082 \n", "5 5082 \n", "6 4965.0 \n", "7 4965.0 \n", "8 4965.0 \n", "9 4965.0 \n", "10 4965.0 \n", "11 4965.0 \n", "12 87.2389 \n", "13 87.3975 \n", "14 60.2771 \n", "15 276.511 \n", "16 870.855 \n", "17 668.214 \n", "18 1.05651 \n", "19 0.67248 \n", "20 0.711571 \n", "21 7.82043 \n", "22 20.3593 \n", "23 15.7644 \n", "24 NM_001103385.2 \n", "25 5082.0 \n", "26 5082.0 \n", "27 4965.0 \n", "28 4965.0 \n", "29 78.3045 \n", "30 605.193333 \n", "31 0.81352 \n", "32 14.648043 \n", "\n", "target_id lcl|NC_004354.4_mrna_NM_001103386.2_8 \\\n", "0 5012 \n", "1 5012 \n", "2 5012 \n", "3 5012 \n", "4 5012 \n", "5 5012 \n", "6 4895.0 \n", "7 4895.0 \n", "8 4895.0 \n", "9 4895.0 \n", "10 4895.0 \n", "11 4895.0 \n", "12 547.742 \n", "13 507.721 \n", "14 186.867 \n", "15 374.231 \n", "16 146.139 \n", "17 260.312 \n", "18 6.72832 \n", "19 3.96253 \n", "20 2.23751 \n", "21 10.7356 \n", "22 3.46537 \n", "23 6.22907 \n", "24 NM_001103386.2 \n", "25 5012.0 \n", "26 5012.0 \n", "27 4895.0 \n", "28 4895.0 \n", "29 414.11 \n", "30 260.227333 \n", "31 4.309453 \n", "32 6.810013 \n", "\n", "target_id lcl|NC_004354.4_mrna_NM_001169155.1_9 ... \\\n", "0 4254 ... \n", "1 4254 ... \n", "2 4254 ... \n", "3 4254 ... \n", "4 4254 ... \n", "5 4254 ... \n", "6 4137.0 ... \n", "7 4137.0 ... \n", "8 4137.0 ... \n", "9 4137.0 ... \n", "10 4137.0 ... \n", "11 4137.0 ... \n", "12 0.0 ... \n", "13 0.0 ... \n", "14 0.0 ... \n", "15 5.66697 ... \n", "16 0.0 ... \n", "17 13.3881 ... \n", "18 0.0 ... \n", "19 0.0 ... \n", "20 0.0 ... \n", "21 0.192355 ... \n", "22 0.0 ... \n", "23 0.379065 ... \n", "24 NM_001169155.1 ... \n", "25 4254.0 ... \n", "26 4254.0 ... \n", "27 4137.0 ... \n", "28 4137.0 ... \n", "29 0.0 ... \n", "30 6.35169 ... \n", "31 0.0 ... \n", "32 0.190473 ... \n", "\n", "target_id lcl|NC_024511.2_trna_Dmel_CR34081_35344 \\\n", "0 67 \n", "1 67 \n", "2 67 \n", "3 67 \n", "4 67 \n", "5 67 \n", "6 5.86705 \n", "7 5.86705 \n", "8 5.86705 \n", "9 5.86705 \n", "10 5.86705 \n", "11 5.86705 \n", "12 0.0 \n", "13 0.0 \n", "14 0.0 \n", "15 0.0 \n", "16 0.0 \n", "17 0.0 \n", "18 0.0 \n", "19 0.0 \n", "20 0.0 \n", "21 0.0 \n", "22 0.0 \n", "23 0.0 \n", "24 Dmel_CR34081 \n", "25 67.0 \n", "26 67.0 \n", "27 5.86705 \n", "28 5.86705 \n", "29 0.0 \n", "30 0.0 \n", "31 0.0 \n", "32 0.0 \n", "\n", "target_id lcl|NC_024511.2_trna_Dmel_CR34082_35345 \\\n", "0 65 \n", "1 65 \n", "2 65 \n", "3 65 \n", "4 65 \n", "5 65 \n", "6 5.71729 \n", "7 5.71729 \n", "8 5.71729 \n", "9 5.71729 \n", "10 5.71729 \n", "11 5.71729 \n", "12 0.0 \n", "13 0.0 \n", "14 0.0 \n", "15 0.0 \n", "16 0.0 \n", "17 0.0 \n", "18 0.0 \n", "19 0.0 \n", "20 0.0 \n", "21 0.0 \n", "22 0.0 \n", "23 0.0 \n", "24 Dmel_CR34082 \n", "25 65.0 \n", "26 65.0 \n", "27 5.71729 \n", "28 5.71729 \n", "29 0.0 \n", "30 0.0 \n", "31 0.0 \n", "32 0.0 \n", "\n", "target_id lcl|NC_024511.2_trna_Dmel_CR34084_35346 \\\n", "0 66 \n", "1 66 \n", "2 66 \n", "3 66 \n", "4 66 \n", "5 66 \n", "6 5.79122 \n", "7 5.79122 \n", "8 5.79122 \n", "9 5.79122 \n", "10 5.79122 \n", "11 5.79122 \n", "12 0.0 \n", "13 0.0 \n", "14 0.0 \n", "15 0.0 \n", "16 0.0 \n", "17 0.0 \n", "18 0.0 \n", "19 0.0 \n", "20 0.0 \n", "21 0.0 \n", "22 0.0 \n", "23 0.0 \n", "24 Dmel_CR34084 \n", "25 66.0 \n", "26 66.0 \n", "27 5.79122 \n", "28 5.79122 \n", "29 0.0 \n", "30 0.0 \n", "31 0.0 \n", "32 0.0 \n", "\n", "target_id lcl|NC_024511.2_trna_Dmel_CR34087_35347 \\\n", "0 66 \n", "1 66 \n", "2 66 \n", "3 66 \n", "4 66 \n", "5 66 \n", "6 5.79122 \n", "7 5.79122 \n", "8 5.79122 \n", "9 5.79122 \n", "10 5.79122 \n", "11 5.79122 \n", "12 0.0 \n", "13 0.0 \n", "14 0.0 \n", "15 0.0 \n", "16 0.0 \n", "17 0.0 \n", "18 0.0 \n", "19 0.0 \n", "20 0.0 \n", "21 0.0 \n", "22 0.0 \n", "23 0.0 \n", "24 Dmel_CR34087 \n", "25 66.0 \n", "26 66.0 \n", "27 5.79122 \n", "28 5.79122 \n", "29 0.0 \n", "30 0.0 \n", "31 0.0 \n", "32 0.0 \n", "\n", "target_id lcl|NC_024511.2_trna_Dmel_CR34088_35348 \\\n", "0 61 \n", "1 61 \n", "2 61 \n", "3 61 \n", "4 61 \n", "5 61 \n", "6 5.43937 \n", "7 5.43937 \n", "8 5.43937 \n", "9 5.43937 \n", "10 5.43937 \n", "11 5.43937 \n", "12 0.0 \n", "13 0.0 \n", "14 0.0 \n", "15 0.0 \n", "16 0.0 \n", "17 0.0 \n", "18 0.0 \n", "19 0.0 \n", "20 0.0 \n", "21 0.0 \n", "22 0.0 \n", "23 0.0 \n", "24 Dmel_CR34088 \n", "25 61.0 \n", "26 61.0 \n", "27 5.43937 \n", "28 5.43937 \n", "29 0.0 \n", "30 0.0 \n", "31 0.0 \n", "32 0.0 \n", "\n", "target_id lcl|NC_024511.2_trna_Dmel_CR34091_35349 \\\n", "0 66 \n", "1 66 \n", "2 66 \n", "3 66 \n", "4 66 \n", "5 66 \n", "6 5.79122 \n", "7 5.79122 \n", "8 5.79122 \n", "9 5.79122 \n", "10 5.79122 \n", "11 5.79122 \n", "12 0.0 \n", "13 0.0 \n", "14 0.0 \n", "15 0.0 \n", "16 0.0 \n", "17 0.0 \n", "18 0.0 \n", "19 0.0 \n", "20 0.0 \n", "21 0.0 \n", "22 0.0 \n", "23 0.0 \n", "24 Dmel_CR34091 \n", "25 66.0 \n", "26 66.0 \n", "27 5.79122 \n", "28 5.79122 \n", "29 0.0 \n", "30 0.0 \n", "31 0.0 \n", "32 0.0 \n", "\n", "target_id lcl|NC_024511.2_trna_Dmel_CR34093_35350 \\\n", "0 65 \n", "1 65 \n", "2 65 \n", "3 65 \n", "4 65 \n", "5 65 \n", "6 5.71729 \n", "7 5.71729 \n", "8 5.71729 \n", "9 5.71729 \n", "10 5.71729 \n", "11 5.71729 \n", "12 0.0 \n", "13 0.0 \n", "14 0.0 \n", "15 0.0 \n", "16 1.0 \n", "17 0.0 \n", "18 0.0 \n", "19 0.0 \n", "20 0.0 \n", "21 0.0 \n", "22 20.3023 \n", "23 0.0 \n", "24 Dmel_CR34093 \n", "25 65.0 \n", "26 65.0 \n", "27 5.71729 \n", "28 5.71729 \n", "29 0.0 \n", "30 0.333333 \n", "31 0.0 \n", "32 6.767433 \n", "\n", "target_id lcl|NC_024511.2_rrna_Dmel_CR34094_35351 \\\n", "0 1324 \n", "1 1324 \n", "2 1324 \n", "3 1324 \n", "4 1324 \n", "5 1324 \n", "6 1207.0 \n", "7 1207.0 \n", "8 1207.0 \n", "9 1207.0 \n", "10 1207.0 \n", "11 1207.0 \n", "12 240513.0 \n", "13 226787.0 \n", "14 160627.0 \n", "15 566164.0 \n", "16 560931.0 \n", "17 558865.0 \n", "18 11981.6 \n", "19 7178.12 \n", "20 7800.03 \n", "21 65867.8 \n", "22 53943.4 \n", "23 54235.3 \n", "24 Dmel_CR34094 \n", "25 1324.0 \n", "26 1324.0 \n", "27 1207.0 \n", "28 1207.0 \n", "29 209309.0 \n", "30 561986.666667 \n", "31 8986.583333 \n", "32 58015.5 \n", "\n", "target_id lcl|NC_024511.2_trna_Dmel_CR34095_35352 \\\n", "0 73 \n", "1 73 \n", "2 73 \n", "3 73 \n", "4 73 \n", "5 73 \n", "6 6.36587 \n", "7 6.36587 \n", "8 6.36587 \n", "9 6.36587 \n", "10 6.36587 \n", "11 6.36587 \n", "12 0.0 \n", "13 0.0 \n", "14 0.0 \n", "15 0.0 \n", "16 0.0 \n", "17 0.0 \n", "18 0.0 \n", "19 0.0 \n", "20 0.0 \n", "21 0.0 \n", "22 0.0 \n", "23 0.0 \n", "24 Dmel_CR34095 \n", "25 73.0 \n", "26 73.0 \n", "27 6.36587 \n", "28 6.36587 \n", "29 0.0 \n", "30 0.0 \n", "31 0.0 \n", "32 0.0 \n", "\n", "target_id lcl|NC_024511.2_rrna_Dmel_CR34096_35353 \n", "0 786 \n", "1 786 \n", "2 786 \n", "3 786 \n", "4 786 \n", "5 786 \n", "6 669.0 \n", "7 669.0 \n", "8 669.0 \n", "9 669.0 \n", "10 669.0 \n", "11 669.0 \n", "12 150.0 \n", "13 172.0 \n", "14 63.0 \n", "15 183.0 \n", "16 268.0 \n", "17 219.0 \n", "18 13.4818 \n", "19 9.82205 \n", "20 5.5195 \n", "21 38.4117 \n", "22 46.4991 \n", "23 38.3443 \n", "24 Dmel_CR34096 \n", "25 786.0 \n", "26 786.0 \n", "27 669.0 \n", "28 669.0 \n", "29 128.333333 \n", "30 223.333333 \n", "31 9.607783 \n", "32 41.085033 \n", "\n", "[33 rows x 35354 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pd.reset_option('display.max_columns')\n", "a2 = a\n", "a2 = a2.set_index('target_id')\n", "display(a2)\n", "a2 = a2.transpose()\n", "a2 = a2.reset_index()\n", "display(a2)\n", "a2.to_csv('count_ME_RF.tsv', sep='\\t', index=False)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indextarget_idlength_MESG1length_MESG2length_MESG4length_MEWB5length_MEWB6length_MEWB7eff_length_MESG1eff_length_MESG2eff_length_MESG4eff_length_MEWB5eff_length_MEWB6eff_length_MEWB7est_counts_MESG1est_counts_MESG2est_counts_MESG4est_counts_MEWB5est_counts_MEWB6est_counts_MEWB7tpm_MESG1tpm_MESG2tpm_MESG4tpm_MEWB5tpm_MEWB6tpm_MEWB7product_accessionlength_SG_moyennelength_WB_moyenneeff_length_SG_moyenneeff_length_WB_moyenneest_counts_SG_moyenneest_counts_WB_moyennetpm_SG_moyennetpm_WB_moyenne# featureclassassemblyassembly_unitseq_typechromosomegenomic_accessionstartendstrandnon-redundant_refseqrelated_accessionnamesymbolGeneIDlocus_tagfeature_interval_lengthproduct_lengthattributes
00lcl|NT_037436.4_mrna_NM_057370.2_23336321321321321321321204.0204.0204.0204.0204.0204.0793608.001403050.00976193.0023622.000050137.00021288.0000233916.000262750.000280473.00016260.200028527.500012223.30000NM_057370.2321.0321.0204.0204.01.057617e+0631682.333333259046.33333319003.666667mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.411509861.011510247.0+NaNNP_476718.1salivary gland secretion 7Sgs747198.0Dmel_CG18087321.0321.0NaN
11lcl|NT_037436.4_mrna_NM_079300.3_23338110911091109110911091109992.0992.0992.0992.0992.0992.02990690.003463760.002744310.0054611.0000102641.00072618.0000181277.000133394.000162146.0007730.480012010.10008574.62000NM_079300.31109.01109.0992.0992.03.066253e+0676623.333333158939.0000009438.400000mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.411512221.011513402.0+NaNNP_524024.1salivary gland secretion 3Sgs339288.0Dmel_CG117201109.01109.0NaN
22lcl|NT_037436.4_mrna_NM_057371.3_23335359359359359359359242.0242.0242.0242.0242.0242.0233229.00424202.00269983.007851.000014900.0005896.000057949.60066966.40065389.2004555.62007146.72002853.81000NM_057371.3359.0359.0242.0242.03.091380e+059549.00000063435.0666674852.050000mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.411508967.011509394.0-NaNNP_476719.2salivary gland secretion 8Sgs839285.0Dmel_CG6132359.0359.0NaN
33lcl|NC_004354.4_mrna_NM_057369.4_932952952952952952952835.0835.0835.0835.0835.0835.0675111.001318340.00751776.0021167.000024247.00017947.000048615.20060317.10052770.0003559.68003370.60002517.61000NM_057369.4952.0952.0835.0835.09.150757e+0521120.33333353900.7666673149.296667mRNANaNGCF_000001215.4Primary AssemblychromosomeXNC_004354.43250089.03251040.0+NaNNP_476717.4salivary gland secretion 4Sgs431304.0Dmel_CG12181952.0952.0NaN
44lcl|NT_033777.3_mrna_NM_079664.2_30721636636636636636636519.0519.0519.0519.0519.0519.0482371.00514361.00410262.0010090.000031823.0008973.000055885.20037861.70046331.8002730.00007117.21002025.13000NM_079664.2636.0636.0519.0519.04.689980e+0516962.00000046692.9000003957.446667mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.317595362.017596112.0+NaNNP_524388.1salivary gland secretion 5Sgs542114.0Dmel_CG7596636.0636.0NaN
55lcl|NT_033777.3_mrna_NM_001170166.1_30723327327327327327327210.0210.0210.0210.0210.0210.0124351.00267635.00154320.006593.18006565.9504278.880035605.10048688.20043071.3004408.73003629.22002386.67000NM_001170166.1327.0327.0210.0210.01.821020e+055812.67000042454.8666673474.873333mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.317596536.017596917.0+NaNNP_001163637.1uncharacterized protein, transcript variant BCG76068673956.0Dmel_CG7606327.0327.0NaN
66lcl|NT_033777.3_mrna_NM_169764.3_30720550550550550550550433.0433.0433.0433.0433.0433.096862.00603219.00165003.004419.00004107.0001322.000013450.80053221.50022335.2001433.09001100.9600357.62300NM_169764.3550.0550.0433.0433.02.883613e+053282.66666729669.166667963.891000mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.317594386.017595057.0+NaNNP_732246.1Sgs5bisSgs5bis42113.0Dmel_CG7587550.0550.0NaN
77lcl|NT_037436.4_mrna_NM_140480.3_24230544544544544544544427.0427.0427.0427.0427.0427.0197529.00277231.00166652.0010451.000012853.0005443.000027815.50024803.50022875.3003436.91003493.92001493.11000NM_140480.3544.0544.0427.0427.02.138040e+059582.33333325164.7666672807.980000mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.415050263.015050806.0-NaNNP_648737.1uncharacterized proteinCG1231039634.0Dmel_CG12310544.0544.0NaN
88lcl|NT_037436.4_mrna_NM_079368.3_243971486148614861486148614861369.01369.01369.01369.01369.01369.0539135.00858612.00396808.0023105.000011017.00010375.000023679.80023960.40016988.8002369.9600934.1050887.70100NM_079368.31486.01486.01369.01369.05.981850e+0514832.33333321543.0000001397.255333mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.415654472.015656010.0-NaNNP_524092.2Ecdysone-induced gene 71EeEig71Ee39706.0Dmel_CG76041486.01486.0NaN
99lcl|NT_033779.5_mrna_NM_134912.3_7107406406406406406406289.0289.0289.0289.0289.0289.081599.50130697.00100868.003054.19003582.8204063.500016977.50017276.90020457.0001484.01001439.01001646.97000NM_134912.3406.0406.0289.0289.01.043882e+053566.83666718237.1333331523.330000mRNANaNGCF_000001215.4Primary Assemblychromosome2LNT_033779.53173972.03174442.0+NaNNP_608756.1uncharacterized protein, transcript variant ACG1540433537.0Dmel_CG15404406.0406.0NaN
1010lcl|NT_033779.5_mrna_NM_164529.3_7105491491491491491491374.0374.0374.0374.0374.0374.0105962.00174190.00121620.004258.21004937.2105464.210017035.80017793.10019059.9001598.80001532.31001711.35000NM_164529.3491.0491.0374.0374.01.339240e+054886.54333317962.9333331614.153333mRNANaNGCF_000001215.4Primary Assemblychromosome2LNT_033779.53173283.03173840.0-NaNNP_722883.1uncharacterized protein, transcript variant ACG31698318888.0Dmel_CG31698491.0491.0NaN
1111lcl|NT_033777.3_mrna_NM_142116.2_29831587587587587587587470.0470.0470.0470.0470.0470.094761.00145362.00102389.001967.00004579.0001714.000012123.10011815.50012768.500587.68501130.8600427.16500NM_142116.2587.0587.0470.0470.01.141707e+052753.33333312235.700000715.236667mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.314565927.014566513.0+NaNNP_650373.1uncharacterized proteinCG1485241765.0Dmel_CG14852587.0587.0NaN
1212lcl|NT_037436.4_mrna_NM_140484.3_24238677677677677677677560.0560.0560.0560.0560.0560.0109539.00122818.00147594.007049.00006036.0006716.000011761.5008378.65015447.8001767.57001251.11001404.77000NM_140484.3677.0677.0560.0560.01.266503e+056600.33333311862.6500001474.483333mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.415061029.015061705.0-NaNNP_648741.2uncharacterized proteinCG1346039638.0Dmel_CG13460677.0677.0NaN
1313lcl|NW_007931121.1_rrna_NR_133555.1_353191995199519951995199519951878.01878.01878.01878.01878.01878.0253278.00884542.00139805.0086902.900087011.10066275.90008109.33017993.8004363.2706497.96005377.93004133.73000NR_133555.11995.01995.01878.01878.04.258750e+0580063.30000010155.4666675336.540000rRNANaNGCF_000001215.4Primary Assemblyunplaced scaffoldNaNNW_007931121.155965.057959.0+NaNNaN18S ribosomal RNA18SrRNA:CR4583826067166.0Dmel_CR458381995.01995.0NaN
1414lcl|NC_024511.2_rrna_Dmel_CR34094_353511324132413241324132413241207.01207.01207.01207.01207.01207.0240513.00226787.00160627.00566164.0000560931.000558865.000011981.6007178.1207800.03065867.800053943.400054235.30000Dmel_CR340941324.01324.01207.01207.02.093090e+05561986.6666678986.58333358015.500000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1515lcl|NT_033778.4_mrna_NM_137981.2_19288581581581581581581464.0464.0464.0464.0464.0464.049752.0028685.0058202.003632.00002419.0003852.00006447.2702361.7707351.9901099.1700605.1370972.41300NM_137981.2581.0581.0464.0464.04.554633e+043301.0000005387.010000892.240000mRNANaNGCF_000001215.4Primary Assemblychromosome2RNT_033778.423751915.023752495.0+NaNNP_611825.1uncharacterized proteinCG1130037760.0Dmel_CG11300581.0581.0NaN
1616lcl|NT_037436.4_mrna_NM_168767.2_25205470470470470470470353.0353.0353.0353.0353.0353.036491.0021240.0037650.005366.00003714.0006259.00006215.7602298.6906251.3702134.59001221.25002076.89000NM_168767.2470.0470.0353.0353.03.179367e+045113.0000004921.9400001810.910000mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.418526976.018527445.0+NaNNP_730339.1uncharacterized proteinCG32198317909.0Dmel_CG32198470.0470.0NaN
1717lcl|NW_007931121.1_rrna_NR_133562.1_353293970397039703970397039703853.03853.03853.03853.03853.03853.0276487.00840675.00127160.00116389.0000105628.00065519.40004314.7908335.4501934.3604241.80003182.11001991.83000NR_133562.13970.03970.03853.03853.04.147740e+0595845.4666674861.5333333138.580000rRNANaNGCF_000001215.4Primary Assemblyunplaced scaffoldNaNNW_007931121.170955.074924.0+NaNNaN28S ribosomal RNA28SrRNA:CR4584426067172.0Dmel_CR458443970.03970.0NaN
1818lcl|NT_033779.5_mrna_NM_001169386.2_7156411411411411411411294.0294.0294.0294.0294.0294.015551.9037030.1027859.50352.6620634.521235.08503180.6904811.8005554.070168.4420250.516093.66100NM_001169386.2411.0411.0294.0294.02.681383e+04407.4226674515.520000170.873000mRNANaNGCF_000001215.4Primary Assemblychromosome2LNT_033779.53388024.03388496.0-NaNNP_001162857.1uncharacterized protein, transcript variant ACG424608674116.0Dmel_CG42460411.0411.0NaN
1919lcl|NT_033777.3_mrna_NM_169585.2_29830483483483483483483366.0366.0366.0366.0366.0366.020858.0023296.0021751.00549.00001266.000697.00003426.6902431.6503483.240210.6340401.5030223.06600NM_169585.2483.0483.0366.0366.02.196833e+04837.3333333113.860000278.401000mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.314564323.014564805.0-NaNNP_731917.1uncharacterized proteinCG808741764.0Dmel_CG8087483.0483.0NaN
2020lcl|NT_033779.5_mrna_NM_078751.4_75763950395039503950395039503833.03833.03833.03833.03833.03833.0349313.00115683.00166136.008542.000016082.00030207.00005479.7301153.0002540.450312.9380487.0090923.10500NM_078751.43950.03950.03833.03833.02.103773e+0518277.0000003057.726667574.350667mRNANaNGCF_000001215.4Primary Assemblychromosome2LNT_033779.54937409.04941423.0+NaNNP_523475.3salivary gland secretion 1Sgs133701.0Dmel_CG30473950.03950.0NaN
2121lcl|NT_033778.4_mrna_NM_058027.5_151292029202920292029202920291912.01912.01912.01912.01912.01912.0100306.00137985.0097557.0049516.900066318.10077626.60003154.4502757.0502990.5803636.67004026.06004755.60000NM_058027.52029.02029.01912.01912.01.119493e+0564487.2000002967.3600004139.443333mRNANaNGCF_000001215.4Primary Assemblychromosome2RNT_033778.411892122.011895520.0+NaNNP_477375.1eukaryotic translation elongation factor 1 alp...eEF1alpha136271.0Dmel_CG82802029.02029.0NaN
2222lcl|NT_033778.4_mrna_NM_137979.3_19285659659659659659659542.0542.0542.0542.0542.0542.033457.908970.7239056.904602.72003495.3505774.61003711.780632.3074223.6101192.4900748.56101247.97000NM_137979.3659.0659.0542.0542.02.716184e+044624.2266672855.8990001063.007000mRNANaNGCF_000001215.4Primary Assemblychromosome2RNT_033778.423747560.023748218.0+NaNNP_611823.2uncharacterized protein, transcript variant ACG1356037758.0Dmel_CG13560659.0659.0NaN
2323lcl|NT_033778.4_mrna_NM_001014551.3_19777320320320320320320203.0203.0203.0203.0203.0203.09982.0011710.0011586.0011282.00008993.0009540.00002956.6802203.7403345.2007804.20005142.15005504.71000NM_001014551.3320.0320.0203.0203.01.109267e+049938.3333332835.2066676150.353333mRNANaNGCF_000001215.4Primary Assemblychromosome2RNT_033778.424903416.024903935.0-NaNNP_001014551.1ribosomal protein L41RpL41251466.0Dmel_CG30425320.0320.0NaN
2424lcl|NT_033779.5_mrna_NM_135695.2_10042776776776776776776659.0659.0659.0659.0659.0659.028885.0060540.0023218.0012788.000015957.00017945.00002635.5403509.5902065.0202724.93002810.62003189.63000NM_135695.2776.0776.0659.0659.03.754767e+0415563.3333332736.7166672908.393333mRNANaNGCF_000001215.4Primary Assemblychromosome2LNT_033779.512045343.012046118.0-NaNNP_609539.1uncharacterized proteinCG677034621.0Dmel_CG6770776.0776.0NaN
2525lcl|NC_004354.4_mrna_NM_130694.3_926557557557557557557440.0440.0440.0440.0440.0440.018614.0028317.0014750.001006.00001157.000949.00002543.7302458.6401964.830321.0580305.2230252.63600NM_130694.3557.0557.0440.0440.02.056033e+041037.3333332322.400000292.972333mRNANaNGCF_000001215.4Primary AssemblychromosomeXNC_004354.43236130.03236686.0-NaNNP_570050.1uncharacterized proteinCG1426531297.0Dmel_CG14265557.0557.0NaN
2626lcl|NT_033779.5_mrna_NM_001298648.1_7106468468468468468468351.0351.0351.0351.0351.0351.010722.5017997.2015817.60554.8060515.177526.50001836.8401958.8402641.310221.9590170.3670175.70100NM_001298648.1468.0468.0351.0351.01.484577e+04532.1610002145.663333189.342333mRNANaNGCF_000001215.4Primary Assemblychromosome2LNT_033779.53173972.03174504.0+NaNNP_001285577.1uncharacterized protein, transcript variant BCG1540433537.0Dmel_CG15404468.0468.0NaN
2727lcl|NT_033777.3_mrna_NM_001300371.1_29214700700700700700700583.0583.0583.0583.0583.0583.017645.0025642.0029130.001986.00002582.0001476.00001819.8501680.2902928.580478.3530514.0720296.55100NM_001300371.1700.0700.0583.0583.02.413900e+042014.6666672142.906667429.658667mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.312341674.012342373.0+NaNNP_001287300.1salivary gland-derived secreted factorSgsf19835010.0Dmel_CG44956700.0700.0NaN
2828lcl|NT_033777.3_mrna_NM_142114.3_29828587587587587587587470.0470.0470.0470.0470.0470.02877.0021892.0031385.0018.0000285.0008.0000368.0661779.4603913.9005.377970.38551.99377NM_142114.3587.0587.0470.0470.01.871800e+04103.6666672020.47533325.919057mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.314560935.014561521.0+NaNNP_650371.1JigJig41762.0Dmel_CG14850587.0587.0NaN
2929lcl|NT_033777.3_mrna_NM_001202313.1_30680500500500500500500383.0383.0383.0383.0383.0383.02651.0035726.909607.0053.000094.00027.0000416.1933563.6601470.19019.431928.48828.25748NM_001202313.1500.0500.0383.0383.01.599497e+0458.0000001816.68100018.725860mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.317406984.017407534.0+NaNNP_001189242.1uncharacterized protein, transcript variant ACG4279810178954.0Dmel_CG42798500.0500.0NaN
3030lcl|NT_033777.3_mrna_NM_001104349.2_30674688688688688688688571.0571.0571.0571.0571.0571.015843.7029670.2016463.701184.66001010.790712.19601668.4101985.1101689.960291.3380205.4770146.09900NM_001104349.2688.0688.0571.0571.02.065920e+04969.2153331781.160000214.304667mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.317402362.017403288.0-NaNNP_001097819.1uncharacterized protein, transcript variant ACG342795740605.0Dmel_CG34279688.0688.0NaN
3131lcl|NC_004354.4_mrna_NM_134609.2_5861643643643643643643526.0526.0526.0526.0526.0526.013507.0028221.0015161.001394.00001439.0001149.00001544.0302049.6801689.380372.1470317.5490255.86800NM_134609.2643.0643.0526.0526.01.896300e+041327.3333331761.030000315.188000mRNANaNGCF_000001215.4Primary AssemblychromosomeXNC_004354.421428614.021429256.0-NaNNP_608453.1uncharacterized proteinCG1091833123.0Dmel_CG10918643.0643.0NaN
3232lcl|NT_037436.4_mrna_NM_001274927.2_24232375375375375375375258.0258.0258.0258.0258.0258.07141.4716733.204359.57324.4260475.383181.72301664.3702477.760990.397176.5770213.875082.50370NM_001274927.2375.0375.0258.0258.09.411413e+03327.1773331710.842333157.651900mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.415050856.015051230.0-NaNNP_001261856.1uncharacterized protein, transcript variant BCG4367914462861.0Dmel_CG43679375.0375.0NaN
3333lcl|NT_033777.3_mrna_NM_141689.3_28493502502502502502502385.0385.0385.0385.0385.0385.011180.6014904.3012324.708585.520010391.10010459.00001746.1701478.9401876.2903131.44003132.83003182.09000NM_141689.3502.0502.0385.0385.01.280320e+049811.8733331700.4666673148.786667mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.39778097.09779078.0+NaNNP_649946.1ribosomal protein S29, transcript variant ARpS2941200.0Dmel_CG8495502.0502.0NaN
3434lcl|NT_037436.4_mrna_NM_140419.3_24016565565565565565565448.0448.0448.0448.0448.0448.013693.0014518.0015432.00664.0000718.000510.00001837.8201238.0202018.970208.1270186.0300133.34400NM_140419.3565.0565.0448.0448.01.454767e+04630.6666671698.270000175.833667mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.413986620.013987184.0+NaNNP_648676.2uncharacterized proteinCG1736239545.0Dmel_CG17362565.0565.0NaN
3535lcl|NT_033779.5_mrna_NM_057282.4_6291601601601601601601484.0484.0484.0484.0484.0484.012646.6018374.0016172.7013652.300013921.10014980.10001571.1301450.3001958.5003960.95003338.61003625.37000NM_057282.4601.0601.0484.0484.01.573110e+0414184.5000001659.9766673641.643333mRNANaNGCF_000001215.4Primary Assemblychromosome2LNT_033779.5419952.0420697.0+NaNNP_476630.1ribosomal protein LP1, transcript variant ARpLP133214.0Dmel_CG4087601.0601.0NaN
3636lcl|NW_007931121.1_rrna_Dmel_CR45851_353091721172117211721172117211604.01604.01604.01604.01604.01604.030657.10122932.0017035.209148.18005077.20011101.00001149.2402927.920622.483800.8820367.4140810.66300Dmel_CR458511721.01721.01604.01604.05.687477e+048442.1266671566.547667659.653000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3737lcl|NT_037436.4_mrna_NM_080275.3_23244568568568568568568451.0451.0451.0451.0451.0451.06485.007790.0021129.00501.000089.000244.0000864.603659.8742745.920155.991022.906063.37170NM_080275.3568.0568.0451.0451.01.180133e+04278.0000001423.46566780.756233mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.411130835.011131402.0+NaNNP_525014.2no optic lobenol64867.0Dmel_CG32077568.0568.0NaN
3838lcl|NT_037436.4_mrna_NM_001274926.2_24231516516516516516516399.0399.0399.0399.0399.0399.04496.5312821.8015487.4073.5745172.61745.2766677.6231227.6502275.05025.893650.216513.29180NM_001274926.2516.0516.0399.0399.01.093524e+0497.1560331393.44100029.800633mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.415050856.015051425.0-NaNNP_001261855.1uncharacterized protein, transcript variant ACG4367914462861.0Dmel_CG43679516.0516.0NaN
3939lcl|NT_037436.4_mrna_NM_080077.2_235722304230423042304230423042187.02187.02187.02187.02187.02187.053241.8099726.0034796.90187872.0000301918.000306650.00001463.8201742.050932.56012062.900016024.200016423.90000NM_080077.22304.02304.02187.02187.06.258823e+04265480.0000001379.47666714837.000000mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.412129393.012131696.0+NaNNP_524816.1larval serum protein 2, transcript variant ALsp245326.0Dmel_CG68062304.02304.0NaN
\n", "
" ], "text/plain": [ " index target_id length_MESG1 \\\n", "0 0 lcl|NT_037436.4_mrna_NM_057370.2_23336 321 \n", "1 1 lcl|NT_037436.4_mrna_NM_079300.3_23338 1109 \n", "2 2 lcl|NT_037436.4_mrna_NM_057371.3_23335 359 \n", "3 3 lcl|NC_004354.4_mrna_NM_057369.4_932 952 \n", "4 4 lcl|NT_033777.3_mrna_NM_079664.2_30721 636 \n", "5 5 lcl|NT_033777.3_mrna_NM_001170166.1_30723 327 \n", "6 6 lcl|NT_033777.3_mrna_NM_169764.3_30720 550 \n", "7 7 lcl|NT_037436.4_mrna_NM_140480.3_24230 544 \n", "8 8 lcl|NT_037436.4_mrna_NM_079368.3_24397 1486 \n", "9 9 lcl|NT_033779.5_mrna_NM_134912.3_7107 406 \n", "10 10 lcl|NT_033779.5_mrna_NM_164529.3_7105 491 \n", "11 11 lcl|NT_033777.3_mrna_NM_142116.2_29831 587 \n", "12 12 lcl|NT_037436.4_mrna_NM_140484.3_24238 677 \n", "13 13 lcl|NW_007931121.1_rrna_NR_133555.1_35319 1995 \n", "14 14 lcl|NC_024511.2_rrna_Dmel_CR34094_35351 1324 \n", "15 15 lcl|NT_033778.4_mrna_NM_137981.2_19288 581 \n", "16 16 lcl|NT_037436.4_mrna_NM_168767.2_25205 470 \n", "17 17 lcl|NW_007931121.1_rrna_NR_133562.1_35329 3970 \n", "18 18 lcl|NT_033779.5_mrna_NM_001169386.2_7156 411 \n", "19 19 lcl|NT_033777.3_mrna_NM_169585.2_29830 483 \n", "20 20 lcl|NT_033779.5_mrna_NM_078751.4_7576 3950 \n", "21 21 lcl|NT_033778.4_mrna_NM_058027.5_15129 2029 \n", "22 22 lcl|NT_033778.4_mrna_NM_137979.3_19285 659 \n", "23 23 lcl|NT_033778.4_mrna_NM_001014551.3_19777 320 \n", "24 24 lcl|NT_033779.5_mrna_NM_135695.2_10042 776 \n", "25 25 lcl|NC_004354.4_mrna_NM_130694.3_926 557 \n", "26 26 lcl|NT_033779.5_mrna_NM_001298648.1_7106 468 \n", "27 27 lcl|NT_033777.3_mrna_NM_001300371.1_29214 700 \n", "28 28 lcl|NT_033777.3_mrna_NM_142114.3_29828 587 \n", "29 29 lcl|NT_033777.3_mrna_NM_001202313.1_30680 500 \n", "30 30 lcl|NT_033777.3_mrna_NM_001104349.2_30674 688 \n", "31 31 lcl|NC_004354.4_mrna_NM_134609.2_5861 643 \n", "32 32 lcl|NT_037436.4_mrna_NM_001274927.2_24232 375 \n", "33 33 lcl|NT_033777.3_mrna_NM_141689.3_28493 502 \n", "34 34 lcl|NT_037436.4_mrna_NM_140419.3_24016 565 \n", "35 35 lcl|NT_033779.5_mrna_NM_057282.4_6291 601 \n", "36 36 lcl|NW_007931121.1_rrna_Dmel_CR45851_35309 1721 \n", "37 37 lcl|NT_037436.4_mrna_NM_080275.3_23244 568 \n", "38 38 lcl|NT_037436.4_mrna_NM_001274926.2_24231 516 \n", "39 39 lcl|NT_037436.4_mrna_NM_080077.2_23572 2304 \n", "\n", " length_MESG2 length_MESG4 length_MEWB5 length_MEWB6 length_MEWB7 \\\n", "0 321 321 321 321 321 \n", "1 1109 1109 1109 1109 1109 \n", "2 359 359 359 359 359 \n", "3 952 952 952 952 952 \n", "4 636 636 636 636 636 \n", "5 327 327 327 327 327 \n", "6 550 550 550 550 550 \n", "7 544 544 544 544 544 \n", "8 1486 1486 1486 1486 1486 \n", "9 406 406 406 406 406 \n", "10 491 491 491 491 491 \n", "11 587 587 587 587 587 \n", "12 677 677 677 677 677 \n", "13 1995 1995 1995 1995 1995 \n", "14 1324 1324 1324 1324 1324 \n", "15 581 581 581 581 581 \n", "16 470 470 470 470 470 \n", "17 3970 3970 3970 3970 3970 \n", "18 411 411 411 411 411 \n", "19 483 483 483 483 483 \n", "20 3950 3950 3950 3950 3950 \n", "21 2029 2029 2029 2029 2029 \n", "22 659 659 659 659 659 \n", "23 320 320 320 320 320 \n", "24 776 776 776 776 776 \n", "25 557 557 557 557 557 \n", "26 468 468 468 468 468 \n", "27 700 700 700 700 700 \n", "28 587 587 587 587 587 \n", "29 500 500 500 500 500 \n", "30 688 688 688 688 688 \n", "31 643 643 643 643 643 \n", "32 375 375 375 375 375 \n", "33 502 502 502 502 502 \n", "34 565 565 565 565 565 \n", "35 601 601 601 601 601 \n", "36 1721 1721 1721 1721 1721 \n", "37 568 568 568 568 568 \n", "38 516 516 516 516 516 \n", "39 2304 2304 2304 2304 2304 \n", "\n", " eff_length_MESG1 eff_length_MESG2 eff_length_MESG4 eff_length_MEWB5 \\\n", "0 204.0 204.0 204.0 204.0 \n", "1 992.0 992.0 992.0 992.0 \n", "2 242.0 242.0 242.0 242.0 \n", "3 835.0 835.0 835.0 835.0 \n", "4 519.0 519.0 519.0 519.0 \n", "5 210.0 210.0 210.0 210.0 \n", "6 433.0 433.0 433.0 433.0 \n", "7 427.0 427.0 427.0 427.0 \n", "8 1369.0 1369.0 1369.0 1369.0 \n", "9 289.0 289.0 289.0 289.0 \n", "10 374.0 374.0 374.0 374.0 \n", "11 470.0 470.0 470.0 470.0 \n", "12 560.0 560.0 560.0 560.0 \n", "13 1878.0 1878.0 1878.0 1878.0 \n", "14 1207.0 1207.0 1207.0 1207.0 \n", "15 464.0 464.0 464.0 464.0 \n", "16 353.0 353.0 353.0 353.0 \n", "17 3853.0 3853.0 3853.0 3853.0 \n", "18 294.0 294.0 294.0 294.0 \n", "19 366.0 366.0 366.0 366.0 \n", "20 3833.0 3833.0 3833.0 3833.0 \n", "21 1912.0 1912.0 1912.0 1912.0 \n", "22 542.0 542.0 542.0 542.0 \n", "23 203.0 203.0 203.0 203.0 \n", "24 659.0 659.0 659.0 659.0 \n", "25 440.0 440.0 440.0 440.0 \n", "26 351.0 351.0 351.0 351.0 \n", "27 583.0 583.0 583.0 583.0 \n", "28 470.0 470.0 470.0 470.0 \n", "29 383.0 383.0 383.0 383.0 \n", "30 571.0 571.0 571.0 571.0 \n", "31 526.0 526.0 526.0 526.0 \n", "32 258.0 258.0 258.0 258.0 \n", "33 385.0 385.0 385.0 385.0 \n", "34 448.0 448.0 448.0 448.0 \n", "35 484.0 484.0 484.0 484.0 \n", "36 1604.0 1604.0 1604.0 1604.0 \n", "37 451.0 451.0 451.0 451.0 \n", "38 399.0 399.0 399.0 399.0 \n", "39 2187.0 2187.0 2187.0 2187.0 \n", "\n", " eff_length_MEWB6 eff_length_MEWB7 est_counts_MESG1 est_counts_MESG2 \\\n", "0 204.0 204.0 793608.00 1403050.00 \n", "1 992.0 992.0 2990690.00 3463760.00 \n", "2 242.0 242.0 233229.00 424202.00 \n", "3 835.0 835.0 675111.00 1318340.00 \n", "4 519.0 519.0 482371.00 514361.00 \n", "5 210.0 210.0 124351.00 267635.00 \n", "6 433.0 433.0 96862.00 603219.00 \n", "7 427.0 427.0 197529.00 277231.00 \n", "8 1369.0 1369.0 539135.00 858612.00 \n", "9 289.0 289.0 81599.50 130697.00 \n", "10 374.0 374.0 105962.00 174190.00 \n", "11 470.0 470.0 94761.00 145362.00 \n", "12 560.0 560.0 109539.00 122818.00 \n", "13 1878.0 1878.0 253278.00 884542.00 \n", "14 1207.0 1207.0 240513.00 226787.00 \n", "15 464.0 464.0 49752.00 28685.00 \n", "16 353.0 353.0 36491.00 21240.00 \n", "17 3853.0 3853.0 276487.00 840675.00 \n", "18 294.0 294.0 15551.90 37030.10 \n", "19 366.0 366.0 20858.00 23296.00 \n", "20 3833.0 3833.0 349313.00 115683.00 \n", "21 1912.0 1912.0 100306.00 137985.00 \n", "22 542.0 542.0 33457.90 8970.72 \n", "23 203.0 203.0 9982.00 11710.00 \n", "24 659.0 659.0 28885.00 60540.00 \n", "25 440.0 440.0 18614.00 28317.00 \n", "26 351.0 351.0 10722.50 17997.20 \n", "27 583.0 583.0 17645.00 25642.00 \n", "28 470.0 470.0 2877.00 21892.00 \n", "29 383.0 383.0 2651.00 35726.90 \n", "30 571.0 571.0 15843.70 29670.20 \n", "31 526.0 526.0 13507.00 28221.00 \n", "32 258.0 258.0 7141.47 16733.20 \n", "33 385.0 385.0 11180.60 14904.30 \n", "34 448.0 448.0 13693.00 14518.00 \n", "35 484.0 484.0 12646.60 18374.00 \n", "36 1604.0 1604.0 30657.10 122932.00 \n", "37 451.0 451.0 6485.00 7790.00 \n", "38 399.0 399.0 4496.53 12821.80 \n", "39 2187.0 2187.0 53241.80 99726.00 \n", "\n", " est_counts_MESG4 est_counts_MEWB5 est_counts_MEWB6 est_counts_MEWB7 \\\n", "0 976193.00 23622.0000 50137.000 21288.0000 \n", "1 2744310.00 54611.0000 102641.000 72618.0000 \n", "2 269983.00 7851.0000 14900.000 5896.0000 \n", "3 751776.00 21167.0000 24247.000 17947.0000 \n", "4 410262.00 10090.0000 31823.000 8973.0000 \n", "5 154320.00 6593.1800 6565.950 4278.8800 \n", "6 165003.00 4419.0000 4107.000 1322.0000 \n", "7 166652.00 10451.0000 12853.000 5443.0000 \n", "8 396808.00 23105.0000 11017.000 10375.0000 \n", "9 100868.00 3054.1900 3582.820 4063.5000 \n", "10 121620.00 4258.2100 4937.210 5464.2100 \n", "11 102389.00 1967.0000 4579.000 1714.0000 \n", "12 147594.00 7049.0000 6036.000 6716.0000 \n", "13 139805.00 86902.9000 87011.100 66275.9000 \n", "14 160627.00 566164.0000 560931.000 558865.0000 \n", "15 58202.00 3632.0000 2419.000 3852.0000 \n", "16 37650.00 5366.0000 3714.000 6259.0000 \n", "17 127160.00 116389.0000 105628.000 65519.4000 \n", "18 27859.50 352.6620 634.521 235.0850 \n", "19 21751.00 549.0000 1266.000 697.0000 \n", "20 166136.00 8542.0000 16082.000 30207.0000 \n", "21 97557.00 49516.9000 66318.100 77626.6000 \n", "22 39056.90 4602.7200 3495.350 5774.6100 \n", "23 11586.00 11282.0000 8993.000 9540.0000 \n", "24 23218.00 12788.0000 15957.000 17945.0000 \n", "25 14750.00 1006.0000 1157.000 949.0000 \n", "26 15817.60 554.8060 515.177 526.5000 \n", "27 29130.00 1986.0000 2582.000 1476.0000 \n", "28 31385.00 18.0000 285.000 8.0000 \n", "29 9607.00 53.0000 94.000 27.0000 \n", "30 16463.70 1184.6600 1010.790 712.1960 \n", "31 15161.00 1394.0000 1439.000 1149.0000 \n", "32 4359.57 324.4260 475.383 181.7230 \n", "33 12324.70 8585.5200 10391.100 10459.0000 \n", "34 15432.00 664.0000 718.000 510.0000 \n", "35 16172.70 13652.3000 13921.100 14980.1000 \n", "36 17035.20 9148.1800 5077.200 11101.0000 \n", "37 21129.00 501.0000 89.000 244.0000 \n", "38 15487.40 73.5745 172.617 45.2766 \n", "39 34796.90 187872.0000 301918.000 306650.0000 \n", "\n", " tpm_MESG1 tpm_MESG2 tpm_MESG4 tpm_MEWB5 tpm_MEWB6 tpm_MEWB7 \\\n", "0 233916.000 262750.000 280473.000 16260.2000 28527.5000 12223.30000 \n", "1 181277.000 133394.000 162146.000 7730.4800 12010.1000 8574.62000 \n", "2 57949.600 66966.400 65389.200 4555.6200 7146.7200 2853.81000 \n", "3 48615.200 60317.100 52770.000 3559.6800 3370.6000 2517.61000 \n", "4 55885.200 37861.700 46331.800 2730.0000 7117.2100 2025.13000 \n", "5 35605.100 48688.200 43071.300 4408.7300 3629.2200 2386.67000 \n", "6 13450.800 53221.500 22335.200 1433.0900 1100.9600 357.62300 \n", "7 27815.500 24803.500 22875.300 3436.9100 3493.9200 1493.11000 \n", "8 23679.800 23960.400 16988.800 2369.9600 934.1050 887.70100 \n", "9 16977.500 17276.900 20457.000 1484.0100 1439.0100 1646.97000 \n", "10 17035.800 17793.100 19059.900 1598.8000 1532.3100 1711.35000 \n", "11 12123.100 11815.500 12768.500 587.6850 1130.8600 427.16500 \n", "12 11761.500 8378.650 15447.800 1767.5700 1251.1100 1404.77000 \n", "13 8109.330 17993.800 4363.270 6497.9600 5377.9300 4133.73000 \n", "14 11981.600 7178.120 7800.030 65867.8000 53943.4000 54235.30000 \n", "15 6447.270 2361.770 7351.990 1099.1700 605.1370 972.41300 \n", "16 6215.760 2298.690 6251.370 2134.5900 1221.2500 2076.89000 \n", "17 4314.790 8335.450 1934.360 4241.8000 3182.1100 1991.83000 \n", "18 3180.690 4811.800 5554.070 168.4420 250.5160 93.66100 \n", "19 3426.690 2431.650 3483.240 210.6340 401.5030 223.06600 \n", "20 5479.730 1153.000 2540.450 312.9380 487.0090 923.10500 \n", "21 3154.450 2757.050 2990.580 3636.6700 4026.0600 4755.60000 \n", "22 3711.780 632.307 4223.610 1192.4900 748.5610 1247.97000 \n", "23 2956.680 2203.740 3345.200 7804.2000 5142.1500 5504.71000 \n", "24 2635.540 3509.590 2065.020 2724.9300 2810.6200 3189.63000 \n", "25 2543.730 2458.640 1964.830 321.0580 305.2230 252.63600 \n", "26 1836.840 1958.840 2641.310 221.9590 170.3670 175.70100 \n", "27 1819.850 1680.290 2928.580 478.3530 514.0720 296.55100 \n", "28 368.066 1779.460 3913.900 5.3779 70.3855 1.99377 \n", "29 416.193 3563.660 1470.190 19.4319 28.4882 8.25748 \n", "30 1668.410 1985.110 1689.960 291.3380 205.4770 146.09900 \n", "31 1544.030 2049.680 1689.380 372.1470 317.5490 255.86800 \n", "32 1664.370 2477.760 990.397 176.5770 213.8750 82.50370 \n", "33 1746.170 1478.940 1876.290 3131.4400 3132.8300 3182.09000 \n", "34 1837.820 1238.020 2018.970 208.1270 186.0300 133.34400 \n", "35 1571.130 1450.300 1958.500 3960.9500 3338.6100 3625.37000 \n", "36 1149.240 2927.920 622.483 800.8820 367.4140 810.66300 \n", "37 864.603 659.874 2745.920 155.9910 22.9060 63.37170 \n", "38 677.623 1227.650 2275.050 25.8936 50.2165 13.29180 \n", "39 1463.820 1742.050 932.560 12062.9000 16024.2000 16423.90000 \n", "\n", " product_accession length_SG_moyenne length_WB_moyenne \\\n", "0 NM_057370.2 321.0 321.0 \n", "1 NM_079300.3 1109.0 1109.0 \n", "2 NM_057371.3 359.0 359.0 \n", "3 NM_057369.4 952.0 952.0 \n", "4 NM_079664.2 636.0 636.0 \n", "5 NM_001170166.1 327.0 327.0 \n", "6 NM_169764.3 550.0 550.0 \n", "7 NM_140480.3 544.0 544.0 \n", "8 NM_079368.3 1486.0 1486.0 \n", "9 NM_134912.3 406.0 406.0 \n", "10 NM_164529.3 491.0 491.0 \n", "11 NM_142116.2 587.0 587.0 \n", "12 NM_140484.3 677.0 677.0 \n", "13 NR_133555.1 1995.0 1995.0 \n", "14 Dmel_CR34094 1324.0 1324.0 \n", "15 NM_137981.2 581.0 581.0 \n", "16 NM_168767.2 470.0 470.0 \n", "17 NR_133562.1 3970.0 3970.0 \n", "18 NM_001169386.2 411.0 411.0 \n", "19 NM_169585.2 483.0 483.0 \n", "20 NM_078751.4 3950.0 3950.0 \n", "21 NM_058027.5 2029.0 2029.0 \n", "22 NM_137979.3 659.0 659.0 \n", "23 NM_001014551.3 320.0 320.0 \n", "24 NM_135695.2 776.0 776.0 \n", "25 NM_130694.3 557.0 557.0 \n", "26 NM_001298648.1 468.0 468.0 \n", "27 NM_001300371.1 700.0 700.0 \n", "28 NM_142114.3 587.0 587.0 \n", "29 NM_001202313.1 500.0 500.0 \n", "30 NM_001104349.2 688.0 688.0 \n", "31 NM_134609.2 643.0 643.0 \n", "32 NM_001274927.2 375.0 375.0 \n", "33 NM_141689.3 502.0 502.0 \n", "34 NM_140419.3 565.0 565.0 \n", "35 NM_057282.4 601.0 601.0 \n", "36 Dmel_CR45851 1721.0 1721.0 \n", "37 NM_080275.3 568.0 568.0 \n", "38 NM_001274926.2 516.0 516.0 \n", "39 NM_080077.2 2304.0 2304.0 \n", "\n", " eff_length_SG_moyenne eff_length_WB_moyenne est_counts_SG_moyenne \\\n", "0 204.0 204.0 1.057617e+06 \n", "1 992.0 992.0 3.066253e+06 \n", "2 242.0 242.0 3.091380e+05 \n", "3 835.0 835.0 9.150757e+05 \n", "4 519.0 519.0 4.689980e+05 \n", "5 210.0 210.0 1.821020e+05 \n", "6 433.0 433.0 2.883613e+05 \n", "7 427.0 427.0 2.138040e+05 \n", "8 1369.0 1369.0 5.981850e+05 \n", "9 289.0 289.0 1.043882e+05 \n", "10 374.0 374.0 1.339240e+05 \n", "11 470.0 470.0 1.141707e+05 \n", "12 560.0 560.0 1.266503e+05 \n", "13 1878.0 1878.0 4.258750e+05 \n", "14 1207.0 1207.0 2.093090e+05 \n", "15 464.0 464.0 4.554633e+04 \n", "16 353.0 353.0 3.179367e+04 \n", "17 3853.0 3853.0 4.147740e+05 \n", "18 294.0 294.0 2.681383e+04 \n", "19 366.0 366.0 2.196833e+04 \n", "20 3833.0 3833.0 2.103773e+05 \n", "21 1912.0 1912.0 1.119493e+05 \n", "22 542.0 542.0 2.716184e+04 \n", "23 203.0 203.0 1.109267e+04 \n", "24 659.0 659.0 3.754767e+04 \n", "25 440.0 440.0 2.056033e+04 \n", "26 351.0 351.0 1.484577e+04 \n", "27 583.0 583.0 2.413900e+04 \n", "28 470.0 470.0 1.871800e+04 \n", "29 383.0 383.0 1.599497e+04 \n", "30 571.0 571.0 2.065920e+04 \n", "31 526.0 526.0 1.896300e+04 \n", "32 258.0 258.0 9.411413e+03 \n", "33 385.0 385.0 1.280320e+04 \n", "34 448.0 448.0 1.454767e+04 \n", "35 484.0 484.0 1.573110e+04 \n", "36 1604.0 1604.0 5.687477e+04 \n", "37 451.0 451.0 1.180133e+04 \n", "38 399.0 399.0 1.093524e+04 \n", "39 2187.0 2187.0 6.258823e+04 \n", "\n", " est_counts_WB_moyenne tpm_SG_moyenne tpm_WB_moyenne # feature class \\\n", "0 31682.333333 259046.333333 19003.666667 mRNA NaN \n", "1 76623.333333 158939.000000 9438.400000 mRNA NaN \n", "2 9549.000000 63435.066667 4852.050000 mRNA NaN \n", "3 21120.333333 53900.766667 3149.296667 mRNA NaN \n", "4 16962.000000 46692.900000 3957.446667 mRNA NaN \n", "5 5812.670000 42454.866667 3474.873333 mRNA NaN \n", "6 3282.666667 29669.166667 963.891000 mRNA NaN \n", "7 9582.333333 25164.766667 2807.980000 mRNA NaN \n", "8 14832.333333 21543.000000 1397.255333 mRNA NaN \n", "9 3566.836667 18237.133333 1523.330000 mRNA NaN \n", "10 4886.543333 17962.933333 1614.153333 mRNA NaN \n", "11 2753.333333 12235.700000 715.236667 mRNA NaN \n", "12 6600.333333 11862.650000 1474.483333 mRNA NaN \n", "13 80063.300000 10155.466667 5336.540000 rRNA NaN \n", "14 561986.666667 8986.583333 58015.500000 NaN NaN \n", "15 3301.000000 5387.010000 892.240000 mRNA NaN \n", "16 5113.000000 4921.940000 1810.910000 mRNA NaN \n", "17 95845.466667 4861.533333 3138.580000 rRNA NaN \n", "18 407.422667 4515.520000 170.873000 mRNA NaN \n", "19 837.333333 3113.860000 278.401000 mRNA NaN \n", "20 18277.000000 3057.726667 574.350667 mRNA NaN \n", "21 64487.200000 2967.360000 4139.443333 mRNA NaN \n", "22 4624.226667 2855.899000 1063.007000 mRNA NaN \n", "23 9938.333333 2835.206667 6150.353333 mRNA NaN \n", "24 15563.333333 2736.716667 2908.393333 mRNA NaN \n", "25 1037.333333 2322.400000 292.972333 mRNA NaN \n", "26 532.161000 2145.663333 189.342333 mRNA NaN \n", "27 2014.666667 2142.906667 429.658667 mRNA NaN \n", "28 103.666667 2020.475333 25.919057 mRNA NaN \n", "29 58.000000 1816.681000 18.725860 mRNA NaN \n", "30 969.215333 1781.160000 214.304667 mRNA NaN \n", "31 1327.333333 1761.030000 315.188000 mRNA NaN \n", "32 327.177333 1710.842333 157.651900 mRNA NaN \n", "33 9811.873333 1700.466667 3148.786667 mRNA NaN \n", "34 630.666667 1698.270000 175.833667 mRNA NaN \n", "35 14184.500000 1659.976667 3641.643333 mRNA NaN \n", "36 8442.126667 1566.547667 659.653000 NaN NaN \n", "37 278.000000 1423.465667 80.756233 mRNA NaN \n", "38 97.156033 1393.441000 29.800633 mRNA NaN \n", "39 265480.000000 1379.476667 14837.000000 mRNA NaN \n", "\n", " assembly assembly_unit seq_type chromosome \\\n", "0 GCF_000001215.4 Primary Assembly chromosome 3L \n", "1 GCF_000001215.4 Primary Assembly chromosome 3L \n", "2 GCF_000001215.4 Primary Assembly chromosome 3L \n", "3 GCF_000001215.4 Primary Assembly chromosome X \n", "4 GCF_000001215.4 Primary Assembly chromosome 3R \n", "5 GCF_000001215.4 Primary Assembly chromosome 3R \n", "6 GCF_000001215.4 Primary Assembly chromosome 3R \n", "7 GCF_000001215.4 Primary Assembly chromosome 3L \n", "8 GCF_000001215.4 Primary Assembly chromosome 3L \n", "9 GCF_000001215.4 Primary Assembly chromosome 2L \n", "10 GCF_000001215.4 Primary Assembly chromosome 2L \n", "11 GCF_000001215.4 Primary Assembly chromosome 3R \n", "12 GCF_000001215.4 Primary Assembly chromosome 3L \n", "13 GCF_000001215.4 Primary Assembly unplaced scaffold NaN \n", "14 NaN NaN NaN NaN \n", "15 GCF_000001215.4 Primary Assembly chromosome 2R \n", "16 GCF_000001215.4 Primary Assembly chromosome 3L \n", "17 GCF_000001215.4 Primary Assembly unplaced scaffold NaN \n", "18 GCF_000001215.4 Primary Assembly chromosome 2L \n", "19 GCF_000001215.4 Primary Assembly chromosome 3R \n", "20 GCF_000001215.4 Primary Assembly chromosome 2L \n", "21 GCF_000001215.4 Primary Assembly chromosome 2R \n", "22 GCF_000001215.4 Primary Assembly chromosome 2R \n", "23 GCF_000001215.4 Primary Assembly chromosome 2R \n", "24 GCF_000001215.4 Primary Assembly chromosome 2L \n", "25 GCF_000001215.4 Primary Assembly chromosome X \n", "26 GCF_000001215.4 Primary Assembly chromosome 2L \n", "27 GCF_000001215.4 Primary Assembly chromosome 3R \n", "28 GCF_000001215.4 Primary Assembly chromosome 3R \n", "29 GCF_000001215.4 Primary Assembly chromosome 3R \n", "30 GCF_000001215.4 Primary Assembly chromosome 3R \n", "31 GCF_000001215.4 Primary Assembly chromosome X \n", "32 GCF_000001215.4 Primary Assembly chromosome 3L \n", "33 GCF_000001215.4 Primary Assembly chromosome 3R \n", "34 GCF_000001215.4 Primary Assembly chromosome 3L \n", "35 GCF_000001215.4 Primary Assembly chromosome 2L \n", "36 NaN NaN NaN NaN \n", "37 GCF_000001215.4 Primary Assembly chromosome 3L \n", "38 GCF_000001215.4 Primary Assembly chromosome 3L \n", "39 GCF_000001215.4 Primary Assembly chromosome 3L \n", "\n", " genomic_accession start end strand non-redundant_refseq \\\n", "0 NT_037436.4 11509861.0 11510247.0 + NaN \n", "1 NT_037436.4 11512221.0 11513402.0 + NaN \n", "2 NT_037436.4 11508967.0 11509394.0 - NaN \n", "3 NC_004354.4 3250089.0 3251040.0 + NaN \n", "4 NT_033777.3 17595362.0 17596112.0 + NaN \n", "5 NT_033777.3 17596536.0 17596917.0 + NaN \n", "6 NT_033777.3 17594386.0 17595057.0 + NaN \n", "7 NT_037436.4 15050263.0 15050806.0 - NaN \n", "8 NT_037436.4 15654472.0 15656010.0 - NaN \n", "9 NT_033779.5 3173972.0 3174442.0 + NaN \n", "10 NT_033779.5 3173283.0 3173840.0 - NaN \n", "11 NT_033777.3 14565927.0 14566513.0 + NaN \n", "12 NT_037436.4 15061029.0 15061705.0 - NaN \n", "13 NW_007931121.1 55965.0 57959.0 + NaN \n", "14 NaN NaN NaN NaN NaN \n", "15 NT_033778.4 23751915.0 23752495.0 + NaN \n", "16 NT_037436.4 18526976.0 18527445.0 + NaN \n", "17 NW_007931121.1 70955.0 74924.0 + NaN \n", "18 NT_033779.5 3388024.0 3388496.0 - NaN \n", "19 NT_033777.3 14564323.0 14564805.0 - NaN \n", "20 NT_033779.5 4937409.0 4941423.0 + NaN \n", "21 NT_033778.4 11892122.0 11895520.0 + NaN \n", "22 NT_033778.4 23747560.0 23748218.0 + NaN \n", "23 NT_033778.4 24903416.0 24903935.0 - NaN \n", "24 NT_033779.5 12045343.0 12046118.0 - NaN \n", "25 NC_004354.4 3236130.0 3236686.0 - NaN \n", "26 NT_033779.5 3173972.0 3174504.0 + NaN \n", "27 NT_033777.3 12341674.0 12342373.0 + NaN \n", "28 NT_033777.3 14560935.0 14561521.0 + NaN \n", "29 NT_033777.3 17406984.0 17407534.0 + NaN \n", "30 NT_033777.3 17402362.0 17403288.0 - NaN \n", "31 NC_004354.4 21428614.0 21429256.0 - NaN \n", "32 NT_037436.4 15050856.0 15051230.0 - NaN \n", "33 NT_033777.3 9778097.0 9779078.0 + NaN \n", "34 NT_037436.4 13986620.0 13987184.0 + NaN \n", "35 NT_033779.5 419952.0 420697.0 + NaN \n", "36 NaN NaN NaN NaN NaN \n", "37 NT_037436.4 11130835.0 11131402.0 + NaN \n", "38 NT_037436.4 15050856.0 15051425.0 - NaN \n", "39 NT_037436.4 12129393.0 12131696.0 + NaN \n", "\n", " related_accession name \\\n", "0 NP_476718.1 salivary gland secretion 7 \n", "1 NP_524024.1 salivary gland secretion 3 \n", "2 NP_476719.2 salivary gland secretion 8 \n", "3 NP_476717.4 salivary gland secretion 4 \n", "4 NP_524388.1 salivary gland secretion 5 \n", "5 NP_001163637.1 uncharacterized protein, transcript variant B \n", "6 NP_732246.1 Sgs5bis \n", "7 NP_648737.1 uncharacterized protein \n", "8 NP_524092.2 Ecdysone-induced gene 71Ee \n", "9 NP_608756.1 uncharacterized protein, transcript variant A \n", "10 NP_722883.1 uncharacterized protein, transcript variant A \n", "11 NP_650373.1 uncharacterized protein \n", "12 NP_648741.2 uncharacterized protein \n", "13 NaN 18S ribosomal RNA \n", "14 NaN NaN \n", "15 NP_611825.1 uncharacterized protein \n", "16 NP_730339.1 uncharacterized protein \n", "17 NaN 28S ribosomal RNA \n", "18 NP_001162857.1 uncharacterized protein, transcript variant A \n", "19 NP_731917.1 uncharacterized protein \n", "20 NP_523475.3 salivary gland secretion 1 \n", "21 NP_477375.1 eukaryotic translation elongation factor 1 alp... \n", "22 NP_611823.2 uncharacterized protein, transcript variant A \n", "23 NP_001014551.1 ribosomal protein L41 \n", "24 NP_609539.1 uncharacterized protein \n", "25 NP_570050.1 uncharacterized protein \n", "26 NP_001285577.1 uncharacterized protein, transcript variant B \n", "27 NP_001287300.1 salivary gland-derived secreted factor \n", "28 NP_650371.1 Jig \n", "29 NP_001189242.1 uncharacterized protein, transcript variant A \n", "30 NP_001097819.1 uncharacterized protein, transcript variant A \n", "31 NP_608453.1 uncharacterized protein \n", "32 NP_001261856.1 uncharacterized protein, transcript variant B \n", "33 NP_649946.1 ribosomal protein S29, transcript variant A \n", "34 NP_648676.2 uncharacterized protein \n", "35 NP_476630.1 ribosomal protein LP1, transcript variant A \n", "36 NaN NaN \n", "37 NP_525014.2 no optic lobe \n", "38 NP_001261855.1 uncharacterized protein, transcript variant A \n", "39 NP_524816.1 larval serum protein 2, transcript variant A \n", "\n", " symbol GeneID locus_tag feature_interval_length \\\n", "0 Sgs7 47198.0 Dmel_CG18087 321.0 \n", "1 Sgs3 39288.0 Dmel_CG11720 1109.0 \n", "2 Sgs8 39285.0 Dmel_CG6132 359.0 \n", "3 Sgs4 31304.0 Dmel_CG12181 952.0 \n", "4 Sgs5 42114.0 Dmel_CG7596 636.0 \n", "5 CG7606 8673956.0 Dmel_CG7606 327.0 \n", "6 Sgs5bis 42113.0 Dmel_CG7587 550.0 \n", "7 CG12310 39634.0 Dmel_CG12310 544.0 \n", "8 Eig71Ee 39706.0 Dmel_CG7604 1486.0 \n", "9 CG15404 33537.0 Dmel_CG15404 406.0 \n", "10 CG31698 318888.0 Dmel_CG31698 491.0 \n", "11 CG14852 41765.0 Dmel_CG14852 587.0 \n", "12 CG13460 39638.0 Dmel_CG13460 677.0 \n", "13 18SrRNA:CR45838 26067166.0 Dmel_CR45838 1995.0 \n", "14 NaN NaN NaN NaN \n", "15 CG11300 37760.0 Dmel_CG11300 581.0 \n", "16 CG32198 317909.0 Dmel_CG32198 470.0 \n", "17 28SrRNA:CR45844 26067172.0 Dmel_CR45844 3970.0 \n", "18 CG42460 8674116.0 Dmel_CG42460 411.0 \n", "19 CG8087 41764.0 Dmel_CG8087 483.0 \n", "20 Sgs1 33701.0 Dmel_CG3047 3950.0 \n", "21 eEF1alpha1 36271.0 Dmel_CG8280 2029.0 \n", "22 CG13560 37758.0 Dmel_CG13560 659.0 \n", "23 RpL41 251466.0 Dmel_CG30425 320.0 \n", "24 CG6770 34621.0 Dmel_CG6770 776.0 \n", "25 CG14265 31297.0 Dmel_CG14265 557.0 \n", "26 CG15404 33537.0 Dmel_CG15404 468.0 \n", "27 Sgsf 19835010.0 Dmel_CG44956 700.0 \n", "28 Jig 41762.0 Dmel_CG14850 587.0 \n", "29 CG42798 10178954.0 Dmel_CG42798 500.0 \n", "30 CG34279 5740605.0 Dmel_CG34279 688.0 \n", "31 CG10918 33123.0 Dmel_CG10918 643.0 \n", "32 CG43679 14462861.0 Dmel_CG43679 375.0 \n", "33 RpS29 41200.0 Dmel_CG8495 502.0 \n", "34 CG17362 39545.0 Dmel_CG17362 565.0 \n", "35 RpLP1 33214.0 Dmel_CG4087 601.0 \n", "36 NaN NaN NaN NaN \n", "37 nol 64867.0 Dmel_CG32077 568.0 \n", "38 CG43679 14462861.0 Dmel_CG43679 516.0 \n", "39 Lsp2 45326.0 Dmel_CG6806 2304.0 \n", "\n", " product_length attributes \n", "0 321.0 NaN \n", "1 1109.0 NaN \n", "2 359.0 NaN \n", "3 952.0 NaN \n", "4 636.0 NaN \n", "5 327.0 NaN \n", "6 550.0 NaN \n", "7 544.0 NaN \n", "8 1486.0 NaN \n", "9 406.0 NaN \n", "10 491.0 NaN \n", "11 587.0 NaN \n", "12 677.0 NaN \n", "13 1995.0 NaN \n", "14 NaN NaN \n", "15 581.0 NaN \n", "16 470.0 NaN \n", "17 3970.0 NaN \n", "18 411.0 NaN \n", "19 483.0 NaN \n", "20 3950.0 NaN \n", "21 2029.0 NaN \n", "22 659.0 NaN \n", "23 320.0 NaN \n", "24 776.0 NaN \n", "25 557.0 NaN \n", "26 468.0 NaN \n", "27 700.0 NaN \n", "28 587.0 NaN \n", "29 500.0 NaN \n", "30 688.0 NaN \n", "31 643.0 NaN \n", "32 375.0 NaN \n", "33 502.0 NaN \n", "34 565.0 NaN \n", "35 601.0 NaN \n", "36 NaN NaN \n", "37 568.0 NaN \n", "38 516.0 NaN \n", "39 2304.0 NaN " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tab_cor = pd.read_table('/Users/arthur/PROJECTS/Stage_IJM/data/genomes/melanogaster/GCF_000001215.4_Release_6_plus_ISO1_MT_feature_table.txt')\n", "new_df = pd.merge(a, tab_cor, how='left', on='product_accession')\n", "new_df = new_df.sort_values(by='tpm_SG_moyenne', ascending=False)\n", "new_df = new_df.reset_index()\n", "new_df = new_df.drop(columns=['index'])\n", "new_df = new_df.reset_index()\n", "new_df.to_csv(\"merged_abundance_ME_RF.csv\", index=False)\n", "pd.set_option('display.max_columns', None)\n", "display(new_df.head(40))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
target_idpvalqvalbse_bmean_obsvar_obstech_varsigma_sqsmooth_sigma_sqfinal_sigma_sqproduct_accession# featureclassassemblyassembly_unitseq_typechromosomegenomic_accessionstartendstrandnon-redundant_refseqrelated_accessionnamesymbolGeneIDlocus_tagfeature_interval_lengthproduct_lengthattributes
0lcl|NT_033777.3_mrna_NM_079830.4_342860.000000e+000.000000e+00-10.2290930.2444484.42139931.3987450.0000530.0105000.0895790.089579NM_079830.4mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.329923754.029924615.0-NaNNP_524554.1jonah 99CiiJon99Cii43544.0Dmel_CG31034862.0862.0NaN
1lcl|NT_033777.3_mrna_NM_169182.2_277090.000000e+000.000000e+003.8935630.0999167.6245234.5571840.0015410.0100010.0134340.013434NM_169182.2mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.37345069.07346354.0+NaNNP_731155.2uncharacterized proteinCG31496326145.0Dmel_CG314961030.01030.0NaN
2lcl|NT_033778.4_mrna_NM_057273.4_139180.000000e+000.000000e+00-10.8717970.2825194.74275135.5545710.0000180.1197080.0539620.119708NM_057273.4mRNANaNGCF_000001215.4Primary Assemblychromosome2RNT_033778.48435443.08436011.0+NaNNP_476621.1larval cuticle protein 3, transcript variant ALcp335819.0Dmel_CG2043513.0513.0NaN
3lcl|NT_033777.3_mrna_NM_001275633.1_297343.305172e-2701.506745e-266-9.8710070.2810634.24235629.2858380.0001360.0683680.1183590.118359NM_001275633.1mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.314152211.014152650.0-NaNNP_001262562.1uncharacterized protein, transcript variant BCG4250050032.0Dmel_CG42500440.0440.0NaN
4lcl|NT_033778.4_mrna_NM_078970.4_150102.992215e-2561.091261e-252-10.5542660.3086604.58398633.5320830.0000200.1428870.0692740.142887NM_078970.4mRNANaNGCF_000001215.4Primary Assemblychromosome2RNT_033778.411355319.011356128.0+NaNNP_523694.2deltaTrypsindeltaTry48343.0Dmel_CG12351810.0810.0NaN
5lcl|NT_037436.4_mrna_NM_079220.2_217122.119157e-2536.440470e-250-9.8103420.2885274.21202428.9411050.0000390.0852900.1248320.124832NM_079220.2mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.46035216.06036175.0+NaNNP_523944.1yippee interacting protein 7yip738680.0Dmel_CG6457904.0904.0NaN
6lcl|NC_004354.4_mrna_NM_132983.3_47883.498568e-2509.113771e-247-9.7987510.2900494.20622828.8751810.0000430.0881100.1261500.126150NM_132983.3mRNANaNGCF_000001215.4Primary AssemblychromosomeXNC_004354.417281777.017282726.0-NaNNP_573211.1uncharacterized protein, transcript variant ACG866432719.0Dmel_CG8664712.0712.0NaN
7lcl|NT_033778.4_mrna_NM_136526.2_138837.833472e-2431.785542e-2393.4431120.1034636.9685853.5625300.0023610.0051680.0136960.013696NM_136526.2mRNANaNGCF_000001215.4Primary Assemblychromosome2RNT_033778.48267643.08269334.0-NaNNP_610370.1uncharacterized proteinCG3037135806.0Dmel_CG303711258.01258.0NaN
8lcl|NT_037436.4_mrna_NM_139756.3_217166.365772e-2391.289776e-235-10.1936260.3088294.40366631.2874560.0000350.1430270.0920910.143027NM_139756.3mRNANaNGCF_000001215.4Primary Assemblychromosome3LNT_037436.46050757.06051690.0+NaNNP_648013.1jonah 65AiiiJon65Aiii38683.0Dmel_CG6483875.0875.0NaN
9lcl|NT_033777.3_mrna_NM_001300669.1_342845.935906e-2351.082412e-231-9.7433640.2976934.17853528.5045840.0001220.0306820.1328100.132810NM_001300669.1mRNANaNGCF_000001215.4Primary Assemblychromosome3RNT_033777.329922216.029923455.0+NaNNP_001287598.1jonah 99Ciii, transcript variant BJon99Ciii43543.0Dmel_CG313621240.01240.0NaN
\n", "
" ], "text/plain": [ " target_id pval qval \\\n", "0 lcl|NT_033777.3_mrna_NM_079830.4_34286 0.000000e+00 0.000000e+00 \n", "1 lcl|NT_033777.3_mrna_NM_169182.2_27709 0.000000e+00 0.000000e+00 \n", "2 lcl|NT_033778.4_mrna_NM_057273.4_13918 0.000000e+00 0.000000e+00 \n", "3 lcl|NT_033777.3_mrna_NM_001275633.1_29734 3.305172e-270 1.506745e-266 \n", "4 lcl|NT_033778.4_mrna_NM_078970.4_15010 2.992215e-256 1.091261e-252 \n", "5 lcl|NT_037436.4_mrna_NM_079220.2_21712 2.119157e-253 6.440470e-250 \n", "6 lcl|NC_004354.4_mrna_NM_132983.3_4788 3.498568e-250 9.113771e-247 \n", "7 lcl|NT_033778.4_mrna_NM_136526.2_13883 7.833472e-243 1.785542e-239 \n", "8 lcl|NT_037436.4_mrna_NM_139756.3_21716 6.365772e-239 1.289776e-235 \n", "9 lcl|NT_033777.3_mrna_NM_001300669.1_34284 5.935906e-235 1.082412e-231 \n", "\n", " b se_b mean_obs var_obs tech_var sigma_sq \\\n", "0 -10.229093 0.244448 4.421399 31.398745 0.000053 0.010500 \n", "1 3.893563 0.099916 7.624523 4.557184 0.001541 0.010001 \n", "2 -10.871797 0.282519 4.742751 35.554571 0.000018 0.119708 \n", "3 -9.871007 0.281063 4.242356 29.285838 0.000136 0.068368 \n", "4 -10.554266 0.308660 4.583986 33.532083 0.000020 0.142887 \n", "5 -9.810342 0.288527 4.212024 28.941105 0.000039 0.085290 \n", "6 -9.798751 0.290049 4.206228 28.875181 0.000043 0.088110 \n", "7 3.443112 0.103463 6.968585 3.562530 0.002361 0.005168 \n", "8 -10.193626 0.308829 4.403666 31.287456 0.000035 0.143027 \n", "9 -9.743364 0.297693 4.178535 28.504584 0.000122 0.030682 \n", "\n", " smooth_sigma_sq final_sigma_sq product_accession # feature class \\\n", "0 0.089579 0.089579 NM_079830.4 mRNA NaN \n", "1 0.013434 0.013434 NM_169182.2 mRNA NaN \n", "2 0.053962 0.119708 NM_057273.4 mRNA NaN \n", "3 0.118359 0.118359 NM_001275633.1 mRNA NaN \n", "4 0.069274 0.142887 NM_078970.4 mRNA NaN \n", "5 0.124832 0.124832 NM_079220.2 mRNA NaN \n", "6 0.126150 0.126150 NM_132983.3 mRNA NaN \n", "7 0.013696 0.013696 NM_136526.2 mRNA NaN \n", "8 0.092091 0.143027 NM_139756.3 mRNA NaN \n", "9 0.132810 0.132810 NM_001300669.1 mRNA NaN \n", "\n", " assembly assembly_unit seq_type chromosome genomic_accession \\\n", "0 GCF_000001215.4 Primary Assembly chromosome 3R NT_033777.3 \n", "1 GCF_000001215.4 Primary Assembly chromosome 3R NT_033777.3 \n", "2 GCF_000001215.4 Primary Assembly chromosome 2R NT_033778.4 \n", "3 GCF_000001215.4 Primary Assembly chromosome 3R NT_033777.3 \n", "4 GCF_000001215.4 Primary Assembly chromosome 2R NT_033778.4 \n", "5 GCF_000001215.4 Primary Assembly chromosome 3L NT_037436.4 \n", "6 GCF_000001215.4 Primary Assembly chromosome X NC_004354.4 \n", "7 GCF_000001215.4 Primary Assembly chromosome 2R NT_033778.4 \n", "8 GCF_000001215.4 Primary Assembly chromosome 3L NT_037436.4 \n", "9 GCF_000001215.4 Primary Assembly chromosome 3R NT_033777.3 \n", "\n", " start end strand non-redundant_refseq related_accession \\\n", "0 29923754.0 29924615.0 - NaN NP_524554.1 \n", "1 7345069.0 7346354.0 + NaN NP_731155.2 \n", "2 8435443.0 8436011.0 + NaN NP_476621.1 \n", "3 14152211.0 14152650.0 - NaN NP_001262562.1 \n", "4 11355319.0 11356128.0 + NaN NP_523694.2 \n", "5 6035216.0 6036175.0 + NaN NP_523944.1 \n", "6 17281777.0 17282726.0 - NaN NP_573211.1 \n", "7 8267643.0 8269334.0 - NaN NP_610370.1 \n", "8 6050757.0 6051690.0 + NaN NP_648013.1 \n", "9 29922216.0 29923455.0 + NaN NP_001287598.1 \n", "\n", " name symbol GeneID \\\n", "0 jonah 99Cii Jon99Cii 43544.0 \n", "1 uncharacterized protein CG31496 326145.0 \n", "2 larval cuticle protein 3, transcript variant A Lcp3 35819.0 \n", "3 uncharacterized protein, transcript variant B CG42500 50032.0 \n", "4 deltaTrypsin deltaTry 48343.0 \n", "5 yippee interacting protein 7 yip7 38680.0 \n", "6 uncharacterized protein, transcript variant A CG8664 32719.0 \n", "7 uncharacterized protein CG30371 35806.0 \n", "8 jonah 65Aiii Jon65Aiii 38683.0 \n", "9 jonah 99Ciii, transcript variant B Jon99Ciii 43543.0 \n", "\n", " locus_tag feature_interval_length product_length attributes \n", "0 Dmel_CG31034 862.0 862.0 NaN \n", "1 Dmel_CG31496 1030.0 1030.0 NaN \n", "2 Dmel_CG2043 513.0 513.0 NaN \n", "3 Dmel_CG42500 440.0 440.0 NaN \n", "4 Dmel_CG12351 810.0 810.0 NaN \n", "5 Dmel_CG6457 904.0 904.0 NaN \n", "6 Dmel_CG8664 712.0 712.0 NaN \n", "7 Dmel_CG30371 1258.0 1258.0 NaN \n", "8 Dmel_CG6483 875.0 875.0 NaN \n", "9 Dmel_CG31362 1240.0 1240.0 NaN " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Index([267], dtype='int64')\n" ] } ], "source": [ "res_sleuth = pd.read_csv('sleuth_results_ME_RF.csv')\n", "\n", "new_col = []\n", "for i, v in enumerate(res_sleuth['target_id']):\n", " v = v.split('_')[-3]+'_'+v.split('_')[-2]\n", " new_col.append(v)\n", "res_sleuth['product_accession'] = new_col\n", "\n", "tab_cor = pd.read_table('/Users/arthur/PROJECTS/Stage_IJM/data/genomes/melanogaster/GCF_000001215.4_Release_6_plus_ISO1_MT_feature_table.txt')\n", "res_sleuth = pd.merge(res_sleuth, tab_cor, how='left', on='product_accession')\n", "res_sleuth.to_csv('sleuth_results_trie_ME_RF.csv')\n", "display(res_sleuth.head(10))\n", "index = res_sleuth[res_sleuth['symbol'] == 'Eig71Ee'].index\n", "print(index)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Gene ID': 'NM_001170166.1',\n", " 'Nom': 'CG7606',\n", " 'Length': 327,\n", " 'Definition': 'Drosophila melanogaster uncharacterized protein, transcript variant B (CG7606), mRNA',\n", " 'Organism': 'Drosophila melanogaster',\n", " 'Protein Translation': 'MFNIKLIILVALTISMVQSCSVEEPEQVECGCGCGKPQCLSCGSRSCGCGCNPCRCPSSSGCGCKD',\n", " 'Signal Peptide': 'Not Available',\n", " 'pT': 1.5151515151515151,\n", " 'pP': 6.0606060606060606,\n", " 'pS': 13.636363636363637,\n", " 'pC': 19.696969696969695}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from Bio import Entrez, SeqIO, Seq\n", "\n", "def get_gene_info(gene):\n", " Entrez.email = \"akdgljs@gmail.com\"\n", " stream = Entrez.efetch(db=\"nucleotide\", id=gene, retmode=\"text\", rettype=\"gb\")\n", " record = SeqIO.read(stream, \"genbank\")\n", " stream.close()\n", " \n", " gene_info = {\n", " \"Gene ID\": gene,\n", " \"Nom\": \"\",\n", " \"Length\": len(record),\n", " \"Definition\": record.description,\n", " \"Organism\": record.annotations.get(\"organism\", \"Unknown\"),\n", " \"Protein Translation\": \"\",\n", " \"Signal Peptide\": \"Not Available\",\n", " \"pT\": 0,\n", " \"pP\": 0,\n", " \"pS\": 0,\n", " \"pC\": 0\n", " }\n", " \n", " for feature in record.features:\n", " if feature.type == \"CDS\":\n", " gene_info[\"Protein Translation\"] = feature.qualifiers.get(\"translation\", [\"\"])[0]\n", " gene_info[\"Nom\"] = feature.qualifiers.get(\"gene\", [\"\"])[0]\n", " # Check for signal peptide feature\n", " if \"signal_peptide\" in feature.qualifiers:\n", " gene_info[\"Signal Peptide\"] = feature.qualifiers[\"signal_peptide\"][0]\n", "\n", " seq = gene_info[\"Protein Translation\"]\n", " if seq.find(\"T\") != -1:\n", " gene_info[\"pT\"] = 100*seq.count(\"T\")/len(seq)\n", " else:\n", " gene_info[\"pT\"] = 0\n", " if seq.find(\"P\") != -1:\n", " gene_info[\"pP\"] = 100*seq.count(\"P\")/len(seq)\n", " else:\n", " gene_info[\"pP\"] = 0\n", " if seq.find(\"S\") != -1:\n", " gene_info[\"pS\"] = 100*seq.count(\"S\")/len(seq)\n", " else:\n", " gene_info[\"pS\"] = 0\n", " if seq.find(\"C\") != -1:\n", " gene_info[\"pC\"] = 100*seq.count(\"C\")/len(seq)\n", " else:\n", " gene_info[\"pC\"] = 0\n", " \n", " return gene_info\n", "\n", "gene = 'NM_001170166.1'\n", "get_gene_info(gene)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('NNHHHHHHHHHHHHHHHCCCCCC', 23)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_signal(gene):\n", " g1 = gene.split('.')[0]\n", " g2 = gene.split('.')[1]\n", " df = pd.read_table(f'res_signalp_RF/output_{g1}_{g2}_plot.txt',header = 1)\n", " sig = \"\"\n", " for l in df['label']:\n", " if l != \"O\":\n", " sig+=l\n", " else:\n", " break\n", " return sig, len(sig)\n", "\n", "gene = 'NM_057370.2'\n", "get_signal(gene)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Gene IDNomDefinitionTPM SG moyenTPM WB moyenTaille% Threonine% Proline% Serine% CysteineSequencePeptide Signal
0NM_057370.2Sgs7Drosophila melanogaster salivary gland secreti...259046.33333319003.666667742.7027036.7567572.70270312.162162MKLIAVTIIACILLIGFSDLALGGACECQPCGPGGKACTGCPEKPQ...(NNHHHHHHHHHHHHHHHCCCCCC, 23)
1NM_079300.3Sgs3Drosophila melanogaster salivary gland secreti...158939.0000009438.40000030742.34527714.657981.6286646.188925MKLTIATALASILLIGSANVANCCDCGCPTTTTTCAPRTTQPPCTT...(NNHHHHHHHHHHHHHHHHCCCCC, 23)
2NM_057371.3Sgs8Drosophila melanogaster salivary gland secreti...63435.0666674852.050000750.06.6666674.013.333333MKLLVVAVIACIMLIGFADPASGCKDCSCVICGPGGEPCPGCSARV...(NNHHHHHHHHHHHHHHHCCCCCC, 23)
3NM_057369.4Sgs4Drosophila melanogaster salivary gland secreti...53900.7666673149.29666728716.37630718.1184675.92334510.452962MRLELLVVLLVGLAALAPSGSTCCKTEPPRCETEPPRCETEPPRCE...(NNHHHHHHHHHHHHHHHCCCC, 21)
4NM_079664.2Sgs5Drosophila melanogaster salivary gland secreti...46692.9000003957.4466671633.0674857.3619639.2024547.361963MFNIKLLLLLLAVSWFHHGQAVQETKIEEKPVSEPEIESEIKNSTS...(NNNHHHHHHHHHHHHHCCCCC, 21)
5NM_001170166.1CG7606Drosophila melanogaster uncharacterized protei...42454.8666673474.873333661.5151526.06060613.63636419.69697MFNIKLIILVALTISMVQSCSVEEPEQVECGCGCGKPQCLSCGSRS...(NNNHHHHHHHHHHHHHCCC, 19)
6NM_169764.3Sgs5bisDrosophila melanogaster Sgs5bis (Sgs5bis), mRNA29669.166667963.8910001424.2253526.3380284.9295777.746479MFNIILLATILVSVAQATIIIKPENPVEETTKCQIYWREHAWALED...(NNNHHHHHHHHHHHCCC, 17)
7NM_140480.3CG12310Drosophila melanogaster uncharacterized protei...25164.7666672807.9800001147.0175444.38596511.4035090.0MRSLILVALLAFLAVGFVAARPAEDEESSAAVVENADEDSTSNDAE...(NNHHHHHHHHHHHHHHHCCC, 20)
8NM_079368.3Eig71EeDrosophila melanogaster Ecdysone-induced gene ...21543.0000001397.25533344519.55056216.6292137.6404497.41573MKLTVVCLVVSFFLLHYAEHSDACLEVIEKALGLQPCNEGGRNEHR...(NNHHHHHHHHHHHHHCCCCCCCC, 23)
9NM_134912.3CG15404Drosophila melanogaster uncharacterized protei...18237.1333331523.3300001224.9180334.9180334.91803310.655738MVAKILLSLLLLAVVTDLVSAQCSQNLCPVVTNSNPRCKGKLQYQC...(NNNNHHHHHHHHHHHHHHCCC, 21)
10NM_164529.3CG31698Drosophila melanogaster uncharacterized protei...17962.9333331614.1533331510.05.9602655.29801313.245033MRAIRVLLIFQLLACLMAVISGCNQGSCHPFIGLNKCNGNGYKEPK...(NNHHHHHHHHHHHHHHHHHCCC, 22)
11NM_142116.2CG14852Drosophila melanogaster uncharacterized protei...12235.700000715.23666717421.2643688.6206916.6666670.574713MRTTTLLLSLGLLVLCFSSYSFAEDDPTDGSTTPTDGSTTPTDGST...(NNHHHHHHHHHHHHHHHCCCCCC, 23)
12NM_140484.3CG13460Drosophila melanogaster uncharacterized protei...11862.6500001474.4833332028.9108919.40594111.8811880.49505MRLFVALVCVSLVAVSSAQLSLRGRLGRSSKVDLAVETPTLLAKTA...(NNHHHHHHHHHHHHHCCC, 18)
13NR_133555.1Drosophila melanogaster 18S ribosomal RNA (18S...10155.4666675336.54000000.00.00.00.0(None, None)
14Dmel_CR34094NoneNone8986.58333358015.500000NoneNoneNoneNoneNoneNoneNone
15NM_137981.2CG11300Drosophila melanogaster uncharacterized protei...5387.010000892.24000015710.19108317.19745214.6496820.636943MRCQFVIAFGLLALIATAYADSPPAAGSPPASSPPAGTPTSPPPAT...(NNHHHHHHHHHHHHHHHCCC, 20)
16NM_168767.2CG32198Drosophila melanogaster uncharacterized protei...4921.9400001810.91000013616.91176511.02941211.7647060.735294MCSKLTLFLGLVALIAAVFALDDPTSPTSPTSPTSPTSPTSPTSPT...(NNNNHHHHHHHHHHHHHCCC, 20)
17NR_133562.1Drosophila melanogaster 28S ribosomal RNA (28S...4861.5333333138.58000000000(None, None)
18NM_001169386.2CG42460Drosophila melanogaster uncharacterized protei...4515.520000170.873000795.0632912.5316467.5949378.860759MKLFSIVFFIFSILGCVSALKNPVCGVKYRGVGLCKMLITKIVYIP...(NNHHHHHHHHHHHHHHCCC, 19)
19NM_169585.2CG8087Drosophila melanogaster uncharacterized protei...3113.860000278.40100014221.8309861.4084519.154933.521127MKATTILAVVSVLTACLLRSSEAVTCTADATVTGCIDCTTNPTDSE...(NNHHHHHHHHHHHHHHHHCCCCC, 23)
20NM_078751.4Sgs1Drosophila melanogaster salivary gland secreti...3057.726667574.350667128646.65629914.0746513.1415243.18818MKVALIFLTVSILLIQVKNVKANYDWDSMQDGPSEEIIPGCGGDTI...(NNHHHHHHHHHHHHHHHCCCCC, 22)
21NM_058027.5eEF1alpha1Drosophila melanogaster eukaryotic translation...2967.3600004139.4433334636.0475165.1835854.1036721.079914MGKEKIHINIVVIGHVDSGKSTTTGHLIYKCGGIDKRTIEKFEKEA...(, 0)
22NM_137979.3CG13560Drosophila melanogaster uncharacterized protei...2855.8990001063.00700018123.204423.31491723.7569060MRFQFVLAFGLIAILATAYAGGDTTGTGSTGTDTTGTGSTGTGSTG...(NNHHHHHHHHHHHHHHHCCC, 20)
23NM_001014551.3RpL41Drosophila melanogaster ribosomal protein L41 ...2835.2066676150.35333325004.00MRAKWRKKRMRRLKRKRRKMRARSK(, 0)
24NM_135695.2CG6770Drosophila melanogaster uncharacterized protei...2736.7166672908.393333694.3478261.4492757.2463771.449275MSEAHFDEYEHYNFDHDKHIFSGHSGKQRNKREANEHTNHFDPSGH...(, 0)
25NM_130694.3CG14265Drosophila melanogaster uncharacterized protei...2322.400000292.97233311914.2857140.84033629.4117650.840336MKLHWLLLAVVLICALYSATGTSPTTETSTSTESTTATGSSTSTTS...(NNHHHHHHHHHHHHHHHCCCC, 21)
26NM_001298648.1CG15404Drosophila melanogaster uncharacterized protei...2145.663333189.3423331224.9180334.9180334.91803310.655738MVAKILLSLLLLAVVTDLVSAQCSQNLCPVVTNSNPRCKGKLQYQC...(NNNNHHHHHHHHHHHHHHCCC, 21)
27NM_001300371.1SgsfDrosophila melanogaster salivary gland-derived...2142.906667429.6586671622.4691365.5555564.9382721.851852MSARRHSGIILVLCCINLSYSYRIIESNEVPKTCPALNKDIIFEEP...(NNNNNHHHHHHHHHHHHHCCC, 21)
28NM_142114.3JigDrosophila melanogaster Jig (Jig), mRNA2020.47533325.91905715816.4556968.22784812.0253163.164557MRATSIILSGVLVLVACLLRSSEAVTCTADPNVTGCIDCTTSPSDP...(NNHHHHHHHHHHHHHHHHHCCCCC, 24)
29NM_001202313.1CG42798Drosophila melanogaster uncharacterized protei...1816.68100018.725860975.1546393.09278412.3711348.247423MNSLIVIFGFLFISTQIVATTESECPEICLAIYSPVCEEAMINGKL...(NNHHHHHHHHHHHHHHHCCCCCC, 23)
30NM_001104349.2CG34279Drosophila melanogaster uncharacterized protei...1781.160000214.304667993.0303032.0202025.0505059.090909MQIVCTLGNKILLICVLLGIFTIVGGQLFTLSIPKKCHDVCPMGYR...(NNNNNNNNNNHHHHHHHHHHHHHCCC, 26)
31NM_134609.2CG10918Drosophila melanogaster uncharacterized protei...1761.030000315.18800018310.3825146.55737719.1256830MRAYIAITLLALVAVVVAQGGGGRRGGRGGGGGGGRSLGGFGGRGG...(NNHHHHHHHHHHHHHCCC, 18)
32NM_001274927.2CG43679Drosophila melanogaster uncharacterized protei...1710.842333157.651900637.93650812.69841311.1111110MRYLGIIALVAFLAISTVMAHPYANSSTEEGSGSSNRSTIRPVPRW...(NNHHHHHHHHHHHHHCCCCC, 20)
33NM_141689.3RpS29Drosophila melanogaster ribosomal protein S29,...1700.4666673148.786667561.7857141.7857145.3571438.928571MGFATLWYSHPRKYGQGSRCCRACSNRHGLIRKYGLNICRQCFREY...(, 0)
34NM_140419.3CG17362Drosophila melanogaster uncharacterized protei...1698.270000175.8336671535.8823532.61437910.4575160MKLLLVLALAVFVAHVAVAQTTDSSDDGDYSYDYADDNDTAGSSED...(NNHHHHHHHHHHHHHHCCC, 19)
35NM_057282.4RpLP1Drosophila melanogaster ribosomal protein LP1,...1659.9766673641.6433331123.5714294.4642865.3571430.892857MSTKAELACVYASLILVDDDVAVTGEKINTILKAANVEVEPYWPGL...(, 0)
36Dmel_CR45851NoneNone1566.547667659.653000NoneNoneNoneNoneNoneNoneNone
37NM_080275.3nolDrosophila melanogaster no optic lobe (nol), mRNA1423.46566780.75623313020.76923113.07692313.0769230MRSQIIILFAIVAFVSSAWAVTDPATPPATDPTTPPATDPTTPPAT...(NNHHHHHHHHHHHHHCCCCC, 20)
38NM_001274926.2CG43679Drosophila melanogaster uncharacterized protei...1393.44100029.800633637.93650812.69841311.1111110MRYLGIIALVAFLAISTVMAHPYANSSTEEGSGSSNRSTIRPVPRW...(NNHHHHHHHHHHHHHCCCCC, 20)
39NM_080077.2Lsp2Drosophila melanogaster larval serum protein 2...1379.47666714837.0000007013.7089873.8516414.5649070.285307MKSFTVIALAAVALLATLGQAKHLDSKVADKDFLMKQKFMYQILQH...(NNHHHHHHHHHHHHHHHHCCC, 21)
40NM_080321.1ng3Drosophila melanogaster new glue 3 (ng3), mRNA1296.626333594.98566714620.5479451.36986322.602741.369863MRYSCVLLLLATVACLLIPQTGGSTATTTSTSASATTTTSASATTT...(NNHHHHHHHHHHHHHHHHCCCCC, 23)
41NM_079879.4RpS3ADrosophila melanogaster ribosomal protein S3A,...1186.0066672098.2066672684.1044782.9850755.5970151.492537MAVGKNKGLSKGGKKGGKKKVVDPFSRKDWYDVKAPNMFQTRQIGK...(, 0)
42NM_057329.4Pig1Drosophila melanogaster Pre-intermoult gene 1 ...1150.050000224.3173331728.1395351.16279117.441860.581395MKLTKLWLLFVCLGLFVTLVVSADTDSDADSDSSADSDSSADSDEN...(NNNNNHHHHHHHHHHHHHHCCC, 22)
43NM_057966.5RpL39Drosophila melanogaster ribosomal protein L39 ...1130.6820002799.083333515.8823531.9607843.9215690MAAHKSFRIKQKLAKKLKQNRSVPQWVRLRTGNTIRYNAKRRHWRR...(, 0)
44NR_037753.2Drosophila melanogaster signal recognition par...1061.853000255.29966700000(None, None)
45NR_001992.2Drosophila melanogaster signal recognition par...1061.853000255.29966700000(None, None)
46NM_001299481.1RpS23Drosophila melanogaster ribosomal protein S23,...1006.7953332419.1433331432.0979024.1958043.4965030.699301MGKPRGLRTARKHVNHRRDQRWADKDYKKAHLGTRWKANPFGGASH...(, 0)
47NM_001272632.2RpL37-1Drosophila melanogaster ribosomal protein L37-...1001.3176672396.933333939.6774192.1505388.6021514.301075MTKGTSSFGKRHNKTHTLCRRCGRSSYHIQKSTCAQCGYPAAKLRS...(, 0)
48NM_137292.4RpS15Drosophila melanogaster ribosomal protein S15,...981.6073332364.1933331484.0540546.7567573.3783780MADQVDENLKKKRTFKKFTYRGVDLDQLLDMPNNQLVELMHSRARR...(, 0)
49NM_166383.4RpS18Drosophila melanogaster ribosomal protein S18,...966.7950001909.8266671526.5789471.9736845.2631580.657895MSLVIPEKFQHILRIMNTNIDGKRKVGIAMTAIKGVGRRYSNIVLK...(, 0)
\n", "
" ], "text/plain": [ " Gene ID Nom \\\n", "0 NM_057370.2 Sgs7 \n", "1 NM_079300.3 Sgs3 \n", "2 NM_057371.3 Sgs8 \n", "3 NM_057369.4 Sgs4 \n", "4 NM_079664.2 Sgs5 \n", "5 NM_001170166.1 CG7606 \n", "6 NM_169764.3 Sgs5bis \n", "7 NM_140480.3 CG12310 \n", "8 NM_079368.3 Eig71Ee \n", "9 NM_134912.3 CG15404 \n", "10 NM_164529.3 CG31698 \n", "11 NM_142116.2 CG14852 \n", "12 NM_140484.3 CG13460 \n", "13 NR_133555.1 \n", "14 Dmel_CR34094 None \n", "15 NM_137981.2 CG11300 \n", "16 NM_168767.2 CG32198 \n", "17 NR_133562.1 \n", "18 NM_001169386.2 CG42460 \n", "19 NM_169585.2 CG8087 \n", "20 NM_078751.4 Sgs1 \n", "21 NM_058027.5 eEF1alpha1 \n", "22 NM_137979.3 CG13560 \n", "23 NM_001014551.3 RpL41 \n", "24 NM_135695.2 CG6770 \n", "25 NM_130694.3 CG14265 \n", "26 NM_001298648.1 CG15404 \n", "27 NM_001300371.1 Sgsf \n", "28 NM_142114.3 Jig \n", "29 NM_001202313.1 CG42798 \n", "30 NM_001104349.2 CG34279 \n", "31 NM_134609.2 CG10918 \n", "32 NM_001274927.2 CG43679 \n", "33 NM_141689.3 RpS29 \n", "34 NM_140419.3 CG17362 \n", "35 NM_057282.4 RpLP1 \n", "36 Dmel_CR45851 None \n", "37 NM_080275.3 nol \n", "38 NM_001274926.2 CG43679 \n", "39 NM_080077.2 Lsp2 \n", "40 NM_080321.1 ng3 \n", "41 NM_079879.4 RpS3A \n", "42 NM_057329.4 Pig1 \n", "43 NM_057966.5 RpL39 \n", "44 NR_037753.2 \n", "45 NR_001992.2 \n", "46 NM_001299481.1 RpS23 \n", "47 NM_001272632.2 RpL37-1 \n", "48 NM_137292.4 RpS15 \n", "49 NM_166383.4 RpS18 \n", "\n", " Definition TPM SG moyen \\\n", "0 Drosophila melanogaster salivary gland secreti... 259046.333333 \n", "1 Drosophila melanogaster salivary gland secreti... 158939.000000 \n", "2 Drosophila melanogaster salivary gland secreti... 63435.066667 \n", "3 Drosophila melanogaster salivary gland secreti... 53900.766667 \n", "4 Drosophila melanogaster salivary gland secreti... 46692.900000 \n", "5 Drosophila melanogaster uncharacterized protei... 42454.866667 \n", "6 Drosophila melanogaster Sgs5bis (Sgs5bis), mRNA 29669.166667 \n", "7 Drosophila melanogaster uncharacterized protei... 25164.766667 \n", "8 Drosophila melanogaster Ecdysone-induced gene ... 21543.000000 \n", "9 Drosophila melanogaster uncharacterized protei... 18237.133333 \n", "10 Drosophila melanogaster uncharacterized protei... 17962.933333 \n", "11 Drosophila melanogaster uncharacterized protei... 12235.700000 \n", "12 Drosophila melanogaster uncharacterized protei... 11862.650000 \n", "13 Drosophila melanogaster 18S ribosomal RNA (18S... 10155.466667 \n", "14 None 8986.583333 \n", "15 Drosophila melanogaster uncharacterized protei... 5387.010000 \n", "16 Drosophila melanogaster uncharacterized protei... 4921.940000 \n", "17 Drosophila melanogaster 28S ribosomal RNA (28S... 4861.533333 \n", "18 Drosophila melanogaster uncharacterized protei... 4515.520000 \n", "19 Drosophila melanogaster uncharacterized protei... 3113.860000 \n", "20 Drosophila melanogaster salivary gland secreti... 3057.726667 \n", "21 Drosophila melanogaster eukaryotic translation... 2967.360000 \n", "22 Drosophila melanogaster uncharacterized protei... 2855.899000 \n", "23 Drosophila melanogaster ribosomal protein L41 ... 2835.206667 \n", "24 Drosophila melanogaster uncharacterized protei... 2736.716667 \n", "25 Drosophila melanogaster uncharacterized protei... 2322.400000 \n", "26 Drosophila melanogaster uncharacterized protei... 2145.663333 \n", "27 Drosophila melanogaster salivary gland-derived... 2142.906667 \n", "28 Drosophila melanogaster Jig (Jig), mRNA 2020.475333 \n", "29 Drosophila melanogaster uncharacterized protei... 1816.681000 \n", "30 Drosophila melanogaster uncharacterized protei... 1781.160000 \n", "31 Drosophila melanogaster uncharacterized protei... 1761.030000 \n", "32 Drosophila melanogaster uncharacterized protei... 1710.842333 \n", "33 Drosophila melanogaster ribosomal protein S29,... 1700.466667 \n", "34 Drosophila melanogaster uncharacterized protei... 1698.270000 \n", "35 Drosophila melanogaster ribosomal protein LP1,... 1659.976667 \n", "36 None 1566.547667 \n", "37 Drosophila melanogaster no optic lobe (nol), mRNA 1423.465667 \n", "38 Drosophila melanogaster uncharacterized protei... 1393.441000 \n", "39 Drosophila melanogaster larval serum protein 2... 1379.476667 \n", "40 Drosophila melanogaster new glue 3 (ng3), mRNA 1296.626333 \n", "41 Drosophila melanogaster ribosomal protein S3A,... 1186.006667 \n", "42 Drosophila melanogaster Pre-intermoult gene 1 ... 1150.050000 \n", "43 Drosophila melanogaster ribosomal protein L39 ... 1130.682000 \n", "44 Drosophila melanogaster signal recognition par... 1061.853000 \n", "45 Drosophila melanogaster signal recognition par... 1061.853000 \n", "46 Drosophila melanogaster ribosomal protein S23,... 1006.795333 \n", "47 Drosophila melanogaster ribosomal protein L37-... 1001.317667 \n", "48 Drosophila melanogaster ribosomal protein S15,... 981.607333 \n", "49 Drosophila melanogaster ribosomal protein S18,... 966.795000 \n", "\n", " TPM WB moyen Taille % Threonine % Proline % Serine % Cysteine \\\n", "0 19003.666667 74 2.702703 6.756757 2.702703 12.162162 \n", "1 9438.400000 307 42.345277 14.65798 1.628664 6.188925 \n", "2 4852.050000 75 0.0 6.666667 4.0 13.333333 \n", "3 3149.296667 287 16.376307 18.118467 5.923345 10.452962 \n", "4 3957.446667 163 3.067485 7.361963 9.202454 7.361963 \n", "5 3474.873333 66 1.515152 6.060606 13.636364 19.69697 \n", "6 963.891000 142 4.225352 6.338028 4.929577 7.746479 \n", "7 2807.980000 114 7.017544 4.385965 11.403509 0.0 \n", "8 1397.255333 445 19.550562 16.629213 7.640449 7.41573 \n", "9 1523.330000 122 4.918033 4.918033 4.918033 10.655738 \n", "10 1614.153333 151 0.0 5.960265 5.298013 13.245033 \n", "11 715.236667 174 21.264368 8.62069 16.666667 0.574713 \n", "12 1474.483333 202 8.910891 9.405941 11.881188 0.49505 \n", "13 5336.540000 0 0.0 0.0 0.0 0.0 \n", "14 58015.500000 None None None None None \n", "15 892.240000 157 10.191083 17.197452 14.649682 0.636943 \n", "16 1810.910000 136 16.911765 11.029412 11.764706 0.735294 \n", "17 3138.580000 0 0 0 0 0 \n", "18 170.873000 79 5.063291 2.531646 7.594937 8.860759 \n", "19 278.401000 142 21.830986 1.408451 9.15493 3.521127 \n", "20 574.350667 1286 46.656299 14.07465 13.141524 3.18818 \n", "21 4139.443333 463 6.047516 5.183585 4.103672 1.079914 \n", "22 1063.007000 181 23.20442 3.314917 23.756906 0 \n", "23 6150.353333 25 0 0 4.0 0 \n", "24 2908.393333 69 4.347826 1.449275 7.246377 1.449275 \n", "25 292.972333 119 14.285714 0.840336 29.411765 0.840336 \n", "26 189.342333 122 4.918033 4.918033 4.918033 10.655738 \n", "27 429.658667 162 2.469136 5.555556 4.938272 1.851852 \n", "28 25.919057 158 16.455696 8.227848 12.025316 3.164557 \n", "29 18.725860 97 5.154639 3.092784 12.371134 8.247423 \n", "30 214.304667 99 3.030303 2.020202 5.050505 9.090909 \n", "31 315.188000 183 10.382514 6.557377 19.125683 0 \n", "32 157.651900 63 7.936508 12.698413 11.111111 0 \n", "33 3148.786667 56 1.785714 1.785714 5.357143 8.928571 \n", "34 175.833667 153 5.882353 2.614379 10.457516 0 \n", "35 3641.643333 112 3.571429 4.464286 5.357143 0.892857 \n", "36 659.653000 None None None None None \n", "37 80.756233 130 20.769231 13.076923 13.076923 0 \n", "38 29.800633 63 7.936508 12.698413 11.111111 0 \n", "39 14837.000000 701 3.708987 3.851641 4.564907 0.285307 \n", "40 594.985667 146 20.547945 1.369863 22.60274 1.369863 \n", "41 2098.206667 268 4.104478 2.985075 5.597015 1.492537 \n", "42 224.317333 172 8.139535 1.162791 17.44186 0.581395 \n", "43 2799.083333 51 5.882353 1.960784 3.921569 0 \n", "44 255.299667 0 0 0 0 0 \n", "45 255.299667 0 0 0 0 0 \n", "46 2419.143333 143 2.097902 4.195804 3.496503 0.699301 \n", "47 2396.933333 93 9.677419 2.150538 8.602151 4.301075 \n", "48 2364.193333 148 4.054054 6.756757 3.378378 0 \n", "49 1909.826667 152 6.578947 1.973684 5.263158 0.657895 \n", "\n", " Sequence \\\n", "0 MKLIAVTIIACILLIGFSDLALGGACECQPCGPGGKACTGCPEKPQ... \n", "1 MKLTIATALASILLIGSANVANCCDCGCPTTTTTCAPRTTQPPCTT... \n", "2 MKLLVVAVIACIMLIGFADPASGCKDCSCVICGPGGEPCPGCSARV... \n", "3 MRLELLVVLLVGLAALAPSGSTCCKTEPPRCETEPPRCETEPPRCE... \n", "4 MFNIKLLLLLLAVSWFHHGQAVQETKIEEKPVSEPEIESEIKNSTS... \n", "5 MFNIKLIILVALTISMVQSCSVEEPEQVECGCGCGKPQCLSCGSRS... \n", "6 MFNIILLATILVSVAQATIIIKPENPVEETTKCQIYWREHAWALED... \n", "7 MRSLILVALLAFLAVGFVAARPAEDEESSAAVVENADEDSTSNDAE... \n", "8 MKLTVVCLVVSFFLLHYAEHSDACLEVIEKALGLQPCNEGGRNEHR... \n", "9 MVAKILLSLLLLAVVTDLVSAQCSQNLCPVVTNSNPRCKGKLQYQC... \n", "10 MRAIRVLLIFQLLACLMAVISGCNQGSCHPFIGLNKCNGNGYKEPK... \n", "11 MRTTTLLLSLGLLVLCFSSYSFAEDDPTDGSTTPTDGSTTPTDGST... \n", "12 MRLFVALVCVSLVAVSSAQLSLRGRLGRSSKVDLAVETPTLLAKTA... \n", "13 \n", "14 None \n", "15 MRCQFVIAFGLLALIATAYADSPPAAGSPPASSPPAGTPTSPPPAT... \n", "16 MCSKLTLFLGLVALIAAVFALDDPTSPTSPTSPTSPTSPTSPTSPT... \n", "17 \n", "18 MKLFSIVFFIFSILGCVSALKNPVCGVKYRGVGLCKMLITKIVYIP... \n", "19 MKATTILAVVSVLTACLLRSSEAVTCTADATVTGCIDCTTNPTDSE... \n", "20 MKVALIFLTVSILLIQVKNVKANYDWDSMQDGPSEEIIPGCGGDTI... \n", "21 MGKEKIHINIVVIGHVDSGKSTTTGHLIYKCGGIDKRTIEKFEKEA... \n", "22 MRFQFVLAFGLIAILATAYAGGDTTGTGSTGTDTTGTGSTGTGSTG... \n", "23 MRAKWRKKRMRRLKRKRRKMRARSK \n", "24 MSEAHFDEYEHYNFDHDKHIFSGHSGKQRNKREANEHTNHFDPSGH... \n", "25 MKLHWLLLAVVLICALYSATGTSPTTETSTSTESTTATGSSTSTTS... \n", "26 MVAKILLSLLLLAVVTDLVSAQCSQNLCPVVTNSNPRCKGKLQYQC... \n", "27 MSARRHSGIILVLCCINLSYSYRIIESNEVPKTCPALNKDIIFEEP... \n", "28 MRATSIILSGVLVLVACLLRSSEAVTCTADPNVTGCIDCTTSPSDP... \n", "29 MNSLIVIFGFLFISTQIVATTESECPEICLAIYSPVCEEAMINGKL... \n", "30 MQIVCTLGNKILLICVLLGIFTIVGGQLFTLSIPKKCHDVCPMGYR... \n", "31 MRAYIAITLLALVAVVVAQGGGGRRGGRGGGGGGGRSLGGFGGRGG... \n", "32 MRYLGIIALVAFLAISTVMAHPYANSSTEEGSGSSNRSTIRPVPRW... \n", "33 MGFATLWYSHPRKYGQGSRCCRACSNRHGLIRKYGLNICRQCFREY... \n", "34 MKLLLVLALAVFVAHVAVAQTTDSSDDGDYSYDYADDNDTAGSSED... \n", "35 MSTKAELACVYASLILVDDDVAVTGEKINTILKAANVEVEPYWPGL... \n", "36 None \n", "37 MRSQIIILFAIVAFVSSAWAVTDPATPPATDPTTPPATDPTTPPAT... \n", "38 MRYLGIIALVAFLAISTVMAHPYANSSTEEGSGSSNRSTIRPVPRW... \n", "39 MKSFTVIALAAVALLATLGQAKHLDSKVADKDFLMKQKFMYQILQH... \n", "40 MRYSCVLLLLATVACLLIPQTGGSTATTTSTSASATTTTSASATTT... \n", "41 MAVGKNKGLSKGGKKGGKKKVVDPFSRKDWYDVKAPNMFQTRQIGK... \n", "42 MKLTKLWLLFVCLGLFVTLVVSADTDSDADSDSSADSDSSADSDEN... \n", "43 MAAHKSFRIKQKLAKKLKQNRSVPQWVRLRTGNTIRYNAKRRHWRR... \n", "44 \n", "45 \n", "46 MGKPRGLRTARKHVNHRRDQRWADKDYKKAHLGTRWKANPFGGASH... \n", "47 MTKGTSSFGKRHNKTHTLCRRCGRSSYHIQKSTCAQCGYPAAKLRS... \n", "48 MADQVDENLKKKRTFKKFTYRGVDLDQLLDMPNNQLVELMHSRARR... \n", "49 MSLVIPEKFQHILRIMNTNIDGKRKVGIAMTAIKGVGRRYSNIVLK... \n", "\n", " Peptide Signal \n", "0 (NNHHHHHHHHHHHHHHHCCCCCC, 23) \n", "1 (NNHHHHHHHHHHHHHHHHCCCCC, 23) \n", "2 (NNHHHHHHHHHHHHHHHCCCCCC, 23) \n", "3 (NNHHHHHHHHHHHHHHHCCCC, 21) \n", "4 (NNNHHHHHHHHHHHHHCCCCC, 21) \n", "5 (NNNHHHHHHHHHHHHHCCC, 19) \n", "6 (NNNHHHHHHHHHHHCCC, 17) \n", "7 (NNHHHHHHHHHHHHHHHCCC, 20) \n", "8 (NNHHHHHHHHHHHHHCCCCCCCC, 23) \n", "9 (NNNNHHHHHHHHHHHHHHCCC, 21) \n", "10 (NNHHHHHHHHHHHHHHHHHCCC, 22) \n", "11 (NNHHHHHHHHHHHHHHHCCCCCC, 23) \n", "12 (NNHHHHHHHHHHHHHCCC, 18) \n", "13 (None, None) \n", "14 None \n", "15 (NNHHHHHHHHHHHHHHHCCC, 20) \n", "16 (NNNNHHHHHHHHHHHHHCCC, 20) \n", "17 (None, None) \n", "18 (NNHHHHHHHHHHHHHHCCC, 19) \n", "19 (NNHHHHHHHHHHHHHHHHCCCCC, 23) \n", "20 (NNHHHHHHHHHHHHHHHCCCCC, 22) \n", "21 (, 0) \n", "22 (NNHHHHHHHHHHHHHHHCCC, 20) \n", "23 (, 0) \n", "24 (, 0) \n", "25 (NNHHHHHHHHHHHHHHHCCCC, 21) \n", "26 (NNNNHHHHHHHHHHHHHHCCC, 21) \n", "27 (NNNNNHHHHHHHHHHHHHCCC, 21) \n", "28 (NNHHHHHHHHHHHHHHHHHCCCCC, 24) \n", "29 (NNHHHHHHHHHHHHHHHCCCCCC, 23) \n", "30 (NNNNNNNNNNHHHHHHHHHHHHHCCC, 26) \n", "31 (NNHHHHHHHHHHHHHCCC, 18) \n", "32 (NNHHHHHHHHHHHHHCCCCC, 20) \n", "33 (, 0) \n", "34 (NNHHHHHHHHHHHHHHCCC, 19) \n", "35 (, 0) \n", "36 None \n", "37 (NNHHHHHHHHHHHHHCCCCC, 20) \n", "38 (NNHHHHHHHHHHHHHCCCCC, 20) \n", "39 (NNHHHHHHHHHHHHHHHHCCC, 21) \n", "40 (NNHHHHHHHHHHHHHHHHCCCCC, 23) \n", "41 (, 0) \n", "42 (NNNNNHHHHHHHHHHHHHHCCC, 22) \n", "43 (, 0) \n", "44 (None, None) \n", "45 (None, None) \n", "46 (, 0) \n", "47 (, 0) \n", "48 (, 0) \n", "49 (, 0) " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_final = pd.DataFrame()\n", "for gene_id in new_df.head(50)['product_accession']:\n", " if gene_id.startswith('Dmel'):\n", " new_row ={\n", " \"Gene ID\": gene_id,\n", " \"Nom\": \"None\",\n", " \"Definition\": \"None\",\n", " \"TPM SG moyen\": new_df[new_df['product_accession'] == gene_id]['tpm_SG_moyenne'].values[0],\n", " \"TPM WB moyen\": new_df[new_df['product_accession'] == gene_id]['tpm_WB_moyenne'].values[0],\n", " \"Taille\": \"None\",\n", " \"% Threonine\": \"None\",\n", " \"% Proline\": \"None\",\n", " \"% Serine\": \"None\",\n", " \"% Cysteine\": \"None\",\n", " \"Sequence\": \"None\",\n", " \"Peptide Signal\": \"None\"\n", " }\n", " else:\n", " gene_info = get_gene_info(gene_id)\n", " sig, taille = \"None\", \"None\"\n", " if gene_id.startswith('NM'):\n", " sig, taille = get_signal(gene_id)\n", " new_row = {\n", " \"Gene ID\": gene_info[\"Gene ID\"],\n", " \"Nom\": gene_info[\"Nom\"],\n", " \"Definition\": gene_info[\"Definition\"],\n", " \"TPM SG moyen\": new_df[new_df['product_accession'] == gene_id]['tpm_SG_moyenne'].values[0],\n", " \"TPM WB moyen\": new_df[new_df['product_accession'] == gene_id]['tpm_WB_moyenne'].values[0],\n", " \"Taille\": len(gene_info[\"Protein Translation\"]),\n", " \"% Threonine\": gene_info[\"pT\"],\n", " \"% Proline\": gene_info[\"pP\"],\n", " \"% Serine\": gene_info[\"pS\"],\n", " \"% Cysteine\": gene_info[\"pC\"],\n", " \"Sequence\": gene_info[\"Protein Translation\"],\n", " \"Peptide Signal\": (sig, taille)\n", " }\n", " df_final = pd.concat([df_final, pd.DataFrame([new_row])], ignore_index=True)\n", "\n", "df_final.to_csv('final_table_RF.csv', index=False)\n", "display(df_final)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "with open('prot50_FR.fasta','a') as f:\n", " for gene_id in new_df.head(50)['product_accession']:\n", " if gene_id.startswith('Dmel'):\n", " continue\n", " gene_info = get_gene_info(gene_id)\n", " f.write(f'>{gene_info[\"Gene ID\"]}\\n{gene_info[\"Protein Translation\"]}\\n')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 2 }