Skip to content

Commit

Permalink
getting phenopackets
Browse files Browse the repository at this point in the history
  • Loading branch information
pnrobinson committed Nov 26, 2023
1 parent 4e2bd54 commit d597ab9
Showing 1 changed file with 136 additions and 94 deletions.
230 changes: 136 additions & 94 deletions GetPhenopackets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -70,59 +70,59 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>WWOX</td>\n",
" <td>notebooks/WWOX/phenopackets</td>\n",
" <td>notebooks/WWOX/phenopackets/PMID_17470496_2.json</td>\n",
" <td>PMID_17470496_2</td>\n",
" <td>Spinocerebellar ataxia, autosomal recessive 12...</td>\n",
" <td>9</td>\n",
" <td>SMARCB1</td>\n",
" <td>notebooks/SMARCB1/phenopackets</td>\n",
" <td>notebooks/SMARCB1/phenopackets/PMID_25168959_Y...</td>\n",
" <td>PMID_25168959_Y22</td>\n",
" <td>Coffin-Siris syndrome 3 (OMIM:614608)</td>\n",
" <td>29</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>WWOX</td>\n",
" <td>notebooks/WWOX/phenopackets</td>\n",
" <td>notebooks/WWOX/phenopackets/PMID_17470496_3.json</td>\n",
" <td>PMID_17470496_3</td>\n",
" <td>Spinocerebellar ataxia, autosomal recessive 12...</td>\n",
" <td>9</td>\n",
" <td>SMARCB1</td>\n",
" <td>notebooks/SMARCB1/phenopackets</td>\n",
" <td>notebooks/SMARCB1/phenopackets/PMID_34101994_I...</td>\n",
" <td>PMID_34101994__II.2</td>\n",
" <td>Rhabdoid tumor predisposition syndrome-1 (OMIM...</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>WWOX</td>\n",
" <td>notebooks/WWOX/phenopackets</td>\n",
" <td>notebooks/WWOX/phenopackets/PMID_17470496_0.json</td>\n",
" <td>PMID_17470496_0</td>\n",
" <td>Spinocerebellar ataxia, autosomal recessive 12...</td>\n",
" <td>9</td>\n",
" <td>SMARCB1</td>\n",
" <td>notebooks/SMARCB1/phenopackets</td>\n",
" <td>notebooks/SMARCB1/phenopackets/PMID_25168959_Y...</td>\n",
" <td>PMID_25168959_Y4</td>\n",
" <td>Coffin-Siris syndrome 3 (OMIM:614608)</td>\n",
" <td>28</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>WWOX</td>\n",
" <td>notebooks/WWOX/phenopackets</td>\n",
" <td>notebooks/WWOX/phenopackets/PMID_17470496_1.json</td>\n",
" <td>PMID_17470496_1</td>\n",
" <td>Spinocerebellar ataxia, autosomal recessive 12...</td>\n",
" <td>9</td>\n",
" <td>SMARCB1</td>\n",
" <td>notebooks/SMARCB1/phenopackets</td>\n",
" <td>notebooks/SMARCB1/phenopackets/PMID_25168959_K...</td>\n",
" <td>PMID_25168959_K2588</td>\n",
" <td>Coffin-Siris syndrome 3 (OMIM:614608)</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>ANKRD11</td>\n",
" <td>notebooks/ANKRD11/phenopackets</td>\n",
" <td>notebooks/ANKRD11/phenopackets/PMID_36446582_N...</td>\n",
" <td>PMID_36446582_Novara,_2017_P2</td>\n",
" <td>KBG syndrome (OMIM:148050)</td>\n",
" <td>SMARCB1</td>\n",
" <td>notebooks/SMARCB1/phenopackets</td>\n",
" <td>notebooks/SMARCB1/phenopackets/PMID_34101994_I...</td>\n",
" <td>PMID_34101994__III.1</td>\n",
" <td>Rhabdoid tumor predisposition syndrome-1 (OMIM...</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
Expand All @@ -134,32 +134,25 @@
],
"text/plain": [
" cohort directory \\\n",
"0 WWOX notebooks/WWOX/phenopackets \n",
"1 WWOX notebooks/WWOX/phenopackets \n",
"2 WWOX notebooks/WWOX/phenopackets \n",
"3 WWOX notebooks/WWOX/phenopackets \n",
"4 ANKRD11 notebooks/ANKRD11/phenopackets \n",
"\n",
" filename \\\n",
"0 notebooks/WWOX/phenopackets/PMID_17470496_2.json \n",
"1 notebooks/WWOX/phenopackets/PMID_17470496_3.json \n",
"2 notebooks/WWOX/phenopackets/PMID_17470496_0.json \n",
"3 notebooks/WWOX/phenopackets/PMID_17470496_1.json \n",
"4 notebooks/ANKRD11/phenopackets/PMID_36446582_N... \n",
"0 SMARCB1 notebooks/SMARCB1/phenopackets \n",
"1 SMARCB1 notebooks/SMARCB1/phenopackets \n",
"2 SMARCB1 notebooks/SMARCB1/phenopackets \n",
"3 SMARCB1 notebooks/SMARCB1/phenopackets \n",
"4 SMARCB1 notebooks/SMARCB1/phenopackets \n",
"\n",
" phenopacket.id \\\n",
"0 PMID_17470496_2 \n",
"1 PMID_17470496_3 \n",
"2 PMID_17470496_0 \n",
"3 PMID_17470496_1 \n",
"4 PMID_36446582_Novara,_2017_P2 \n",
" filename phenopacket.id \\\n",
"0 notebooks/SMARCB1/phenopackets/PMID_25168959_Y... PMID_25168959_Y22 \n",
"1 notebooks/SMARCB1/phenopackets/PMID_34101994_I... PMID_34101994__II.2 \n",
"2 notebooks/SMARCB1/phenopackets/PMID_25168959_Y... PMID_25168959_Y4 \n",
"3 notebooks/SMARCB1/phenopackets/PMID_25168959_K... PMID_25168959_K2588 \n",
"4 notebooks/SMARCB1/phenopackets/PMID_34101994_I... PMID_34101994__III.1 \n",
"\n",
" disease n_hpo n_var n_alleles \\\n",
"0 Spinocerebellar ataxia, autosomal recessive 12... 9 1 2 \n",
"1 Spinocerebellar ataxia, autosomal recessive 12... 9 1 2 \n",
"2 Spinocerebellar ataxia, autosomal recessive 12... 9 1 2 \n",
"3 Spinocerebellar ataxia, autosomal recessive 12... 9 1 2 \n",
"4 KBG syndrome (OMIM:148050) 5 1 1 \n",
"0 Coffin-Siris syndrome 3 (OMIM:614608) 29 1 1 \n",
"1 Rhabdoid tumor predisposition syndrome-1 (OMIM... 4 1 1 \n",
"2 Coffin-Siris syndrome 3 (OMIM:614608) 28 1 1 \n",
"3 Coffin-Siris syndrome 3 (OMIM:614608) 25 1 1 \n",
"4 Rhabdoid tumor predisposition syndrome-1 (OMIM... 5 1 1 \n",
"\n",
" n_encounters \n",
"0 1 \n",
Expand Down Expand Up @@ -215,13 +208,13 @@
" <th>0</th>\n",
" <td>SMARCB1</td>\n",
" <td>notebooks/SMARCB1/phenopackets</td>\n",
" <td>0</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>WWOX</td>\n",
" <td>notebooks/WWOX/phenopackets</td>\n",
" <td>4</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
Expand All @@ -231,111 +224,160 @@
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>OFD1</td>\n",
" <td>notebooks/OFD1/phenopackets</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>PPP2R1A</td>\n",
" <td>notebooks/PPP2R1A/phenopackets</td>\n",
" <td>60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>TRAF7</td>\n",
" <td>notebooks/TRAF7/phenopackets</td>\n",
" <td>45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>GLI3</td>\n",
" <td>notebooks/GLI3/phenopackets</td>\n",
" <td>77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>7</th>\n",
" <td>SETD2</td>\n",
" <td>notebooks/SETD2/phenopackets</td>\n",
" <td>14</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>8</th>\n",
" <td>ZSWIM6</td>\n",
" <td>notebooks/ZSWIM6/phenopackets</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <th>9</th>\n",
" <td>ANKH</td>\n",
" <td>notebooks/ANKH/phenopackets</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <th>10</th>\n",
" <td>KDM6B</td>\n",
" <td>notebooks/KDM6B/phenopackets</td>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <th>11</th>\n",
" <td>SMARCC2</td>\n",
" <td>notebooks/SMARCC2/phenopackets</td>\n",
" <td>0</td>\n",
" <td>60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <th>12</th>\n",
" <td>MAPK8IP3</td>\n",
" <td>notebooks/MAPK8IP3/phenopackets</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <th>13</th>\n",
" <td>FBN1</td>\n",
" <td>notebooks/FBN1/phenopackets</td>\n",
" <td>103</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <th>14</th>\n",
" <td>WFS1</td>\n",
" <td>notebooks/WFS1/phenopackets</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>SLC45A2</td>\n",
" <td>notebooks/SLC45A2/phenopackets</td>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>ERI1</td>\n",
" <td>notebooks/ERI1/phenopackets</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <th>17</th>\n",
" <td>SUOX</td>\n",
" <td>notebooks/SUOX/phenopackets</td>\n",
" <td>0</td>\n",
" <td>35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <th>18</th>\n",
" <td>SON</td>\n",
" <td>notebooks/SON/phenopackets</td>\n",
" <td>0</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <th>19</th>\n",
" <td>EZH1</td>\n",
" <td>notebooks/EZH1/phenopackets</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <th>20</th>\n",
" <td>SCN2A</td>\n",
" <td>notebooks/SCN2A/phenopackets</td>\n",
" <td>396</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>LIRICAL</td>\n",
" <td>notebooks/LIRICAL/v2phenopackets</td>\n",
" <td>384</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <th>22</th>\n",
" <td>COL3A1</td>\n",
" <td>notebooks/COL3A1/phenopackets</td>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>STXBP1</td>\n",
" <td>notebooks/STXBP1/phenopackets</td>\n",
" <td>463</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Cohort Directory Count\n",
"0 SMARCB1 notebooks/SMARCB1/phenopackets 0\n",
"1 WWOX notebooks/WWOX/phenopackets 4\n",
"0 SMARCB1 notebooks/SMARCB1/phenopackets 17\n",
"1 WWOX notebooks/WWOX/phenopackets 9\n",
"2 ANKRD11 notebooks/ANKRD11/phenopackets 328\n",
"3 GLI3 notebooks/GLI3/phenopackets 77\n",
"4 SETD2 notebooks/SETD2/phenopackets 14\n",
"5 ZSWIM6 notebooks/ZSWIM6/phenopackets 0\n",
"6 ANKH notebooks/ANKH/phenopackets 7\n",
"7 KDM6B notebooks/KDM6B/phenopackets 73\n",
"8 SMARCC2 notebooks/SMARCC2/phenopackets 0\n",
"9 MAPK8IP3 notebooks/MAPK8IP3/phenopackets 20\n",
"10 FBN1 notebooks/FBN1/phenopackets 103\n",
"11 ERI1 notebooks/ERI1/phenopackets 10\n",
"12 SUOX notebooks/SUOX/phenopackets 0\n",
"13 SON notebooks/SON/phenopackets 0\n",
"14 EZH1 notebooks/EZH1/phenopackets 19\n",
"15 LIRICAL notebooks/LIRICAL/v2phenopackets 384\n",
"16 COL3A1 notebooks/COL3A1/phenopackets 39"
"3 OFD1 notebooks/OFD1/phenopackets 25\n",
"4 PPP2R1A notebooks/PPP2R1A/phenopackets 60\n",
"5 TRAF7 notebooks/TRAF7/phenopackets 45\n",
"6 GLI3 notebooks/GLI3/phenopackets 77\n",
"7 SETD2 notebooks/SETD2/phenopackets 29\n",
"8 ZSWIM6 notebooks/ZSWIM6/phenopackets 16\n",
"9 ANKH notebooks/ANKH/phenopackets 7\n",
"10 KDM6B notebooks/KDM6B/phenopackets 73\n",
"11 SMARCC2 notebooks/SMARCC2/phenopackets 60\n",
"12 MAPK8IP3 notebooks/MAPK8IP3/phenopackets 20\n",
"13 FBN1 notebooks/FBN1/phenopackets 103\n",
"14 WFS1 notebooks/WFS1/phenopackets 16\n",
"15 SLC45A2 notebooks/SLC45A2/phenopackets 30\n",
"16 ERI1 notebooks/ERI1/phenopackets 10\n",
"17 SUOX notebooks/SUOX/phenopackets 35\n",
"18 SON notebooks/SON/phenopackets 52\n",
"19 EZH1 notebooks/EZH1/phenopackets 19\n",
"20 SCN2A notebooks/SCN2A/phenopackets 396\n",
"21 LIRICAL notebooks/LIRICAL/v2phenopackets 384\n",
"22 COL3A1 notebooks/COL3A1/phenopackets 39\n",
"23 STXBP1 notebooks/STXBP1/phenopackets 463"
]
},
"execution_count": 4,
Expand Down Expand Up @@ -365,7 +407,7 @@
"output_type": "stream",
"text": [
"Adding archive suffix to outfilename\n",
"Added 177 files to tar archive at /Users/robinp/GIT/phenopacket-store/all_phenopackets.tgz\n"
"Added 2313 files to tar archive at /Users/robinp/GIT/phenopacket-store/all_phenopackets.tgz\n"
]
}
],
Expand All @@ -389,7 +431,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Added 177 files to zip archive at /Users/robinp/GIT/phenopacket-store/all_phenopackets\n"
"Added 2313 files to zip archive at /Users/robinp/GIT/phenopacket-store/all_phenopackets\n"
]
}
],
Expand Down

0 comments on commit d597ab9

Please sign in to comment.