-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathimport_omim.php
executable file
·247 lines (203 loc) · 6.73 KB
/
import_omim.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#!/usr/bin/php
<?php
;
// Copyright: see COPYING
// Authors: see git-blame(1)
if ($_SERVER["argc"] != 3)
{
die ("Usage: ".$_SERVER["argv"][0]." OmimVarLocusIdSNP.bcp morbidmap\n");
}
chdir ('public_html');
require_once 'lib/setup.php';
require_once 'lib/genomes.php';
require_once 'lib/openid.php';
require_once 'lib/bp.php';
ini_set ("output_buffering", FALSE);
ini_set ("memory_limit", 67108864);
genomes_create_tables ();
openid_login_as_robot ("OMIM Importing Robot");
print "Creating omim locus/id/snp table...";
theDb()->query ("CREATE TEMPORARY TABLE omim_a (
`omim_id` BIGINT UNSIGNED,
`x1` INTEGER,
`x2` INTEGER,
`gene` VARCHAR(16),
`aa_from_short` VARCHAR(32),
`aa_pos` INTEGER,
`aa_to_short` VARCHAR(32),
`x3` VARCHAR(32),
`rsid` BIGINT UNSIGNED,
INDEX(omim_id)
)");
print "\n";
print "Importing OmimVarLocusIdSNP...";
$e = theDb()->query ("LOAD DATA LOCAL INFILE ? INTO TABLE omim_a FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'",
array ($_SERVER["argv"][1]));
print theDb()->affectedRows();
print "\n";
print "Creating omim id -> annotation table...";
theDb()->query ("CREATE TEMPORARY TABLE omim_annotation (
`annotation_text` VARCHAR(128),
`x1` VARCHAR(32),
`omim_id` BIGINT UNSIGNED,
`x2` VARCHAR(32),
INDEX(omim_id)
)");
print "\n";
print "Importing morbidmap...";
theDb()->query ("LOAD DATA LOCAL INFILE ? INTO TABLE omim_annotation FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'",
array ($_SERVER["argv"][2]));
print theDb()->affectedRows();
print "\n";
print "Creating aa_short -> aa_long table...";
$q = theDb()->query ("CREATE TEMPORARY TABLE aa13 (
`short` CHAR(1) PRIMARY KEY,
`long` VARCHAR(4)
)");
if (theDb()->isError($q)) { die ($q->getMessage()."\n"); }
foreach ($aa_13 as $short => $long) {
theDb()->query ("INSERT INTO aa13 VALUES (?, ?)", array ($short, $long));
}
print "\n";
print "Adding columns...";
theDb()->query ("ALTER TABLE omim_a ADD variant_id BIGINT UNSIGNED, ADD INDEX(variant_id)");
theDb()->query ("ALTER TABLE omim_a ADD aa_from VARCHAR(255)");
theDb()->query ("ALTER TABLE omim_a ADD aa_to VARCHAR(255)");
theDb()->query ("ALTER TABLE omim_a ADD url VARCHAR(255)");
theDb()->query ("ALTER TABLE omim_a ADD INDEX(gene,aa_from,aa_pos,aa_to)");
print "\n";
print "Filling in url field...";
theDb()->query ("
UPDATE omim_a
SET url = CONCAT('http://www.ncbi.nlm.nih.gov/omim/',omim_id)
");
print theDb()->affectedRows();
print "\n";
print "Converting short to long AA (from)...";
$q = theDb()->query ("
UPDATE omim_a
LEFT JOIN aa13 ON aa13.short = omim_a.aa_from_short
SET gene = UPPER(TRIM(gene)),
aa_from = aa13.long
");
if (theDb()->isError($q)) { die ($q->getMessage()."\n"); }
print theDb()->affectedRows();
print "\n";
print "Converting short to long AA (to)...";
$q = theDb()->query ("
UPDATE omim_a
LEFT JOIN aa13 ON aa13.short = omim_a.aa_to_short
SET
aa_to = aa13.long
");
if (theDb()->isError($q)) { die ($q->getMessage()."\n"); }
print theDb()->affectedRows();
print "\n";
print "Converting gene names to canonical gene names...";
$q = theDb()->query ("
UPDATE omim_a
LEFT JOIN gene_canonical_name ON aka = gene
SET gene = official
WHERE official IS NOT NULL
");
if (theDb()->isError($q)) { die ($q->getMessage()."\n"); }
print theDb()->affectedRows();
print "\n";
print "Looking up variant ids...";
theDb()->query ($update_variant_id_sql = "
UPDATE omim_a, variants
SET omim_a.variant_id=variants.variant_id
WHERE variants.variant_gene = omim_a.gene
AND variants.variant_aa_from = omim_a.aa_from
AND variants.variant_aa_to = omim_a.aa_to
AND variants.variant_aa_pos = omim_a.aa_pos
");
print theDb()->affectedRows();
print "\n";
print "Looking up variant ids by rsid...";
theDb()->query ("
UPDATE omim_a, variants
SET omim_a.variant_id=variants.variant_id
WHERE omim_a.variant_id IS NULL
AND variants.variant_rsid IS NOT NULL
AND omim_a.rsid = variants.variant_rsid
");
print theDb()->affectedRows();
print "\n";
print "Deleting rows with no/invalid AA change...";
theDb()->query ("DELETE FROM omim_a WHERE variant_id IS NULL AND (aa_from IS NULL OR aa_to IS NULL OR aa_pos IS NULL OR aa_pos<=0)");
print theDb()->affectedRows();
print "\n";
print "Adding variants...";
$q = theDb()->query ("
SELECT concat(gene,' ',aa_from,aa_pos,aa_to) AS gene_aa_change
FROM omim_a
WHERE variant_id IS NULL
GROUP BY gene,aa_from,aa_pos,aa_to");
$n=0;
$did = array();
while ($row =& $q->fetchRow())
{
if (isset ($did[$row["gene_aa_change"]]))
continue;
// create the variant, and an "initial edit/add" row in edits table
$variant_id = evidence_get_variant_id ($row["gene_aa_change"],
false, false, false,
true);
$edit_id = evidence_get_latest_edit ($variant_id,
0, 0, 0,
true);
$did[$row["gene_aa_change"]] = 1;
print "\n".$row["gene_aa_change"]." -> $variant_id $edit_id ...";
++$n;
if ($n % 100 == 0)
print "$n...";
}
print "$n\n";
print "Looking up variant ids again...";
theDb()->query ($update_variant_id_sql);
print theDb()->affectedRows();
print "\n";
print "Editing \"unknown\" variants to \"pathogenic\"...";
$timestamp = theDb()->getOne("SELECT NOW()");
$q = theDb()->query ("INSERT INTO edits
(variant_id, genome_id, article_pmid, disease_id, is_draft,
previous_edit_id, variant_dominance, variant_quality, variant_quality_text,
summary_short, summary_long,
variant_impact, edit_oid, edit_timestamp)
SELECT DISTINCT s.variant_id, 0, 0, 0, 0,
edit_id, variant_dominance, variant_quality, variant_quality_text,
summary_short, summary_long,
?, ?, ?
FROM omim_a
LEFT JOIN snap_latest s ON omim_a.variant_id=s.variant_id AND s.article_pmid=0 AND s.genome_id=0 AND s.disease_id=0
WHERE s.variant_impact='none'",
array ('pathogenic',
getCurrentUser("oid"), $timestamp));
if (theDb()->isError($q)) { print $q->getMessage(); print "..."; }
print theDb()->affectedRows();
print "\n";
print "Copying edits to snap_latest...";
theDb()->query ("REPLACE INTO snap_latest SELECT * FROM edits WHERE edit_oid=? AND edit_timestamp=?",
array (getCurrentUser("oid"), $timestamp));
print theDb()->affectedRows();
print "\n";
print "Updating variant_external...";
theDb()->query ("LOCK TABLES variant_external WRITE");
theDb()->query ("DELETE FROM variant_external WHERE tag='OMIM'");
theDb()->query ("INSERT INTO variant_external
(variant_id, tag, content, url, updated)
SELECT variant_id, 'OMIM', annotation_text, url, NOW()
FROM omim_a
LEFT JOIN omim_annotation ON omim_a.omim_id = omim_annotation.omim_id
WHERE variant_id > 0");
print theDb()->affectedRows();
print "\n";
theDb()->query ("UNLOCK TABLES");
if (getenv("DEBUG"))
{
theDb()->query ("DROP TABLE IF EXISTS omim_last");
theDb()->query ("CREATE TABLE IF NOT EXISTS omim_last LIKE omim_a");
theDb()->query ("INSERT INTO omim_last SELECT * FROM omim_a");
}
?>