-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
608d8e9
commit f1f0c31
Showing
4 changed files
with
303 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,227 @@ | ||
const users = [] | ||
|
||
import localforage from 'https://cdn.skypack.dev/localforage'; | ||
|
||
localforage.config({ | ||
driver: [ | ||
localforage.INDEXEDDB, | ||
localforage.LOCALSTORAGE, | ||
localforage.WEBSQL | ||
], | ||
name: 'localforage' | ||
}); | ||
|
||
let openSnpDbUrls = localforage.createInstance({ | ||
name: "openSnpDbUrls", | ||
storeName: "userUrls" | ||
}) | ||
let userTxts = localforage.createInstance({ | ||
name: "userTxts", | ||
storeName: "userTxts" | ||
}) | ||
let userPhenotypes = localforage.createInstance({ | ||
name: "userPhenotypes", | ||
storeName: "userPhenotypes" | ||
}) | ||
// get all users with genotype data (23andMe, illumina, ancestry etc)------------------------------- | ||
const getUsers = async function () { // opensnp user data includes ancestry, familtyTree, and 23and me genotype data | ||
const newLocal = 'usersFull'; | ||
let dt | ||
dt = await openSnpDbUrls.getItem(newLocal); // check for users in localstorage | ||
if (dt == null) { | ||
let url = 'https://corsproxy.io/?https://opensnp.org/users.json' | ||
let users = (await (await fetch(url)).json()) | ||
let dt2 = users.sort((a, b) => a.id - b.id) | ||
|
||
dt = openSnpDbUrls.setItem('usersFull', dt2) | ||
} | ||
return dt | ||
} | ||
// userTxts.removeItem("https://opensnp.org/data/1.23andme.2995") | ||
//userTxts.setItem("hi", "lala"); | ||
const getTxts = async function (table,usersData, keysLen) { | ||
clearTable(table,keysLen) | ||
let arr = [] | ||
let urls = usersData.map(x => x["genotype.download_url"]) | ||
|
||
//remove old txts if table is full | ||
let storageList = await table.keys() | ||
|
||
// if size is >0.5Gb, then delete half irrelevant keys | ||
const deleteKeyList = storageList.filter(x => !urls.includes(x)) | ||
clearStorage(table, keysLen, deleteKeyList) | ||
|
||
for (let i = 0; i < urls.length; i++) { | ||
let parsedUser2 = await table.getItem(urls[i]); | ||
console.log("processing user #", i) | ||
|
||
if (parsedUser2 == null) { | ||
// console.log("i, parsedUser2 == null") | ||
let url2 = 'https://corsproxy.io/?' + urls[i] | ||
const user = (await (await fetch(url2)).text()) | ||
let parsedUser = (await parseTxts(user, usersData[i])) | ||
|
||
// console.log("parsedUser",parsedUser) | ||
arr.push(parsedUser) | ||
table.setItem(urls[i], parsedUser); | ||
} else { | ||
// console.log(i,"parsedUser2 NOT null"); | ||
arr.push(parsedUser2) | ||
} | ||
} | ||
return arr | ||
} | ||
|
||
const clearStorage = async function (table, keysLen, deleteKeyList) { | ||
if ( table.length() > keysLen) { | ||
deleteKeyList.map(x => userTxts.removeItem(x)) | ||
const halfLength = Math.ceil(deleteKeyList.length / 2); | ||
console.log('halfLength:', halfLength) | ||
const firstHalf = deleteKeyList.slice(0, halfLength); | ||
firstHalf.map(x => userTxts.removeItem(x)) | ||
} | ||
} | ||
|
||
|
||
|
||
// create 23andme obj and data -------------------------- | ||
const parseTxts = async function (txt, usersData) { | ||
|
||
let obj = {} | ||
let rows = txt.split(/[\r\n]+/g) | ||
obj.txt = txt | ||
obj.openSnp = usersData | ||
|
||
let n = rows.filter(r => (r[0] == '#')).length | ||
obj.meta = rows.slice(0, n - 1).join('\r\n') | ||
obj.year = rows[0].split(/\r?\n|\r|\n/g)[0].slice(-4) | ||
obj.qc = rows[0].substring(0, 37) == '# This data file generated by 23andMe' | ||
obj.cols = rows[n - 1].slice(2).split(/\t/) | ||
obj.dt = rows.slice(n) | ||
obj.dt = obj.dt.map((r, i) => { | ||
r = r.split('\t') | ||
r[2] = parseInt(r[2]) | ||
// position in the chr | ||
r[4] = i | ||
return r | ||
}) | ||
return obj | ||
} | ||
// filter users without 23andme data (type = "23andme")--------------------------------------------------------- | ||
const filterUsers = async function (type, users) { | ||
let arr = [] | ||
users.filter(row => row.genotypes.length > 0).map(dt => { | ||
|
||
// keep user with one or more 23andme files | ||
dt.genotypes.map(i => { | ||
if (dt.genotypes.length > 0 && i.filetype == type) { | ||
let innerObj = {}; | ||
innerObj["name"] = dt["name"]; | ||
innerObj["id"] = dt["id"]; | ||
innerObj["genotype.id"] = i.id; | ||
innerObj["genotype.filetype"] = i.filetype; | ||
innerObj["genotype.download_url"] = i.download_url.replace("http", "https") | ||
arr.push(innerObj) | ||
} | ||
}) | ||
}) | ||
return arr | ||
} | ||
|
||
// Function to estimate the size of localForage data | ||
async function getLocalForageTableSize(tableName) { | ||
console.log("---------------------------") | ||
console.log("running... getLocalForageTableSize function") | ||
let totalSize = 0; | ||
let i = 0 | ||
await tableName.iterate((value, key) => { | ||
i+=1 | ||
console.log(i,"key",key) | ||
const stringifiedValue = JSON.stringify(value); | ||
totalSize += stringifiedValue.length * 2; // Approximate size in bytes | ||
}); | ||
const gbs = bytesToGB(totalSize) | ||
console.log("localforage ", tableName._config.name, "table size:", gbs.toFixed(3), "GBs") | ||
return gbs; | ||
} | ||
|
||
|
||
function bytesToGB(bytes) { | ||
return bytes / Math.pow(1024, 3); | ||
} | ||
|
||
// filter users without 23andme data (type = "23andme")--------------------------------------------------------- | ||
|
||
const getUsersByPhenotypeId = async function (phenoId,table,storageSize) { | ||
console.log("---------------------------") | ||
console.log("running... getUsersByPhenotypeId function") | ||
console.log("phenotype id:", phenoId) | ||
|
||
let allUsers = await getUsers() | ||
const cors = `https://corsproxy.io/?` | ||
let onePhenotypeUrl = `https://opensnp.org/phenotypes/json/variations/${phenoId}.json` | ||
let users = (await (await fetch(cors + onePhenotypeUrl)).json()) | ||
let userIds = users.users.map(x => x.user_id) | ||
// get users with phenotype data (even those without genotype data) | ||
const userIds2 = allUsers.filter(({ | ||
id | ||
}) => userIds.includes(id)); | ||
let cleanUsers | ||
if (userIds2.length < 9) { | ||
cleanUsers = await filterUsers("23andme", userIds2) | ||
} else { | ||
cleanUsers = (await filterUsers("23andme", userIds2.slice(4, 15))).slice(0,8) | ||
console.log("Warning: user txts for phenotypeID", phenoId, "> 8. First 8 files used.") | ||
} | ||
// get 23 and me texts from urls using getTxts function | ||
let snpTxts = await getTxts(table,cleanUsers,storageSize) | ||
|
||
console.log("User txts for phenotypeID", phenoId, ": ", snpTxts) | ||
return snpTxts | ||
} | ||
|
||
const getUserPhenotypes = async function () { | ||
const allPhenotypesUrl = 'https://opensnp.org/phenotypes.json' | ||
const allPhenotypes = await userPhenotypes.getItem(allPhenotypesUrl); | ||
|
||
if (allPhenotypes == null) { | ||
const cors = `https://corsproxy.io/?` | ||
const allPhenotypes = (await (await fetch(cors + allPhenotypesUrl)).json()).sort((a, b) => b.number_of_users - a.number_of_users) | ||
userPhenotypes.setItem(allPhenotypesUrl, allPhenotypes); | ||
} | ||
// console.log(allPhenotypes.length," phenotypes found ") | ||
return allPhenotypes | ||
} | ||
|
||
|
||
const getPhenotypeNameFromId = async function (id) { | ||
console.log("---------------------------") | ||
console.log("running... getPhenotypeNameFromId function") | ||
const dt = await getUserPhenotypes() | ||
// console.log("dt",dt) | ||
const name = dt.filter(x => x.id == id)[0].characteristic | ||
console.log("Phenotype id", id, "corresponds to:", name) | ||
return name | ||
} | ||
|
||
const clearTable = function(table,keysLen){ | ||
if( table.length() > keysLen){ | ||
table.clear() | ||
.then(() => { | ||
console.log('LocalForage cleared!'); | ||
}) | ||
.catch(err => { | ||
console.error('Error clearing localForage:', err); | ||
}); | ||
} | ||
} | ||
|
||
const id = 50 | ||
const keysLen = 12 | ||
const storageSize = 1.3 | ||
const td2Users = await getUsersByPhenotypeId(id,userTxts,storageSize) | ||
const name = await getPhenotypeNameFromId(id) | ||
const phenotypes = await getUserPhenotypes() | ||
const userTableSize = await getLocalForageTableSize(userTxts) | ||
|
||
// export |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.