|
|
@@ -5,6 +5,7 @@ import os |
|
|
|
import pandas as pd
|
|
|
|
from bisect import bisect_left
|
|
|
|
import torch
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
|
|
def index(a, x):
|
|
|
@@ -14,7 +15,7 @@ def index(a, x): |
|
|
|
raise ValueError
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
def load_data():
|
|
|
|
path = '/pstore/data/data_science/ref/decagon'
|
|
|
|
df_combo = pd.read_csv(os.path.join(path, 'bio-decagon-combo.csv'))
|
|
|
|
df_effcat = pd.read_csv(os.path.join(path, 'bio-decagon-effectcategories.csv'))
|
|
|
@@ -43,6 +44,7 @@ def main(): |
|
|
|
data.add_node_type('Gene', len(genes))
|
|
|
|
data.add_node_type('Drug', len(drugs))
|
|
|
|
|
|
|
|
print('Preparing PPI...')
|
|
|
|
print('Indexing rows...')
|
|
|
|
rows = [index(genes, g) for g in df_ppi['Gene 1']]
|
|
|
|
print('Indexing cols...')
|
|
|
@@ -56,6 +58,39 @@ def main(): |
|
|
|
print('adj_mat created')
|
|
|
|
fam = data.add_relation_family('PPI', 0, 0, True)
|
|
|
|
rel = fam.add_relation_type('PPI', adj_mat)
|
|
|
|
print('OK')
|
|
|
|
|
|
|
|
print('Preparing Drug-Gene (Target) edges...')
|
|
|
|
rows = [index(drugs, d) for d in df_tgtall['STITCH']]
|
|
|
|
cols = [index(genes, g) for g in df_tgtall['Gene']]
|
|
|
|
indices = list(zip(rows, cols))
|
|
|
|
indices = torch.tensor(indices).transpose(0, 1)
|
|
|
|
values = torch.ones(len(rows))
|
|
|
|
adj_mat = torch.sparse_coo_tensor(indices, values, size=(len(drugs), len(genes)))
|
|
|
|
fam = data.add_relation_family('Drug-Gene (Target)', 1, 0, True)
|
|
|
|
rel = fam.add_relation_type('Drug-Gene (Target)', adj_mat)
|
|
|
|
print('OK')
|
|
|
|
|
|
|
|
print('Preparing Drug-Drug (Side Effect) edges...')
|
|
|
|
fam = data.add_relation_family('Drug-Drug (Side Effect)', 1, 1, True)
|
|
|
|
print('# of side effects:', len(df_combo), 'unique:', len(df_combo['Polypharmacy Side Effect'].unique()))
|
|
|
|
for eff, df in df_combo.groupby('Polypharmacy Side Effect'):
|
|
|
|
sys.stdout.write('.') # print(eff, '...')
|
|
|
|
sys.stdout.flush()
|
|
|
|
rows = [index(drugs, d) for d in df['STITCH 1']]
|
|
|
|
cols = [index(drugs, d) for d in df['STITCH 2']]
|
|
|
|
indices = list(zip(rows, cols))
|
|
|
|
indices = torch.tensor(indices).transpose(0, 1)
|
|
|
|
values = torch.ones(len(rows))
|
|
|
|
adj_mat = torch.sparse_coo_tensor(indices, values, size=(len(drugs), len(drugs)))
|
|
|
|
adj_mat = (adj_mat + adj_mat.transpose(0, 1)) / 2
|
|
|
|
rel = fam.add_relation_type(df['Polypharmacy Side Effect'], adj_mat)
|
|
|
|
print()
|
|
|
|
print('OK')
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
data = load_data()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|