diff --git a/experiments/decagon_run/decagon_run.py b/experiments/decagon_run/decagon_run.py index 239250e..d21cce0 100644 --- a/experiments/decagon_run/decagon_run.py +++ b/experiments/decagon_run/decagon_run.py @@ -5,6 +5,7 @@ import os import pandas as pd from bisect import bisect_left import torch +import sys def index(a, x): @@ -14,7 +15,7 @@ def index(a, x): raise ValueError -def main(): +def load_data(): path = '/pstore/data/data_science/ref/decagon' df_combo = pd.read_csv(os.path.join(path, 'bio-decagon-combo.csv')) df_effcat = pd.read_csv(os.path.join(path, 'bio-decagon-effectcategories.csv')) @@ -43,6 +44,7 @@ def main(): data.add_node_type('Gene', len(genes)) data.add_node_type('Drug', len(drugs)) + print('Preparing PPI...') print('Indexing rows...') rows = [index(genes, g) for g in df_ppi['Gene 1']] print('Indexing cols...') @@ -56,6 +58,39 @@ def main(): print('adj_mat created') fam = data.add_relation_family('PPI', 0, 0, True) rel = fam.add_relation_type('PPI', adj_mat) + print('OK') + + print('Preparing Drug-Gene (Target) edges...') + rows = [index(drugs, d) for d in df_tgtall['STITCH']] + cols = [index(genes, g) for g in df_tgtall['Gene']] + indices = list(zip(rows, cols)) + indices = torch.tensor(indices).transpose(0, 1) + values = torch.ones(len(rows)) + adj_mat = torch.sparse_coo_tensor(indices, values, size=(len(drugs), len(genes))) + fam = data.add_relation_family('Drug-Gene (Target)', 1, 0, True) + rel = fam.add_relation_type('Drug-Gene (Target)', adj_mat) + print('OK') + + print('Preparing Drug-Drug (Side Effect) edges...') + fam = data.add_relation_family('Drug-Drug (Side Effect)', 1, 1, True) + print('# of side effects:', len(df_combo), 'unique:', len(df_combo['Polypharmacy Side Effect'].unique())) + for eff, df in df_combo.groupby('Polypharmacy Side Effect'): + sys.stdout.write('.') # print(eff, '...') + sys.stdout.flush() + rows = [index(drugs, d) for d in df['STITCH 1']] + cols = [index(drugs, d) for d in df['STITCH 2']] + indices = list(zip(rows, cols)) + indices = torch.tensor(indices).transpose(0, 1) + values = torch.ones(len(rows)) + adj_mat = torch.sparse_coo_tensor(indices, values, size=(len(drugs), len(drugs))) + adj_mat = (adj_mat + adj_mat.transpose(0, 1)) / 2 + rel = fam.add_relation_type(df['Polypharmacy Side Effect'], adj_mat) + print() + print('OK') + + +def main(): + data = load_data() if __name__ == '__main__':