| @@ -5,6 +5,7 @@ import os | |||||
| import pandas as pd | import pandas as pd | ||||
| from bisect import bisect_left | from bisect import bisect_left | ||||
| import torch | import torch | ||||
| import sys | |||||
| def index(a, x): | def index(a, x): | ||||
| @@ -14,7 +15,7 @@ def index(a, x): | |||||
| raise ValueError | raise ValueError | ||||
| def main(): | |||||
| def load_data(): | |||||
| path = '/pstore/data/data_science/ref/decagon' | path = '/pstore/data/data_science/ref/decagon' | ||||
| df_combo = pd.read_csv(os.path.join(path, 'bio-decagon-combo.csv')) | df_combo = pd.read_csv(os.path.join(path, 'bio-decagon-combo.csv')) | ||||
| df_effcat = pd.read_csv(os.path.join(path, 'bio-decagon-effectcategories.csv')) | df_effcat = pd.read_csv(os.path.join(path, 'bio-decagon-effectcategories.csv')) | ||||
| @@ -43,6 +44,7 @@ def main(): | |||||
| data.add_node_type('Gene', len(genes)) | data.add_node_type('Gene', len(genes)) | ||||
| data.add_node_type('Drug', len(drugs)) | data.add_node_type('Drug', len(drugs)) | ||||
| print('Preparing PPI...') | |||||
| print('Indexing rows...') | print('Indexing rows...') | ||||
| rows = [index(genes, g) for g in df_ppi['Gene 1']] | rows = [index(genes, g) for g in df_ppi['Gene 1']] | ||||
| print('Indexing cols...') | print('Indexing cols...') | ||||
| @@ -56,6 +58,39 @@ def main(): | |||||
| print('adj_mat created') | print('adj_mat created') | ||||
| fam = data.add_relation_family('PPI', 0, 0, True) | fam = data.add_relation_family('PPI', 0, 0, True) | ||||
| rel = fam.add_relation_type('PPI', adj_mat) | rel = fam.add_relation_type('PPI', adj_mat) | ||||
| print('OK') | |||||
| print('Preparing Drug-Gene (Target) edges...') | |||||
| rows = [index(drugs, d) for d in df_tgtall['STITCH']] | |||||
| cols = [index(genes, g) for g in df_tgtall['Gene']] | |||||
| indices = list(zip(rows, cols)) | |||||
| indices = torch.tensor(indices).transpose(0, 1) | |||||
| values = torch.ones(len(rows)) | |||||
| adj_mat = torch.sparse_coo_tensor(indices, values, size=(len(drugs), len(genes))) | |||||
| fam = data.add_relation_family('Drug-Gene (Target)', 1, 0, True) | |||||
| rel = fam.add_relation_type('Drug-Gene (Target)', adj_mat) | |||||
| print('OK') | |||||
| print('Preparing Drug-Drug (Side Effect) edges...') | |||||
| fam = data.add_relation_family('Drug-Drug (Side Effect)', 1, 1, True) | |||||
| print('# of side effects:', len(df_combo), 'unique:', len(df_combo['Polypharmacy Side Effect'].unique())) | |||||
| for eff, df in df_combo.groupby('Polypharmacy Side Effect'): | |||||
| sys.stdout.write('.') # print(eff, '...') | |||||
| sys.stdout.flush() | |||||
| rows = [index(drugs, d) for d in df['STITCH 1']] | |||||
| cols = [index(drugs, d) for d in df['STITCH 2']] | |||||
| indices = list(zip(rows, cols)) | |||||
| indices = torch.tensor(indices).transpose(0, 1) | |||||
| values = torch.ones(len(rows)) | |||||
| adj_mat = torch.sparse_coo_tensor(indices, values, size=(len(drugs), len(drugs))) | |||||
| adj_mat = (adj_mat + adj_mat.transpose(0, 1)) / 2 | |||||
| rel = fam.add_relation_type(df['Polypharmacy Side Effect'], adj_mat) | |||||
| print() | |||||
| print('OK') | |||||
| def main(): | |||||
| data = load_data() | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||