IF YOU WOULD LIKE TO GET AN ACCOUNT, please write an email to s dot adaszewski at gmail dot com. User accounts are meant only to report issues and/or generate pull requests. This is a purpose-specific Git hosting for ADARED projects. Thank you for your understanding!
Browse Source

Start working on experiments/decagon_run.

master
Stanislaw Adaszewski 4 years ago
parent
commit
373a12f51a
3 changed files with 70 additions and 0 deletions
  1. +3
    -0
      docker/postgresql/Dockerfile
  2. +5
    -0
      docker/postgresql/docker-init.sh
  3. +62
    -0
      experiments/decagon_run/decagon_run.py

+ 3
- 0
docker/postgresql/Dockerfile View File

@@ -1,3 +1,6 @@
FROM alpine:latest
RUN apk add postgresql
RUN mkdir /data && \
chown postgres:postgres /data

+ 5
- 0
docker/postgresql/docker-init.sh View File

@@ -0,0 +1,5 @@
#!/bin/sh
if [ ! -f /data/PG_VERSION ]; then
initdb -D /data --pwfile=/superuser_password
fi

+ 62
- 0
experiments/decagon_run/decagon_run.py View File

@@ -0,0 +1,62 @@
#!/usr/bin/env python3
from icosagon.data import Data
import os
import pandas as pd
from bisect import bisect_left
import torch
def index(a, x):
i = bisect_left(a, x)
if i != len(a) and a[i] == x:
return i
raise ValueError
def main():
path = '/pstore/data/data_science/ref/decagon'
df_combo = pd.read_csv(os.path.join(path, 'bio-decagon-combo.csv'))
df_effcat = pd.read_csv(os.path.join(path, 'bio-decagon-effectcategories.csv'))
df_mono = pd.read_csv(os.path.join(path, 'bio-decagon-mono.csv'))
df_ppi = pd.read_csv(os.path.join(path, 'bio-decagon-ppi.csv'))
df_tgtall = pd.read_csv(os.path.join(path, 'bio-decagon-targets-all.csv'))
df_tgt = pd.read_csv(os.path.join(path, 'bio-decagon-targets.csv'))
lst = [ 'df_combo', 'df_effcat', 'df_mono', 'df_ppi', 'df_tgtall', 'df_tgt' ]
for nam in lst:
print(f'len({nam}): {len(locals()[nam])}')
print(f'{nam}.columns: {locals()[nam].columns}')
genes = set()
genes = genes.union(df_ppi['Gene 1']).union(df_ppi['Gene 2']) \
.union(df_tgtall['Gene']).union(df_tgt['Gene'])
genes = sorted(genes)
print('len(genes):', len(genes))
drugs = set()
drugs = drugs.union(df_combo['STITCH 1']).union(df_combo['STITCH 2']) \
.union(df_mono['STITCH']).union(df_tgtall['STITCH']).union(df_tgt['STITCH'])
drugs = sorted(drugs)
print('len(drugs):', len(drugs))
data = Data()
data.add_node_type('Gene', len(genes))
data.add_node_type('Drug', len(drugs))
print('Indexing rows...')
rows = [index(genes, g) for g in df_ppi['Gene 1']]
print('Indexing cols...')
cols = [index(genes, g) for g in df_ppi['Gene 2']]
indices = list(zip(rows, cols))
indices = torch.tensor(indices).transpose(0, 1)
values = torch.ones(len(rows))
print('indices.shape:', indices.shape, 'values.shape:', values.shape)
adj_mat = torch.sparse_coo_tensor(indices, values, size=(len(genes),) * 2)
adj_mat = (adj_mat + adj_mat.transpose(0, 1)) / 2
print('adj_mat created')
fam = data.add_relation_family('PPI', 0, 0, True)
rel = fam.add_relation_type('PPI', adj_mat)
if __name__ == '__main__':
main()

Loading…
Cancel
Save