From 9ff10387282e9303bbbde75cb3f7f41be1341e4d Mon Sep 17 00:00:00 2001 From: Stanislaw Adaszewski Date: Thu, 20 Aug 2020 18:30:12 +0200 Subject: [PATCH] Add extra checks in Data. --- src/triacontagon/data.py | 22 +++++++++++++++++++--- src/triacontagon/sampling.py | 12 ++++++++++-- src/triacontagon/util.py | 23 +++++++++++++++++++++++ tests/triacontagon/test_batch.py | 20 ++++++++++---------- tests/triacontagon/test_model.py | 32 +++++++++++++++++--------------- 5 files changed, 79 insertions(+), 30 deletions(-) diff --git a/src/triacontagon/data.py b/src/triacontagon/data.py index 3c13a50..48b0a58 100644 --- a/src/triacontagon/data.py +++ b/src/triacontagon/data.py @@ -9,7 +9,8 @@ from typing import Callable, \ Tuple, \ List import types -from .util import _nonzero_sum +from .util import _nonzero_sum, \ + _diag import torch @@ -61,13 +62,28 @@ class Data(object): name = str(name) vertex_type_row = int(vertex_type_row) vertex_type_column = int(vertex_type_column) + if not isinstance(adjacency_matrices, list): raise TypeError('adjacency_matrices must be a list of tensors') - if not isinstance(decoder_factory, types.FunctionType): - raise TypeError('decoder_factory must be a function') + + if not callable(decoder_factory): + raise TypeError('decoder_factory must be callable') + if (vertex_type_row, vertex_type_column) in self.edge_types: raise KeyError('Edge type for given combination of row and column already exists') + + if vertex_type_row == vertex_type_column and \ + any(torch.any(_diag(adj_mat).to(torch.bool)) \ + for adj_mat in adjacency_matrices): + raise ValueError('Adjacency matrices for same row/column vertex types must have empty diagonals') + + if any(adj_mat.shape[0] != self.vertex_types[vertex_type_row].count \ + or adj_mat.shape[1] != self.vertex_types[vertex_type_column].count \ + for adj_mat in adjacency_matrices): + raise ValueError('Adjacency matrices must have as many rows as row vertex type count and as many columns as column vertex type count') + total_connectivity = _nonzero_sum(adjacency_matrices) + self.edge_types[vertex_type_row, vertex_type_column] = \ EdgeType(name, vertex_type_row, vertex_type_column, adjacency_matrices, decoder_factory, total_connectivity) diff --git a/src/triacontagon/sampling.py b/src/triacontagon/sampling.py index 13ef5cf..29ac224 100644 --- a/src/triacontagon/sampling.py +++ b/src/triacontagon/sampling.py @@ -120,13 +120,18 @@ def get_true_classes(adj_mat: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor] return true_classes, row_count -def negative_sample_adj_mat(adj_mat: torch.Tensor) -> torch.Tensor: +def negative_sample_adj_mat(adj_mat: torch.Tensor, + remove_diagonal: bool=False) -> torch.Tensor: + if not isinstance(adj_mat, torch.Tensor): raise ValueError('adj_mat must be a torch.Tensor, got: %s' % adj_mat.__class__.__name__) edges_pos, degrees = get_edges_and_degrees(adj_mat) true_classes, row_count = get_true_classes(adj_mat) + if remove_diagonal: + true_classes = torch.cat([ torch.arange(len(adj_mat)).view(-1, 1), + true_classes ], dim=1) # true_classes = edges_pos[:, 1].view(-1, 1) # print('true_classes:', true_classes) @@ -164,7 +169,10 @@ def negative_sample_data(data: Data) -> Data: for key, et in data.edge_types.items(): adjacency_matrices_neg = [] for adj_mat in et.adjacency_matrices: - adj_mat_neg = negative_sample_adj_mat(adj_mat) + remove_diagonal = True \ + if et.vertex_type_row == et.vertex_type_column \ + else False + adj_mat_neg = negative_sample_adj_mat(adj_mat, remove_diagonal) adjacency_matrices_neg.append(adj_mat_neg) res.add_edge_type(et.name, et.vertex_type_row, et.vertex_type_column, diff --git a/src/triacontagon/util.py b/src/triacontagon/util.py index 70067f8..27e8524 100644 --- a/src/triacontagon/util.py +++ b/src/triacontagon/util.py @@ -4,6 +4,29 @@ from typing import List, \ import time +def _diag(x: torch.Tensor, make_sparse: bool=False): + if len(x.shape) < 1 or len(x.shape) > 2: + raise ValueError('Matrix or vector expected') + + if not x.is_sparse and not make_sparse: + return torch.diag(x) + + if len(x.shape) == 1: + indices = torch.arange(len(x)).view(1, -1) + indices = torch.cat([ indices, indices ]) + return _sparse_coo_tensor(indices, x.to_dense(), (len(x),) * 2) + + values = x.values() + indices = x.indices() + mask = torch.nonzero(indices[0] == indices[1], as_tuple=True)[0] + indices = torch.flatten(indices[0, mask]) + order = torch.argsort(indices) + values = values[mask][order] + res = torch.zeros(min(x.shape[0], x.shape[1]), dtype=values.dtype) + res[indices] = values + return res + + def _equal(x: torch.Tensor, y: torch.Tensor): if x.is_sparse ^ y.is_sparse: raise ValueError('Cannot mix sparse and dense tensors') diff --git a/tests/triacontagon/test_batch.py b/tests/triacontagon/test_batch.py index 46ce6a3..9591c45 100644 --- a/tests/triacontagon/test_batch.py +++ b/tests/triacontagon/test_batch.py @@ -33,7 +33,7 @@ def test_same_data_org_02(): torch.tensor([ [0, 0, 0, 1], [1, 0, 0, 0], - [0, 1, 1, 0], + [0, 1, 0, 1], [1, 0, 1, 0] ]).to_sparse() ], dedicom_decoder) @@ -46,7 +46,7 @@ def test_same_data_org_02(): torch.tensor([ [0, 0, 0, 1], [1, 0, 0, 0], - [0, 1, 1, 0], + [0, 1, 0, 1], [1, 0, 0, 0] ]).to_sparse() ], dedicom_decoder) @@ -94,7 +94,7 @@ def test_batcher_02(): ]).to_sparse(), torch.tensor([ - [1, 0, 1, 0, 0], + [0, 0, 1, 0, 1], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], [0, 1, 0, 0, 0], @@ -113,7 +113,7 @@ def test_batcher_02(): assert visited == { (0, 0, 1), (0, 0, 3), (0, 1, 4), (0, 2, 0), (0, 3, 2), (0, 4, 3), - (1, 0, 0), (1, 0, 2), (1, 1, 3), (1, 2, 4), + (1, 0, 2), (1, 0, 4), (1, 1, 3), (1, 2, 4), (1, 3, 1), (1, 4, 2) } @@ -132,7 +132,7 @@ def test_batcher_03(): ]).to_sparse(), torch.tensor([ - [1, 0, 1, 0, 0], + [0, 0, 1, 0, 1], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], [0, 1, 0, 0, 0], @@ -162,7 +162,7 @@ def test_batcher_03(): assert visited == { (0, 0, 0, 0, 1), (0, 0, 0, 0, 3), (0, 0, 0, 1, 4), (0, 0, 0, 2, 0), (0, 0, 0, 3, 2), (0, 0, 0, 4, 3), - (0, 0, 1, 0, 0), (0, 0, 1, 0, 2), (0, 0, 1, 1, 3), (0, 0, 1, 2, 4), + (0, 0, 1, 0, 2), (0, 0, 1, 0, 4), (0, 0, 1, 1, 3), (0, 0, 1, 2, 4), (0, 0, 1, 3, 1), (0, 0, 1, 4, 2), (0, 1, 0, 0, 1), (0, 1, 0, 1, 0), (0, 1, 0, 1, 3), (0, 1, 0, 2, 1), (0, 1, 0, 3, 2), (0, 1, 0, 4, 1), @@ -211,7 +211,7 @@ def test_batcher_05(): ]).to_sparse(), torch.tensor([ - [1, 0, 1, 0, 0], + [0, 0, 1, 0, 1], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], [0, 1, 0, 0, 0], @@ -242,7 +242,7 @@ def test_batcher_05(): assert visited == { (0, 0, 0, 0, 1), (0, 0, 0, 0, 3), (0, 0, 0, 1, 4), (0, 0, 0, 2, 0), (0, 0, 0, 3, 2), (0, 0, 0, 4, 3), - (0, 0, 1, 0, 0), (0, 0, 1, 0, 2), (0, 0, 1, 1, 3), (0, 0, 1, 2, 4), + (0, 0, 1, 0, 2), (0, 0, 1, 0, 4), (0, 0, 1, 1, 3), (0, 0, 1, 2, 4), (0, 0, 1, 3, 1), (0, 0, 1, 4, 2), (0, 1, 0, 0, 1), (0, 1, 0, 1, 0), (0, 1, 0, 1, 3), (0, 1, 0, 2, 1), (0, 1, 0, 3, 2), (0, 1, 0, 4, 1), @@ -264,7 +264,7 @@ def test_dual_batcher_01(): ]).to_sparse(), torch.tensor([ - [1, 0, 1, 0, 0], + [0, 0, 1, 0, 1], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], [0, 1, 0, 0, 0], @@ -306,7 +306,7 @@ def test_dual_batcher_01(): expected = { (0, 0, 0, 0, 1), (0, 0, 0, 0, 3), (0, 0, 0, 1, 4), (0, 0, 0, 2, 0), (0, 0, 0, 3, 2), (0, 0, 0, 4, 3), - (0, 0, 1, 0, 0), (0, 0, 1, 0, 2), (0, 0, 1, 1, 3), (0, 0, 1, 2, 4), + (0, 0, 1, 0, 2), (0, 0, 1, 0, 4), (0, 0, 1, 1, 3), (0, 0, 1, 2, 4), (0, 0, 1, 3, 1), (0, 0, 1, 4, 2), (0, 1, 0, 0, 1), (0, 1, 0, 1, 0), (0, 1, 0, 1, 3), (0, 1, 0, 2, 1), (0, 1, 0, 3, 2), (0, 1, 0, 4, 1), diff --git a/tests/triacontagon/test_model.py b/tests/triacontagon/test_model.py index 0f57578..06c0ece 100644 --- a/tests/triacontagon/test_model.py +++ b/tests/triacontagon/test_model.py @@ -13,10 +13,10 @@ def test_per_layer_required_vertices_01(): d.add_vertex_type('Drug', 5) d.add_edge_type('Gene-Gene', 0, 0, [ torch.tensor([ - [1, 0, 0, 1], - [0, 1, 1, 0], + [0, 0, 0, 1], [0, 0, 1, 0], - [0, 1, 0, 1] + [1, 0, 0, 0], + [0, 1, 0, 0] ]).to_sparse() ], dedicom_decoder) d.add_edge_type('Gene-Drug', 0, 1, [ torch.tensor([ @@ -27,11 +27,11 @@ def test_per_layer_required_vertices_01(): ]).to_sparse() ], dedicom_decoder) d.add_edge_type('Drug-Drug', 1, 1, [ torch.tensor([ + [0, 0, 1, 0, 1], + [0, 0, 0, 1, 1], [1, 0, 0, 0, 0], - [0, 1, 0, 0, 0], - [0, 0, 1, 0, 0], - [0, 0, 0, 1, 0], - [0, 0, 0, 0, 1] + [0, 1, 0, 0, 1], + [1, 1, 0, 1, 0] ]).to_sparse() ], dedicom_decoder) batch = TrainingBatch(0, 1, 0, torch.tensor([ @@ -48,10 +48,10 @@ def test_model_convolve_01(): d.add_vertex_type('Drug', 5) d.add_edge_type('Gene-Gene', 0, 0, [ torch.tensor([ - [1, 0, 0, 1], - [0, 1, 1, 0], + [0, 0, 0, 1], [0, 0, 1, 0], - [0, 1, 0, 1] + [1, 0, 0, 0], + [0, 1, 0, 0] ], dtype=torch.float).to_sparse() ], dedicom_decoder) d.add_edge_type('Gene-Drug', 0, 1, [ torch.tensor([ @@ -62,11 +62,11 @@ def test_model_convolve_01(): ], dtype=torch.float).to_sparse() ], dedicom_decoder) d.add_edge_type('Drug-Drug', 1, 1, [ torch.tensor([ - [1, 0, 0, 0, 0], - [0, 1, 0, 0, 0], - [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], - [0, 0, 0, 0, 1] + [0, 0, 0, 0, 1], + [0, 1, 0, 0, 0], + [1, 0, 0, 0, 0], + [0, 1, 0, 1, 0] ], dtype=torch.float).to_sparse() ], dedicom_decoder) model = Model(d, [9, 32, 64], keep_prob=1.0, @@ -90,8 +90,10 @@ def test_model_decode_01(): d = Data() d.add_vertex_type('Gene', 100) + gene_gene = torch.rand(100, 100).round() + gene_gene = gene_gene - torch.diag(torch.diag(gene_gene)) d.add_edge_type('Gene-Gene', 0, 0, [ - torch.rand(100, 100).round().to_sparse() + gene_gene.to_sparse() ], dedicom_decoder) b = TrainingBatch(0, 0, 0, torch.tensor([