From 05b1ecf47a2b3186906b634492588fcb0df733f1 Mon Sep 17 00:00:00 2001 From: Stanislaw Adaszewski Date: Sat, 6 Jun 2020 17:19:46 +0200 Subject: [PATCH] Start rework normalize. --- src/icosagon/data.py | 2 +- src/icosagon/normalize.py | 91 +++++++++++++++++++++++++++--- src/icosagon/trainprep.py | 28 +++++++--- tests/icosagon/test_normalize.py | 95 ++++++++++++++++++++++++++++++++ tests/icosagon/test_trainprep.py | 34 +++++++----- 5 files changed, 220 insertions(+), 30 deletions(-) create mode 100644 tests/icosagon/test_normalize.py diff --git a/src/icosagon/data.py b/src/icosagon/data.py index dab3852..4166d40 100644 --- a/src/icosagon/data.py +++ b/src/icosagon/data.py @@ -21,7 +21,7 @@ class RelationType(object): node_type_row: int node_type_column: int adjacency_matrix: torch.Tensor - is_autogenerated: bool + is_autogenerated: bool = False class Data(object): diff --git a/src/icosagon/normalize.py b/src/icosagon/normalize.py index 82e1072..4d91864 100644 --- a/src/icosagon/normalize.py +++ b/src/icosagon/normalize.py @@ -6,17 +6,90 @@ import numpy as np import scipy.sparse as sp +import torch -def norm_adj_mat_one_node_type(adj): - adj = sp.coo_matrix(adj) - assert adj.shape[0] == adj.shape[1] - adj_ = adj + sp.eye(adj.shape[0]) - rowsum = np.array(adj_.sum(1)) - degree_mat_inv_sqrt = np.power(rowsum, -0.5).flatten() - degree_mat_inv_sqrt = sp.diags(degree_mat_inv_sqrt) - adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt) - return adj_normalized +def add_eye_sparse(adj_mat: torch.Tensor) -> torch.Tensor: + if not isinstance(adj_mat, torch.Tensor): + raise ValueError('adj_mat must be a torch.Tensor') + + if not adj_mat.is_sparse: + raise ValueError('adj_mat must be sparse') + + if len(adj_mat.shape) != 2 or \ + adj_mat.shape[0] != adj_mat.shape[1]: + raise ValueError('adj_mat must be a square matrix') + + adj_mat = adj_mat.coalesce() + indices = adj_mat.indices() + values = adj_mat.values() + + eye_indices = torch.arange(adj_mat.shape[0], dtype=indices.dtype).view(1, -1) + eye_indices = torch.cat((eye_indices, eye_indices), 0) + eye_values = torch.ones(adj_mat.shape[0], dtype=values.dtype) + + indices = torch.cat((indices, eye_indices), 1) + values = torch.cat((values, eye_values), 0) + + adj_mat = torch.sparse_coo_tensor(indices=indices, values=values, size=adj_mat.shape) + + return adj_mat + + +def norm_adj_mat_one_node_type_sparse(adj_mat): + if len(adj_mat.shape) != 2 or \ + adj_mat.shape[0] != adj_mat.shape[1]: + raise ValueError('adj_mat must be a square matrix') + + adj_mat = add_eye_sparse(adj_mat) + + adj_mat = adj_mat.coalesce() + indices = adj_mat.indices() + values = adj_mat.values() + degrees = torch.zeros(adj_mat.shape[0]) + degrees = degrees.index_add(0, indices[0], values.to(degrees.dtype)) + print('degrees:', degrees) + print('values:', values) + values = values.to(degrees.dtype) / degrees[indices[0]] + adj_mat = torch.sparse_coo_tensor(indices=indices, values=values, size=adj_mat.shape) + + return adj_mat + + +def norm_adj_mat_one_node_type_dense(adj_mat): + if not isinstance(adj_mat, torch.Tensor): + raise ValueError('adj_mat must be a torch.Tensor') + + if adj_mat.is_sparse: + raise ValueError('adj_mat must be dense') + + if len(adj_mat.shape) != 2 or \ + adj_mat.shape[0] != adj_mat.shape[1]: + raise ValueError('adj_mat must be a square matrix') + + adj_mat = adj_mat + torch.eye(adj_mat.shape[0], dtype=adj_mat.dtype) + degrees = adj_mat.sum(1).view(-1, 1).to(torch.float32) + adj_mat = adj_mat.to(degrees.dtype) / degrees + + return adj_mat + + +def norm_adj_mat_one_node_type(adj_mat): + if adj_mat.is_sparse: + return norm_adj_mat_one_node_type_sparse(adj_mat) + else: + return norm_adj_mat_one_node_type_dense(adj_mat) + + +# def norm_adj_mat_one_node_type(adj): +# adj = sp.coo_matrix(adj) +# assert adj.shape[0] == adj.shape[1] +# adj_ = adj + sp.eye(adj.shape[0]) +# rowsum = np.array(adj_.sum(1)) +# degree_mat_inv_sqrt = np.power(rowsum, -0.5).flatten() +# degree_mat_inv_sqrt = sp.diags(degree_mat_inv_sqrt) +# adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt) +# return adj_normalized def norm_adj_mat_two_node_types(adj): diff --git a/src/icosagon/trainprep.py b/src/icosagon/trainprep.py index a999dec..e1766f3 100644 --- a/src/icosagon/trainprep.py +++ b/src/icosagon/trainprep.py @@ -11,7 +11,9 @@ from typing import Any, \ List, \ Tuple, \ Dict -from .data import NodeType +from .data import NodeType, \ + RelationType, \ + Data from collections import defaultdict from .normalize import norm_adj_mat_one_node_type, \ norm_adj_mat_two_node_types @@ -73,7 +75,7 @@ def train_val_test_split_edges(edges: torch.Tensor, return TrainValTest(edges_train, edges_val, edges_test) -def get_edges_and_degrees(adj_mat): +def get_edges_and_degrees(adj_mat: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: if adj_mat.is_sparse: adj_mat = adj_mat.coalesce() degrees = torch.zeros(adj_mat.shape[1], dtype=torch.int64) @@ -109,23 +111,35 @@ def prepare_adj_mat(adj_mat: torch.Tensor, return adj_mat_train, edges_pos, edges_neg -def prepare_relation(r, ratios): +def prepare_relation_type(r: RelationType, + ratios: TrainValTest) -> PreparedRelationType: + + if not isinstance(r, RelationType): + raise ValueError('r must be a RelationType') + + if not isinstance(ratios, TrainValTest): + raise ValueError('ratios must be a TrainValTest') + adj_mat = r.adjacency_matrix - adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat) + adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat, ratios) + print('adj_mat_train:', adj_mat_train) if r.node_type_row == r.node_type_column: adj_mat_train = norm_adj_mat_one_node_type(adj_mat_train) else: adj_mat_train = norm_adj_mat_two_node_types(adj_mat_train) - return PreparedRelation(r.name, r.node_type_row, r.node_type_column, + return PreparedRelationType(r.name, r.node_type_row, r.node_type_column, adj_mat_train, edges_pos, edges_neg) -def prepare_training(data): +def prepare_training(data: Data) -> PreparedData: + if not isinstance(data, Data): + raise ValueError('data must be of class Data') + relation_types = defaultdict(lambda: defaultdict(list)) for (node_type_row, node_type_column), rels in data.relation_types: for r in rels: relation_types[node_type_row][node_type_column].append( - prep_relation(r)) + prep_relation_type(r)) return PreparedData(data.node_types, relation_types) diff --git a/tests/icosagon/test_normalize.py b/tests/icosagon/test_normalize.py new file mode 100644 index 0000000..63452b9 --- /dev/null +++ b/tests/icosagon/test_normalize.py @@ -0,0 +1,95 @@ +from icosagon.normalize import add_eye_sparse, \ + norm_adj_mat_one_node_type_sparse, \ + norm_adj_mat_one_node_type_dense, \ + norm_adj_mat_one_node_type +import decagon_pytorch.normalize +import torch +import pytest +import numpy as np + + +def test_add_eye_sparse_01(): + adj_mat_dense = torch.rand((10, 10)) + adj_mat_sparse = adj_mat_dense.to_sparse() + + adj_mat_dense += torch.eye(10) + adj_mat_sparse = add_eye_sparse(adj_mat_sparse) + + assert torch.all(adj_mat_sparse.to_dense() == adj_mat_dense) + + +def test_add_eye_sparse_02(): + adj_mat_dense = torch.rand((10, 20)) + adj_mat_sparse = adj_mat_dense.to_sparse() + + with pytest.raises(ValueError): + _ = add_eye_sparse(adj_mat_sparse) + + +def test_add_eye_sparse_03(): + adj_mat_dense = torch.rand((10, 10)) + + with pytest.raises(ValueError): + _ = add_eye_sparse(adj_mat_dense) + + +def test_add_eye_sparse_04(): + adj_mat_dense = np.random.rand(10, 10) + + with pytest.raises(ValueError): + _ = add_eye_sparse(adj_mat_dense) + + +def test_norm_adj_mat_one_node_type_sparse_01(): + adj_mat = torch.rand((10, 10)) + adj_mat = (adj_mat > .5) + adj_mat = adj_mat.to_sparse() + _ = norm_adj_mat_one_node_type_sparse(adj_mat) + + +def test_norm_adj_mat_one_node_type_sparse_02(): + adj_mat_dense = torch.rand((10, 10)) + adj_mat_dense = (adj_mat_dense > .5) + adj_mat_sparse = adj_mat_dense.to_sparse() + adj_mat_sparse = norm_adj_mat_one_node_type_sparse(adj_mat_sparse) + adj_mat_dense = norm_adj_mat_one_node_type_dense(adj_mat_dense) + assert torch.all(adj_mat_sparse.to_dense() == adj_mat_dense) + + +def test_norm_adj_mat_one_node_type_dense_01(): + adj_mat = torch.rand((10, 10)) + adj_mat = (adj_mat > .5) + _ = norm_adj_mat_one_node_type_dense(adj_mat) + + +def test_norm_adj_mat_one_node_type_dense_02(): + adj_mat = torch.tensor([ + [0, 1, 1, 0], # 3 + [1, 0, 1, 0], # 3 + [1, 1, 0, 1], # 4 + [0, 0, 1, 0] # 2 + ]) + expect = np.array([ + [1/3, 1/3, 1/3, 0], + [1/3, 1/3, 1/3, 0], + [1/4, 1/4, 1/4, 1/4], + [0, 0, 1/2, 1/2] + ], dtype=np.float32) + res = decagon_pytorch.normalize.norm_adj_mat_one_node_type(adj_mat) + res = res.todense().astype(np.float32) + print('res:', res) + print('expect:', expect) + assert torch.all(res == expect) + + +@pytest.mark.skip +def test_norm_adj_mat_one_node_type_dense_03(): + adj_mat = torch.rand((10, 10)) + adj_mat = (adj_mat > .5) + adj_mat_dec = decagon_pytorch.normalize.norm_adj_mat_one_node_type(adj_mat) + adj_mat_ico = norm_adj_mat_one_node_type_dense(adj_mat) + adj_mat_dec = adj_mat_dec.todense() + adj_mat_ico = adj_mat_ico.detach().cpu().numpy() + print('adj_mat_dec:', adj_mat_dec) + print('adj_mat_ico:', adj_mat_ico) + assert np.all(adj_mat_dec == adj_mat_ico) diff --git a/tests/icosagon/test_trainprep.py b/tests/icosagon/test_trainprep.py index ae9c562..967bb1e 100644 --- a/tests/icosagon/test_trainprep.py +++ b/tests/icosagon/test_trainprep.py @@ -7,11 +7,13 @@ from icosagon.trainprep import TrainValTest, \ train_val_test_split_edges, \ get_edges_and_degrees, \ - prepare_adj_mat + prepare_adj_mat, \ + prepare_relation_type import torch import pytest import numpy as np from itertools import chain +from icosagon.data import RelationType def test_train_val_test_split_edges_01(): @@ -100,17 +102,23 @@ def test_prepare_adj_mat_02(): assert len(edges.shape) == 2 assert edges.shape[1] == 2 -# def prepare_adj_mat(adj_mat: torch.Tensor, -# ratios: TrainValTest) -> Tuple[TrainValTest, TrainValTest]: -# -# degrees = adj_mat.sum(0) -# edges_pos = torch.nonzero(adj_mat) -# -# neg_neighbors = fixed_unigram_candidate_sampler(edges_pos[:, 1], -# len(edges), degrees, 0.75) -# edges_neg = torch.cat((edges_pos[:, 0], neg_neighbors.view(-1, 1)), 1) + +def test_prepare_relation_type_01(): + adj_mat = (torch.rand((10, 10)) > .5) + r = RelationType('Test', 0, 0, adj_mat) + ratios = TrainValTest(.8, .1, .1) + _ = prepare_relation_type(r, ratios) + + + +# def prepare_relation(r, ratios): +# adj_mat = r.adjacency_matrix +# adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat) # -# edges_pos = train_val_test_split_edges(edges_pos, ratios) -# edges_neg = train_val_test_split_edges(edges_neg, ratios) +# if r.node_type_row == r.node_type_column: +# adj_mat_train = norm_adj_mat_one_node_type(adj_mat_train) +# else: +# adj_mat_train = norm_adj_mat_two_node_types(adj_mat_train) # -# return edges_pos, edges_neg +# return PreparedRelation(r.name, r.node_type_row, r.node_type_column, +# adj_mat_train, edges_pos, edges_neg)