IF YOU WOULD LIKE TO GET AN ACCOUNT, please write an email to s dot adaszewski at gmail dot com. User accounts are meant only to report issues and/or generate pull requests. This is a purpose-specific Git hosting for ADARED projects. Thank you for your understanding!
Browse Source

Start rework normalize.

master
Stanislaw Adaszewski 4 years ago
parent
commit
05b1ecf47a
5 changed files with 220 additions and 30 deletions
  1. +1
    -1
      src/icosagon/data.py
  2. +82
    -9
      src/icosagon/normalize.py
  3. +21
    -7
      src/icosagon/trainprep.py
  4. +95
    -0
      tests/icosagon/test_normalize.py
  5. +21
    -13
      tests/icosagon/test_trainprep.py

+ 1
- 1
src/icosagon/data.py View File

@@ -21,7 +21,7 @@ class RelationType(object):
node_type_row: int node_type_row: int
node_type_column: int node_type_column: int
adjacency_matrix: torch.Tensor adjacency_matrix: torch.Tensor
is_autogenerated: bool
is_autogenerated: bool = False
class Data(object): class Data(object):


+ 82
- 9
src/icosagon/normalize.py View File

@@ -6,17 +6,90 @@
import numpy as np import numpy as np
import scipy.sparse as sp import scipy.sparse as sp
import torch
def norm_adj_mat_one_node_type(adj):
adj = sp.coo_matrix(adj)
assert adj.shape[0] == adj.shape[1]
adj_ = adj + sp.eye(adj.shape[0])
rowsum = np.array(adj_.sum(1))
degree_mat_inv_sqrt = np.power(rowsum, -0.5).flatten()
degree_mat_inv_sqrt = sp.diags(degree_mat_inv_sqrt)
adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt)
return adj_normalized
def add_eye_sparse(adj_mat: torch.Tensor) -> torch.Tensor:
if not isinstance(adj_mat, torch.Tensor):
raise ValueError('adj_mat must be a torch.Tensor')
if not adj_mat.is_sparse:
raise ValueError('adj_mat must be sparse')
if len(adj_mat.shape) != 2 or \
adj_mat.shape[0] != adj_mat.shape[1]:
raise ValueError('adj_mat must be a square matrix')
adj_mat = adj_mat.coalesce()
indices = adj_mat.indices()
values = adj_mat.values()
eye_indices = torch.arange(adj_mat.shape[0], dtype=indices.dtype).view(1, -1)
eye_indices = torch.cat((eye_indices, eye_indices), 0)
eye_values = torch.ones(adj_mat.shape[0], dtype=values.dtype)
indices = torch.cat((indices, eye_indices), 1)
values = torch.cat((values, eye_values), 0)
adj_mat = torch.sparse_coo_tensor(indices=indices, values=values, size=adj_mat.shape)
return adj_mat
def norm_adj_mat_one_node_type_sparse(adj_mat):
if len(adj_mat.shape) != 2 or \
adj_mat.shape[0] != adj_mat.shape[1]:
raise ValueError('adj_mat must be a square matrix')
adj_mat = add_eye_sparse(adj_mat)
adj_mat = adj_mat.coalesce()
indices = adj_mat.indices()
values = adj_mat.values()
degrees = torch.zeros(adj_mat.shape[0])
degrees = degrees.index_add(0, indices[0], values.to(degrees.dtype))
print('degrees:', degrees)
print('values:', values)
values = values.to(degrees.dtype) / degrees[indices[0]]
adj_mat = torch.sparse_coo_tensor(indices=indices, values=values, size=adj_mat.shape)
return adj_mat
def norm_adj_mat_one_node_type_dense(adj_mat):
if not isinstance(adj_mat, torch.Tensor):
raise ValueError('adj_mat must be a torch.Tensor')
if adj_mat.is_sparse:
raise ValueError('adj_mat must be dense')
if len(adj_mat.shape) != 2 or \
adj_mat.shape[0] != adj_mat.shape[1]:
raise ValueError('adj_mat must be a square matrix')
adj_mat = adj_mat + torch.eye(adj_mat.shape[0], dtype=adj_mat.dtype)
degrees = adj_mat.sum(1).view(-1, 1).to(torch.float32)
adj_mat = adj_mat.to(degrees.dtype) / degrees
return adj_mat
def norm_adj_mat_one_node_type(adj_mat):
if adj_mat.is_sparse:
return norm_adj_mat_one_node_type_sparse(adj_mat)
else:
return norm_adj_mat_one_node_type_dense(adj_mat)
# def norm_adj_mat_one_node_type(adj):
# adj = sp.coo_matrix(adj)
# assert adj.shape[0] == adj.shape[1]
# adj_ = adj + sp.eye(adj.shape[0])
# rowsum = np.array(adj_.sum(1))
# degree_mat_inv_sqrt = np.power(rowsum, -0.5).flatten()
# degree_mat_inv_sqrt = sp.diags(degree_mat_inv_sqrt)
# adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt)
# return adj_normalized
def norm_adj_mat_two_node_types(adj): def norm_adj_mat_two_node_types(adj):


+ 21
- 7
src/icosagon/trainprep.py View File

@@ -11,7 +11,9 @@ from typing import Any, \
List, \ List, \
Tuple, \ Tuple, \
Dict Dict
from .data import NodeType
from .data import NodeType, \
RelationType, \
Data
from collections import defaultdict from collections import defaultdict
from .normalize import norm_adj_mat_one_node_type, \ from .normalize import norm_adj_mat_one_node_type, \
norm_adj_mat_two_node_types norm_adj_mat_two_node_types
@@ -73,7 +75,7 @@ def train_val_test_split_edges(edges: torch.Tensor,
return TrainValTest(edges_train, edges_val, edges_test) return TrainValTest(edges_train, edges_val, edges_test)
def get_edges_and_degrees(adj_mat):
def get_edges_and_degrees(adj_mat: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
if adj_mat.is_sparse: if adj_mat.is_sparse:
adj_mat = adj_mat.coalesce() adj_mat = adj_mat.coalesce()
degrees = torch.zeros(adj_mat.shape[1], dtype=torch.int64) degrees = torch.zeros(adj_mat.shape[1], dtype=torch.int64)
@@ -109,23 +111,35 @@ def prepare_adj_mat(adj_mat: torch.Tensor,
return adj_mat_train, edges_pos, edges_neg return adj_mat_train, edges_pos, edges_neg
def prepare_relation(r, ratios):
def prepare_relation_type(r: RelationType,
ratios: TrainValTest) -> PreparedRelationType:
if not isinstance(r, RelationType):
raise ValueError('r must be a RelationType')
if not isinstance(ratios, TrainValTest):
raise ValueError('ratios must be a TrainValTest')
adj_mat = r.adjacency_matrix adj_mat = r.adjacency_matrix
adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat)
adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat, ratios)
print('adj_mat_train:', adj_mat_train)
if r.node_type_row == r.node_type_column: if r.node_type_row == r.node_type_column:
adj_mat_train = norm_adj_mat_one_node_type(adj_mat_train) adj_mat_train = norm_adj_mat_one_node_type(adj_mat_train)
else: else:
adj_mat_train = norm_adj_mat_two_node_types(adj_mat_train) adj_mat_train = norm_adj_mat_two_node_types(adj_mat_train)
return PreparedRelation(r.name, r.node_type_row, r.node_type_column,
return PreparedRelationType(r.name, r.node_type_row, r.node_type_column,
adj_mat_train, edges_pos, edges_neg) adj_mat_train, edges_pos, edges_neg)
def prepare_training(data):
def prepare_training(data: Data) -> PreparedData:
if not isinstance(data, Data):
raise ValueError('data must be of class Data')
relation_types = defaultdict(lambda: defaultdict(list)) relation_types = defaultdict(lambda: defaultdict(list))
for (node_type_row, node_type_column), rels in data.relation_types: for (node_type_row, node_type_column), rels in data.relation_types:
for r in rels: for r in rels:
relation_types[node_type_row][node_type_column].append( relation_types[node_type_row][node_type_column].append(
prep_relation(r))
prep_relation_type(r))
return PreparedData(data.node_types, relation_types) return PreparedData(data.node_types, relation_types)

+ 95
- 0
tests/icosagon/test_normalize.py View File

@@ -0,0 +1,95 @@
from icosagon.normalize import add_eye_sparse, \
norm_adj_mat_one_node_type_sparse, \
norm_adj_mat_one_node_type_dense, \
norm_adj_mat_one_node_type
import decagon_pytorch.normalize
import torch
import pytest
import numpy as np
def test_add_eye_sparse_01():
adj_mat_dense = torch.rand((10, 10))
adj_mat_sparse = adj_mat_dense.to_sparse()
adj_mat_dense += torch.eye(10)
adj_mat_sparse = add_eye_sparse(adj_mat_sparse)
assert torch.all(adj_mat_sparse.to_dense() == adj_mat_dense)
def test_add_eye_sparse_02():
adj_mat_dense = torch.rand((10, 20))
adj_mat_sparse = adj_mat_dense.to_sparse()
with pytest.raises(ValueError):
_ = add_eye_sparse(adj_mat_sparse)
def test_add_eye_sparse_03():
adj_mat_dense = torch.rand((10, 10))
with pytest.raises(ValueError):
_ = add_eye_sparse(adj_mat_dense)
def test_add_eye_sparse_04():
adj_mat_dense = np.random.rand(10, 10)
with pytest.raises(ValueError):
_ = add_eye_sparse(adj_mat_dense)
def test_norm_adj_mat_one_node_type_sparse_01():
adj_mat = torch.rand((10, 10))
adj_mat = (adj_mat > .5)
adj_mat = adj_mat.to_sparse()
_ = norm_adj_mat_one_node_type_sparse(adj_mat)
def test_norm_adj_mat_one_node_type_sparse_02():
adj_mat_dense = torch.rand((10, 10))
adj_mat_dense = (adj_mat_dense > .5)
adj_mat_sparse = adj_mat_dense.to_sparse()
adj_mat_sparse = norm_adj_mat_one_node_type_sparse(adj_mat_sparse)
adj_mat_dense = norm_adj_mat_one_node_type_dense(adj_mat_dense)
assert torch.all(adj_mat_sparse.to_dense() == adj_mat_dense)
def test_norm_adj_mat_one_node_type_dense_01():
adj_mat = torch.rand((10, 10))
adj_mat = (adj_mat > .5)
_ = norm_adj_mat_one_node_type_dense(adj_mat)
def test_norm_adj_mat_one_node_type_dense_02():
adj_mat = torch.tensor([
[0, 1, 1, 0], # 3
[1, 0, 1, 0], # 3
[1, 1, 0, 1], # 4
[0, 0, 1, 0] # 2
])
expect = np.array([
[1/3, 1/3, 1/3, 0],
[1/3, 1/3, 1/3, 0],
[1/4, 1/4, 1/4, 1/4],
[0, 0, 1/2, 1/2]
], dtype=np.float32)
res = decagon_pytorch.normalize.norm_adj_mat_one_node_type(adj_mat)
res = res.todense().astype(np.float32)
print('res:', res)
print('expect:', expect)
assert torch.all(res == expect)
@pytest.mark.skip
def test_norm_adj_mat_one_node_type_dense_03():
adj_mat = torch.rand((10, 10))
adj_mat = (adj_mat > .5)
adj_mat_dec = decagon_pytorch.normalize.norm_adj_mat_one_node_type(adj_mat)
adj_mat_ico = norm_adj_mat_one_node_type_dense(adj_mat)
adj_mat_dec = adj_mat_dec.todense()
adj_mat_ico = adj_mat_ico.detach().cpu().numpy()
print('adj_mat_dec:', adj_mat_dec)
print('adj_mat_ico:', adj_mat_ico)
assert np.all(adj_mat_dec == adj_mat_ico)

+ 21
- 13
tests/icosagon/test_trainprep.py View File

@@ -7,11 +7,13 @@
from icosagon.trainprep import TrainValTest, \ from icosagon.trainprep import TrainValTest, \
train_val_test_split_edges, \ train_val_test_split_edges, \
get_edges_and_degrees, \ get_edges_and_degrees, \
prepare_adj_mat
prepare_adj_mat, \
prepare_relation_type
import torch import torch
import pytest import pytest
import numpy as np import numpy as np
from itertools import chain from itertools import chain
from icosagon.data import RelationType
def test_train_val_test_split_edges_01(): def test_train_val_test_split_edges_01():
@@ -100,17 +102,23 @@ def test_prepare_adj_mat_02():
assert len(edges.shape) == 2 assert len(edges.shape) == 2
assert edges.shape[1] == 2 assert edges.shape[1] == 2
# def prepare_adj_mat(adj_mat: torch.Tensor,
# ratios: TrainValTest) -> Tuple[TrainValTest, TrainValTest]:
#
# degrees = adj_mat.sum(0)
# edges_pos = torch.nonzero(adj_mat)
#
# neg_neighbors = fixed_unigram_candidate_sampler(edges_pos[:, 1],
# len(edges), degrees, 0.75)
# edges_neg = torch.cat((edges_pos[:, 0], neg_neighbors.view(-1, 1)), 1)
def test_prepare_relation_type_01():
adj_mat = (torch.rand((10, 10)) > .5)
r = RelationType('Test', 0, 0, adj_mat)
ratios = TrainValTest(.8, .1, .1)
_ = prepare_relation_type(r, ratios)
# def prepare_relation(r, ratios):
# adj_mat = r.adjacency_matrix
# adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat)
# #
# edges_pos = train_val_test_split_edges(edges_pos, ratios)
# edges_neg = train_val_test_split_edges(edges_neg, ratios)
# if r.node_type_row == r.node_type_column:
# adj_mat_train = norm_adj_mat_one_node_type(adj_mat_train)
# else:
# adj_mat_train = norm_adj_mat_two_node_types(adj_mat_train)
# #
# return edges_pos, edges_neg
# return PreparedRelation(r.name, r.node_type_row, r.node_type_column,
# adj_mat_train, edges_pos, edges_neg)

Loading…
Cancel
Save