IF YOU WOULD LIKE TO GET AN ACCOUNT, please write an email to s dot adaszewski at gmail dot com. User accounts are meant only to report issues and/or generate pull requests. This is a purpose-specific Git hosting for ADARED projects. Thank you for your understanding!
Browse Source

Start rework normalize.

master
Stanislaw Adaszewski 3 years ago
parent
commit
05b1ecf47a
5 changed files with 220 additions and 30 deletions
  1. +1
    -1
      src/icosagon/data.py
  2. +82
    -9
      src/icosagon/normalize.py
  3. +21
    -7
      src/icosagon/trainprep.py
  4. +95
    -0
      tests/icosagon/test_normalize.py
  5. +21
    -13
      tests/icosagon/test_trainprep.py

+ 1
- 1
src/icosagon/data.py View File

@@ -21,7 +21,7 @@ class RelationType(object):
node_type_row: int
node_type_column: int
adjacency_matrix: torch.Tensor
is_autogenerated: bool
is_autogenerated: bool = False
class Data(object):


+ 82
- 9
src/icosagon/normalize.py View File

@@ -6,17 +6,90 @@
import numpy as np
import scipy.sparse as sp
import torch
def norm_adj_mat_one_node_type(adj):
adj = sp.coo_matrix(adj)
assert adj.shape[0] == adj.shape[1]
adj_ = adj + sp.eye(adj.shape[0])
rowsum = np.array(adj_.sum(1))
degree_mat_inv_sqrt = np.power(rowsum, -0.5).flatten()
degree_mat_inv_sqrt = sp.diags(degree_mat_inv_sqrt)
adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt)
return adj_normalized
def add_eye_sparse(adj_mat: torch.Tensor) -> torch.Tensor:
if not isinstance(adj_mat, torch.Tensor):
raise ValueError('adj_mat must be a torch.Tensor')
if not adj_mat.is_sparse:
raise ValueError('adj_mat must be sparse')
if len(adj_mat.shape) != 2 or \
adj_mat.shape[0] != adj_mat.shape[1]:
raise ValueError('adj_mat must be a square matrix')
adj_mat = adj_mat.coalesce()
indices = adj_mat.indices()
values = adj_mat.values()
eye_indices = torch.arange(adj_mat.shape[0], dtype=indices.dtype).view(1, -1)
eye_indices = torch.cat((eye_indices, eye_indices), 0)
eye_values = torch.ones(adj_mat.shape[0], dtype=values.dtype)
indices = torch.cat((indices, eye_indices), 1)
values = torch.cat((values, eye_values), 0)
adj_mat = torch.sparse_coo_tensor(indices=indices, values=values, size=adj_mat.shape)
return adj_mat
def norm_adj_mat_one_node_type_sparse(adj_mat):
if len(adj_mat.shape) != 2 or \
adj_mat.shape[0] != adj_mat.shape[1]:
raise ValueError('adj_mat must be a square matrix')
adj_mat = add_eye_sparse(adj_mat)
adj_mat = adj_mat.coalesce()
indices = adj_mat.indices()
values = adj_mat.values()
degrees = torch.zeros(adj_mat.shape[0])
degrees = degrees.index_add(0, indices[0], values.to(degrees.dtype))
print('degrees:', degrees)
print('values:', values)
values = values.to(degrees.dtype) / degrees[indices[0]]
adj_mat = torch.sparse_coo_tensor(indices=indices, values=values, size=adj_mat.shape)
return adj_mat
def norm_adj_mat_one_node_type_dense(adj_mat):
if not isinstance(adj_mat, torch.Tensor):
raise ValueError('adj_mat must be a torch.Tensor')
if adj_mat.is_sparse:
raise ValueError('adj_mat must be dense')
if len(adj_mat.shape) != 2 or \
adj_mat.shape[0] != adj_mat.shape[1]:
raise ValueError('adj_mat must be a square matrix')
adj_mat = adj_mat + torch.eye(adj_mat.shape[0], dtype=adj_mat.dtype)
degrees = adj_mat.sum(1).view(-1, 1).to(torch.float32)
adj_mat = adj_mat.to(degrees.dtype) / degrees
return adj_mat
def norm_adj_mat_one_node_type(adj_mat):
if adj_mat.is_sparse:
return norm_adj_mat_one_node_type_sparse(adj_mat)
else:
return norm_adj_mat_one_node_type_dense(adj_mat)
# def norm_adj_mat_one_node_type(adj):
# adj = sp.coo_matrix(adj)
# assert adj.shape[0] == adj.shape[1]
# adj_ = adj + sp.eye(adj.shape[0])
# rowsum = np.array(adj_.sum(1))
# degree_mat_inv_sqrt = np.power(rowsum, -0.5).flatten()
# degree_mat_inv_sqrt = sp.diags(degree_mat_inv_sqrt)
# adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt)
# return adj_normalized
def norm_adj_mat_two_node_types(adj):


+ 21
- 7
src/icosagon/trainprep.py View File

@@ -11,7 +11,9 @@ from typing import Any, \
List, \
Tuple, \
Dict
from .data import NodeType
from .data import NodeType, \
RelationType, \
Data
from collections import defaultdict
from .normalize import norm_adj_mat_one_node_type, \
norm_adj_mat_two_node_types
@@ -73,7 +75,7 @@ def train_val_test_split_edges(edges: torch.Tensor,
return TrainValTest(edges_train, edges_val, edges_test)
def get_edges_and_degrees(adj_mat):
def get_edges_and_degrees(adj_mat: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
if adj_mat.is_sparse:
adj_mat = adj_mat.coalesce()
degrees = torch.zeros(adj_mat.shape[1], dtype=torch.int64)
@@ -109,23 +111,35 @@ def prepare_adj_mat(adj_mat: torch.Tensor,
return adj_mat_train, edges_pos, edges_neg
def prepare_relation(r, ratios):
def prepare_relation_type(r: RelationType,
ratios: TrainValTest) -> PreparedRelationType:
if not isinstance(r, RelationType):
raise ValueError('r must be a RelationType')
if not isinstance(ratios, TrainValTest):
raise ValueError('ratios must be a TrainValTest')
adj_mat = r.adjacency_matrix
adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat)
adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat, ratios)
print('adj_mat_train:', adj_mat_train)
if r.node_type_row == r.node_type_column:
adj_mat_train = norm_adj_mat_one_node_type(adj_mat_train)
else:
adj_mat_train = norm_adj_mat_two_node_types(adj_mat_train)
return PreparedRelation(r.name, r.node_type_row, r.node_type_column,
return PreparedRelationType(r.name, r.node_type_row, r.node_type_column,
adj_mat_train, edges_pos, edges_neg)
def prepare_training(data):
def prepare_training(data: Data) -> PreparedData:
if not isinstance(data, Data):
raise ValueError('data must be of class Data')
relation_types = defaultdict(lambda: defaultdict(list))
for (node_type_row, node_type_column), rels in data.relation_types:
for r in rels:
relation_types[node_type_row][node_type_column].append(
prep_relation(r))
prep_relation_type(r))
return PreparedData(data.node_types, relation_types)

+ 95
- 0
tests/icosagon/test_normalize.py View File

@@ -0,0 +1,95 @@
from icosagon.normalize import add_eye_sparse, \
norm_adj_mat_one_node_type_sparse, \
norm_adj_mat_one_node_type_dense, \
norm_adj_mat_one_node_type
import decagon_pytorch.normalize
import torch
import pytest
import numpy as np
def test_add_eye_sparse_01():
adj_mat_dense = torch.rand((10, 10))
adj_mat_sparse = adj_mat_dense.to_sparse()
adj_mat_dense += torch.eye(10)
adj_mat_sparse = add_eye_sparse(adj_mat_sparse)
assert torch.all(adj_mat_sparse.to_dense() == adj_mat_dense)
def test_add_eye_sparse_02():
adj_mat_dense = torch.rand((10, 20))
adj_mat_sparse = adj_mat_dense.to_sparse()
with pytest.raises(ValueError):
_ = add_eye_sparse(adj_mat_sparse)
def test_add_eye_sparse_03():
adj_mat_dense = torch.rand((10, 10))
with pytest.raises(ValueError):
_ = add_eye_sparse(adj_mat_dense)
def test_add_eye_sparse_04():
adj_mat_dense = np.random.rand(10, 10)
with pytest.raises(ValueError):
_ = add_eye_sparse(adj_mat_dense)
def test_norm_adj_mat_one_node_type_sparse_01():
adj_mat = torch.rand((10, 10))
adj_mat = (adj_mat > .5)
adj_mat = adj_mat.to_sparse()
_ = norm_adj_mat_one_node_type_sparse(adj_mat)
def test_norm_adj_mat_one_node_type_sparse_02():
adj_mat_dense = torch.rand((10, 10))
adj_mat_dense = (adj_mat_dense > .5)
adj_mat_sparse = adj_mat_dense.to_sparse()
adj_mat_sparse = norm_adj_mat_one_node_type_sparse(adj_mat_sparse)
adj_mat_dense = norm_adj_mat_one_node_type_dense(adj_mat_dense)
assert torch.all(adj_mat_sparse.to_dense() == adj_mat_dense)
def test_norm_adj_mat_one_node_type_dense_01():
adj_mat = torch.rand((10, 10))
adj_mat = (adj_mat > .5)
_ = norm_adj_mat_one_node_type_dense(adj_mat)
def test_norm_adj_mat_one_node_type_dense_02():
adj_mat = torch.tensor([
[0, 1, 1, 0], # 3
[1, 0, 1, 0], # 3
[1, 1, 0, 1], # 4
[0, 0, 1, 0] # 2
])
expect = np.array([
[1/3, 1/3, 1/3, 0],
[1/3, 1/3, 1/3, 0],
[1/4, 1/4, 1/4, 1/4],
[0, 0, 1/2, 1/2]
], dtype=np.float32)
res = decagon_pytorch.normalize.norm_adj_mat_one_node_type(adj_mat)
res = res.todense().astype(np.float32)
print('res:', res)
print('expect:', expect)
assert torch.all(res == expect)
@pytest.mark.skip
def test_norm_adj_mat_one_node_type_dense_03():
adj_mat = torch.rand((10, 10))
adj_mat = (adj_mat > .5)
adj_mat_dec = decagon_pytorch.normalize.norm_adj_mat_one_node_type(adj_mat)
adj_mat_ico = norm_adj_mat_one_node_type_dense(adj_mat)
adj_mat_dec = adj_mat_dec.todense()
adj_mat_ico = adj_mat_ico.detach().cpu().numpy()
print('adj_mat_dec:', adj_mat_dec)
print('adj_mat_ico:', adj_mat_ico)
assert np.all(adj_mat_dec == adj_mat_ico)

+ 21
- 13
tests/icosagon/test_trainprep.py View File

@@ -7,11 +7,13 @@
from icosagon.trainprep import TrainValTest, \
train_val_test_split_edges, \
get_edges_and_degrees, \
prepare_adj_mat
prepare_adj_mat, \
prepare_relation_type
import torch
import pytest
import numpy as np
from itertools import chain
from icosagon.data import RelationType
def test_train_val_test_split_edges_01():
@@ -100,17 +102,23 @@ def test_prepare_adj_mat_02():
assert len(edges.shape) == 2
assert edges.shape[1] == 2
# def prepare_adj_mat(adj_mat: torch.Tensor,
# ratios: TrainValTest) -> Tuple[TrainValTest, TrainValTest]:
#
# degrees = adj_mat.sum(0)
# edges_pos = torch.nonzero(adj_mat)
#
# neg_neighbors = fixed_unigram_candidate_sampler(edges_pos[:, 1],
# len(edges), degrees, 0.75)
# edges_neg = torch.cat((edges_pos[:, 0], neg_neighbors.view(-1, 1)), 1)
def test_prepare_relation_type_01():
adj_mat = (torch.rand((10, 10)) > .5)
r = RelationType('Test', 0, 0, adj_mat)
ratios = TrainValTest(.8, .1, .1)
_ = prepare_relation_type(r, ratios)
# def prepare_relation(r, ratios):
# adj_mat = r.adjacency_matrix
# adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat)
#
# edges_pos = train_val_test_split_edges(edges_pos, ratios)
# edges_neg = train_val_test_split_edges(edges_neg, ratios)
# if r.node_type_row == r.node_type_column:
# adj_mat_train = norm_adj_mat_one_node_type(adj_mat_train)
# else:
# adj_mat_train = norm_adj_mat_two_node_types(adj_mat_train)
#
# return edges_pos, edges_neg
# return PreparedRelation(r.name, r.node_type_row, r.node_type_column,
# adj_mat_train, edges_pos, edges_neg)

Loading…
Cancel
Save