@@ -21,7 +21,7 @@ class RelationType(object): | |||
node_type_row: int | |||
node_type_column: int | |||
adjacency_matrix: torch.Tensor | |||
is_autogenerated: bool | |||
is_autogenerated: bool = False | |||
class Data(object): | |||
@@ -6,17 +6,90 @@ | |||
import numpy as np | |||
import scipy.sparse as sp | |||
import torch | |||
def norm_adj_mat_one_node_type(adj): | |||
adj = sp.coo_matrix(adj) | |||
assert adj.shape[0] == adj.shape[1] | |||
adj_ = adj + sp.eye(adj.shape[0]) | |||
rowsum = np.array(adj_.sum(1)) | |||
degree_mat_inv_sqrt = np.power(rowsum, -0.5).flatten() | |||
degree_mat_inv_sqrt = sp.diags(degree_mat_inv_sqrt) | |||
adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt) | |||
return adj_normalized | |||
def add_eye_sparse(adj_mat: torch.Tensor) -> torch.Tensor: | |||
if not isinstance(adj_mat, torch.Tensor): | |||
raise ValueError('adj_mat must be a torch.Tensor') | |||
if not adj_mat.is_sparse: | |||
raise ValueError('adj_mat must be sparse') | |||
if len(adj_mat.shape) != 2 or \ | |||
adj_mat.shape[0] != adj_mat.shape[1]: | |||
raise ValueError('adj_mat must be a square matrix') | |||
adj_mat = adj_mat.coalesce() | |||
indices = adj_mat.indices() | |||
values = adj_mat.values() | |||
eye_indices = torch.arange(adj_mat.shape[0], dtype=indices.dtype).view(1, -1) | |||
eye_indices = torch.cat((eye_indices, eye_indices), 0) | |||
eye_values = torch.ones(adj_mat.shape[0], dtype=values.dtype) | |||
indices = torch.cat((indices, eye_indices), 1) | |||
values = torch.cat((values, eye_values), 0) | |||
adj_mat = torch.sparse_coo_tensor(indices=indices, values=values, size=adj_mat.shape) | |||
return adj_mat | |||
def norm_adj_mat_one_node_type_sparse(adj_mat): | |||
if len(adj_mat.shape) != 2 or \ | |||
adj_mat.shape[0] != adj_mat.shape[1]: | |||
raise ValueError('adj_mat must be a square matrix') | |||
adj_mat = add_eye_sparse(adj_mat) | |||
adj_mat = adj_mat.coalesce() | |||
indices = adj_mat.indices() | |||
values = adj_mat.values() | |||
degrees = torch.zeros(adj_mat.shape[0]) | |||
degrees = degrees.index_add(0, indices[0], values.to(degrees.dtype)) | |||
print('degrees:', degrees) | |||
print('values:', values) | |||
values = values.to(degrees.dtype) / degrees[indices[0]] | |||
adj_mat = torch.sparse_coo_tensor(indices=indices, values=values, size=adj_mat.shape) | |||
return adj_mat | |||
def norm_adj_mat_one_node_type_dense(adj_mat): | |||
if not isinstance(adj_mat, torch.Tensor): | |||
raise ValueError('adj_mat must be a torch.Tensor') | |||
if adj_mat.is_sparse: | |||
raise ValueError('adj_mat must be dense') | |||
if len(adj_mat.shape) != 2 or \ | |||
adj_mat.shape[0] != adj_mat.shape[1]: | |||
raise ValueError('adj_mat must be a square matrix') | |||
adj_mat = adj_mat + torch.eye(adj_mat.shape[0], dtype=adj_mat.dtype) | |||
degrees = adj_mat.sum(1).view(-1, 1).to(torch.float32) | |||
adj_mat = adj_mat.to(degrees.dtype) / degrees | |||
return adj_mat | |||
def norm_adj_mat_one_node_type(adj_mat): | |||
if adj_mat.is_sparse: | |||
return norm_adj_mat_one_node_type_sparse(adj_mat) | |||
else: | |||
return norm_adj_mat_one_node_type_dense(adj_mat) | |||
# def norm_adj_mat_one_node_type(adj): | |||
# adj = sp.coo_matrix(adj) | |||
# assert adj.shape[0] == adj.shape[1] | |||
# adj_ = adj + sp.eye(adj.shape[0]) | |||
# rowsum = np.array(adj_.sum(1)) | |||
# degree_mat_inv_sqrt = np.power(rowsum, -0.5).flatten() | |||
# degree_mat_inv_sqrt = sp.diags(degree_mat_inv_sqrt) | |||
# adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt) | |||
# return adj_normalized | |||
def norm_adj_mat_two_node_types(adj): | |||
@@ -11,7 +11,9 @@ from typing import Any, \ | |||
List, \ | |||
Tuple, \ | |||
Dict | |||
from .data import NodeType | |||
from .data import NodeType, \ | |||
RelationType, \ | |||
Data | |||
from collections import defaultdict | |||
from .normalize import norm_adj_mat_one_node_type, \ | |||
norm_adj_mat_two_node_types | |||
@@ -73,7 +75,7 @@ def train_val_test_split_edges(edges: torch.Tensor, | |||
return TrainValTest(edges_train, edges_val, edges_test) | |||
def get_edges_and_degrees(adj_mat): | |||
def get_edges_and_degrees(adj_mat: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: | |||
if adj_mat.is_sparse: | |||
adj_mat = adj_mat.coalesce() | |||
degrees = torch.zeros(adj_mat.shape[1], dtype=torch.int64) | |||
@@ -109,23 +111,35 @@ def prepare_adj_mat(adj_mat: torch.Tensor, | |||
return adj_mat_train, edges_pos, edges_neg | |||
def prepare_relation(r, ratios): | |||
def prepare_relation_type(r: RelationType, | |||
ratios: TrainValTest) -> PreparedRelationType: | |||
if not isinstance(r, RelationType): | |||
raise ValueError('r must be a RelationType') | |||
if not isinstance(ratios, TrainValTest): | |||
raise ValueError('ratios must be a TrainValTest') | |||
adj_mat = r.adjacency_matrix | |||
adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat) | |||
adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat, ratios) | |||
print('adj_mat_train:', adj_mat_train) | |||
if r.node_type_row == r.node_type_column: | |||
adj_mat_train = norm_adj_mat_one_node_type(adj_mat_train) | |||
else: | |||
adj_mat_train = norm_adj_mat_two_node_types(adj_mat_train) | |||
return PreparedRelation(r.name, r.node_type_row, r.node_type_column, | |||
return PreparedRelationType(r.name, r.node_type_row, r.node_type_column, | |||
adj_mat_train, edges_pos, edges_neg) | |||
def prepare_training(data): | |||
def prepare_training(data: Data) -> PreparedData: | |||
if not isinstance(data, Data): | |||
raise ValueError('data must be of class Data') | |||
relation_types = defaultdict(lambda: defaultdict(list)) | |||
for (node_type_row, node_type_column), rels in data.relation_types: | |||
for r in rels: | |||
relation_types[node_type_row][node_type_column].append( | |||
prep_relation(r)) | |||
prep_relation_type(r)) | |||
return PreparedData(data.node_types, relation_types) |
@@ -0,0 +1,95 @@ | |||
from icosagon.normalize import add_eye_sparse, \ | |||
norm_adj_mat_one_node_type_sparse, \ | |||
norm_adj_mat_one_node_type_dense, \ | |||
norm_adj_mat_one_node_type | |||
import decagon_pytorch.normalize | |||
import torch | |||
import pytest | |||
import numpy as np | |||
def test_add_eye_sparse_01(): | |||
adj_mat_dense = torch.rand((10, 10)) | |||
adj_mat_sparse = adj_mat_dense.to_sparse() | |||
adj_mat_dense += torch.eye(10) | |||
adj_mat_sparse = add_eye_sparse(adj_mat_sparse) | |||
assert torch.all(adj_mat_sparse.to_dense() == adj_mat_dense) | |||
def test_add_eye_sparse_02(): | |||
adj_mat_dense = torch.rand((10, 20)) | |||
adj_mat_sparse = adj_mat_dense.to_sparse() | |||
with pytest.raises(ValueError): | |||
_ = add_eye_sparse(adj_mat_sparse) | |||
def test_add_eye_sparse_03(): | |||
adj_mat_dense = torch.rand((10, 10)) | |||
with pytest.raises(ValueError): | |||
_ = add_eye_sparse(adj_mat_dense) | |||
def test_add_eye_sparse_04(): | |||
adj_mat_dense = np.random.rand(10, 10) | |||
with pytest.raises(ValueError): | |||
_ = add_eye_sparse(adj_mat_dense) | |||
def test_norm_adj_mat_one_node_type_sparse_01(): | |||
adj_mat = torch.rand((10, 10)) | |||
adj_mat = (adj_mat > .5) | |||
adj_mat = adj_mat.to_sparse() | |||
_ = norm_adj_mat_one_node_type_sparse(adj_mat) | |||
def test_norm_adj_mat_one_node_type_sparse_02(): | |||
adj_mat_dense = torch.rand((10, 10)) | |||
adj_mat_dense = (adj_mat_dense > .5) | |||
adj_mat_sparse = adj_mat_dense.to_sparse() | |||
adj_mat_sparse = norm_adj_mat_one_node_type_sparse(adj_mat_sparse) | |||
adj_mat_dense = norm_adj_mat_one_node_type_dense(adj_mat_dense) | |||
assert torch.all(adj_mat_sparse.to_dense() == adj_mat_dense) | |||
def test_norm_adj_mat_one_node_type_dense_01(): | |||
adj_mat = torch.rand((10, 10)) | |||
adj_mat = (adj_mat > .5) | |||
_ = norm_adj_mat_one_node_type_dense(adj_mat) | |||
def test_norm_adj_mat_one_node_type_dense_02(): | |||
adj_mat = torch.tensor([ | |||
[0, 1, 1, 0], # 3 | |||
[1, 0, 1, 0], # 3 | |||
[1, 1, 0, 1], # 4 | |||
[0, 0, 1, 0] # 2 | |||
]) | |||
expect = np.array([ | |||
[1/3, 1/3, 1/3, 0], | |||
[1/3, 1/3, 1/3, 0], | |||
[1/4, 1/4, 1/4, 1/4], | |||
[0, 0, 1/2, 1/2] | |||
], dtype=np.float32) | |||
res = decagon_pytorch.normalize.norm_adj_mat_one_node_type(adj_mat) | |||
res = res.todense().astype(np.float32) | |||
print('res:', res) | |||
print('expect:', expect) | |||
assert torch.all(res == expect) | |||
@pytest.mark.skip | |||
def test_norm_adj_mat_one_node_type_dense_03(): | |||
adj_mat = torch.rand((10, 10)) | |||
adj_mat = (adj_mat > .5) | |||
adj_mat_dec = decagon_pytorch.normalize.norm_adj_mat_one_node_type(adj_mat) | |||
adj_mat_ico = norm_adj_mat_one_node_type_dense(adj_mat) | |||
adj_mat_dec = adj_mat_dec.todense() | |||
adj_mat_ico = adj_mat_ico.detach().cpu().numpy() | |||
print('adj_mat_dec:', adj_mat_dec) | |||
print('adj_mat_ico:', adj_mat_ico) | |||
assert np.all(adj_mat_dec == adj_mat_ico) |
@@ -7,11 +7,13 @@ | |||
from icosagon.trainprep import TrainValTest, \ | |||
train_val_test_split_edges, \ | |||
get_edges_and_degrees, \ | |||
prepare_adj_mat | |||
prepare_adj_mat, \ | |||
prepare_relation_type | |||
import torch | |||
import pytest | |||
import numpy as np | |||
from itertools import chain | |||
from icosagon.data import RelationType | |||
def test_train_val_test_split_edges_01(): | |||
@@ -100,17 +102,23 @@ def test_prepare_adj_mat_02(): | |||
assert len(edges.shape) == 2 | |||
assert edges.shape[1] == 2 | |||
# def prepare_adj_mat(adj_mat: torch.Tensor, | |||
# ratios: TrainValTest) -> Tuple[TrainValTest, TrainValTest]: | |||
# | |||
# degrees = adj_mat.sum(0) | |||
# edges_pos = torch.nonzero(adj_mat) | |||
# | |||
# neg_neighbors = fixed_unigram_candidate_sampler(edges_pos[:, 1], | |||
# len(edges), degrees, 0.75) | |||
# edges_neg = torch.cat((edges_pos[:, 0], neg_neighbors.view(-1, 1)), 1) | |||
def test_prepare_relation_type_01(): | |||
adj_mat = (torch.rand((10, 10)) > .5) | |||
r = RelationType('Test', 0, 0, adj_mat) | |||
ratios = TrainValTest(.8, .1, .1) | |||
_ = prepare_relation_type(r, ratios) | |||
# def prepare_relation(r, ratios): | |||
# adj_mat = r.adjacency_matrix | |||
# adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat) | |||
# | |||
# edges_pos = train_val_test_split_edges(edges_pos, ratios) | |||
# edges_neg = train_val_test_split_edges(edges_neg, ratios) | |||
# if r.node_type_row == r.node_type_column: | |||
# adj_mat_train = norm_adj_mat_one_node_type(adj_mat_train) | |||
# else: | |||
# adj_mat_train = norm_adj_mat_two_node_types(adj_mat_train) | |||
# | |||
# return edges_pos, edges_neg | |||
# return PreparedRelation(r.name, r.node_type_row, r.node_type_column, | |||
# adj_mat_train, edges_pos, edges_neg) |