IF YOU WOULD LIKE TO GET AN ACCOUNT, please write an email to s dot adaszewski at gmail dot com. User accounts are meant only to report issues and/or generate pull requests. This is a purpose-specific Git hosting for ADARED projects. Thank you for your understanding!
Browse Source

Work on icosagon.trainprep.

master
Stanislaw Adaszewski 3 years ago
parent
commit
584bf19c3f
4 changed files with 114 additions and 25 deletions
  1. +3
    -3
      src/icosagon/sampling.py
  2. +34
    -9
      src/icosagon/trainprep.py
  3. +6
    -0
      tests/icosagon/test_data.py
  4. +71
    -13
      tests/icosagon/test_trainprep.py

+ 3
- 3
src/icosagon/sampling.py View File

@@ -13,14 +13,14 @@ from typing import List, \
def fixed_unigram_candidate_sampler(
true_classes: Union[np.array, torch.Tensor],
num_samples: int,
unigrams: List[Union[int, float]],
distortion: float = 1.):
if isinstance(true_classes, torch.Tensor):
true_classes = true_classes.detach().cpu().numpy()
if true_classes.shape[0] != num_samples:
if len(true_classes.shape) != 2:
raise ValueError('true_classes must be a 2D matrix with shape (num_samples, num_true)')
num_samples = true_classes.shape[0]
unigrams = np.array(unigrams)
if distortion != 1.:
unigrams = unigrams.astype(np.float64) ** distortion
@@ -39,4 +39,4 @@ def fixed_unigram_candidate_sampler(
mask = mask.sum(1).astype(np.bool)
# print('mask:', mask)
indices = indices[mask]
return result
return torch.tensor(result)

+ 34
- 9
src/icosagon/trainprep.py View File

@@ -13,6 +13,9 @@ from typing import Any, \
Dict
from .data import NodeType
from collections import defaultdict
from .normalize import norm_adj_mat_one_node_type, \
norm_adj_mat_two_node_types
import numpy as np
@dataclass
@@ -70,28 +73,50 @@ def train_val_test_split_edges(edges: torch.Tensor,
return TrainValTest(edges_train, edges_val, edges_test)
def get_edges_and_degrees(adj_mat):
if adj_mat.is_sparse:
adj_mat = adj_mat.coalesce()
degrees = torch.zeros(adj_mat.shape[1], dtype=torch.int64)
degrees = degrees.index_add(0, adj_mat.indices()[1],
torch.ones(adj_mat.indices().shape[1], dtype=torch.int64))
edges_pos = adj_mat.indices().transpose(0, 1)
else:
degrees = adj_mat.sum(0)
edges_pos = torch.nonzero(adj_mat)
return edges_pos, degrees
def prepare_adj_mat(adj_mat: torch.Tensor,
ratios: TrainValTest) -> Tuple[TrainValTest, TrainValTest]:
degrees = adj_mat.sum(0)
edges_pos = torch.nonzero(adj_mat)
if not isinstance(adj_mat, torch.Tensor):
raise ValueError('adj_mat must be a torch.Tensor')
neg_neighbors = fixed_unigram_candidate_sampler(edges_pos[:, 1],
len(edges), degrees, 0.75)
edges_neg = torch.cat((edges_pos[:, 0], neg_neighbors.view(-1, 1)), 1)
edges_pos, degrees = get_edges_and_degrees(adj_mat)
neg_neighbors = fixed_unigram_candidate_sampler(
edges_pos[:, 1].view(-1, 1), degrees, 0.75)
print(edges_pos.dtype)
print(neg_neighbors.dtype)
edges_neg = torch.cat((edges_pos[:, 0].view(-1, 1), neg_neighbors.view(-1, 1)), 1)
edges_pos = train_val_test_split_edges(edges_pos, ratios)
edges_neg = train_val_test_split_edges(edges_neg, ratios)
return edges_pos, edges_neg
adj_mat_train = torch.sparse_coo_tensor(indices = edges_pos.train.transpose(0, 1),
values=torch.ones(len(edges_pos.train), dtype=adj_mat.dtype))
return adj_mat_train, edges_pos, edges_neg
def prepare_relation(r, ratios):
adj_mat = r.adjacency_matrix
edges_pos, edges_neg = prepare_adj_mat(adj_mat)
adj_mat_train, edges_pos, edges_neg = prepare_adj_mat(adj_mat)
adj_mat_train = torch.sparse_coo_tensor(indices = edges_pos[0].transpose(0, 1),
values=torch.ones(len(edges_pos[0]), dtype=adj_mat.dtype))
if r.node_type_row == r.node_type_column:
adj_mat_train = norm_adj_mat_one_node_type(adj_mat_train)
else:
adj_mat_train = norm_adj_mat_two_node_types(adj_mat_train)
return PreparedRelation(r.name, r.node_type_row, r.node_type_column,
adj_mat_train, edges_pos, edges_neg)


+ 6
- 0
tests/icosagon/test_data.py View File

@@ -1,3 +1,9 @@
#
# Copyright (C) Stanislaw Adaszewski, 2020
# License: GPLv3
#
from icosagon import Data
import torch
import pytest


+ 71
- 13
tests/icosagon/test_trainprep.py View File

@@ -1,8 +1,17 @@
#
# Copyright (C) Stanislaw Adaszewski, 2020
# License: GPLv3
#
from icosagon.trainprep import TrainValTest, \
train_val_test_split_edges
train_val_test_split_edges, \
get_edges_and_degrees, \
prepare_adj_mat
import torch
import pytest
import numpy as np
from itertools import chain
def test_train_val_test_split_edges_01():
@@ -43,16 +52,65 @@ def test_train_val_test_split_edges_01():
res.test.shape == (0, 2)
def test_train_val_test_split_edges_02():
edges = torch.randint(0, 30, (30, 2))
ratios = TrainValTest(.8, .1, .1)
res = train_val_test_split_edges(edges, ratios)
edges = [ tuple(a) for a in edges ]
res = [ tuple(a) for a in chain(res.train, res.val, res.test) ]
assert all([ a in edges for a in res ])
def test_get_edges_and_degrees_01():
adj_mat_dense = (torch.rand((10, 10)) > .5)
adj_mat_sparse = adj_mat_dense.to_sparse()
edges_dense, degrees_dense = get_edges_and_degrees(adj_mat_dense)
edges_sparse, degrees_sparse = get_edges_and_degrees(adj_mat_sparse)
assert torch.all(degrees_dense == degrees_sparse)
edges_dense = [ tuple(a) for a in edges_dense ]
edges_sparse = [ tuple(a) for a in edges_dense ]
assert len(edges_dense) == len(edges_sparse)
assert all([ a in edges_dense for a in edges_sparse ])
assert all([ a in edges_sparse for a in edges_dense ])
# assert torch.all(edges_dense == edges_sparse)
def test_prepare_adj_mat_01():
adj_mat = (torch.rand((10, 10)) > .5)
adj_mat = adj_mat.to_sparse()
ratios = TrainValTest(.8, .1, .1)
_ = prepare_adj_mat(adj_mat, ratios)
def test_prepare_adj_mat_02():
adj_mat = (torch.rand((10, 10)) > .5)
adj_mat = adj_mat.to_sparse()
ratios = TrainValTest(.8, .1, .1)
(adj_mat_train, edges_pos, edges_neg) = prepare_adj_mat(adj_mat, ratios)
assert isinstance(adj_mat_train, torch.Tensor)
assert adj_mat_train.is_sparse
assert adj_mat_train.shape == adj_mat.shape
assert adj_mat_train.dtype == adj_mat.dtype
assert isinstance(edges_pos, TrainValTest)
assert isinstance(edges_neg, TrainValTest)
for a in ['train', 'val', 'test']:
for b in [edges_pos, edges_neg]:
edges = getattr(b, a)
assert isinstance(edges, torch.Tensor)
assert len(edges.shape) == 2
assert edges.shape[1] == 2
# if ratios.train + ratios.val + ratios.test != 1.0:
# raise ValueError('Train, validation and test ratios must add up to 1')
#
# order = torch.randperm(len(edges))
# edges = edges[order, :]
# n = round(len(edges) * ratios.train)
# edges_train = edges[:n]
# n_1 = round(len(edges) * (ratios.train + ratios.val))
# edges_val = edges[n:n_1]
# edges_test = edges[n_1:]
#
# return TrainValTest(edges_train, edges_val, edges_test)
# def prepare_adj_mat(adj_mat: torch.Tensor,
# ratios: TrainValTest) -> Tuple[TrainValTest, TrainValTest]:
#
# degrees = adj_mat.sum(0)
# edges_pos = torch.nonzero(adj_mat)
#
# neg_neighbors = fixed_unigram_candidate_sampler(edges_pos[:, 1],
# len(edges), degrees, 0.75)
# edges_neg = torch.cat((edges_pos[:, 0], neg_neighbors.view(-1, 1)), 1)
#
# edges_pos = train_val_test_split_edges(edges_pos, ratios)
# edges_neg = train_val_test_split_edges(edges_neg, ratios)
#
# return edges_pos, edges_neg

Loading…
Cancel
Save