IF YOU WOULD LIKE TO GET AN ACCOUNT, please write an email to s dot adaszewski at gmail dot com. User accounts are meant only to report issues and/or generate pull requests. This is a purpose-specific Git hosting for ADARED projects. Thank you for your understanding!
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

test_sampling.py 4.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. import tensorflow as tf
  2. import numpy as np
  3. from collections import defaultdict
  4. import torch
  5. import torch.utils.data
  6. from typing import List, \
  7. Union
  8. import icosagon.sampling
  9. import scipy.stats
  10. def test_unigram_01():
  11. range_max = 7
  12. distortion = 0.75
  13. batch_size = 500
  14. unigrams = [ 1, 3, 2, 1, 2, 1, 3]
  15. num_true = 1
  16. true_classes = np.zeros((batch_size, num_true), dtype=np.int64)
  17. for i in range(batch_size):
  18. true_classes[i, 0] = i % range_max
  19. true_classes = tf.convert_to_tensor(true_classes)
  20. neg_samples, _, _ = tf.nn.fixed_unigram_candidate_sampler(
  21. true_classes=true_classes,
  22. num_true=num_true,
  23. num_sampled=batch_size,
  24. unique=False,
  25. range_max=range_max,
  26. distortion=distortion,
  27. unigrams=unigrams)
  28. assert neg_samples.shape == (batch_size,)
  29. for i in range(batch_size):
  30. assert neg_samples[i] != true_classes[i, 0]
  31. counts = defaultdict(int)
  32. with tf.Session() as sess:
  33. neg_samples = neg_samples.eval()
  34. for x in neg_samples:
  35. counts[x] += 1
  36. print('counts:', counts)
  37. assert counts[0] < counts[1] and \
  38. counts[0] < counts[2] and \
  39. counts[0] < counts[4] and \
  40. counts[0] < counts[6]
  41. assert counts[2] < counts[1] and \
  42. counts[0] < counts[6]
  43. assert counts[3] < counts[1] and \
  44. counts[3] < counts[2] and \
  45. counts[3] < counts[4] and \
  46. counts[3] < counts[6]
  47. assert counts[4] < counts[1] and \
  48. counts[4] < counts[6]
  49. assert counts[5] < counts[1] and \
  50. counts[5] < counts[2] and \
  51. counts[5] < counts[4] and \
  52. counts[5] < counts[6]
  53. def test_unigram_02():
  54. range_max = 7
  55. distortion = 0.75
  56. batch_size = 500
  57. unigrams = [ 1, 3, 2, 1, 2, 1, 3]
  58. num_true = 1
  59. true_classes = np.zeros((batch_size, num_true), dtype=np.int64)
  60. for i in range(batch_size):
  61. true_classes[i, 0] = i % range_max
  62. true_classes = torch.tensor(true_classes)
  63. neg_samples = icosagon.sampling.fixed_unigram_candidate_sampler(
  64. true_classes=true_classes,
  65. unigrams=unigrams,
  66. distortion=distortion)
  67. assert neg_samples.shape == (batch_size,)
  68. for i in range(batch_size):
  69. assert neg_samples[i] != true_classes[i, 0]
  70. counts = defaultdict(int)
  71. for x in neg_samples:
  72. counts[x.item()] += 1
  73. print('counts:', counts)
  74. assert counts[0] < counts[1] and \
  75. counts[0] < counts[2] and \
  76. counts[0] < counts[4] and \
  77. counts[0] < counts[6]
  78. assert counts[2] < counts[1] and \
  79. counts[0] < counts[6]
  80. assert counts[3] < counts[1] and \
  81. counts[3] < counts[2] and \
  82. counts[3] < counts[4] and \
  83. counts[3] < counts[6]
  84. assert counts[4] < counts[1] and \
  85. counts[4] < counts[6]
  86. assert counts[5] < counts[1] and \
  87. counts[5] < counts[2] and \
  88. counts[5] < counts[4] and \
  89. counts[5] < counts[6]
  90. def test_unigram_03():
  91. range_max = 7
  92. distortion = 0.75
  93. batch_size = 25
  94. unigrams = [ 1, 3, 2, 1, 2, 1, 3]
  95. num_true = 1
  96. true_classes = np.zeros((batch_size, num_true), dtype=np.int64)
  97. for i in range(batch_size):
  98. true_classes[i, 0] = i % range_max
  99. true_classes_tf = tf.convert_to_tensor(true_classes)
  100. true_classes_torch = torch.tensor(true_classes)
  101. counts_tf = defaultdict(list)
  102. counts_torch = defaultdict(list)
  103. for i in range(10):
  104. neg_samples, _, _ = tf.nn.fixed_unigram_candidate_sampler(
  105. true_classes=true_classes_tf,
  106. num_true=num_true,
  107. num_sampled=batch_size,
  108. unique=False,
  109. range_max=range_max,
  110. distortion=distortion,
  111. unigrams=unigrams)
  112. counts = defaultdict(int)
  113. with tf.Session() as sess:
  114. neg_samples = neg_samples.eval()
  115. for x in neg_samples:
  116. counts[x.item()] += 1
  117. for k, v in counts.items():
  118. counts_tf[k].append(v)
  119. neg_samples = icosagon.sampling.fixed_unigram_candidate_sampler(
  120. true_classes=true_classes,
  121. distortion=distortion,
  122. unigrams=unigrams)
  123. counts = defaultdict(int)
  124. for x in neg_samples:
  125. counts[x.item()] += 1
  126. for k, v in counts.items():
  127. counts_torch[k].append(v)
  128. for i in range(range_max):
  129. print('counts_tf[%d]:' % i, counts_tf[i])
  130. print('counts_torch[%d]:' % i, counts_torch[i])
  131. for i in range(range_max):
  132. statistic, pvalue = scipy.stats.ttest_ind(counts_tf[i], counts_torch[i])
  133. assert pvalue * range_max > .05