test_sketches.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. """Tests for _sketches.py."""
  2. import numpy as np
  3. from numpy.testing import assert_, assert_equal
  4. from scipy.linalg import clarkson_woodruff_transform
  5. from scipy.linalg._sketches import cwt_matrix
  6. from scipy.sparse import issparse, rand
  7. from scipy.sparse.linalg import norm
  8. class TestClarksonWoodruffTransform:
  9. """
  10. Testing the Clarkson Woodruff Transform
  11. """
  12. # set seed for generating test matrices
  13. rng = np.random.RandomState(seed=1179103485)
  14. # Test matrix parameters
  15. n_rows = 2000
  16. n_cols = 100
  17. density = 0.1
  18. # Sketch matrix dimensions
  19. n_sketch_rows = 200
  20. # Seeds to test with
  21. seeds = [1755490010, 934377150, 1391612830, 1752708722, 2008891431,
  22. 1302443994, 1521083269, 1501189312, 1126232505, 1533465685]
  23. A_dense = rng.randn(n_rows, n_cols)
  24. A_csc = rand(
  25. n_rows, n_cols, density=density, format='csc', random_state=rng,
  26. )
  27. A_csr = rand(
  28. n_rows, n_cols, density=density, format='csr', random_state=rng,
  29. )
  30. A_coo = rand(
  31. n_rows, n_cols, density=density, format='coo', random_state=rng,
  32. )
  33. # Collect the test matrices
  34. test_matrices = [
  35. A_dense, A_csc, A_csr, A_coo,
  36. ]
  37. # Test vector with norm ~1
  38. x = rng.randn(n_rows, 1) / np.sqrt(n_rows)
  39. def test_sketch_dimensions(self):
  40. for A in self.test_matrices:
  41. for seed in self.seeds:
  42. sketch = clarkson_woodruff_transform(
  43. A, self.n_sketch_rows, seed=seed
  44. )
  45. assert_(sketch.shape == (self.n_sketch_rows, self.n_cols))
  46. def test_seed_returns_identical_transform_matrix(self):
  47. for A in self.test_matrices:
  48. for seed in self.seeds:
  49. S1 = cwt_matrix(
  50. self.n_sketch_rows, self.n_rows, seed=seed
  51. ).toarray()
  52. S2 = cwt_matrix(
  53. self.n_sketch_rows, self.n_rows, seed=seed
  54. ).toarray()
  55. assert_equal(S1, S2)
  56. def test_seed_returns_identically(self):
  57. for A in self.test_matrices:
  58. for seed in self.seeds:
  59. sketch1 = clarkson_woodruff_transform(
  60. A, self.n_sketch_rows, seed=seed
  61. )
  62. sketch2 = clarkson_woodruff_transform(
  63. A, self.n_sketch_rows, seed=seed
  64. )
  65. if issparse(sketch1):
  66. sketch1 = sketch1.toarray()
  67. if issparse(sketch2):
  68. sketch2 = sketch2.toarray()
  69. assert_equal(sketch1, sketch2)
  70. def test_sketch_preserves_frobenius_norm(self):
  71. # Given the probabilistic nature of the sketches
  72. # we run the test multiple times and check that
  73. # we pass all/almost all the tries.
  74. n_errors = 0
  75. for A in self.test_matrices:
  76. if issparse(A):
  77. true_norm = norm(A)
  78. else:
  79. true_norm = np.linalg.norm(A)
  80. for seed in self.seeds:
  81. sketch = clarkson_woodruff_transform(
  82. A, self.n_sketch_rows, seed=seed,
  83. )
  84. if issparse(sketch):
  85. sketch_norm = norm(sketch)
  86. else:
  87. sketch_norm = np.linalg.norm(sketch)
  88. if np.abs(true_norm - sketch_norm) > 0.1 * true_norm:
  89. n_errors += 1
  90. assert_(n_errors == 0)
  91. def test_sketch_preserves_vector_norm(self):
  92. n_errors = 0
  93. n_sketch_rows = int(np.ceil(2. / (0.01 * 0.5**2)))
  94. true_norm = np.linalg.norm(self.x)
  95. for seed in self.seeds:
  96. sketch = clarkson_woodruff_transform(
  97. self.x, n_sketch_rows, seed=seed,
  98. )
  99. sketch_norm = np.linalg.norm(sketch)
  100. if np.abs(true_norm - sketch_norm) > 0.5 * true_norm:
  101. n_errors += 1
  102. assert_(n_errors == 0)