You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
54 lines
1.8 KiB
54 lines
1.8 KiB
import numpy as np
|
|
import pytest
|
|
from sklearn.decomposition import TruncatedSVD
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
|
class TestSVDBenchmark:
|
|
@pytest.mark.benchmark
|
|
def test_svd_on_synthetic_vote_matrix(self, benchmark):
|
|
"""Benchmark SVD decomposition on a 100x20 synthetic vote matrix."""
|
|
np.random.seed(42)
|
|
vote_matrix = np.random.choice([-1, 0, 1], size=(100, 20))
|
|
|
|
def run_svd():
|
|
svd = TruncatedSVD(n_components=5, random_state=42)
|
|
return svd.fit_transform(vote_matrix)
|
|
|
|
result = benchmark(run_svd)
|
|
assert result.shape == (100, 5)
|
|
|
|
|
|
class TestSimilarityBenchmark:
|
|
@pytest.mark.benchmark
|
|
def test_cosine_similarity_small(self, benchmark):
|
|
"""Benchmark cosine similarity on 50 vectors of dimension 10."""
|
|
vectors = np.random.randn(50, 10).astype(np.float32)
|
|
|
|
def run_similarity():
|
|
return cosine_similarity(vectors)
|
|
|
|
result = benchmark(run_similarity)
|
|
assert result.shape == (50, 50)
|
|
|
|
@pytest.mark.benchmark
|
|
def test_cosine_similarity_medium(self, benchmark):
|
|
"""Benchmark cosine similarity on 200 vectors of dimension 50."""
|
|
vectors = np.random.randn(200, 50).astype(np.float32)
|
|
|
|
def run_similarity():
|
|
return cosine_similarity(vectors)
|
|
|
|
result = benchmark(run_similarity)
|
|
assert result.shape == (200, 200)
|
|
|
|
@pytest.mark.benchmark
|
|
def test_cosine_similarity_large(self, benchmark):
|
|
"""Benchmark cosine similarity on 500 vectors of dimension 100."""
|
|
vectors = np.random.randn(500, 100).astype(np.float32)
|
|
|
|
def run_similarity():
|
|
return cosine_similarity(vectors)
|
|
|
|
result = benchmark(run_similarity)
|
|
assert result.shape == (500, 500)
|
|
|