You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
motief/tests/benchmark/test_pipeline_benchmarks.py

54 lines
1.8 KiB

import numpy as np
import pytest
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
class TestSVDBenchmark:
@pytest.mark.benchmark
def test_svd_on_synthetic_vote_matrix(self, benchmark):
"""Benchmark SVD decomposition on a 100x20 synthetic vote matrix."""
np.random.seed(42)
vote_matrix = np.random.choice([-1, 0, 1], size=(100, 20))
def run_svd():
svd = TruncatedSVD(n_components=5, random_state=42)
return svd.fit_transform(vote_matrix)
result = benchmark(run_svd)
assert result.shape == (100, 5)
class TestSimilarityBenchmark:
@pytest.mark.benchmark
def test_cosine_similarity_small(self, benchmark):
"""Benchmark cosine similarity on 50 vectors of dimension 10."""
vectors = np.random.randn(50, 10).astype(np.float32)
def run_similarity():
return cosine_similarity(vectors)
result = benchmark(run_similarity)
assert result.shape == (50, 50)
@pytest.mark.benchmark
def test_cosine_similarity_medium(self, benchmark):
"""Benchmark cosine similarity on 200 vectors of dimension 50."""
vectors = np.random.randn(200, 50).astype(np.float32)
def run_similarity():
return cosine_similarity(vectors)
result = benchmark(run_similarity)
assert result.shape == (200, 200)
@pytest.mark.benchmark
def test_cosine_similarity_large(self, benchmark):
"""Benchmark cosine similarity on 500 vectors of dimension 100."""
vectors = np.random.randn(500, 100).astype(np.float32)
def run_similarity():
return cosine_similarity(vectors)
result = benchmark(run_similarity)
assert result.shape == (500, 500)