feat: add benchmark suite for pipeline operations

- Add pytest-benchmark to dev dependencies
- Benchmark SVD decomposition on synthetic vote matrix
- Benchmark cosine similarity at small/medium/large scales

P5-003: Benchmark suite
main
Sven Geboers 4 weeks ago
parent e352d7c7bc
commit 14921e9256
  1. 1
      pyproject.toml
  2. 54
      tests/benchmark/test_pipeline_benchmarks.py
  3. 32
      uv.lock

@ -25,6 +25,7 @@ dependencies = [
dev = [
"pytest>=9.0.2",
"pyright>=1.1.408",
"pytest-benchmark>=4.0.0",
]
[tool.ruff.lint]

@ -0,0 +1,54 @@
import numpy as np
import pytest
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
class TestSVDBenchmark:
@pytest.mark.benchmark
def test_svd_on_synthetic_vote_matrix(self, benchmark):
"""Benchmark SVD decomposition on a 100x20 synthetic vote matrix."""
np.random.seed(42)
vote_matrix = np.random.choice([-1, 0, 1], size=(100, 20))
def run_svd():
svd = TruncatedSVD(n_components=5, random_state=42)
return svd.fit_transform(vote_matrix)
result = benchmark(run_svd)
assert result.shape == (100, 5)
class TestSimilarityBenchmark:
@pytest.mark.benchmark
def test_cosine_similarity_small(self, benchmark):
"""Benchmark cosine similarity on 50 vectors of dimension 10."""
vectors = np.random.randn(50, 10).astype(np.float32)
def run_similarity():
return cosine_similarity(vectors)
result = benchmark(run_similarity)
assert result.shape == (50, 50)
@pytest.mark.benchmark
def test_cosine_similarity_medium(self, benchmark):
"""Benchmark cosine similarity on 200 vectors of dimension 50."""
vectors = np.random.randn(200, 50).astype(np.float32)
def run_similarity():
return cosine_similarity(vectors)
result = benchmark(run_similarity)
assert result.shape == (200, 200)
@pytest.mark.benchmark
def test_cosine_similarity_large(self, benchmark):
"""Benchmark cosine similarity on 500 vectors of dimension 100."""
vectors = np.random.randn(500, 100).astype(np.float32)
def run_similarity():
return cosine_similarity(vectors)
result = benchmark(run_similarity)
assert result.shape == (500, 500)

@ -949,6 +949,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f7/af/ab3c51ab7507a7325e98ffe691d9495ee3d3aa5f589afad65ec920d39821/protobuf-6.31.1-py3-none-any.whl", hash = "sha256:720a6c7e6b77288b85063569baae8536671b39f15cc22037ec7045658d80489e", size = 168724, upload-time = "2025-05-28T19:25:53.926Z" },
]
[[package]]
name = "py-cpuinfo"
version = "9.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716, upload-time = "2022-10-25T20:38:06.303Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" },
]
[[package]]
name = "pyarrow"
version = "21.0.0"
@ -1099,6 +1108,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
]
[[package]]
name = "pytest-benchmark"
version = "5.2.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "py-cpuinfo" },
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/24/34/9f732b76456d64faffbef6232f1f9dbec7a7c4999ff46282fa418bd1af66/pytest_benchmark-5.2.3.tar.gz", hash = "sha256:deb7317998a23c650fd4ff76e1230066a76cb45dcece0aca5607143c619e7779", size = 341340, upload-time = "2025-11-09T18:48:43.215Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/33/29/e756e715a48959f1c0045342088d7ca9762a2f509b945f362a316e9412b7/pytest_benchmark-5.2.3-py3-none-any.whl", hash = "sha256:bc839726ad20e99aaa0d11a127445457b4219bdb9e80a1afc4b51da7f96b0803", size = 45255, upload-time = "2025-11-09T18:48:39.765Z" },
]
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
@ -1382,7 +1404,6 @@ dependencies = [
{ name = "matplotlib" },
{ name = "openai" },
{ name = "plotly" },
{ name = "pyright" },
{ name = "requests" },
{ name = "schedule" },
{ name = "scikit-learn" },
@ -1393,7 +1414,9 @@ dependencies = [
[package.dev-dependencies]
dev = [
{ name = "pyright" },
{ name = "pytest" },
{ name = "pytest-benchmark" },
]
[package.metadata]
@ -1405,7 +1428,6 @@ requires-dist = [
{ name = "matplotlib", specifier = ">=3.8" },
{ name = "openai", specifier = ">=1.99.7" },
{ name = "plotly", specifier = ">=5.0" },
{ name = "pyright", specifier = ">=1.1.408" },
{ name = "requests", specifier = ">=2.32.4" },
{ name = "schedule", specifier = ">=1.2.2" },
{ name = "scikit-learn", specifier = ">=1.8.0" },
@ -1415,7 +1437,11 @@ requires-dist = [
]
[package.metadata.requires-dev]
dev = [{ name = "pytest", specifier = ">=9.0.2" }]
dev = [
{ name = "pyright", specifier = ">=1.1.408" },
{ name = "pytest", specifier = ">=9.0.2" },
{ name = "pytest-benchmark", specifier = ">=4.0.0" },
]
[[package]]
name = "streamlit"

Loading…
Cancel
Save