From 2eb620bb33808304772b432e9524c8806e838b9a Mon Sep 17 00:00:00 2001 From: Siarhei Fedartsou Date: Sat, 15 Jun 2024 23:00:30 +0200 Subject: [PATCH] Calculate confidence interval for benchmark measurements --- .github/workflows/osrm-backend.yml | 2 +- scripts/ci/e2e_benchmark.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/osrm-backend.yml b/.github/workflows/osrm-backend.yml index 70565905a..50ec17721 100644 --- a/.github/workflows/osrm-backend.yml +++ b/.github/workflows/osrm-backend.yml @@ -676,7 +676,7 @@ jobs: path: pr - name: Install dependencies run: | - python3 -m pip install "conan<2.0.0" "requests==2.31.0" "numpy==1.26.4" + python3 -m pip install "conan<2.0.0" "requests==2.31.0" "numpy==1.26.4" "scipy==1.13.1" sudo apt-get update -y && sudo apt-get install ccache - name: Prepare data run: | diff --git a/scripts/ci/e2e_benchmark.py b/scripts/ci/e2e_benchmark.py index 025fc7160..e380d45dc 100644 --- a/scripts/ci/e2e_benchmark.py +++ b/scripts/ci/e2e_benchmark.py @@ -7,6 +7,7 @@ import csv import numpy as np import time import argparse +from scipy import stats class BenchmarkRunner: def __init__(self, gps_traces_file_path): @@ -82,7 +83,7 @@ def calculate_confidence_interval(data): assert len(data) == 5, f"Shape: {data.shape}" mean = np.mean(data) std_err = np.std(data, ddof=1) / np.sqrt(len(data)) - h = std_err * 1.96 # 95% confidence interval + h = std_err * stats.t.ppf((1 + 0.95) / 2., len(data) - 1) # 95% confidence interval using t-distribution return mean, h def main():