From 2eb620bb33808304772b432e9524c8806e838b9a Mon Sep 17 00:00:00 2001
From: Siarhei Fedartsou <siarhei.fedartsou@gmail.com>
Date: Sat, 15 Jun 2024 23:00:30 +0200
Subject: [PATCH] Calculate confidence interval for benchmark measurements

---
 .github/workflows/osrm-backend.yml | 2 +-
 scripts/ci/e2e_benchmark.py        | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/osrm-backend.yml b/.github/workflows/osrm-backend.yml
index 70565905a..50ec17721 100644
--- a/.github/workflows/osrm-backend.yml
+++ b/.github/workflows/osrm-backend.yml
@@ -676,7 +676,7 @@ jobs:
           path: pr
       - name: Install dependencies
         run: | 
-          python3 -m pip install "conan<2.0.0" "requests==2.31.0" "numpy==1.26.4" 
+          python3 -m pip install "conan<2.0.0" "requests==2.31.0" "numpy==1.26.4" "scipy==1.13.1"
           sudo apt-get update -y && sudo apt-get install ccache
       - name: Prepare data
         run: |
diff --git a/scripts/ci/e2e_benchmark.py b/scripts/ci/e2e_benchmark.py
index 025fc7160..e380d45dc 100644
--- a/scripts/ci/e2e_benchmark.py
+++ b/scripts/ci/e2e_benchmark.py
@@ -7,6 +7,7 @@ import csv
 import numpy as np
 import time
 import argparse
+from scipy import stats
 
 class BenchmarkRunner:
     def __init__(self, gps_traces_file_path):
@@ -82,7 +83,7 @@ def calculate_confidence_interval(data):
     assert len(data) == 5, f"Shape: {data.shape}"
     mean = np.mean(data)
     std_err = np.std(data, ddof=1) / np.sqrt(len(data))
-    h = std_err * 1.96 # 95% confidence interval
+    h = std_err * stats.t.ppf((1 + 0.95) / 2., len(data) - 1)  # 95% confidence interval using t-distribution
     return mean, h
 
 def main():