From 91459df87ef655346d576109e9e027ee882cd570 Mon Sep 17 00:00:00 2001
From: Siarhei Fedartsou <siarhei.fedartsou@gmail.com>
Date: Sun, 16 Jun 2024 11:03:36 +0200
Subject: [PATCH] Calculate confidence interval for benchmark measurements

---
 scripts/ci/e2e_benchmark.py  | 20 +++++++++++++++-----
 scripts/ci/run_benchmarks.sh |  2 +-
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/scripts/ci/e2e_benchmark.py b/scripts/ci/e2e_benchmark.py
index 7b835e1ea..bc4ba87b9 100644
--- a/scripts/ci/e2e_benchmark.py
+++ b/scripts/ci/e2e_benchmark.py
@@ -91,12 +91,21 @@ class BenchmarkRunner:
         else:
             raise Exception(f"Unknown benchmark: {benchmark_name}")
 
+def bootstrap_confidence_interval(data, num_samples=1000, confidence_level=0.95):
+    means = []
+    for _ in range(num_samples):
+        sample = np.random.choice(data, size=len(data), replace=True)
+        means.append(np.mean(sample))
+    lower_bound = np.percentile(means, (1 - confidence_level) / 2 * 100)
+    upper_bound = np.percentile(means, (1 + confidence_level) / 2 * 100)
+    mean = np.mean(means)
+    return mean, lower_bound, upper_bound
+
 def calculate_confidence_interval(data):
-    #assert len(data) == 5, f"Shape: {data.shape}"
-    mean = np.mean(data)
-    std_err = np.std(data, ddof=1) / np.sqrt(len(data))
-    h = std_err * stats.t.ppf((1 + 0.95) / 2., len(data) - 1)  # 95% confidence interval using t-distribution
-    return mean, h, np.min(data)
+    mean, lower, upper = bootstrap_confidence_interval(data)
+    min_value = np.min(data)
+    return mean, (upper - lower) / 2, min_value
+
 
 def main():
     parser = argparse.ArgumentParser(description='Run GPS benchmark tests.')
@@ -108,6 +117,7 @@ def main():
 
     args = parser.parse_args()
 
+    np.random.seed(42)
 
     runner = BenchmarkRunner(args.gps_traces_file_path)
     
diff --git a/scripts/ci/run_benchmarks.sh b/scripts/ci/run_benchmarks.sh
index e1ebfa921..f117f95a3 100755
--- a/scripts/ci/run_benchmarks.sh
+++ b/scripts/ci/run_benchmarks.sh
@@ -101,7 +101,7 @@ function run_benchmarks_for_folder {
         fi
 
         for METHOD in route nearest trip table match; do
-            python3 $SCRIPTS_FOLDER/scripts/ci/e2e_benchmark.py --host http://localhost:5000 --method $METHOD --iterations 15 --num_requests 1000 --gps_traces_file_path $GPS_TRACES > $RESULTS_FOLDER/e2e_${METHOD}_${ALGORITHM}.bench
+            python3 $SCRIPTS_FOLDER/scripts/ci/e2e_benchmark.py --host http://localhost:5000 --method $METHOD --iterations 5 --num_requests 1000 --gps_traces_file_path $GPS_TRACES > $RESULTS_FOLDER/e2e_${METHOD}_${ALGORITHM}.bench
         done
 
         kill -9 $OSRM_ROUTED_PID