Calculate confidence interval for benchmark measurements

2024-06-16 11:03:36 +02:00 · 2024-06-16 11:03:36 +02:00 · 91459df87e
commit 91459df87e
parent 685f4cca3c
2 changed files with 16 additions and 6 deletions
--- a/scripts/ci/e2e_benchmark.py
+++ b/scripts/ci/e2e_benchmark.py
@ -91,12 +91,21 @@ class BenchmarkRunner:
        else:
            raise Exception(f"Unknown benchmark: {benchmark_name}")

+def bootstrap_confidence_interval(data, num_samples=1000, confidence_level=0.95):
+    means = []
+    for _ in range(num_samples):
+        sample = np.random.choice(data, size=len(data), replace=True)
+        means.append(np.mean(sample))
+    lower_bound = np.percentile(means, (1 - confidence_level) / 2 * 100)
+    upper_bound = np.percentile(means, (1 + confidence_level) / 2 * 100)
+    mean = np.mean(means)
+    return mean, lower_bound, upper_bound
+
 def calculate_confidence_interval(data):
-    #assert len(data) == 5, f"Shape: {data.shape}"
-    mean = np.mean(data)
-    std_err = np.std(data, ddof=1) / np.sqrt(len(data))
-    h = std_err * stats.t.ppf((1 + 0.95) / 2., len(data) - 1)  # 95% confidence interval using t-distribution
-    return mean, h, np.min(data)
+    mean, lower, upper = bootstrap_confidence_interval(data)
+    min_value = np.min(data)
+    return mean, (upper - lower) / 2, min_value
+

 def main():
    parser = argparse.ArgumentParser(description='Run GPS benchmark tests.')
@ -108,6 +117,7 @@ def main():

    args = parser.parse_args()

+    np.random.seed(42)

    runner = BenchmarkRunner(args.gps_traces_file_path)
    
--- a/scripts/ci/run_benchmarks.sh
+++ b/scripts/ci/run_benchmarks.sh
@ -101,7 +101,7 @@ function run_benchmarks_for_folder {
        fi

        for METHOD in route nearest trip table match; do
-            python3 $SCRIPTS_FOLDER/scripts/ci/e2e_benchmark.py --host http://localhost:5000 --method $METHOD --iterations 15 --num_requests 1000 --gps_traces_file_path $GPS_TRACES > $RESULTS_FOLDER/e2e_${METHOD}_${ALGORITHM}.bench
+            python3 $SCRIPTS_FOLDER/scripts/ci/e2e_benchmark.py --host http://localhost:5000 --method $METHOD --iterations 5 --num_requests 1000 --gps_traces_file_path $GPS_TRACES > $RESULTS_FOLDER/e2e_${METHOD}_${ALGORITHM}.bench
        done

        kill -9 $OSRM_ROUTED_PID