From 91459df87ef655346d576109e9e027ee882cd570 Mon Sep 17 00:00:00 2001 From: Siarhei Fedartsou Date: Sun, 16 Jun 2024 11:03:36 +0200 Subject: [PATCH] Calculate confidence interval for benchmark measurements --- scripts/ci/e2e_benchmark.py | 20 +++++++++++++++----- scripts/ci/run_benchmarks.sh | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/scripts/ci/e2e_benchmark.py b/scripts/ci/e2e_benchmark.py index 7b835e1ea..bc4ba87b9 100644 --- a/scripts/ci/e2e_benchmark.py +++ b/scripts/ci/e2e_benchmark.py @@ -91,12 +91,21 @@ class BenchmarkRunner: else: raise Exception(f"Unknown benchmark: {benchmark_name}") +def bootstrap_confidence_interval(data, num_samples=1000, confidence_level=0.95): + means = [] + for _ in range(num_samples): + sample = np.random.choice(data, size=len(data), replace=True) + means.append(np.mean(sample)) + lower_bound = np.percentile(means, (1 - confidence_level) / 2 * 100) + upper_bound = np.percentile(means, (1 + confidence_level) / 2 * 100) + mean = np.mean(means) + return mean, lower_bound, upper_bound + def calculate_confidence_interval(data): - #assert len(data) == 5, f"Shape: {data.shape}" - mean = np.mean(data) - std_err = np.std(data, ddof=1) / np.sqrt(len(data)) - h = std_err * stats.t.ppf((1 + 0.95) / 2., len(data) - 1) # 95% confidence interval using t-distribution - return mean, h, np.min(data) + mean, lower, upper = bootstrap_confidence_interval(data) + min_value = np.min(data) + return mean, (upper - lower) / 2, min_value + def main(): parser = argparse.ArgumentParser(description='Run GPS benchmark tests.') @@ -108,6 +117,7 @@ def main(): args = parser.parse_args() + np.random.seed(42) runner = BenchmarkRunner(args.gps_traces_file_path) diff --git a/scripts/ci/run_benchmarks.sh b/scripts/ci/run_benchmarks.sh index e1ebfa921..f117f95a3 100755 --- a/scripts/ci/run_benchmarks.sh +++ b/scripts/ci/run_benchmarks.sh @@ -101,7 +101,7 @@ function run_benchmarks_for_folder { fi for METHOD in route nearest trip table match; do - python3 $SCRIPTS_FOLDER/scripts/ci/e2e_benchmark.py --host http://localhost:5000 --method $METHOD --iterations 15 --num_requests 1000 --gps_traces_file_path $GPS_TRACES > $RESULTS_FOLDER/e2e_${METHOD}_${ALGORITHM}.bench + python3 $SCRIPTS_FOLDER/scripts/ci/e2e_benchmark.py --host http://localhost:5000 --method $METHOD --iterations 5 --num_requests 1000 --gps_traces_file_path $GPS_TRACES > $RESULTS_FOLDER/e2e_${METHOD}_${ALGORITHM}.bench done kill -9 $OSRM_ROUTED_PID