Calculate confidence interval for benchmark measurements

2024-06-15 19:31:22 +02:00 · 2024-06-15 19:31:22 +02:00 · 5fb9e65fbb
commit 5fb9e65fbb
parent a119287878
3 changed files with 86 additions and 36 deletions
--- a/.github/workflows/osrm-backend.yml
+++ b/.github/workflows/osrm-backend.yml
@ -721,9 +721,15 @@ jobs:
          make -j$(nproc) benchmarks
          cd ..
          make -C test/data 
-      - name: Run Benchmarks
+      - name: Run Base Benchmarks
        run: |
-          ./pr/scripts/ci/run_benchmarks.sh base pr
+          # we intentionally use scripts from PR branch to be able to update them and see results in the same PR
+          ./pr/scripts/ci/run_benchmarks.sh -f $(pwd)/base -r $(pwd)/base_results -s $(pwd)/pr -b $(pwd)/base/build -o ~/data.osm.pbf -g ~/gps_traces.csv
+          
+      - name: Run PR Benchmarks 
+         run: |
+          ./pr/scripts/ci/run_benchmarks.sh -f $(pwd)/pr -r $(pwd)/pr_results -s $(pwd)/pr -b $(pwd)/pr/build -o ~/data.osm.pbf -g ~/gps_traces.csv
+          
      - name: Post Benchmark Results
        run: |
          python3 pr/scripts/ci/post_benchmark_results.py base_results pr_results
--- a/scripts/ci/e2e_benchmark.py
+++ b/scripts/ci/e2e_benchmark.py
@ -9,11 +9,11 @@ import time
 import argparse

 class BenchmarkRunner:
-    def __init__(self):
+    def __init__(self, gps_traces_file_path):
        self.coordinates = []
        self.tracks = defaultdict(list)

-        gps_traces_file_path = os.path.expanduser('~/gps_traces.csv')
+        gps_traces_file_path = os.path.expanduser(gps_traces_file_path)
        with open(gps_traces_file_path, 'r') as file:
            reader = csv.DictReader(file)
            for row in reader:
@ -90,11 +90,12 @@ def main():
    parser.add_argument('--method', type=str, required=True, choices=['route', 'table', 'match', 'nearest', 'trip'], help='Benchmark method')
    parser.add_argument('--num_requests', type=int, required=True, help='Number of requests to perform')
    parser.add_argument('--iterations', type=int, default=5, required=True, help='Number of iterations to run the benchmark')
+    parser.add_argument('--gps_traces_file_path', type=str, required=True, help='Path to the GPS traces file')

    args = parser.parse_args()


-    runner = BenchmarkRunner()
+    runner = BenchmarkRunner(args.gps_traces_file_path)
    
    all_times = []
    for _ in range(args.iterations):
@ -110,13 +111,13 @@ def main():
    perc_99_time, perc_99_ci = calculate_confidence_interval(np.percentile(all_times, 99, axis=0))
    max_time, max_ci = calculate_confidence_interval(np.max(all_times, axis=0))

-    print(f'Total: {total_time}ms ± {total_ci:.2f}ms')
-    print(f"Min time: {min_time}ms ± {min_ci:.2f}ms")
+    print(f'Total: {total_time:.2f}ms ± {total_ci:.2f}ms')
+    print(f"Min time: {min_time:.2f}ms ± {min_ci:.2f}ms")
    print(f"Mean time: {mean_time:.2f}ms ± {mean_ci:.2f}ms")
-    print(f"Median time: {median_time}ms ± {median_ci:.2f}ms")
+    print(f"Median time: {median_time:.2f}ms ± {median_ci:.2f}ms")
    print(f"95th percentile: {perc_95_time:.2f}ms ± {perc_95_ci:.2f}ms")
    print(f"99th percentile: {perc_99_time:.2f}ms ± {perc_99_ci:.2f}ms")
-    print(f"Max time: {max_time}ms ± {max_ci:.2f}ms")
+    print(f"Max time: {max_time:.2f}ms ± {max_ci:.2f}ms")

 if __name__ == '__main__':
    main()
--- a/scripts/ci/run_benchmarks.sh
+++ b/scripts/ci/run_benchmarks.sh
@ -1,50 +1,94 @@
 #!/bin/bash
 set -eou pipefail

+function usage {
+    echo "Usage: $0 -f <folder> -r <results_folder> -s <scripts_folder> -b <binaries_folder>"
+    exit 1
+}
+
+while getopts ":f:r:s:b:o:g" opt; do
+  case $opt in
+    f) FOLDER="$OPTARG"
+    ;;
+    r) RESULTS_FOLDER="$OPTARG"
+    ;;
+    s) SCRIPTS_FOLDER="$OPTARG"
+    ;;
+    b) BINARIES_FOLDER="$OPTARG"
+    ;;
+    o) OSM_PBF="$OPTARG"
+    ;;
+    g) GPS_TRACES="$OPTARG"
+    ;;
+    \?) echo "Invalid option -$OPTARG" >&2
+        usage
+    ;;
+    :) echo "Option -$OPTARG requires an argument." >&2
+        usage
+    ;;
+  esac
+done
+
+if [ -z "${FOLDER:-}" ] || [ -z "${RESULTS_FOLDER:-}" ] || [ -z "${SCRIPTS_FOLDER:-}" ] || [ -z "${BINARIES_FOLDER:-}" || [ -z "${OSM_PBF:-}" || [ -z "${GPS_TRACES:-}" ]; then
+    usage
+fi
+
 function measure_peak_ram_and_time {
    COMMAND=$1
    OUTPUT_FILE=$2
+    if [ "$(uname)" == "Darwin" ]; then
+        # on macOS time has different parameters, so simply run command on macOS
+        $COMMAND > /dev/null 2>&1
+    else
+        OUTPUT=$(/usr/bin/time -f "%e %M" $COMMAND 2>&1 | tail -n 1)

-    OUTPUT=$(/usr/bin/time -f "%e %M" $COMMAND 2>&1 | tail -n 1)
-
-    TIME=$(echo $OUTPUT | awk '{print $1}')
-    PEAK_RAM_KB=$(echo $OUTPUT | awk '{print $2}')
-    PEAK_RAM_MB=$(echo "scale=2; $PEAK_RAM_KB / 1024" | bc)
-    echo "Time: ${TIME}s Peak RAM: ${PEAK_RAM_MB}MB" > $OUTPUT_FILE
+        TIME=$(echo $OUTPUT | awk '{print $1}')
+        PEAK_RAM_KB=$(echo $OUTPUT | awk '{print $2}')
+        PEAK_RAM_MB=$(echo "scale=2; $PEAK_RAM_KB / 1024" | bc)
+        echo "Time: ${TIME}s Peak RAM: ${PEAK_RAM_MB}MB" > $OUTPUT_FILE
+    fi
 }

 function run_benchmarks_for_folder {
    echo "Running benchmarks for $1"

-    FOLDER=$1
-    RESULTS_FOLDER=$2
-    SCRIPTS_FOLDER=$3

    mkdir -p $RESULTS_FOLDER

-    BENCHMARKS_FOLDER="$FOLDER/build/src/benchmarks"
+    BENCHMARKS_FOLDER="$BINARIES_FOLDER/src/benchmarks"
+    echo "Running match-bench MLD"
+    $BENCHMARKS_FOLDER/match-bench "$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/match_mld.bench"
+    echo "Running match-bench CH"
+    $BENCHMARKS_FOLDER/match-bench "$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/match_ch.bench"
+    echo "Running route-bench MLD"
+    $BENCHMARKS_FOLDER/route-bench "$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/route_mld.bench"
+    echo "Running route-bench CH"
+    $BENCHMARKS_FOLDER/route-bench "$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/route_ch.bench"
+    echo "Running alias"
+    $BENCHMARKS_FOLDER/alias-bench > "$RESULTS_FOLDER/alias.bench"
+    echo "Running json-render-bench"
+    $BENCHMARKS_FOLDER/json-render-bench  "$FOLDER/src/benchmarks/portugal_to_korea.json" > "$RESULTS_FOLDER/json-render.bench"
+    echo "Running packedvector-bench"
+    $BENCHMARKS_FOLDER/packedvector-bench > "$RESULTS_FOLDER/packedvector.bench"
+    echo "Running rtree-bench"
+    $BENCHMARKS_FOLDER/rtree-bench "$FOLDER/test/data/monaco.osrm.ramIndex" "$FOLDER/test/data/monaco.osrm.fileIndex" "$FOLDER/test/data/monaco.osrm.nbg_nodes" > "$RESULTS_FOLDER/rtree.bench"

-    ./$BENCHMARKS_FOLDER/match-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/match_mld.bench"
-    ./$BENCHMARKS_FOLDER/match-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/match_ch.bench"
-    ./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/route_mld.bench"
-    ./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/route_ch.bench"
-    ./$BENCHMARKS_FOLDER/alias-bench > "$RESULTS_FOLDER/alias.bench"
-    ./$BENCHMARKS_FOLDER/json-render-bench  "./$FOLDER/src/benchmarks/portugal_to_korea.json" > "$RESULTS_FOLDER/json-render.bench"
-    ./$BENCHMARKS_FOLDER/packedvector-bench > "$RESULTS_FOLDER/packedvector.bench"
-    ./$BENCHMARKS_FOLDER/rtree-bench "./$FOLDER/test/data/monaco.osrm.ramIndex" "./$FOLDER/test/data/monaco.osrm.fileIndex" "./$FOLDER/test/data/monaco.osrm.nbg_nodes" > "$RESULTS_FOLDER/rtree.bench"
-
-    BINARIES_FOLDER="$FOLDER/build"
-
-    cp ~/data.osm.pbf $FOLDER
+    cp -rf $OSM_PBF $FOLDER/data.osm.pbf

+    # echo "Running osrm-extract"
    measure_peak_ram_and_time "$BINARIES_FOLDER/osrm-extract -p $FOLDER/profiles/car.lua $FOLDER/data.osm.pbf" "$RESULTS_FOLDER/osrm_extract.bench"
+    # echo "Running osrm-partition"
    measure_peak_ram_and_time "$BINARIES_FOLDER/osrm-partition $FOLDER/data.osrm" "$RESULTS_FOLDER/osrm_partition.bench"
+    # echo "Running osrm-customize"
    measure_peak_ram_and_time "$BINARIES_FOLDER/osrm-customize $FOLDER/data.osrm" "$RESULTS_FOLDER/osrm_customize.bench"
+    # echo "Running osrm-contract"
    measure_peak_ram_and_time "$BINARIES_FOLDER/osrm-contract $FOLDER/data.osrm" "$RESULTS_FOLDER/osrm_contract.bench"

    for BENCH in nearest table trip route match; do
-        ./$BENCHMARKS_FOLDER/bench "$FOLDER/data.osrm" mld ~/gps_traces.csv ${BENCH} > "$RESULTS_FOLDER/random_${BENCH}_mld.bench" || true
-        ./$BENCHMARKS_FOLDER/bench "$FOLDER/data.osrm" ch ~/gps_traces.csv ${BENCH}  > "$RESULTS_FOLDER/random_${BENCH}_ch.bench" || true
+        echo "Running random $BENCH MLD"
+        $BENCHMARKS_FOLDER/bench "$FOLDER/data.osrm" mld $GPS_TRACES ${BENCH} > "$RESULTS_FOLDER/random_${BENCH}_mld.bench" || true
+        echo "Running random $BENCH CH"
+        $BENCHMARKS_FOLDER/bench "$FOLDER/data.osrm" ch $GPS_TRACES ${BENCH}  > "$RESULTS_FOLDER/random_${BENCH}_ch.bench" || true
    done


@ -60,13 +104,12 @@ function run_benchmarks_for_folder {
        fi

        for METHOD in route nearest trip table match; do
-            python3 $SCRIPTS_FOLDER/scripts/ci/e2e_benchmark.py --host http://localhost:5000 --method $METHOD --num_requests 1000 > $RESULTS_FOLDER/e2e_${METHOD}_${ALGORITHM}.bench
+            python3 $SCRIPTS_FOLDER/scripts/ci/e2e_benchmark.py --host http://localhost:5000 --method $METHOD --iterations 5 --num_requests 1000 --gps_traces_file_path $GPS_TRACES > $RESULTS_FOLDER/e2e_${METHOD}_${ALGORITHM}.bench
        done

        kill -9 $OSRM_ROUTED_PID
    done
 }

-run_benchmarks_for_folder $1 "${1}_results" $2
-run_benchmarks_for_folder $2 "${2}_results" $2
+run_benchmarks_for_folder