Make e2e benchmarks more stable (#6927)

This commit is contained in:
Siarhei Fedartsou 2024-06-08 22:27:12 +02:00 committed by GitHub
parent 99cb17aed3
commit a2915f78c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 121 additions and 128 deletions

View File

@ -656,7 +656,7 @@ jobs:
path: pr
- name: Install dependencies
run: |
python3 -m pip install "conan<2.0.0" "requests==2.31.0" "locust==2.28.0"
python3 -m pip install "conan<2.0.0" "requests==2.31.0" "numpy==1.26.4"
sudo apt-get update -y && sudo apt-get install ccache
- name: Prepare data
run: |

102
scripts/ci/e2e_benchmark.py Normal file
View File

@ -0,0 +1,102 @@
import requests
import sys
import random
from collections import defaultdict
import os
import csv
import numpy as np
import time
import argparse
class BenchmarkRunner:
def __init__(self):
self.coordinates = []
self.tracks = defaultdict(list)
gps_traces_file_path = os.path.expanduser('~/gps_traces.csv')
with open(gps_traces_file_path, 'r') as file:
reader = csv.DictReader(file)
for row in reader:
coord = (float(row['Latitude']), float(row['Longitude']))
self.coordinates.append(coord)
self.tracks[row['TrackID']].append(coord)
self.track_ids = list(self.tracks.keys())
def run(self, benchmark_name, host, num_requests, warmup_requests=50):
for _ in range(warmup_requests):
url = self.make_url(host, benchmark_name)
_ = requests.get(url)
times = []
for _ in range(num_requests):
url = self.make_url(host, benchmark_name)
start_time = time.time()
response = requests.get(url)
end_time = time.time()
if response.status_code != 200:
if benchmark_name == 'match':
code = response.json()['code']
if code == 'NoSegment' or code == 'NoMatch':
continue
raise Exception(f"Error: {response.status_code} {response.text}")
times.append((end_time - start_time) * 1000) # convert to ms
return times
def make_url(self, host, benchmark_name):
if benchmark_name == 'route':
start = random.choice(self.coordinates)
end = random.choice(self.coordinates)
start_coord = f"{start[1]:.6f},{start[0]:.6f}"
end_coord = f"{end[1]:.6f},{end[0]:.6f}"
return f"{host}/route/v1/driving/{start_coord};{end_coord}?overview=full&steps=true"
elif benchmark_name == 'table':
num_coords = random.randint(3, 100)
selected_coords = random.sample(self.coordinates, num_coords)
coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in selected_coords])
return f"{host}/table/v1/driving/{coords_str}"
elif benchmark_name == 'match':
num_coords = random.randint(50, 100)
track_id = random.choice(self.track_ids)
track_coords = self.tracks[track_id][:num_coords]
coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in track_coords])
radiues_str = ";".join([f"{random.randint(5, 20)}" for _ in range(len(track_coords))])
return f"{host}/match/v1/driving/{coords_str}?steps=true&radiuses={radiues_str}"
elif benchmark_name == 'nearest':
coord = random.choice(self.coordinates)
coord_str = f"{coord[1]:.6f},{coord[0]:.6f}"
return f"{host}/nearest/v1/driving/{coord_str}"
elif benchmark_name == 'trip':
num_coords = random.randint(2, 10)
selected_coords = random.sample(self.coordinates, num_coords)
coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in selected_coords])
return f"{host}/trip/v1/driving/{coords_str}?steps=true"
else:
raise Exception(f"Unknown benchmark: {benchmark_name}")
def main():
parser = argparse.ArgumentParser(description='Run GPS benchmark tests.')
parser.add_argument('--host', type=str, required=True, help='Host URL')
parser.add_argument('--method', type=str, required=True, choices=['route', 'table', 'match', 'nearest', 'trip'], help='Benchmark method')
parser.add_argument('--num_requests', type=int, required=True, help='Number of requests to perform')
args = parser.parse_args()
random.seed(42)
runner = BenchmarkRunner()
times = runner.run(args.method, args.host, args.num_requests)
print(f'Total: {np.sum(times)}ms')
print(f"Min time: {np.min(times)}ms")
print(f"Mean time: {np.mean(times)}ms")
print(f"Median time: {np.median(times)}ms")
print(f"95th percentile: {np.percentile(times, 95)}ms")
print(f"99th percentile: {np.percentile(times, 99)}ms")
print(f"Max time: {np.max(times)}ms")
if __name__ == '__main__':
main()

View File

@ -1,74 +0,0 @@
from locust import HttpUser, TaskSet, task, between
import csv
import random
from collections import defaultdict
import os
class OSRMTasks(TaskSet):
def on_start(self):
random.seed(42)
self.coordinates = []
self.tracks = defaultdict(list)
gps_traces_file_path = os.path.expanduser('~/gps_traces.csv')
with open(gps_traces_file_path, 'r') as file:
reader = csv.DictReader(file)
for row in reader:
coord = (float(row['Latitude']), float(row['Longitude']))
self.coordinates.append(coord)
self.tracks[row['TrackID']].append(coord)
self.track_ids = list(self.tracks.keys())
@task
def get_route(self):
start = random.choice(self.coordinates)
end = random.choice(self.coordinates)
start_coord = f"{start[1]:.6f},{start[0]:.6f}"
end_coord = f"{end[1]:.6f},{end[0]:.6f}"
self.client.get(f"/route/v1/driving/{start_coord};{end_coord}?overview=full&steps=true", name="route")
@task
def get_table(self):
num_coords = random.randint(3, 100)
selected_coords = random.sample(self.coordinates, num_coords)
coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in selected_coords])
self.client.get(f"/table/v1/driving/{coords_str}", name="table")
@task
def get_match(self):
num_coords = random.randint(50, 100)
track_id = random.choice(self.track_ids)
track_coords = self.tracks[track_id][:num_coords]
coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in track_coords])
radiues_str = ";".join([f"{random.randint(5, 20)}" for _ in range(len(track_coords))])
with self.client.get(f"/match/v1/driving/{coords_str}?steps=true&radiuses={radiues_str}", name="match", catch_response=True) as response:
if response.status_code == 400:
j = response.json()
# it is expected that some of requests will fail with such error: map matching fails sometimes
if j['code'] == 'NoSegment' or j['code'] == 'NoMatch':
response.success()
@task
def get_nearest(self):
coord = random.choice(self.coordinates)
coord_str = f"{coord[1]:.6f},{coord[0]:.6f}"
self.client.get(f"/nearest/v1/driving/{coord_str}", name="nearest")
@task
def get_trip(self):
num_coords = random.randint(2, 10)
selected_coords = random.sample(self.coordinates, num_coords)
coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in selected_coords])
self.client.get(f"/trip/v1/driving/{coords_str}?steps=true", name="trip")
class OSRMUser(HttpUser):
tasks = [OSRMTasks]
# random wait time between requests to not load server for 100%
wait_time = between(0.05, 0.5)

View File

@ -1,31 +0,0 @@
import sys
import csv
def main(locust_csv_base_name, suffix, output_folder):
with open(f"{locust_csv_base_name}_stats.csv", 'r') as file:
reader = csv.DictReader(file)
for row in reader:
name = row['Name']
if name == 'Aggregated': continue
statistics = f'''
requests: {row['Request Count']}
failures: {row['Failure Count']}
req/s: {float(row['Requests/s']):.3f}req/s
avg: {float(row['Average Response Time']):.3f}ms
50%: {row['50%']}ms
75%: {row['75%']}ms
95%: {row['95%']}ms
98%: {row['98%']}ms
99%: {row['99%']}ms
min: {float(row['Min Response Time']):.3f}ms
max: {float(row['Max Response Time']):.3f}ms
'''
with open(f"{output_folder}/e2e_{name}_{suffix}.bench", 'w') as f:
f.write(statistics)
if __name__ == '__main__':
if len(sys.argv) != 4:
print(f"Usage: {sys.argv[0]} <locust csv base name> <suffix> <output folder>")
sys.exit(1)
main(sys.argv[1], sys.argv[2], sys.argv[3])

View File

@ -18,6 +18,7 @@ function run_benchmarks_for_folder {
FOLDER=$1
RESULTS_FOLDER=$2
SCRIPTS_FOLDER=$3
mkdir -p $RESULTS_FOLDER
@ -41,32 +42,27 @@ function run_benchmarks_for_folder {
measure_peak_ram_and_time "$BINARIES_FOLDER/osrm-customize $FOLDER/data.osrm" "$RESULTS_FOLDER/osrm_customize.bench"
measure_peak_ram_and_time "$BINARIES_FOLDER/osrm-contract $FOLDER/data.osrm" "$RESULTS_FOLDER/osrm_contract.bench"
if [ -f "$FOLDER/scripts/ci/locustfile.py" ]; then
for ALGORITHM in mld ch; do
$BINARIES_FOLDER/osrm-routed --algorithm $ALGORITHM $FOLDER/data.osrm &
OSRM_ROUTED_PID=$!
for ALGORITHM in ch mld; do
$BINARIES_FOLDER/osrm-routed --algorithm $ALGORITHM $FOLDER/data.osrm &
OSRM_ROUTED_PID=$!
# wait for osrm-routed to start
curl --retry-delay 3 --retry 10 --retry-all-errors "http://127.0.0.1:5000/route/v1/driving/13.388860,52.517037;13.385983,52.496891?steps=true"
locust -f $FOLDER/scripts/ci/locustfile.py \
--headless \
--processes -1 \
--users 10 \
--spawn-rate 1 \
--host http://localhost:5000 \
--run-time 1m \
--csv=locust_results_$ALGORITHM \
--loglevel ERROR
# wait for osrm-routed to start
if ! curl --retry-delay 3 --retry 10 --retry-all-errors "http://127.0.0.1:5000/route/v1/driving/13.388860,52.517037;13.385983,52.496891?steps=true"; then
echo "osrm-routed failed to start for algorithm $ALGORITHM"
kill -9 $OSRM_ROUTED_PID
continue
fi
python3 $FOLDER/scripts/ci/process_locust_benchmark_results.py locust_results_$ALGORITHM $ALGORITHM $RESULTS_FOLDER
kill -0 $OSRM_ROUTED_PID
for METHOD in route nearest trip table match; do
python3 $SCRIPTS_FOLDER/scripts/ci/e2e_benchmark.py --host http://localhost:5000 --method $METHOD --num_requests 1000 > $RESULTS_FOLDER/e2e_${METHOD}_${ALGORITHM}.bench
done
fi
kill -9 $OSRM_ROUTED_PID
done
}
run_benchmarks_for_folder $1 "${1}_results"
run_benchmarks_for_folder $2 "${2}_results"
run_benchmarks_for_folder $1 "${1}_results" $2
run_benchmarks_for_folder $2 "${2}_results" $2