Implement end to end benchmark (#6910)

This commit is contained in:
Siarhei Fedartsou 2024-06-05 21:39:10 +02:00 committed by GitHub
parent 2725202771
commit c7ee1a59eb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 261 additions and 10 deletions

View File

@ -377,12 +377,11 @@ jobs:
key: v4-test-${{ matrix.name }}-${{ github.sha }}
restore-keys: |
v4-test-${{ matrix.name }}-
- name: Prepare environment
run: |
echo "CCACHE_DIR=$HOME/.ccache" >> $GITHUB_ENV
mkdir -p $HOME/.ccache
PACKAGE_JSON_VERSION=$(node -e "console.log(require('./package.json').version)")
echo PUBLISH=$([[ "${GITHUB_REF:-}" == "refs/tags/v${PACKAGE_JSON_VERSION}" ]] && echo "On" || echo "Off") >> $GITHUB_ENV
echo "OSRM_INSTALL_DIR=${GITHUB_WORKSPACE}/install-osrm" >> $GITHUB_ENV
@ -490,7 +489,7 @@ jobs:
run: |
echo "Using ${JOBS} jobs"
pushd ${OSRM_BUILD_DIR}
ccache --zero-stats
cmake .. -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DENABLE_CONAN=${ENABLE_CONAN:-OFF} \
@ -508,6 +507,7 @@ jobs:
if [[ "${NODE_PACKAGE_TESTS_ONLY}" != "ON" ]]; then
make tests --jobs=${JOBS}
make benchmarks --jobs=${JOBS}
sudo make install
if [[ "${RUNNER_OS}" == "Linux" ]]; then
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${OSRM_INSTALL_DIR}/lib" >> $GITHUB_ENV
@ -628,6 +628,13 @@ jobs:
PR_NUMBER: ${{ github.event.pull_request.number }}
GITHUB_REPOSITORY: ${{ github.repository }}
steps:
- name: Enable data.osm.pbf cache
uses: actions/cache@v4
with:
path: ~/data.osm.pbf
key: v1-data-osm-pbf
restore-keys: |
v1-data-osm-pbf
- name: Enable compiler cache
uses: actions/cache@v4
with:
@ -648,9 +655,15 @@ jobs:
ref: ${{ github.head_ref }}
path: pr
- name: Install dependencies
run: |
python3 -m pip install "conan<2.0.0" "requests==2.31.0"
run: |
python3 -m pip install "conan<2.0.0" "requests==2.31.0" "locust==2.28.0"
sudo apt-get update -y && sudo apt-get install ccache
- name: Prepare data
run: |
if [ ! -f "~/data.osm.pbf" ]; then
wget http://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf -O ~/data.osm.pbf
fi
gunzip -c ./pr/test/data/berlin_gps_traces.csv.gz > ~/gps_traces.csv
- name: Prepare environment
run: |
echo "CCACHE_DIR=$HOME/.ccache" >> $GITHUB_ENV

View File

@ -0,0 +1,91 @@
import requests
import xml.etree.ElementTree as ET
import csv
import sys
import argparse
def get_osm_gps_traces(min_lon, min_lat, max_lon, max_lat):
url = 'https://api.openstreetmap.org/api/0.6/trackpoints'
traces = []
lon_step = 0.25
lat_step = 0.25
current_min_lon = min_lon
while current_min_lon < max_lon:
current_max_lon = min(current_min_lon + lon_step, max_lon)
current_min_lat = min_lat
while current_min_lat < max_lat:
current_max_lat = min(current_min_lat + lat_step, max_lat)
bbox = f'{current_min_lon},{current_min_lat},{current_max_lon},{current_max_lat}'
print(f"Requesting bbox: {bbox}", file=sys.stderr)
params = {
'bbox': bbox,
'page': 0
}
headers = {
'Accept': 'application/xml'
}
response = requests.get(url, params=params, headers=headers)
if response.status_code == 200:
traces.append(response.content)
else:
print(f"Error fetching data for bbox {bbox}: {response.status_code} {response.text}", file=sys.stderr)
current_min_lat += lat_step
current_min_lon += lon_step
return traces
def parse_gpx_data(gpx_data):
try:
root = ET.fromstring(gpx_data)
except ET.ParseError as e:
print(f"Error parsing GPX data: {e}", file=sys.stderr)
return []
namespace = {'gpx': 'http://www.topografix.com/GPX/1/0'}
tracks = []
for trk in root.findall('.//gpx:trk', namespace):
track_data = []
for trkseg in trk.findall('.//gpx:trkseg', namespace):
for trkpt in trkseg.findall('gpx:trkpt', namespace):
lat = trkpt.get('lat')
lon = trkpt.get('lon')
time = trkpt.find('time').text if trkpt.find('time') is not None else ''
track_data.append([lat, lon, time])
tracks.append(track_data)
return tracks
def save_to_csv(data, file):
writer = csv.writer(file)
writer.writerow(['TrackID', 'Latitude', 'Longitude', 'Time'])
writer.writerows(data)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Fetch and output OSM GPS traces for a given bounding box.')
parser.add_argument('min_lon', type=float, help='Minimum longitude of the bounding box')
parser.add_argument('min_lat', type=float, help='Minimum latitude of the bounding box')
parser.add_argument('max_lon', type=float, help='Maximum longitude of the bounding box')
parser.add_argument('max_lat', type=float, help='Maximum latitude of the bounding box')
args = parser.parse_args()
gpx_data_traces = get_osm_gps_traces(args.min_lon, args.min_lat, args.max_lon, args.max_lat)
print(f"Collected {len(gpx_data_traces)} trace segments", file=sys.stderr)
all_data = []
track_id = 0
for gpx_data in gpx_data_traces:
for track in parse_gpx_data(gpx_data):
for point in track:
all_data.append([track_id] + point)
track_id += 1
# Output all data to stdout
save_to_csv(all_data, sys.stdout)

74
scripts/ci/locustfile.py Normal file
View File

@ -0,0 +1,74 @@
from locust import HttpUser, TaskSet, task, between
import csv
import random
from collections import defaultdict
import os
class OSRMTasks(TaskSet):
def on_start(self):
random.seed(42)
self.coordinates = []
self.tracks = defaultdict(list)
gps_traces_file_path = os.path.expanduser('~/gps_traces.csv')
with open(gps_traces_file_path, 'r') as file:
reader = csv.DictReader(file)
for row in reader:
coord = (float(row['Latitude']), float(row['Longitude']))
self.coordinates.append(coord)
self.tracks[row['TrackID']].append(coord)
self.track_ids = list(self.tracks.keys())
@task
def get_route(self):
start = random.choice(self.coordinates)
end = random.choice(self.coordinates)
start_coord = f"{start[1]:.6f},{start[0]:.6f}"
end_coord = f"{end[1]:.6f},{end[0]:.6f}"
self.client.get(f"/route/v1/driving/{start_coord};{end_coord}?overview=full&steps=true", name="route")
@task
def get_table(self):
num_coords = random.randint(3, 100)
selected_coords = random.sample(self.coordinates, num_coords)
coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in selected_coords])
self.client.get(f"/table/v1/driving/{coords_str}", name="table")
@task
def get_match(self):
num_coords = random.randint(50, 100)
track_id = random.choice(self.track_ids)
track_coords = self.tracks[track_id][:num_coords]
coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in track_coords])
radiues_str = ";".join([f"{random.randint(5, 20)}" for _ in range(len(track_coords))])
with self.client.get(f"/match/v1/driving/{coords_str}?steps=true&radiuses={radiues_str}", name="match", catch_response=True) as response:
if response.status_code == 400:
j = response.json()
# it is expected that some of requests will fail with such error: map matching fails sometimes
if j['code'] == 'NoSegment' or j['code'] == 'NoMatch':
response.success()
@task
def get_nearest(self):
coord = random.choice(self.coordinates)
coord_str = f"{coord[1]:.6f},{coord[0]:.6f}"
self.client.get(f"/nearest/v1/driving/{coord_str}", name="nearest")
@task
def get_trip(self):
num_coords = random.randint(2, 10)
selected_coords = random.sample(self.coordinates, num_coords)
coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in selected_coords])
self.client.get(f"/trip/v1/driving/{coords_str}?steps=true", name="trip")
class OSRMUser(HttpUser):
tasks = [OSRMTasks]
# random wait time between requests to not load server for 100%
wait_time = between(0.05, 0.5)

View File

@ -16,8 +16,10 @@ def create_markdown_table(results):
rows = []
for result in results:
name = result['name']
base = result['base'].replace('\n', '<br/>')
pr = result['pr'].replace('\n', '<br/>')
base = result['base'] or ''
base = base.replace('\n', '<br/>')
pr = result['pr'] or ''
pr = pr.replace('\n', '<br/>')
row = f"| {name} | {base} | {pr} |"
rows.append(row)
return f"{header}\n" + "\n".join(rows)
@ -75,7 +77,14 @@ def main():
pr_body = pr_details.get('body', '') or ''
markdown_table = create_markdown_table(benchmark_results)
new_benchmark_section = f"<!-- BENCHMARK_RESULTS_START -->\n## Benchmark Results\n{markdown_table}\n<!-- BENCHMARK_RESULTS_END -->"
new_benchmark_section = f"""
<!-- BENCHMARK_RESULTS_START -->
<details><summary><h2>Benchmark Results</h2></summary>
{markdown_table}
</details>
<!-- BENCHMARK_RESULTS_END -->
"""
if re.search(r'<!-- BENCHMARK_RESULTS_START -->.*<!-- BENCHMARK_RESULTS_END -->', pr_body, re.DOTALL):
updated_body = re.sub(

View File

@ -0,0 +1,31 @@
import sys
import csv
def main(locust_csv_base_name, suffix, output_folder):
with open(f"{locust_csv_base_name}_stats.csv", 'r') as file:
reader = csv.DictReader(file)
for row in reader:
name = row['Name']
if name == 'Aggregated': continue
statistics = f'''
requests: {row['Request Count']}
failures: {row['Failure Count']}
req/s: {float(row['Requests/s']):.3f}req/s
avg: {float(row['Average Response Time']):.3f}ms
50%: {row['50%']}ms
75%: {row['75%']}ms
95%: {row['95%']}ms
98%: {row['98%']}ms
99%: {row['99%']}ms
min: {float(row['Min Response Time']):.3f}ms
max: {float(row['Max Response Time']):.3f}ms
'''
with open(f"{output_folder}/e2e_{name}_{suffix}.bench", 'w') as f:
f.write(statistics)
if __name__ == '__main__':
if len(sys.argv) != 4:
print(f"Usage: {sys.argv[0]} <locust csv base name> <suffix> <output folder>")
sys.exit(1)
main(sys.argv[1], sys.argv[2], sys.argv[3])

View File

@ -13,12 +13,45 @@ function run_benchmarks_for_folder {
./$BENCHMARKS_FOLDER/match-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/match_mld.bench"
./$BENCHMARKS_FOLDER/match-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/match_ch.bench"
./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/route_mld.bench" || true # TODO: remove `true` when this benchmark will be merged to master
./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/route_ch.bench" || true # TODO: remove `true` when this benchmark will be merged to master
./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/route_mld.bench"
./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/route_ch.bench"
./$BENCHMARKS_FOLDER/alias-bench > "$RESULTS_FOLDER/alias.bench"
./$BENCHMARKS_FOLDER/json-render-bench "./$FOLDER/src/benchmarks/portugal_to_korea.json" > "$RESULTS_FOLDER/json-render.bench"
./$BENCHMARKS_FOLDER/packedvector-bench > "$RESULTS_FOLDER/packedvector.bench"
./$BENCHMARKS_FOLDER/rtree-bench "./$FOLDER/test/data/monaco.osrm.ramIndex" "./$FOLDER/test/data/monaco.osrm.fileIndex" "./$FOLDER/test/data/monaco.osrm.nbg_nodes" > "$RESULTS_FOLDER/rtree.bench"
BINARIES_FOLDER="$FOLDER/build"
cp ~/data.osm.pbf $FOLDER
$BINARIES_FOLDER/osrm-extract -p $FOLDER/profiles/car.lua $FOLDER/data.osm.pbf
$BINARIES_FOLDER/osrm-partition $FOLDER/data.osrm
$BINARIES_FOLDER/osrm-customize $FOLDER/data.osrm
$BINARIES_FOLDER/osrm-contract $FOLDER/data.osrm
if [ -f "$FOLDER/scripts/ci/locustfile.py" ]; then
for ALGORITHM in mld ch; do
$BINARIES_FOLDER/osrm-routed --algorithm $ALGORITHM $FOLDER/data.osrm &
OSRM_ROUTED_PID=$!
# wait for osrm-routed to start
curl --retry-delay 3 --retry 10 --retry-all-errors "http://127.0.0.1:5000/route/v1/driving/13.388860,52.517037;13.385983,52.496891?steps=true"
locust -f $FOLDER/scripts/ci/locustfile.py \
--headless \
--processes -1 \
--users 10 \
--spawn-rate 1 \
--host http://localhost:5000 \
--run-time 1m \
--csv=locust_results_$ALGORITHM \
--loglevel ERROR
python3 $FOLDER/scripts/ci/process_locust_benchmark_results.py locust_results_$ALGORITHM $ALGORITHM $RESULTS_FOLDER
kill -0 $OSRM_ROUTED_PID
done
fi
}
run_benchmarks_for_folder $1 "${1}_results"

Binary file not shown.