diff --git a/.github/workflows/osrm-backend.yml b/.github/workflows/osrm-backend.yml index 9a35cd04a..d8eff0581 100644 --- a/.github/workflows/osrm-backend.yml +++ b/.github/workflows/osrm-backend.yml @@ -377,12 +377,11 @@ jobs: key: v4-test-${{ matrix.name }}-${{ github.sha }} restore-keys: | v4-test-${{ matrix.name }}- - - name: Prepare environment run: | echo "CCACHE_DIR=$HOME/.ccache" >> $GITHUB_ENV mkdir -p $HOME/.ccache - + PACKAGE_JSON_VERSION=$(node -e "console.log(require('./package.json').version)") echo PUBLISH=$([[ "${GITHUB_REF:-}" == "refs/tags/v${PACKAGE_JSON_VERSION}" ]] && echo "On" || echo "Off") >> $GITHUB_ENV echo "OSRM_INSTALL_DIR=${GITHUB_WORKSPACE}/install-osrm" >> $GITHUB_ENV @@ -490,7 +489,7 @@ jobs: run: | echo "Using ${JOBS} jobs" pushd ${OSRM_BUILD_DIR} - + ccache --zero-stats cmake .. -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DENABLE_CONAN=${ENABLE_CONAN:-OFF} \ @@ -508,6 +507,7 @@ jobs: if [[ "${NODE_PACKAGE_TESTS_ONLY}" != "ON" ]]; then make tests --jobs=${JOBS} make benchmarks --jobs=${JOBS} + sudo make install if [[ "${RUNNER_OS}" == "Linux" ]]; then echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${OSRM_INSTALL_DIR}/lib" >> $GITHUB_ENV @@ -628,6 +628,13 @@ jobs: PR_NUMBER: ${{ github.event.pull_request.number }} GITHUB_REPOSITORY: ${{ github.repository }} steps: + - name: Enable data.osm.pbf cache + uses: actions/cache@v4 + with: + path: ~/data.osm.pbf + key: v1-data-osm-pbf + restore-keys: | + v1-data-osm-pbf - name: Enable compiler cache uses: actions/cache@v4 with: @@ -648,9 +655,15 @@ jobs: ref: ${{ github.head_ref }} path: pr - name: Install dependencies - run: | - python3 -m pip install "conan<2.0.0" "requests==2.31.0" + run: | + python3 -m pip install "conan<2.0.0" "requests==2.31.0" "locust==2.28.0" sudo apt-get update -y && sudo apt-get install ccache + - name: Prepare data + run: | + if [ ! -f "~/data.osm.pbf" ]; then + wget http://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf -O ~/data.osm.pbf + fi + gunzip -c ./pr/test/data/berlin_gps_traces.csv.gz > ~/gps_traces.csv - name: Prepare environment run: | echo "CCACHE_DIR=$HOME/.ccache" >> $GITHUB_ENV diff --git a/scripts/ci/download_gps_traces.py b/scripts/ci/download_gps_traces.py new file mode 100644 index 000000000..961acd532 --- /dev/null +++ b/scripts/ci/download_gps_traces.py @@ -0,0 +1,91 @@ +import requests +import xml.etree.ElementTree as ET +import csv +import sys +import argparse + +def get_osm_gps_traces(min_lon, min_lat, max_lon, max_lat): + url = 'https://api.openstreetmap.org/api/0.6/trackpoints' + traces = [] + + lon_step = 0.25 + lat_step = 0.25 + + current_min_lon = min_lon + + while current_min_lon < max_lon: + current_max_lon = min(current_min_lon + lon_step, max_lon) + + current_min_lat = min_lat + while current_min_lat < max_lat: + current_max_lat = min(current_min_lat + lat_step, max_lat) + + bbox = f'{current_min_lon},{current_min_lat},{current_max_lon},{current_max_lat}' + print(f"Requesting bbox: {bbox}", file=sys.stderr) + + params = { + 'bbox': bbox, + 'page': 0 + } + headers = { + 'Accept': 'application/xml' + } + + response = requests.get(url, params=params, headers=headers) + if response.status_code == 200: + traces.append(response.content) + else: + print(f"Error fetching data for bbox {bbox}: {response.status_code} {response.text}", file=sys.stderr) + + current_min_lat += lat_step + current_min_lon += lon_step + + return traces + +def parse_gpx_data(gpx_data): + try: + root = ET.fromstring(gpx_data) + except ET.ParseError as e: + print(f"Error parsing GPX data: {e}", file=sys.stderr) + return [] + namespace = {'gpx': 'http://www.topografix.com/GPX/1/0'} + + tracks = [] + for trk in root.findall('.//gpx:trk', namespace): + track_data = [] + for trkseg in trk.findall('.//gpx:trkseg', namespace): + for trkpt in trkseg.findall('gpx:trkpt', namespace): + lat = trkpt.get('lat') + lon = trkpt.get('lon') + time = trkpt.find('time').text if trkpt.find('time') is not None else '' + track_data.append([lat, lon, time]) + tracks.append(track_data) + return tracks + +def save_to_csv(data, file): + writer = csv.writer(file) + writer.writerow(['TrackID', 'Latitude', 'Longitude', 'Time']) + writer.writerows(data) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Fetch and output OSM GPS traces for a given bounding box.') + parser.add_argument('min_lon', type=float, help='Minimum longitude of the bounding box') + parser.add_argument('min_lat', type=float, help='Minimum latitude of the bounding box') + parser.add_argument('max_lon', type=float, help='Maximum longitude of the bounding box') + parser.add_argument('max_lat', type=float, help='Maximum latitude of the bounding box') + + args = parser.parse_args() + + gpx_data_traces = get_osm_gps_traces(args.min_lon, args.min_lat, args.max_lon, args.max_lat) + print(f"Collected {len(gpx_data_traces)} trace segments", file=sys.stderr) + + all_data = [] + track_id = 0 + for gpx_data in gpx_data_traces: + for track in parse_gpx_data(gpx_data): + for point in track: + all_data.append([track_id] + point) + track_id += 1 + + # Output all data to stdout + save_to_csv(all_data, sys.stdout) diff --git a/scripts/ci/locustfile.py b/scripts/ci/locustfile.py new file mode 100644 index 000000000..cd46aaba9 --- /dev/null +++ b/scripts/ci/locustfile.py @@ -0,0 +1,74 @@ +from locust import HttpUser, TaskSet, task, between +import csv +import random +from collections import defaultdict +import os + +class OSRMTasks(TaskSet): + def on_start(self): + random.seed(42) + + self.coordinates = [] + self.tracks = defaultdict(list) + + gps_traces_file_path = os.path.expanduser('~/gps_traces.csv') + with open(gps_traces_file_path, 'r') as file: + reader = csv.DictReader(file) + for row in reader: + coord = (float(row['Latitude']), float(row['Longitude'])) + self.coordinates.append(coord) + self.tracks[row['TrackID']].append(coord) + self.track_ids = list(self.tracks.keys()) + + @task + def get_route(self): + start = random.choice(self.coordinates) + end = random.choice(self.coordinates) + + start_coord = f"{start[1]:.6f},{start[0]:.6f}" + end_coord = f"{end[1]:.6f},{end[0]:.6f}" + + self.client.get(f"/route/v1/driving/{start_coord};{end_coord}?overview=full&steps=true", name="route") + + @task + def get_table(self): + num_coords = random.randint(3, 100) + selected_coords = random.sample(self.coordinates, num_coords) + coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in selected_coords]) + + self.client.get(f"/table/v1/driving/{coords_str}", name="table") + + @task + def get_match(self): + num_coords = random.randint(50, 100) + track_id = random.choice(self.track_ids) + track_coords = self.tracks[track_id][:num_coords] + coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in track_coords]) + radiues_str = ";".join([f"{random.randint(5, 20)}" for _ in range(len(track_coords))]) + + with self.client.get(f"/match/v1/driving/{coords_str}?steps=true&radiuses={radiues_str}", name="match", catch_response=True) as response: + if response.status_code == 400: + j = response.json() + # it is expected that some of requests will fail with such error: map matching fails sometimes + if j['code'] == 'NoSegment' or j['code'] == 'NoMatch': + response.success() + + @task + def get_nearest(self): + coord = random.choice(self.coordinates) + coord_str = f"{coord[1]:.6f},{coord[0]:.6f}" + + self.client.get(f"/nearest/v1/driving/{coord_str}", name="nearest") + + @task + def get_trip(self): + num_coords = random.randint(2, 10) + selected_coords = random.sample(self.coordinates, num_coords) + coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in selected_coords]) + + self.client.get(f"/trip/v1/driving/{coords_str}?steps=true", name="trip") + +class OSRMUser(HttpUser): + tasks = [OSRMTasks] + # random wait time between requests to not load server for 100% + wait_time = between(0.05, 0.5) diff --git a/scripts/ci/post_benchmark_results.py b/scripts/ci/post_benchmark_results.py index a5dc38aa5..339534a19 100644 --- a/scripts/ci/post_benchmark_results.py +++ b/scripts/ci/post_benchmark_results.py @@ -16,8 +16,10 @@ def create_markdown_table(results): rows = [] for result in results: name = result['name'] - base = result['base'].replace('\n', '
') - pr = result['pr'].replace('\n', '
') + base = result['base'] or '' + base = base.replace('\n', '
') + pr = result['pr'] or '' + pr = pr.replace('\n', '
') row = f"| {name} | {base} | {pr} |" rows.append(row) return f"{header}\n" + "\n".join(rows) @@ -75,7 +77,14 @@ def main(): pr_body = pr_details.get('body', '') or '' markdown_table = create_markdown_table(benchmark_results) - new_benchmark_section = f"\n## Benchmark Results\n{markdown_table}\n" + new_benchmark_section = f""" + +

Benchmark Results

+ +{markdown_table} +
+ +""" if re.search(r'.*', pr_body, re.DOTALL): updated_body = re.sub( diff --git a/scripts/ci/process_locust_benchmark_results.py b/scripts/ci/process_locust_benchmark_results.py new file mode 100644 index 000000000..ad71daf7f --- /dev/null +++ b/scripts/ci/process_locust_benchmark_results.py @@ -0,0 +1,31 @@ +import sys +import csv + +def main(locust_csv_base_name, suffix, output_folder): + with open(f"{locust_csv_base_name}_stats.csv", 'r') as file: + reader = csv.DictReader(file) + for row in reader: + name = row['Name'] + if name == 'Aggregated': continue + + statistics = f''' +requests: {row['Request Count']} +failures: {row['Failure Count']} +req/s: {float(row['Requests/s']):.3f}req/s +avg: {float(row['Average Response Time']):.3f}ms +50%: {row['50%']}ms +75%: {row['75%']}ms +95%: {row['95%']}ms +98%: {row['98%']}ms +99%: {row['99%']}ms +min: {float(row['Min Response Time']):.3f}ms +max: {float(row['Max Response Time']):.3f}ms +''' + with open(f"{output_folder}/e2e_{name}_{suffix}.bench", 'w') as f: + f.write(statistics) + +if __name__ == '__main__': + if len(sys.argv) != 4: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + main(sys.argv[1], sys.argv[2], sys.argv[3]) \ No newline at end of file diff --git a/scripts/ci/run_benchmarks.sh b/scripts/ci/run_benchmarks.sh index 6aea4e089..51ced3d53 100755 --- a/scripts/ci/run_benchmarks.sh +++ b/scripts/ci/run_benchmarks.sh @@ -13,12 +13,45 @@ function run_benchmarks_for_folder { ./$BENCHMARKS_FOLDER/match-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/match_mld.bench" ./$BENCHMARKS_FOLDER/match-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/match_ch.bench" - ./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/route_mld.bench" || true # TODO: remove `true` when this benchmark will be merged to master - ./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/route_ch.bench" || true # TODO: remove `true` when this benchmark will be merged to master + ./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/route_mld.bench" + ./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/route_ch.bench" ./$BENCHMARKS_FOLDER/alias-bench > "$RESULTS_FOLDER/alias.bench" ./$BENCHMARKS_FOLDER/json-render-bench "./$FOLDER/src/benchmarks/portugal_to_korea.json" > "$RESULTS_FOLDER/json-render.bench" ./$BENCHMARKS_FOLDER/packedvector-bench > "$RESULTS_FOLDER/packedvector.bench" ./$BENCHMARKS_FOLDER/rtree-bench "./$FOLDER/test/data/monaco.osrm.ramIndex" "./$FOLDER/test/data/monaco.osrm.fileIndex" "./$FOLDER/test/data/monaco.osrm.nbg_nodes" > "$RESULTS_FOLDER/rtree.bench" + + BINARIES_FOLDER="$FOLDER/build" + + cp ~/data.osm.pbf $FOLDER + $BINARIES_FOLDER/osrm-extract -p $FOLDER/profiles/car.lua $FOLDER/data.osm.pbf + $BINARIES_FOLDER/osrm-partition $FOLDER/data.osrm + $BINARIES_FOLDER/osrm-customize $FOLDER/data.osrm + $BINARIES_FOLDER/osrm-contract $FOLDER/data.osrm + + if [ -f "$FOLDER/scripts/ci/locustfile.py" ]; then + for ALGORITHM in mld ch; do + $BINARIES_FOLDER/osrm-routed --algorithm $ALGORITHM $FOLDER/data.osrm & + OSRM_ROUTED_PID=$! + + # wait for osrm-routed to start + curl --retry-delay 3 --retry 10 --retry-all-errors "http://127.0.0.1:5000/route/v1/driving/13.388860,52.517037;13.385983,52.496891?steps=true" + locust -f $FOLDER/scripts/ci/locustfile.py \ + --headless \ + --processes -1 \ + --users 10 \ + --spawn-rate 1 \ + --host http://localhost:5000 \ + --run-time 1m \ + --csv=locust_results_$ALGORITHM \ + --loglevel ERROR + + python3 $FOLDER/scripts/ci/process_locust_benchmark_results.py locust_results_$ALGORITHM $ALGORITHM $RESULTS_FOLDER + + + kill -0 $OSRM_ROUTED_PID + done + fi + } run_benchmarks_for_folder $1 "${1}_results" diff --git a/test/data/berlin_gps_traces.csv.gz b/test/data/berlin_gps_traces.csv.gz new file mode 100644 index 000000000..842d1cc82 Binary files /dev/null and b/test/data/berlin_gps_traces.csv.gz differ