Implement end to end benchmark (#6910)

SiarheiFedartsou · web-flow · commit c7ee1a59eb20 · 2024-06-05T21:39:10.000+02:00
diff --git a/.github/workflows/osrm-backend.yml b/.github/workflows/osrm-backend.yml
@@ -377,12 +377,11 @@ jobs:
         key: v4-test-${{ matrix.name }}-${{ github.sha }}
         restore-keys: |
           v4-test-${{ matrix.name }}-
-
     - name: Prepare environment
       run: |
         echo "CCACHE_DIR=$HOME/.ccache" >> $GITHUB_ENV
         mkdir -p $HOME/.ccache
-        
+
         PACKAGE_JSON_VERSION=$(node -e "console.log(require('./package.json').version)")
         echo PUBLISH=$([[ "${GITHUB_REF:-}" == "refs/tags/v${PACKAGE_JSON_VERSION}" ]] && echo "On" || echo "Off") >> $GITHUB_ENV
         echo "OSRM_INSTALL_DIR=${GITHUB_WORKSPACE}/install-osrm" >> $GITHUB_ENV
@@ -490,7 +489,7 @@ jobs:
       run: |
         echo "Using ${JOBS} jobs"
         pushd ${OSRM_BUILD_DIR}
-
+        
         ccache --zero-stats
         cmake .. -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
                  -DENABLE_CONAN=${ENABLE_CONAN:-OFF} \
@@ -508,6 +507,7 @@ jobs:
         if [[ "${NODE_PACKAGE_TESTS_ONLY}" != "ON" ]]; then
           make tests --jobs=${JOBS}
           make benchmarks --jobs=${JOBS}
+
           sudo make install
           if [[ "${RUNNER_OS}" == "Linux" ]]; then
             echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${OSRM_INSTALL_DIR}/lib" >> $GITHUB_ENV
@@ -628,6 +628,13 @@ jobs:
       PR_NUMBER: ${{ github.event.pull_request.number }}
       GITHUB_REPOSITORY: ${{ github.repository }}
     steps: 
+      - name: Enable data.osm.pbf cache
+        uses: actions/cache@v4
+        with:
+          path: ~/data.osm.pbf
+          key: v1-data-osm-pbf
+          restore-keys: |
+            v1-data-osm-pbf
       - name: Enable compiler cache
         uses: actions/cache@v4
         with:
@@ -648,9 +655,15 @@ jobs:
           ref: ${{ github.head_ref }}
           path: pr
       - name: Install dependencies
-        run: |
-          python3 -m pip install "conan<2.0.0" "requests==2.31.0"
+        run: | 
+          python3 -m pip install "conan<2.0.0" "requests==2.31.0" "locust==2.28.0"
           sudo apt-get update -y && sudo apt-get install ccache
+      - name: Prepare data
+        run: |
+          if [ ! -f "~/data.osm.pbf" ]; then
+            wget http://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf -O ~/data.osm.pbf
+          fi
+          gunzip -c ./pr/test/data/berlin_gps_traces.csv.gz > ~/gps_traces.csv
       - name: Prepare environment
         run: |
           echo "CCACHE_DIR=$HOME/.ccache" >> $GITHUB_ENV
diff --git a/scripts/ci/download_gps_traces.py b/scripts/ci/download_gps_traces.py
@@ -0,0 +1,91 @@
+import requests
+import xml.etree.ElementTree as ET
+import csv
+import sys
+import argparse
+
+def get_osm_gps_traces(min_lon, min_lat, max_lon, max_lat):
+    url = 'https://api.openstreetmap.org/api/0.6/trackpoints'
+    traces = []
+    
+    lon_step = 0.25
+    lat_step = 0.25
+    
+    current_min_lon = min_lon
+
+    while current_min_lon < max_lon:
+        current_max_lon = min(current_min_lon + lon_step, max_lon)
+        
+        current_min_lat = min_lat
+        while current_min_lat < max_lat:
+            current_max_lat = min(current_min_lat + lat_step, max_lat)
+            
+            bbox = f'{current_min_lon},{current_min_lat},{current_max_lon},{current_max_lat}'
+            print(f"Requesting bbox: {bbox}", file=sys.stderr)
+            
+            params = {
+                'bbox': bbox,
+                'page': 0
+            }
+            headers = {
+                'Accept': 'application/xml'
+            }
+            
+            response = requests.get(url, params=params, headers=headers)
+            if response.status_code == 200:
+                traces.append(response.content)
+            else:
+                print(f"Error fetching data for bbox {bbox}: {response.status_code} {response.text}", file=sys.stderr)
+            
+            current_min_lat += lat_step
+        current_min_lon += lon_step
+    
+    return traces
+
+def parse_gpx_data(gpx_data):
+    try:
+        root = ET.fromstring(gpx_data)
+    except ET.ParseError as e:
+        print(f"Error parsing GPX data: {e}", file=sys.stderr)
+        return []
+    namespace = {'gpx': 'http://www.topografix.com/GPX/1/0'}
+
+    tracks = []
+    for trk in root.findall('.//gpx:trk', namespace):
+        track_data = []
+        for trkseg in trk.findall('.//gpx:trkseg', namespace):
+            for trkpt in trkseg.findall('gpx:trkpt', namespace):
+                lat = trkpt.get('lat')
+                lon = trkpt.get('lon')
+                time = trkpt.find('time').text if trkpt.find('time') is not None else ''
+                track_data.append([lat, lon, time])
+        tracks.append(track_data)
+    return tracks
+
+def save_to_csv(data, file):
+    writer = csv.writer(file)
+    writer.writerow(['TrackID', 'Latitude', 'Longitude', 'Time'])
+    writer.writerows(data)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Fetch and output OSM GPS traces for a given bounding box.')
+    parser.add_argument('min_lon', type=float, help='Minimum longitude of the bounding box')
+    parser.add_argument('min_lat', type=float, help='Minimum latitude of the bounding box')
+    parser.add_argument('max_lon', type=float, help='Maximum longitude of the bounding box')
+    parser.add_argument('max_lat', type=float, help='Maximum latitude of the bounding box')
+    
+    args = parser.parse_args()
+    
+    gpx_data_traces = get_osm_gps_traces(args.min_lon, args.min_lat, args.max_lon, args.max_lat)
+    print(f"Collected {len(gpx_data_traces)} trace segments", file=sys.stderr)
+
+    all_data = []
+    track_id = 0
+    for gpx_data in gpx_data_traces:
+        for track in parse_gpx_data(gpx_data):
+            for point in track:
+                all_data.append([track_id] + point)
+            track_id += 1
+    
+    # Output all data to stdout
+    save_to_csv(all_data, sys.stdout)
diff --git a/scripts/ci/locustfile.py b/scripts/ci/locustfile.py
@@ -0,0 +1,74 @@
+from locust import HttpUser, TaskSet, task, between
+import csv
+import random
+from collections import defaultdict
+import os
+
+class OSRMTasks(TaskSet):
+    def on_start(self):
+        random.seed(42)
+
+        self.coordinates = []
+        self.tracks = defaultdict(list)
+
+        gps_traces_file_path = os.path.expanduser('~/gps_traces.csv')
+        with open(gps_traces_file_path, 'r') as file:
+            reader = csv.DictReader(file)
+            for row in reader:
+                coord = (float(row['Latitude']), float(row['Longitude']))
+                self.coordinates.append(coord)
+                self.tracks[row['TrackID']].append(coord)
+        self.track_ids = list(self.tracks.keys())
+
+    @task
+    def get_route(self):
+        start = random.choice(self.coordinates)
+        end = random.choice(self.coordinates)
+        
+        start_coord = f"{start[1]:.6f},{start[0]:.6f}"
+        end_coord = f"{end[1]:.6f},{end[0]:.6f}"
+        
+        self.client.get(f"/route/v1/driving/{start_coord};{end_coord}?overview=full&steps=true", name="route")
+
+    @task
+    def get_table(self):
+        num_coords = random.randint(3, 100)
+        selected_coords = random.sample(self.coordinates, num_coords)
+        coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in selected_coords])
+        
+        self.client.get(f"/table/v1/driving/{coords_str}", name="table")
+
+    @task
+    def get_match(self):
+        num_coords = random.randint(50, 100)
+        track_id = random.choice(self.track_ids)
+        track_coords = self.tracks[track_id][:num_coords]
+        coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in track_coords])
+        radiues_str = ";".join([f"{random.randint(5, 20)}" for _ in range(len(track_coords))])
+
+        with self.client.get(f"/match/v1/driving/{coords_str}?steps=true&radiuses={radiues_str}", name="match", catch_response=True) as response:
+            if response.status_code == 400:
+                j = response.json()
+                # it is expected that some of requests will fail with such error: map matching fails sometimes
+                if j['code'] == 'NoSegment' or j['code'] == 'NoMatch':
+                    response.success()
+
+    @task
+    def get_nearest(self):
+        coord = random.choice(self.coordinates)
+        coord_str = f"{coord[1]:.6f},{coord[0]:.6f}"
+        
+        self.client.get(f"/nearest/v1/driving/{coord_str}", name="nearest")
+
+    @task
+    def get_trip(self):
+        num_coords = random.randint(2, 10)
+        selected_coords = random.sample(self.coordinates, num_coords)
+        coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in selected_coords])
+        
+        self.client.get(f"/trip/v1/driving/{coords_str}?steps=true", name="trip")
+
+class OSRMUser(HttpUser):
+    tasks = [OSRMTasks]
+    # random wait time between requests to not load server for 100%
+    wait_time = between(0.05, 0.5)
diff --git a/scripts/ci/post_benchmark_results.py b/scripts/ci/post_benchmark_results.py
@@ -16,8 +16,10 @@ def create_markdown_table(results):
     rows = []
     for result in results:
         name = result['name']
-        base = result['base'].replace('\n', '<br/>')
-        pr = result['pr'].replace('\n', '<br/>')
+        base = result['base'] or ''
+        base = base.replace('\n', '<br/>')
+        pr = result['pr'] or ''
+        pr = pr.replace('\n', '<br/>')
         row = f"| {name} | {base} | {pr} |"
         rows.append(row)
     return f"{header}\n" + "\n".join(rows)
@@ -75,7 +77,14 @@ def main():
     pr_body = pr_details.get('body', '') or ''
 
     markdown_table = create_markdown_table(benchmark_results)
-    new_benchmark_section = f"<!-- BENCHMARK_RESULTS_START -->\n## Benchmark Results\n{markdown_table}\n<!-- BENCHMARK_RESULTS_END -->"
+    new_benchmark_section = f"""
+<!-- BENCHMARK_RESULTS_START -->
+<details><summary><h2>Benchmark Results</h2></summary>
+
+{markdown_table}
+</details>
+<!-- BENCHMARK_RESULTS_END -->
+"""
 
     if re.search(r'<!-- BENCHMARK_RESULTS_START -->.*<!-- BENCHMARK_RESULTS_END -->', pr_body, re.DOTALL):
         updated_body = re.sub(
diff --git a/scripts/ci/process_locust_benchmark_results.py b/scripts/ci/process_locust_benchmark_results.py
@@ -0,0 +1,31 @@
+import sys
+import csv
+
+def main(locust_csv_base_name, suffix, output_folder):
+    with open(f"{locust_csv_base_name}_stats.csv", 'r') as file:
+            reader = csv.DictReader(file)
+            for row in reader:
+                name = row['Name']
+                if name == 'Aggregated': continue
+                
+                statistics = f'''
+requests: {row['Request Count']}
+failures: {row['Failure Count']}
+req/s: {float(row['Requests/s']):.3f}req/s
+avg: {float(row['Average Response Time']):.3f}ms
+50%: {row['50%']}ms
+75%: {row['75%']}ms
+95%: {row['95%']}ms
+98%: {row['98%']}ms
+99%: {row['99%']}ms   
+min: {float(row['Min Response Time']):.3f}ms
+max: {float(row['Max Response Time']):.3f}ms
+'''
+                with open(f"{output_folder}/e2e_{name}_{suffix}.bench", 'w') as f:
+                    f.write(statistics)
+
+if __name__ == '__main__':
+    if len(sys.argv) != 4:
+        print(f"Usage: {sys.argv[0]} <locust csv base name> <suffix> <output folder>")
+        sys.exit(1)
+    main(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/scripts/ci/run_benchmarks.sh b/scripts/ci/run_benchmarks.sh
@@ -13,12 +13,45 @@ function run_benchmarks_for_folder {
 
     ./$BENCHMARKS_FOLDER/match-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/match_mld.bench"
     ./$BENCHMARKS_FOLDER/match-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/match_ch.bench"
-    ./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/route_mld.bench" || true # TODO: remove `true` when this benchmark will be merged to master
-    ./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/route_ch.bench" || true # TODO: remove `true` when this benchmark will be merged to master
+    ./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/route_mld.bench"
+    ./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/route_ch.bench"
     ./$BENCHMARKS_FOLDER/alias-bench > "$RESULTS_FOLDER/alias.bench"
     ./$BENCHMARKS_FOLDER/json-render-bench  "./$FOLDER/src/benchmarks/portugal_to_korea.json" > "$RESULTS_FOLDER/json-render.bench"
     ./$BENCHMARKS_FOLDER/packedvector-bench > "$RESULTS_FOLDER/packedvector.bench"
     ./$BENCHMARKS_FOLDER/rtree-bench "./$FOLDER/test/data/monaco.osrm.ramIndex" "./$FOLDER/test/data/monaco.osrm.fileIndex" "./$FOLDER/test/data/monaco.osrm.nbg_nodes" > "$RESULTS_FOLDER/rtree.bench"
+
+    BINARIES_FOLDER="$FOLDER/build"
+
+    cp ~/data.osm.pbf $FOLDER
+    $BINARIES_FOLDER/osrm-extract -p $FOLDER/profiles/car.lua $FOLDER/data.osm.pbf
+    $BINARIES_FOLDER/osrm-partition $FOLDER/data.osrm
+    $BINARIES_FOLDER/osrm-customize $FOLDER/data.osrm
+    $BINARIES_FOLDER/osrm-contract $FOLDER/data.osrm
+
+    if [ -f "$FOLDER/scripts/ci/locustfile.py" ]; then
+        for ALGORITHM in mld ch; do
+            $BINARIES_FOLDER/osrm-routed --algorithm $ALGORITHM $FOLDER/data.osrm &
+            OSRM_ROUTED_PID=$!
+
+            # wait for osrm-routed to start
+            curl --retry-delay 3 --retry 10 --retry-all-errors "http://127.0.0.1:5000/route/v1/driving/13.388860,52.517037;13.385983,52.496891?steps=true"
+            locust -f $FOLDER/scripts/ci/locustfile.py \
+                --headless \
+                --processes -1 \
+                --users 10 \
+                --spawn-rate 1 \
+                --host http://localhost:5000 \
+                --run-time 1m \
+                --csv=locust_results_$ALGORITHM \
+                --loglevel ERROR
+
+            python3 $FOLDER/scripts/ci/process_locust_benchmark_results.py locust_results_$ALGORITHM $ALGORITHM $RESULTS_FOLDER
+
+
+            kill -0 $OSRM_ROUTED_PID
+        done
+    fi
+
 }
 
 run_benchmarks_for_folder $1 "${1}_results"
diff --git a/test/data/berlin_gps_traces.csv.gz b/test/data/berlin_gps_traces.csv.gz