From acde702ac47ac70e2f3a4d6b722f9ca78928021b Mon Sep 17 00:00:00 2001 From: Alexander Andreev Date: Mon, 28 Apr 2025 03:58:29 -0700 Subject: [PATCH 1/2] Updates and fixes --- README.md | 1 + configs/BENCH-CONFIG-SPEC.md | 166 ++++++++++++++++ configs/README.md | 168 ++--------------- configs/experiments/README.md | 5 + sklbench/benchmarks/common.py | 10 +- sklbench/benchmarks/custom_function.py | 15 +- sklbench/benchmarks/sklearn_estimator.py | 26 +-- sklbench/datasets/README.md | 4 + sklbench/datasets/__init__.py | 13 +- sklbench/datasets/downloaders.py | 4 +- sklbench/datasets/loaders.py | 9 +- sklbench/datasets/transformer.py | 2 +- sklbench/emulators/svs/neighbors.py | 10 +- sklbench/report/arguments.py | 12 +- sklbench/report/compatibility.py | 182 ++++++++++-------- sklbench/report/implementation.py | 80 ++++++-- sklbench/runner/commands_helper.py | 6 +- sklbench/runner/implementation.py | 22 ++- sklbench/utils/bench_case.py | 2 +- sklbench/utils/config.py | 6 +- sklbench/utils/custom_types.py | 2 + sklbench/utils/env.py | 17 ++ sklbench/utils/logger.py | 2 +- sklbench/utils/measurement.py | 229 ++++++++++++++++++++--- sklbench/utils/special_params.py | 16 +- 25 files changed, 678 insertions(+), 331 deletions(-) create mode 100644 configs/BENCH-CONFIG-SPEC.md create mode 100644 configs/experiments/README.md diff --git a/README.md b/README.md index 7a8c8078..80c8ef57 100755 --- a/README.md +++ b/README.md @@ -97,6 +97,7 @@ flowchart TB ## 📑 Documentation [Scikit-learn_bench](README.md): - [Configs](configs/README.md) + - [Benchmarking Config Specification](configs/BENCH-CONFIG-SPEC.md) - [Benchmarks Runner](sklbench/runner/README.md) - [Report Generator](sklbench/report/README.md) - [Benchmarks](sklbench/benchmarks/README.md) diff --git a/configs/BENCH-CONFIG-SPEC.md b/configs/BENCH-CONFIG-SPEC.md new file mode 100644 index 00000000..e6b7eb40 --- /dev/null +++ b/configs/BENCH-CONFIG-SPEC.md @@ -0,0 +1,166 @@ +# Benchmarking Configs Specification + +## Config Structure + +Benchmark config files are written in JSON format and have a few reserved keys: + - `INCLUDE` - Other configuration files whose parameter sets to include + - `PARAMETERS_SETS` - Benchmark parameters within each set + - `TEMPLATES` - List different setups with parameters sets template-specific parameters + - `SETS` - List parameters sets to include in the template + +Configs heavily utilize lists of scalar values and dictionaries to avoid duplication of cases. + +Formatting specification: +```json +{ + "INCLUDE": [ + "another_config_file_path_0" + ... + ], + "PARAMETERS_SETS": { + "parameters_set_name_0": Dict or List[Dict] of any JSON-serializable with any level of nesting, + ... + }, + "TEMPLATES": { + "template_name_0": { + "SETS": ["parameters_set_name_0", ...], + Dict of any JSON-serializable with any level of nesting overwriting parameter sets + }, + ... + } +} +``` + +Example +```json +{ + "PARAMETERS_SETS": { + "estimator parameters": { + "algorithm": { + "estimator": "LinearRegression", + "estimator_params": { + "fit_intercept": false + } + } + }, + "regression data": { + "data": [ + { "source": "fetch_openml", "id": 1430 }, + { "dataset": "california_housing" } + ] + } + }, + "TEMPLATES": { + "linear regression": { + "SETS": ["estimator parameters", "regression data"], + "algorithm": { + "library": ["sklearn", "sklearnex", "cuml"] + } + } + } +} +``` + +## Common Parameters + +Configs have the three highest parameter keys: + - `bench` - Specifies a workflow of the benchmark, such as parameters of measurement or profiling + - `algorithm` - Specifies measured entity parameters + - `data` - Specifies data parameters to use + +| Parameter keys | Default value | Choices | Description | +|:---------------|:--------------|:--------|:------------| +|

Benchmark workflow parameters

|||| +| `bench`:`taskset` | None | | Value for `-c` argument of `taskset` utility used over benchmark subcommand. | +| `bench`:`vtune_profiling` | None | | Analysis type for `collect` argument of Intel(R) VTune* Profiler tool. Linux* OS only. | +| `bench`:`vtune_results_directory` | `_vtune_results` | | Directory path to store Intel(R) VTune* Profiler results. | +| `bench`:`n_runs` | `10` | | Number of runs for measured entity. | +| `bench`:`time_limit` | `3600` | | Time limit in seconds before the benchmark early stop. | +| `bench`:`memory_profile` | False | | Profiles memory usage of benchmark process. | +| `bench`:`flush_cache` | False | | Flushes cache before every time measurement if enabled. | +| `bench`:`cpu_profile` | False | | Profiles average CPU load during benchmark run. | +| `bench`:`distributor` | None | None, `mpi` | Library used to handle distributed algorithm. | +| `bench`:`mpi_params` | Empty dict | | Parameters for `mpirun` command of MPI library. | +|

Data parameters

|||| +| `data`:`cache_directory` | `data_cache` | | Directory path to store cached datasets for fast loading. | +| `data`:`raw_cache_directory` | `data`:`cache_directory` + "raw" | | Directory path to store downloaded raw datasets. | +| `data`:`dataset` | None | | Name of dataset to use from implemented dataset loaders. | +| `data`:`source` | None | `fetch_openml`, `make_regression`, `make_classification`, `make_blobs` | Data source to use for loading or synthetic generation. | +| `data`:`id` | None | | OpenML data id for `fetch_openml` source. | +| `data`:`preprocessing_kwargs`:`replace_nan` | `median` | `median`, `mean` | Value to replace NaNs in preprocessed data. | +| `data`:`preprocessing_kwargs`:`category_encoding` | `ordinal` | `ordinal`, `onehot`, `drop`, `ignore` | How to encode categorical features in preprocessed data. | +| `data`:`preprocessing_kwargs`:`normalize` | False | | Enables normalization of preprocessed data. | +| `data`:`preprocessing_kwargs`:`force_for_sparse` | True | | Forces preprocessing for sparse data formats. | +| `data`:`split_kwargs` | Empty `dict` or default split from dataset description | | Data split parameters for `train_test_split` function. | +| `data`:`format` | `pandas` | `pandas`, `numpy`, `cudf` | Data format to use in benchmark. | +| `data`:`order` | `F` | `C`, `F` | Data order to use in benchmark: contiguous(C) or Fortran. | +| `data`:`dtype` | `float64` | | Data type to use in benchmark. | +| `data`:`distributed_split` | None | None, `rank_based` | Split type used to distribute data between machines in distributed algorithm. `None` type means usage of all data without split on all machines. `rank_based` type splits the data equally between machines with split sequence based on rank id from MPI. | +|

Algorithm parameters

|||| +| `algorithm`:`library` | None | | Python module containing measured entity (class or function). | +| `algorithm`:`device` | `default` | `default`, `cpu`, `gpu` | Device selected for computation. | + +## Benchmark-Specific Parameters + +### `Scikit-learn Estimator` + +| Parameter keys | Default value | Choices | Description | +|:---------------|:--------------|:--------|:------------| +| `algorithm`:`estimator` | None | | Name of measured estimator. | +| `algorithm`:`estimator_params` | Empty `dict` | | Parameters for estimator constructor. | +| `algorithm`:`online_inference_mode` | False | | Enables online mode for inference methods of estimator (separate call for each sample). | +| `algorithm`:`sklearn_context` | None | | Parameters for sklearn `config_context` used over estimator. | +| `algorithm`:`sklearnex_context` | None | | Parameters for sklearnex `config_context` used over estimator. Updated by `sklearn_context` if set. | +| `bench`:`ensure_sklearnex_patching` | True | | If True, warns about sklearnex patching failures. | + +### `Function` + +| Parameter keys | Default value | Choices | Description | +|:---------------|:--------------|:--------|:------------| +| `algorithm`:`function` | None | | Name of measured function. | +| `algorithm`:`args_order` | `x_train\|y_train` | Any in format `{subset_0}\|..\|{subset_n}` | Arguments order for measured function. | +| `algorithm`:`kwargs` | Empty `dict` | | Named arguments for measured function. | + +## Special Value + +You can define some parameters as specific from other parameters or properties with `[SPECIAL_VALUE]` prefix in string value: +```json +... "estimator_params": { "n_jobs": "[SPECIAL_VALUE]physical_cpus" } ... +... "generation_kwargs": { "n_informative": "[SPECIAL_VALUE]0.5" } ... +``` + +List of available special values: + +| Parameter keys | Benchmark type[s] | Special value | Description | +|:---------------|:------------------|:--------------|:------------| +| `data`:`dataset` | all | `all_named` | Sets datasets to use as list of all named datasets available in loaders. | +| `data`:`generation_kwargs`:`n_informative` | all | *float* value in [0, 1] range | Sets datasets to use as list of all named datasets available in loaders. | +| `bench`:`taskset` | all | Specification of numa nodes in `numa:{numa_node_0}[\|{numa_node_1}...]` format | Sets CPUs affinity using `taskset` utility. | +| `algorithm`:`estimator_params`:`n_jobs` | sklearn_estimator | `physical_cpus`, `logical_cpus`, or ratio of previous ones in format `{type}_cpus:{ratio}` where `ratio` is float | Sets `n_jobs` parameter to a number of physical/logical CPUs or ratio of them for an estimator. | +| `algorithm`:`estimator_params`:`scale_pos_weight` | sklearn_estimator | `auto` | Sets `scale_pos_weight` parameter to `sum(negative instances) / sum(positive instances)` value for estimator. | +| `algorithm`:`estimator_params`:`n_clusters` | sklearn_estimator | `auto` | Sets `n_clusters` parameter to number of clusters or classes from dataset description for estimator. | +| `algorithm`:`estimator_params`:`eps` | sklearn_estimator | `distances_quantile:{quantile}` format where quantile is *float* value in [0, 1] range | Computes `eps` parameter as quantile value of distances in `x_train` matrix for estimator. | + +## Range of Values + +You can define some parameters as a range of values with the `[RANGE]` prefix in string value: +```json +... "generation_kwargs": {"n_features": "[RANGE]pow:2:5:6"} ... +``` + +Supported ranges: + + - `add:start{int}:end{int}:step{int}` - Arithmetic progression (Sequence: start + step * i <= end) + - `mul:current{int}:end{int}:step{int}` - Geometric progression (Sequence: current * step <= end) + - `pow:base{int}:start{int}:end{int}[:step{int}=1]` - Powers of base number + +## Removal of Values + +You can remove specific parameter from subset of cases when stacking parameters sets using `[REMOVE]` parameter value: + +```json +... "estimator_params": { "n_jobs": "[REMOVE]" } ... +``` + +--- +[Documentation tree](../README.md#-documentation) diff --git a/configs/README.md b/configs/README.md index 8d3c5ac2..4c31849b 100644 --- a/configs/README.md +++ b/configs/README.md @@ -10,166 +10,20 @@ The configuration file (config) defines: Configs are split into subdirectories and files by benchmark scope and algorithm. -# Benchmarking Configs Specification +# Benchmarking Config Scopes -## Config Structure +| Scope (Folder) | Description | +|:---------------|:---------------| +| `common` | Defines common parameters for other scopes | +| `experiments` | Configurations for specific performance-profiling experiments | +| `regular` | Configurations used to regularly track performance changes | +| `weekly` | Configurations with high-load cases used to track performance changes at longer intervals | +| `spmd` | Configurations used to track the performance of SPMD algorithms | +| `testing` | Configurations used in testing `scikit-learn_bench` | -Benchmark config files are written in JSON format and have a few reserved keys: - - `INCLUDE` - Other configuration files whose parameter sets to include - - `PARAMETERS_SETS` - Benchmark parameters within each set - - `TEMPLATES` - List different setups with parameters sets template-specific parameters - - `SETS` - List parameters sets to include in the template +# Benchmarking Config Specification -Configs heavily utilize lists of scalar values and dictionaries to avoid duplication of cases. - -Formatting specification: -```json -{ - "INCLUDE": [ - "another_config_file_path_0" - ... - ] - "PARAMETERS_SETS": { - "parameters_set_name_0": Dict or List[Dict] of any JSON-serializable with any level of nesting, - ... - }, - "TEMPLATES": { - "template_name_0": { - "SETS": ["parameters_set_name_0", ...], - Dict of any JSON-serializable with any level of nesting overwriting parameter sets - }, - ... - } -} -``` - -Example -```json -{ - "PARAMETERS_SETS": { - "estimator parameters": { - "algorithm": { - "estimator": "LinearRegression", - "estimator_params": { - "fit_intercept": false - } - } - }, - "regression data": { - "data": [ - { "source": "fetch_openml", "id": 1430 }, - { "dataset": "california_housing" } - ] - } - }, - "TEMPLATES": { - "linear regression": { - "SETS": ["estimator parameters", "regression data"], - "algorithm": { - "library": ["sklearn", "sklearnex", "cuml"] - } - } - } -} -``` - -## Common Parameters - -Configs have the three highest parameter keys: - - `bench` - Specifies a workflow of the benchmark, such as parameters of measurement or profiling - - `algorithm` - Specifies measured entity parameters - - `data` - Specifies data parameters to use - -| Parameter keys | Default value | Choices | Description | -|:---------------|:--------------|:--------|:------------| -|

Benchmark workflow parameters

|||| -| `bench`:`taskset` | None | | Value for `-c` argument of `taskset` utility used over benchmark subcommand. | -| `bench`:`vtune_profiling` | None | | Analysis type for `collect` argument of Intel(R) VTune* Profiler tool. Linux* OS only. | -| `bench`:`vtune_results_directory` | `_vtune_results` | | Directory path to store Intel(R) VTune* Profiler results. | -| `bench`:`n_runs` | `10` | | Number of runs for measured entity. | -| `bench`:`time_limit` | `3600` | | Time limit in seconds before the benchmark early stop. | -| `bench`:`distributor` | None | None, `mpi` | Library used to handle distributed algorithm. | -| `bench`:`mpi_params` | Empty dict | | Parameters for `mpirun` command of MPI library. | -|

Data parameters

|||| -| `data`:`cache_directory` | `data_cache` | | Directory path to store cached datasets for fast loading. | -| `data`:`raw_cache_directory` | `data`:`cache_directory` + "raw" | | Directory path to store downloaded raw datasets. | -| `data`:`dataset` | None | | Name of dataset to use from implemented dataset loaders. | -| `data`:`source` | None | `fetch_openml`, `make_regression`, `make_classification`, `make_blobs` | Data source to use for loading or synthetic generation. | -| `data`:`id` | None | | OpenML data id for `fetch_openml` source. | -| `data`:`preprocessing_kwargs`:`replace_nan` | `median` | `median`, `mean` | Value to replace NaNs in preprocessed data. | -| `data`:`preprocessing_kwargs`:`category_encoding` | `ordinal` | `ordinal`, `onehot`, `drop`, `ignore` | How to encode categorical features in preprocessed data. | -| `data`:`preprocessing_kwargs`:`normalize` | False | | Enables normalization of preprocessed data. | -| `data`:`preprocessing_kwargs`:`force_for_sparse` | True | | Forces preprocessing for sparse data formats. | -| `data`:`split_kwargs` | Empty `dict` or default split from dataset description | | Data split parameters for `train_test_split` function. | -| `data`:`format` | `pandas` | `pandas`, `numpy`, `cudf` | Data format to use in benchmark. | -| `data`:`order` | `F` | `C`, `F` | Data order to use in benchmark: contiguous(C) or Fortran. | -| `data`:`dtype` | `float64` | | Data type to use in benchmark. | -| `data`:`distributed_split` | None | None, `rank_based` | Split type used to distribute data between machines in distributed algorithm. `None` type means usage of all data without split on all machines. `rank_based` type splits the data equally between machines with split sequence based on rank id from MPI. | -|

Algorithm parameters

|||| -| `algorithm`:`library` | None | | Python module containing measured entity (class or function). | -| `algorithm`:`device` | `default` | `default`, `cpu`, `gpu` | Device selected for computation. | - -## Benchmark-Specific Parameters - -### `Scikit-learn Estimator` - -| Parameter keys | Default value | Choices | Description | -|:---------------|:--------------|:--------|:------------| -| `algorithm`:`estimator` | None | | Name of measured estimator. | -| `algorithm`:`estimator_params` | Empty `dict` | | Parameters for estimator constructor. | -| `algorithm`:`online_inference_mode` | False | | Enables online mode for inference methods of estimator (separate call for each sample). | -| `algorithm`:`sklearn_context` | None | | Parameters for sklearn `config_context` used over estimator. | -| `algorithm`:`sklearnex_context` | None | | Parameters for sklearnex `config_context` used over estimator. Updated by `sklearn_context` if set. | -| `bench`:`ensure_sklearnex_patching` | True | | If True, warns about sklearnex patching failures. | - -### `Function` - -| Parameter keys | Default value | Choices | Description | -|:---------------|:--------------|:--------|:------------| -| `algorithm`:`function` | None | | Name of measured function. | -| `algorithm`:`args_order` | `x_train\|y_train` | Any in format `{subset_0}\|..\|{subset_n}` | Arguments order for measured function. | -| `algorithm`:`kwargs` | Empty `dict` | | Named arguments for measured function. | - -## Special Value - -You can define some parameters as specific from other parameters or properties with `[SPECIAL_VALUE]` prefix in string value: -```json -... "estimator_params": { "n_jobs": "[SPECIAL_VALUE]physical_cpus" } ... -... "generation_kwargs": { "n_informative": "[SPECIAL_VALUE]0.5" } ... -``` - -List of available special values: - -| Parameter keys | Benchmark type[s] | Special value | Description | -|:---------------|:------------------|:--------------|:------------| -| `data`:`dataset` | all | `all_named` | Sets datasets to use as list of all named datasets available in loaders. | -| `data`:`generation_kwargs`:`n_informative` | all | *float* value in [0, 1] range | Sets datasets to use as list of all named datasets available in loaders. | -| `bench`:`taskset` | all | Specification of numa nodes in `numa:{numa_node_0}[\|{numa_node_1}...]` format | Sets CPUs affinity using `taskset` utility. | -| `algorithm`:`estimator_params`:`n_jobs` | sklearn_estimator | `physical_cpus`, `logical_cpus`, or ratio of previous ones in format `{type}_cpus:{ratio}` where `ratio` is float | Sets `n_jobs` parameter to a number of physical/logical CPUs or ratio of them for an estimator. | -| `algorithm`:`estimator_params`:`scale_pos_weight` | sklearn_estimator | `auto` | Sets `scale_pos_weight` parameter to `sum(negative instances) / sum(positive instances)` value for estimator. | -| `algorithm`:`estimator_params`:`n_clusters` | sklearn_estimator | `auto` | Sets `n_clusters` parameter to number of clusters or classes from dataset description for estimator. | -| `algorithm`:`estimator_params`:`eps` | sklearn_estimator | `distances_quantile:{quantile}` format where quantile is *float* value in [0, 1] range | Computes `eps` parameter as quantile value of distances in `x_train` matrix for estimator. | - -## Range of Values - -You can define some parameters as a range of values with the `[RANGE]` prefix in string value: -```json -... "generation_kwargs": {"n_features": "[RANGE]pow:2:5:6"} ... -``` - -Supported ranges: - - - `add:start{int}:end{int}:step{int}` - Arithmetic progression (Sequence: start + step * i <= end) - - `mul:current{int}:end{int}:step{int}` - Geometric progression (Sequence: current * step <= end) - - `pow:base{int}:start{int}:end{int}[:step{int}=1]` - Powers of base number - -## Removal of Values - -You can remove specific parameter from subset of cases when stacking parameters sets using `[REMOVE]` parameter value: - -```json -... "estimator_params": { "n_jobs": "[REMOVE]" } ... -``` +Refer to [`Benchmarking Config Specification`](BENCH-CONFIG-SPEC.md) for the details how to read and write benchmarking configs in `scikit-learn_bench`. --- [Documentation tree](../README.md#-documentation) diff --git a/configs/experiments/README.md b/configs/experiments/README.md new file mode 100644 index 00000000..2b6225c5 --- /dev/null +++ b/configs/experiments/README.md @@ -0,0 +1,5 @@ +# Experimental Configs + +`daal4py_svd`: tests performance scalability of `daal4py.svd` algorithm + +`nearest_neighbors`: tests performance of neighbors search implementations from `sklearnex`, `sklearn`, `raft`, `faiss` and `svs`. diff --git a/sklbench/benchmarks/common.py b/sklbench/benchmarks/common.py index 7f81386e..1df1e1a5 100644 --- a/sklbench/benchmarks/common.py +++ b/sklbench/benchmarks/common.py @@ -29,9 +29,13 @@ def enrich_result(result: Dict, bench_case: BenchCase) -> Dict: result.update( { "dataset": get_data_name(bench_case, shortened=True), - "library": get_bench_case_value(bench_case, "algorithm:library").replace( - "sklbench.emulators.", "" - ), + "library": get_bench_case_value(bench_case, "algorithm:library") + .replace( + # skipping emulators namespace for conciseness + "sklbench.emulators.", + "", + ) + .replace(".utils", ""), "device": get_bench_case_value(bench_case, "algorithm:device"), } ) diff --git a/sklbench/benchmarks/custom_function.py b/sklbench/benchmarks/custom_function.py index 25abb900..287cbfc8 100644 --- a/sklbench/benchmarks/custom_function.py +++ b/sklbench/benchmarks/custom_function.py @@ -62,14 +62,6 @@ def get_function_args(bench_case: BenchCase, x_train, y_train, x_test, y_test) - return args -def measure_function_instance(bench_case, function_instance, args: Tuple, kwargs: Dict): - metrics = dict() - metrics["time[ms]"], metrics["time std[ms]"], _ = measure_case( - bench_case, function_instance, *args, **kwargs - ) - return metrics - - def main(bench_case: BenchCase, filters: List[BenchCase]): library_name = get_bench_case_value(bench_case, "algorithm:library") function_name = get_bench_case_value(bench_case, "algorithm:function") @@ -93,12 +85,13 @@ def main(bench_case: BenchCase, filters: List[BenchCase]): logger.warning("Benchmarking case was filtered.") return list() - metrics = measure_function_instance( + metrics = measure_case( bench_case, function_instance, - function_args, - get_bench_case_value(bench_case, "algorithm:kwargs", dict()), + *function_args, + **get_bench_case_value(bench_case, "algorithm:kwargs", dict()), ) + result = { "task": "utility", "function": function_name, diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index f9c0a75e..1d49722c 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -425,21 +425,21 @@ def measure_sklearn_estimator( if enable_modelbuilders and stage == "inference": import daal4py - daal_model = daal4py.mb.convert_model( - estimator_instance.get_booster() - ) + if hasattr(estimator_instance, "get_booster"): + # XGBoost branch + daal_model = daal4py.mb.convert_model( + estimator_instance.get_booster() + ) + elif hasattr(estimator_instance, "booster_"): + # LightGBM branch + daal_model = daal4py.mb.convert_model(estimator_instance.booster_) + else: + raise ValueError( + "Unable to get convert model to daal4py GBT format." + ) method_instance = getattr(daal_model, method) - metrics[method] = dict() - ( - metrics[method]["time[ms]"], - metrics[method]["time std[ms]"], - _, - ) = measure_case(bench_case, method_instance, *data_args) - if batch_size is not None: - metrics[method]["throughput[samples/ms]"] = ( - (data_args[0].shape[0] // batch_size) * batch_size - ) / metrics[method]["time[ms]"] + metrics[method] = measure_case(bench_case, method_instance, *data_args) if ensure_sklearnex_patching: full_method_name = f"{estimator_class.__name__}.{method}" sklearnex_logging_stream.seek(0) diff --git a/sklbench/datasets/README.md b/sklbench/datasets/README.md index 8589a019..b5fe50cc 100644 --- a/sklbench/datasets/README.md +++ b/sklbench/datasets/README.md @@ -10,9 +10,13 @@ Data handling steps: Existing data sources: - Synthetic data from sklearn - OpenML datasets + - Kaggle competition datasets - Custom loaders for named datasets - User-provided datasets in compatible format +Kaggle API keys and competition rules acceptance are required for next dataset: +- [Bosch Production Line Performance (`bosch`)](https://www.kaggle.com/c/bosch-production-line-performance/overview) + ## Data Caching There are two levels of caching with corresponding directories: `raw cache` for files downloaded from external sources, and just `cache` for files applicable for fast-loading in benchmarks. diff --git a/sklbench/datasets/__init__.py b/sklbench/datasets/__init__.py index 093875c4..81ecc737 100644 --- a/sklbench/datasets/__init__.py +++ b/sklbench/datasets/__init__.py @@ -14,6 +14,7 @@ # limitations under the License. # =============================================================================== +import gc import os from typing import Dict, Tuple @@ -31,7 +32,11 @@ def load_data(bench_case: BenchCase) -> Tuple[Dict, Dict]: # get data name and cache dirs data_name = get_data_name(bench_case, shortened=False) - data_cache = get_bench_case_value(bench_case, "data:cache_directory", "data_cache") + data_cache = get_bench_case_value( + bench_case, + "data:cache_directory", + os.environ.get("SKLBENCH_DATA_CACHE", "data_cache"), + ) raw_data_cache = get_bench_case_value( bench_case, "data:raw_cache_directory", os.path.join(data_cache, "raw") ) @@ -84,3 +89,9 @@ def load_data(bench_case: BenchCase) -> Tuple[Dict, Dict]: "Unable to get data from bench_case:\n" f'{custom_format(get_bench_case_value(bench_case, "data"))}' ) + + +def load_data_with_cleanup(bench_case: BenchCase): + result = load_data(bench_case) + del result + gc.collect() diff --git a/sklbench/datasets/downloaders.py b/sklbench/datasets/downloaders.py index fc1fa5e6..c4bec2ff 100644 --- a/sklbench/datasets/downloaders.py +++ b/sklbench/datasets/downloaders.py @@ -101,7 +101,9 @@ def download_kaggle_files( kaggle_type: str, kaggle_name: str, filenames: List[str], raw_data_cache_dir: str ): if not kaggle_is_imported: - raise ValueError("Kaggle API is not available.") + raise ValueError( + "Kaggle API is not available. Please, check if 'kaggle' package and Kaggle API key are installed." + ) api = kaggle.KaggleApi() api.authenticate() diff --git a/sklbench/datasets/loaders.py b/sklbench/datasets/loaders.py index 20df75b2..6866e052 100644 --- a/sklbench/datasets/loaders.py +++ b/sklbench/datasets/loaders.py @@ -25,7 +25,9 @@ load_digits, load_svmlight_file, make_blobs, + make_circles, make_classification, + make_moons, make_regression, ) @@ -64,6 +66,8 @@ def load_sklearn_synthetic_data( "make_classification": make_classification, "make_regression": make_regression, "make_blobs": make_blobs, + "make_moons": make_moons, + "make_circles": make_circles, } generation_kwargs = {"random_state": 42} generation_kwargs.update(input_kwargs) @@ -79,8 +83,11 @@ def load_sklearn_synthetic_data( data_desc["n_clusters_per_class"] = generation_kwargs.get( "n_clusters_per_class", 2 ) - if function_name == "make_blobs": + elif function_name == "make_blobs": data_desc["n_clusters"] = generation_kwargs["centers"] + elif function_name in ["make_circles", "make_moons"]: + data_desc["n_classes"] = 2 + data_desc["n_clusters"] = 2 return {"x": x, "y": y}, data_desc diff --git a/sklbench/datasets/transformer.py b/sklbench/datasets/transformer.py index d2e63e9e..9fe515b4 100644 --- a/sklbench/datasets/transformer.py +++ b/sklbench/datasets/transformer.py @@ -137,7 +137,7 @@ def split_and_transform_data(bench_case, data, data_description): device = get_bench_case_value(bench_case, "algorithm:device", None) common_data_format = get_bench_case_value(bench_case, "data:format", "pandas") common_data_order = get_bench_case_value(bench_case, "data:order", "F") - common_data_dtype = get_bench_case_value(bench_case, "data:dtype", "float64") + common_data_dtype = get_bench_case_value(bench_case, "data:dtype", "float32") data_dict = { "x_train": x_train, diff --git a/sklbench/emulators/svs/neighbors.py b/sklbench/emulators/svs/neighbors.py index 958438ea..b37c3ec6 100644 --- a/sklbench/emulators/svs/neighbors.py +++ b/sklbench/emulators/svs/neighbors.py @@ -14,7 +14,7 @@ # limitations under the License. # =============================================================================== -import pysvs +import svs from psutil import cpu_count from ..common.neighbors import NearestNeighborsBase @@ -42,15 +42,15 @@ def __init__( self.n_jobs = n_jobs def fit(self, X, y=None): - build_params = pysvs.VamanaBuildParameters( + build_params = svs.VamanaBuildParameters( graph_max_degree=self.graph_max_degree, window_size=self.window_size, - num_threads=self.n_jobs, + # num_threads=self.n_jobs, ) - self._index = pysvs.Vamana.build( + self._index = svs.Vamana.build( build_params, X, - pysvs.DistanceType.L2, + svs.DistanceType.L2, num_threads=self.n_jobs, ) return self diff --git a/sklbench/report/arguments.py b/sklbench/report/arguments.py index 166661f1..a42027a4 100644 --- a/sklbench/report/arguments.py +++ b/sklbench/report/arguments.py @@ -53,6 +53,14 @@ def add_report_generator_arguments( help="[EXPERIMENTAL] Compatibility mode drops and modifies results " "to make them comparable (for example, sklearn and cuML parameters).", ) + # included metrics arguments + parser.add_argument( + "--performance-stability-metrics", + "-psm", + default=False, + action="store_true", + help="Adds performance stability metrics in report.", + ) # 'separate-table' report type arguments parser.add_argument( "--drop-columns", @@ -90,14 +98,14 @@ def add_report_generator_arguments( "--perf-color-scale", type=float, nargs="+", - default=[0.8, 1.0, 10.0], + default=[0.5, 1.0, 2.0], help="Color scale for performance metric improvement in report.", ) parser.add_argument( "--quality-color-scale", type=float, nargs="+", - default=[0.99, 0.995, 1.01], + default=[0.98, 1.0, 1.02], help="Color scale for quality metric improvement in report.", ) return parser diff --git a/sklbench/report/compatibility.py b/sklbench/report/compatibility.py index d297b52c..1fdfdf40 100644 --- a/sklbench/report/compatibility.py +++ b/sklbench/report/compatibility.py @@ -34,6 +34,24 @@ def transform_results_to_compatible(results: pd.DataFrame): "min_bin_size", ], ) + if results["environment_name"].unique().size > 1: + # DBSCAN `eps` parameter drop for different CPUs + results.drop( + inplace=True, + errors="ignore", + columns=[ + "eps", + ], + ) + # auto-assigned `n_jobs` drop for different CPUs + if results["n_jobs"].unique().size > 1: + results.drop( + inplace=True, + errors="ignore", + columns=[ + "n_jobs", + ], + ) # cuML compatibility if ( (results["library"] == "cuml") @@ -117,83 +135,97 @@ def transform_results_to_compatible(results: pd.DataFrame): "graph_degree", ], ) - # DBSCAN parameters renaming - cuml_dbscan_index = (results["estimator"] == "DBSCAN") & ( - results["library"] == "cuml" - ) - if cuml_dbscan_index.any(): - results.loc[cuml_dbscan_index, "algorithm"] = "brute" - # KMeans parameters renaming - cuml_kmeans_index = (results["estimator"] == "KMeans") & ( - results["library"] == "cuml" - ) - if cuml_kmeans_index.any(): - results.loc[cuml_kmeans_index, "algorithm"] = "lloyd" - results.loc[ - cuml_kmeans_index & (results["init"] == "scalable-k-means++"), "init" - ] = "k-means++" - # Linear models parameters renaming - linear_index = ( - (results["estimator"] == "LinearRegression") - | (results["estimator"] == "Ridge") - | (results["estimator"] == "Lasso") - | (results["estimator"] == "ElasticNet") - ) & ( - (results["library"] == "cuml") - | (results["library"] == "sklearn") - | (results["library"] == "sklearnex") - ) - if linear_index.any(): - results.loc[linear_index, "algorithm"] = np.nan - results.loc[linear_index, "solver"] = np.nan + if "estimator" in results: + # DBSCAN parameters renaming + cuml_dbscan_index = (results["estimator"] == "DBSCAN") & ( + results["library"] == "cuml" + ) + if cuml_dbscan_index.any(): + results.loc[cuml_dbscan_index, "algorithm"] = "brute" + # KMeans parameters renaming + cuml_kmeans_index = (results["estimator"] == "KMeans") & ( + results["library"] == "cuml" + ) + if cuml_kmeans_index.any(): + results.loc[cuml_kmeans_index, "algorithm"] = "lloyd" + results.loc[ + cuml_kmeans_index & (results["init"] == "scalable-k-means++"), "init" + ] = "k-means++" + # Linear models parameters renaming + linear_index = ( + (results["estimator"] == "LinearRegression") + | (results["estimator"] == "Ridge") + | (results["estimator"] == "Lasso") + | (results["estimator"] == "ElasticNet") + ) & ( + (results["library"] == "cuml") + | (results["library"] == "sklearn") + | (results["library"] == "sklearnex") + ) + if linear_index.any(): + results.loc[linear_index, "algorithm"] = np.nan + results.loc[linear_index, "solver"] = np.nan + results.loc[linear_index, "iterations"] = np.nan - sklearn_ridge_index = (results["estimator"] == "Ridge") & ( - (results["library"] == "sklearn") | (results["library"] == "sklearnex") - ) - if sklearn_ridge_index.any(): - results.loc[sklearn_ridge_index, "tol"] = np.nan + sklearn_ridge_index = (results["estimator"] == "Ridge") & ( + (results["library"] == "sklearn") | (results["library"] == "sklearnex") + ) + if sklearn_ridge_index.any(): + results.loc[sklearn_ridge_index, "tol"] = np.nan - cuml_logreg_index = (results["estimator"] == "LogisticRegression") & ( - results["library"] == "cuml" - ) - if cuml_logreg_index.any(): - lbfgs_solver_index = ( - cuml_logreg_index - & (results["solver"] == "qn") - & ((results["penalty"] == "none") | (results["penalty"] == "l2")) + cuml_logreg_index = (results["estimator"] == "LogisticRegression") & ( + results["library"] == "cuml" ) - if lbfgs_solver_index.any(): - results.loc[lbfgs_solver_index, "solver"] = "lbfgs" - # TSNE parameters renaming - cuml_tsne_index = (results["estimator"] == "TSNE") & ( - results["library"] == "cuml" - ) - if cuml_tsne_index.any(): - results.loc[cuml_tsne_index, "n_neighbors"] = np.nan - # SVC parameters renaming - cuml_svc_index = (results["estimator"] == "SVC") & (results["library"] == "cuml") - if cuml_svc_index.any(): - results.loc[cuml_svc_index, "decision_function_shape"] = results.loc[ - cuml_svc_index, "multiclass_strategy" - ] - results.loc[cuml_svc_index, "multiclass_strategy"] = np.nan - # Ensemble parameters renaming - cuml_rf_index = ( - (results["estimator"] == "RandomForestClassifier") - | (results["estimator"] == "RandomForestRegressor") - ) & (results["library"] == "cuml") - if cuml_rf_index.any(): - gini_index = cuml_rf_index & (results["split_criterion"] == 0) - if gini_index.any(): - results.loc[gini_index, "criterion"] = "gini" - results.loc[gini_index, "split_criterion"] = np.nan - mse_index = cuml_rf_index & (results["split_criterion"] == 2) - if mse_index.any(): - results.loc[mse_index, "criterion"] = "squared_error" - results.loc[mse_index, "split_criterion"] = np.nan - inf_leaves_index = cuml_rf_index & (results["max_leaves"] == -1) - if inf_leaves_index.any(): - results.loc[inf_leaves_index, "max_leaf_nodes"] = None - results.loc[inf_leaves_index, "max_leaves"] = np.nan + if cuml_logreg_index.any(): + logreg_index = results["estimator"] == "LogisticRegression" + results.loc[logreg_index, "iterations"] = np.nan + lbfgs_solver_index = ( + cuml_logreg_index + & (results["solver"] == "qn") + & ((results["penalty"] == "none") | (results["penalty"] == "l2")) + ) + if lbfgs_solver_index.any(): + results.loc[lbfgs_solver_index, "solver"] = "lbfgs" + # TSNE parameters renaming + cuml_tsne_index = (results["estimator"] == "TSNE") & ( + results["library"] == "cuml" + ) + if cuml_tsne_index.any(): + results.loc[cuml_tsne_index, "n_neighbors"] = np.nan + # SVC parameters renaming + cuml_svc_index = (results["estimator"] == "SVC") & ( + results["library"] == "cuml" + ) + if cuml_svc_index.any(): + results.loc[cuml_svc_index, "decision_function_shape"] = results.loc[ + cuml_svc_index, "multiclass_strategy" + ] + results.loc[cuml_svc_index, "multiclass_strategy"] = np.nan + # Ensemble parameters renaming + cuml_rf_index = ( + (results["estimator"] == "RandomForestClassifier") + | (results["estimator"] == "RandomForestRegressor") + ) & (results["library"] == "cuml") + if cuml_rf_index.any(): + gini_index = cuml_rf_index & (results["split_criterion"] == 0) + if gini_index.any(): + results.loc[gini_index, "criterion"] = "gini" + results.loc[gini_index, "split_criterion"] = np.nan + mse_index = cuml_rf_index & (results["split_criterion"] == 2) + if mse_index.any(): + results.loc[mse_index, "criterion"] = "squared_error" + results.loc[mse_index, "split_criterion"] = np.nan + inf_leaves_index = cuml_rf_index & (results["max_leaves"] == -1) + if inf_leaves_index.any(): + results.loc[inf_leaves_index, "max_leaf_nodes"] = None + results.loc[inf_leaves_index, "max_leaves"] = np.nan + # PCA solver alignment between sklearn[ex] and cuml + pca_index = ( + (results["library"] == "sklearn") + | (results["library"] == "sklearnex") + | (results["library"] == "cuml") + ) & (results["estimator"] == "PCA") + if pca_index.any(): + results.loc[pca_index, "svd_solver"] = "full" return results diff --git a/sklbench/report/implementation.py b/sklbench/report/implementation.py index 1c9c01cd..b998bbab 100644 --- a/sklbench/report/implementation.py +++ b/sklbench/report/implementation.py @@ -18,6 +18,7 @@ import json from typing import Dict, List +import numpy as np import openpyxl as xl import pandas as pd from openpyxl.formatting.rule import ColorScaleRule @@ -25,13 +26,16 @@ from openpyxl.utils.dataframe import dataframe_to_rows from scipy.stats import gmean -from ..utils.common import custom_format, flatten_dict, flatten_list +from ..utils.common import custom_format, flatten_list from ..utils.logger import logger +from ..utils.measurement import enrich_metrics from .compatibility import transform_results_to_compatible METRICS = { "lower is better": [ + "1st run time[ms]", "time[ms]", + "cost[microdollar]", "iterations", # classification "logloss", @@ -40,8 +44,7 @@ # clustering "inertia", "Davies-Bouldin score", - # manifold - # - TSNE + # manifold - TSNE "Kullback-Leibler divergence", ], "higher is better": [ @@ -69,10 +72,18 @@ # 'clusters' is number of computer clusters by DBSCAN "clusters", ], - "incomparable": ["time std[ms]"], + "incomparable": [ + "1st-mean run ratio", + "time CV", + "cpu load[%]", + ], } +MEMORY_TYPES = ["RAM", "VRAM"] +for memory_type in MEMORY_TYPES: + METRICS["incomparable"].append(f"peak {memory_type} usage[MB]") + METRICS["incomparable"].append(f"{memory_type} usage-iteration correlation") METRIC_NAMES = flatten_list([list(METRICS[key]) for key in METRICS]) -PERF_METRICS = ["time[ms]", "throughput[samples/ms]"] +PERF_METRICS = ["time[ms]", "throughput[samples/ms]", "cost[microdollar]"] COLUMNS_ORDER = [ # algorithm @@ -97,6 +108,21 @@ "batch_size", ] +RED_COLOR, YELLOW_COLOR, GREEN_COLOR, WHITE_COLOR = "F85D5E", "FAF52E", "58C144", "FFFFFF" +COLUMN_COLOR_RULES = { + "time CV": ColorScaleRule( + start_type="num", + start_value=0.0, + start_color=GREEN_COLOR, + mid_type="num", + mid_value=0.1, + mid_color=YELLOW_COLOR, + end_type="num", + end_value=0.5, + end_color=RED_COLOR, + ) +} + DIFFBY_COLUMNS = ["environment_name", "library", "format", "device"] @@ -165,6 +191,10 @@ def select_comparison(i, j, diffs_selection): df = input_df.set_index(index_columns) unique_indices = df.index.unique() splitted_dfs = split_df_by_columns(input_df, diff_columns) + for key, df in splitted_dfs.items(): + for index_column in index_columns: + if index_column not in df.columns: + df[index_column] = np.nan splitted_dfs = {key: df.set_index(index_columns) for key, df in splitted_dfs.items()} # drop results with duplicated indices (keep first entry only) @@ -184,6 +214,8 @@ def select_comparison(i, j, diffs_selection): if select_comparison(i, j, diffs_selection): comparison_name = f"{key_jth} vs {key_ith}" for column in df_ith.columns: + if column not in df_jth.columns: + continue if column in METRICS["higher is better"]: df[f"{comparison_name}\n{column} relative improvement"] = ( df_jth[column] / df_ith[column] @@ -235,9 +267,13 @@ def get_result_tables_as_df( diffby_columns=DIFFBY_COLUMNS, splitby_columns=["estimator", "method", "function"], compatibility_mode=False, + include_performance_stability_metrics=False, ): bench_cases = pd.DataFrame( - [flatten_dict(bench_case) for bench_case in results["bench_cases"]] + [ + enrich_metrics(bench_case, include_performance_stability_metrics) + for bench_case in results["bench_cases"] + ] ) if compatibility_mode: @@ -263,32 +299,32 @@ def get_summary_from_df(df: pd.DataFrame, df_name: str) -> pd.DataFrame: return summary -def get_color_rule(scale): - red, yellow, green = "F85D5E", "FAF52E", "58C144" +def get_color_rule_for_comparison(scale): start_value, mid_value, end_value = scale return ColorScaleRule( start_type="num", start_value=start_value, - start_color=red, + start_color=RED_COLOR, mid_type="num", mid_value=mid_value, - mid_color=yellow, + mid_color=WHITE_COLOR, end_type="num", end_value=end_value, - end_color=green, + end_color=GREEN_COLOR, ) def apply_rules_for_sheet(sheet, perf_color_scale, quality_color_scale): for column in sheet.iter_cols(): column_idx = get_column_letter(column[0].column) + cell_range = f"${column_idx}1:${column_idx}{len(column)}" is_rel_impr = any( [ isinstance(cell.value, str) and "relative improvement" in cell.value for cell in column ] ) - is_time = any( + is_perf = any( [ isinstance(cell.value, str) and (any(map(lambda x: x in cell.value, PERF_METRICS))) @@ -296,11 +332,19 @@ def apply_rules_for_sheet(sheet, perf_color_scale, quality_color_scale): ] ) if is_rel_impr: - cell_range = f"${column_idx}1:${column_idx}{len(column)}" sheet.conditional_formatting.add( cell_range, - get_color_rule(perf_color_scale if is_time else quality_color_scale), + get_color_rule_for_comparison( + perf_color_scale if is_perf else quality_color_scale + ), ) + else: + column_name = {cell.value for cell in column} & set(COLUMN_COLOR_RULES.keys()) + if len(column_name) == 1: + column_name = column_name.pop() + sheet.conditional_formatting.add( + cell_range, COLUMN_COLOR_RULES[column_name] + ) def write_environment_info(results, workbook): @@ -332,7 +376,13 @@ def generate_report(args: argparse.Namespace): results = merge_result_files(args.result_files) diffby, splitby = args.diff_columns, args.split_columns - dfs = get_result_tables_as_df(results, diffby, splitby, args.compatibility_mode) + dfs = get_result_tables_as_df( + results, + diffby, + splitby, + args.compatibility_mode, + args.performance_stability_metrics, + ) wb = xl.Workbook() summary_dfs = list() diff --git a/sklbench/runner/commands_helper.py b/sklbench/runner/commands_helper.py index 09e61369..51379e4b 100644 --- a/sklbench/runner/commands_helper.py +++ b/sklbench/runner/commands_helper.py @@ -100,13 +100,13 @@ def run_benchmark_from_case( logger.debug(f"Benchmark wrapper call command:\n{command}") return_code, stdout, stderr = read_output_from_command(command) - # filter stdout warnings - prefixes_to_skip = ["[W]", "[I]"] + # filter cuML stdout verbosity + suffixes_to_skip = ["[W]", "[I]", "[CUML]"] stdout = "\n".join( [ line for line in stdout.split("\n") - if not any(map(lambda x: line.startswith(x), prefixes_to_skip)) + if not any(map(lambda x: x in line, suffixes_to_skip)) ] ) diff --git a/sklbench/runner/implementation.py b/sklbench/runner/implementation.py index 2375e4b7..cac0bba4 100644 --- a/sklbench/runner/implementation.py +++ b/sklbench/runner/implementation.py @@ -23,7 +23,7 @@ from psutil import cpu_count from tqdm import tqdm -from ..datasets import load_data +from ..datasets import load_data_with_cleanup from ..report import generate_report, get_result_tables_as_df from ..utils.bench_case import get_bench_case_name, get_data_name from ..utils.common import custom_format, hash_from_json_repr @@ -98,11 +98,12 @@ def run_benchmarks(args: argparse.Namespace) -> int: # trick: get unique dataset names only to avoid loading of same dataset # by different cases/processes dataset_cases = {get_data_name(case): case for case in bench_cases} + n_datasets = len(dataset_cases) logger.debug(f"Unique dataset names to load:\n{list(dataset_cases.keys())}") - n_proc = min([16, cpu_count(), len(dataset_cases)]) - logger.info(f"Prefetching datasets with {n_proc} processes") + n_proc = min([16, cpu_count(), n_datasets]) + logger.info(f"Prefetching {n_datasets} datasets with {n_proc} processes") with Pool(n_proc) as pool: - pool.map(load_data, dataset_cases.values()) + pool.map(load_data_with_cleanup, dataset_cases.values()) # run bench_cases return_code, result = call_benchmarks( @@ -113,21 +114,22 @@ def run_benchmarks(args: argparse.Namespace) -> int: args.exit_on_error, ) - # output as pandas dataframe - if len(result["bench_cases"]) != 0: - for key, df in get_result_tables_as_df(result).items(): - logger.info(f'{custom_format(key, bcolor="HEADER")}\n{df}') - # output raw result logger.debug(custom_format(result)) + # save result to file with open(args.result_file, "w") as fp: json.dump(result, fp, indent=4) + # output as pandas dataframe + if len(result["bench_cases"]) != 0: + for key, df in get_result_tables_as_df(result).items(): + logger.info(f'{custom_format(key, bcolor="HEADER")}\n{df}') + # generate report if args.report: if args.result_file not in args.result_files: - args.result_files += [args.result_file] + args.result_files.append(args.result_file) generate_report(args) return return_code diff --git a/sklbench/utils/bench_case.py b/sklbench/utils/bench_case.py index b63f36bb..532453ce 100644 --- a/sklbench/utils/bench_case.py +++ b/sklbench/utils/bench_case.py @@ -112,7 +112,7 @@ def get_data_name(bench_case: BenchCase, shortened: bool = False) -> str: openml_id = get_bench_case_value(bench_case, "data:id") return f"openml_{openml_id}" # make_* - if source in ["make_classification", "make_regression", "make_blobs"]: + if source.startswith("make_"): name = source if shortened: return name.replace("classification", "clsf").replace("regression", "regr") diff --git a/sklbench/utils/config.py b/sklbench/utils/config.py index 11de647d..1010b830 100644 --- a/sklbench/utils/config.py +++ b/sklbench/utils/config.py @@ -102,8 +102,10 @@ def parse_config_file(config_path: str) -> List[Dict]: include_content.update(json.load(include_file)["PARAMETERS_SETS"]) else: logger.warning(f"Include file '{include_path}' not found.") - include_content.update(config_content["PARAMETERS_SETS"]) - config_content["PARAMETERS_SETS"] = include_content + if "PARAMETERS_SETS" in config_content: + config_content["PARAMETERS_SETS"].update(include_content) + else: + config_content["PARAMETERS_SETS"] = include_content for template_name, template_content in config_content["TEMPLATES"].items(): new_templates = [{}] # 1st step: pop list of included param sets and add them to template diff --git a/sklbench/utils/custom_types.py b/sklbench/utils/custom_types.py index e30e7de7..887d17a8 100644 --- a/sklbench/utils/custom_types.py +++ b/sklbench/utils/custom_types.py @@ -31,4 +31,6 @@ # case is expected to be nested dict BenchCase = Dict[str, Dict[str, Any]] +BenchResult = Dict[str, Union[Scalar, List]] + Array = Union[pd.DataFrame, np.ndarray, csr_matrix] diff --git a/sklbench/utils/env.py b/sklbench/utils/env.py index 73b6d45e..8b0415b3 100644 --- a/sklbench/utils/env.py +++ b/sklbench/utils/env.py @@ -15,6 +15,8 @@ # =============================================================================== import json +import subprocess +import sys from typing import Dict import pandas as pd @@ -43,6 +45,21 @@ def get_numa_cpus_conf() -> Dict[int, str]: return dict() +def get_number_of_sockets(): + if sys.platform == "win32": + command = "wmic cpu get DeviceID" + result = subprocess.check_output(command, shell=True, text=True) + n_sockets = len(list(filter(lambda x: x.startswith("CPU"), result.split("\n")))) + elif sys.platform == "linux": + command = "lscpu | grep 'Socket(s):' | awk '{print $2}'" + result = subprocess.check_output(command, shell=True, text=True) + n_sockets = int(result.strip("\n")) + else: + logger.warning("Unable to get number of sockets due to unknown sys.platform") + n_sockets = 1 + return n_sockets + + def get_software_info() -> Dict: result = dict() # conda list diff --git a/sklbench/utils/logger.py b/sklbench/utils/logger.py index 90940630..5bd9eaf8 100644 --- a/sklbench/utils/logger.py +++ b/sklbench/utils/logger.py @@ -19,7 +19,7 @@ logger = logging.Logger("sklbench") logging_channel = logging.StreamHandler() -logging_formatter = logging.Formatter("%(levelname)s:%(name)s: %(message)s") +logging_formatter = logging.Formatter("%(levelname)s - %(name)s - %(message)s") logging_channel.setFormatter(logging_formatter) logger.addHandler(logging_channel) diff --git a/sklbench/utils/measurement.py b/sklbench/utils/measurement.py index 989daefd..3b95f6ac 100644 --- a/sklbench/utils/measurement.py +++ b/sklbench/utils/measurement.py @@ -14,12 +14,22 @@ # limitations under the License. # =============================================================================== +import gc +import threading import timeit +import warnings +from math import ceil, sqrt +from time import sleep +from typing import Dict, List import numpy as np +import psutil +from cpuinfo import get_cpu_info +from scipy.stats import pearsonr from .bench_case import get_bench_case_value -from .custom_types import BenchCase +from .custom_types import BenchCase, BenchResult +from .env import get_number_of_sockets from .logger import logger try: @@ -29,24 +39,133 @@ except (ImportError, ModuleNotFoundError): itt_is_available = False +try: + import pynvml + + pynvml.nvmlInit() + + nvml_is_available = True +except (ImportError, ModuleNotFoundError): + nvml_is_available = False + + +def box_filter(array, left=0.2, right=0.8): + array.sort() + size = len(array) + if size == 1 or len(np.unique(array)) == 1: + return array[0], 0.0 + lower, upper = array[int(size * left)], array[int(size * right)] + result = np.array([item for item in array if lower < item < upper]) + return np.mean(result), np.std(result) + + +def enrich_metrics( + bench_result: BenchResult, include_performance_stability_metrics=False +): + """Transforms raw performance and other results into aggregated metrics""" + # time metrics + res = bench_result.copy() + mean, std = box_filter(res["time[ms]"]) + if include_performance_stability_metrics: + res.update( + { + "1st run time[ms]": res["time[ms]"][0], + "1st-mean run ratio": res["time[ms]"][0] / mean, + } + ) + res.update( + { + "time[ms]": mean, + "time CV": std / mean, # Coefficient of Variation + } + ) + cost = res.get("cost[microdollar]", None) + if cost: + res["cost[microdollar]"] = box_filter(res["cost[microdollar]"])[0] + batch_size = res.get("batch_size", None) + if batch_size: + res["throughput[samples/ms]"] = ( + (res["samples"] // batch_size) * batch_size + ) / mean + # memory metrics + for memory_type in ["RAM", "VRAM"]: + if f"peak {memory_type} usage[MB]" in res: + if include_performance_stability_metrics: + with warnings.catch_warnings(): + # ignoring ConstantInputWarning + warnings.filterwarnings( + "ignore", + message="An input array is constant; the correlation coefficient is not defined", + ) + mem_iter_corr, _ = pearsonr( + res[f"peak {memory_type} usage[MB]"], + list(range(len(res[f"peak {memory_type} usage[MB]"]))), + ) + res[f"{memory_type} usage-iteration correlation"] = mem_iter_corr + res[f"peak {memory_type} usage[MB]"] = max( + res[f"peak {memory_type} usage[MB]"] + ) + # cpu metrics + if "cpu load[%]" in res: + res["cpu load[%]"] = np.median(res["cpu load[%]"]) + return res + + +def get_n_from_cache_size(): + """Gets `n` size of square matrix that fits into L3 cache""" + l3_size = get_cpu_info()["l3_cache_size"] + n_sockets = get_number_of_sockets() + return ceil(sqrt(n_sockets * l3_size / 8)) -def box_filter(timing, left=0.2, right=0.8): - timing.sort() - size = len(timing) - if size == 1: - return timing[0] * 1000, 0 - lower, upper = timing[int(size * left)], timing[int(size * right)] - result = np.array([item for item in timing if lower < item < upper]) - return np.mean(result) * 1000, np.std(result) * 1000 +def flush_cache(n: int = get_n_from_cache_size()): + np.matmul(np.random.rand(n, n), np.random.rand(n, n)) -def measure_time( + +def get_ram_usage(): + """Memory used by the current process in bytes""" + return psutil.Process().memory_info().rss + + +def get_vram_usage(): + """Memory used by the current process on all GPUs in bytes""" + pid = psutil.Process().pid + + device_count = pynvml.nvmlDeviceGetCount() + vram_usage = 0 + for i in range(device_count): + handle = pynvml.nvmlDeviceGetHandleByIndex(i) + process_info = pynvml.nvmlDeviceGetComputeRunningProcesses(handle) + for p in process_info: + if p.pid == pid: + vram_usage += p.usedGpuMemory + return vram_usage + + +def monitor_memory_usage( + interval: float, memory_profiles: Dict[str, List], stop_event, enable_nvml_profiling +): + while not stop_event.is_set(): + memory_profiles["RAM"].append(get_ram_usage()) + if enable_nvml_profiling: + memory_profiles["VRAM"].append(get_vram_usage()) + sleep(interval) + + +def measure_perf( func, *args, - n_runs=20, - time_limit=60 * 60, - std_mean_ratio=0.2, - enable_itt=False, + n_runs: int, + time_limit: float, + enable_itt: bool, + collect_return_values: bool = False, + enable_cache_flushing: bool, + enable_garbage_collection: bool, + enable_cpu_profiling: bool, + enable_memory_profiling: bool, + enable_nvml_profiling: bool = False, + memory_profiling_interval: float = 0.001, + cost_per_hour: float = 0.0, **kwargs, ): if enable_itt and not itt_is_available: @@ -54,17 +173,57 @@ def measure_time( "Intel(R) VTune(TM) profiling was requested " 'but "itt" python module is not available.' ) - times = [] - func_return_value = None + enable_itt = False + times = list() + if collect_return_values: + func_return_values = list() + if enable_cpu_profiling: + cpu_loads = list() + if enable_memory_profiling: + memory_peaks = {"RAM": list()} + if enable_nvml_profiling: + memory_peaks["VRAM"] = list() while len(times) < n_runs: - if enable_itt and itt_is_available: + if enable_cache_flushing: + flush_cache() + if enable_itt: itt.resume() + if enable_memory_profiling: + memory_profiles = {"RAM": list()} + if enable_nvml_profiling: + memory_profiles["VRAM"] = list() + profiling_stop_event = threading.Event() + profiling_thread = threading.Thread( + target=monitor_memory_usage, + args=( + memory_profiling_interval, + memory_profiles, + profiling_stop_event, + enable_nvml_profiling, + ), + ) + profiling_thread.start() + if enable_cpu_profiling: + # start cpu profiling interval by using `None` value + psutil.cpu_percent(interval=None) t0 = timeit.default_timer() func_return_value = func(*args, **kwargs) t1 = timeit.default_timer() - if enable_itt and itt_is_available: + if enable_cpu_profiling: + cpu_loads.append(psutil.cpu_percent(interval=None)) + if enable_memory_profiling: + profiling_stop_event.set() + profiling_thread.join() + memory_peaks["RAM"].append(max(memory_profiles["RAM"])) + if enable_nvml_profiling: + memory_peaks["VRAM"].append(max(memory_profiles["VRAM"])) + if collect_return_values: + func_return_values.append(func_return_value) + if enable_itt: itt.pause() - times.append(t1 - t0) + times.append((t1 - t0)) + if enable_garbage_collection: + gc.collect() if sum(times) > time_limit: logger.warning( f"'{func}' function measurement time " @@ -72,13 +231,25 @@ def measure_time( f"exceeded time limit ({time_limit} seconds)" ) break - mean, std = box_filter(times) - if std / mean > std_mean_ratio: - logger.warning( - f'Measured "std / mean" time ratio of "{str(func)}" function is higher ' - f"than threshold ({round(std / mean, 3)} vs. {std_mean_ratio})" + perf_metrics = {"time[ms]": list(map(lambda x: x * 1000, times))} + if enable_memory_profiling: + perf_metrics[f"peak RAM usage[MB]"] = list( + map(lambda x: x / 2**20, memory_peaks["RAM"]) + ) + if enable_nvml_profiling: + perf_metrics[f"peak VRAM usage[MB]"] = list( + map(lambda x: x / 2**20, memory_peaks["VRAM"]) + ) + if enable_cpu_profiling: + perf_metrics["cpu load[%]"] = cpu_loads + if cost_per_hour > 0.0: + perf_metrics["cost[microdollar]"] = list( + map(lambda x: x / 1000 / 3600 * cost_per_hour * 1e6, perf_metrics["time[ms]"]) ) - return mean, std, func_return_value + if collect_return_values: + return perf_metrics, func_return_values + else: + return perf_metrics # wrapper to get measurement params from benchmarking case @@ -90,11 +261,17 @@ def measure_case(case: BenchCase, func, *args, **kwargs): comm = MPI.COMM_WORLD comm.Barrier() - return measure_time( + return measure_perf( func, *args, **kwargs, n_runs=get_bench_case_value(case, "bench:n_runs", 10), time_limit=get_bench_case_value(case, "bench:time_limit", 3600), enable_itt=get_bench_case_value(case, "bench:vtune_profiling") is not None, + enable_cache_flushing=get_bench_case_value(case, "bench:flush_cache", False), + enable_garbage_collection=get_bench_case_value(case, "bench:gc_collect", False), + enable_cpu_profiling=get_bench_case_value(case, "bench:cpu_profile", False), + enable_memory_profiling=get_bench_case_value(case, "bench:memory_profile", False), + enable_nvml_profiling=get_bench_case_value(case, "algorithm:library") == "cuml", + cost_per_hour=get_bench_case_value(case, "bench:cost_per_hour", 0.0), ) diff --git a/sklbench/utils/special_params.py b/sklbench/utils/special_params.py index 49191023..42a8ce32 100644 --- a/sklbench/utils/special_params.py +++ b/sklbench/utils/special_params.py @@ -203,15 +203,15 @@ def assign_case_special_values_on_run( raise ValueError(f'Unknown special value {n_jobs} for "n_jobs"') n_jobs = int(n_cpus * get_ratio_from_n_jobs(n_jobs)) set_bench_case_value(bench_case, "algorithm:estimator_params:n_jobs", n_jobs) - # classes balance for XGBoost + # classes balance for GBT frameworks scale_pos_weight = get_bench_case_value( bench_case, "algorithm:estimator_params:scale_pos_weight", None ) if ( is_special_value(scale_pos_weight) and scale_pos_weight.replace(SP_VALUE_STR, "") == "auto" - and library == "xgboost" - and estimator == "XGBClassifier" + and (library.endswith("gbm") or library.endswith("boost")) + and estimator.endswith("Classifier") ): y_train = convert_to_numpy(data[1]) value_counts = pd.value_counts(y_train).sort_index() @@ -231,6 +231,16 @@ def assign_case_special_values_on_run( "algorithm:estimator_params:scale_pos_weight", scale_pos_weight, ) + # number of classes assignment for multiclass LightGBM + num_classes = get_bench_case_value( + bench_case, "algorithm:estimator_params:num_classes", None + ) + if is_special_value(num_classes) and num_classes.replace(SP_VALUE_STR, "") == "auto": + set_bench_case_value( + bench_case, + "algorithm:estimator_params:num_classes", + data_description.get("n_classes", None), + ) # "n_clusters" auto assignment from data description n_clusters = get_bench_case_value( bench_case, "algorithm:estimator_params:n_clusters", None From 251a00c9e2297badfd302ab16e713ca881a94909 Mon Sep 17 00:00:00 2001 From: Alexander Andreev Date: Mon, 28 Apr 2025 04:34:55 -0700 Subject: [PATCH 2/2] Fix cpuinfo usage on Windows --- sklbench/utils/measurement.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sklbench/utils/measurement.py b/sklbench/utils/measurement.py index 3b95f6ac..d8b994ad 100644 --- a/sklbench/utils/measurement.py +++ b/sklbench/utils/measurement.py @@ -113,9 +113,15 @@ def enrich_metrics( def get_n_from_cache_size(): """Gets `n` size of square matrix that fits into L3 cache""" - l3_size = get_cpu_info()["l3_cache_size"] + cache_size = 0 + cpu_info = get_cpu_info() + # cache reading abibility of cpuinfo is platform dependent + if "l3_cache_size" in cpu_info: + cache_size += cpu_info["l3_cache_size"] + if "l2_cache_size" in cpu_info: + cache_size += cpu_info["l2_cache_size"] * psutil.cpu_count(logical=False) n_sockets = get_number_of_sockets() - return ceil(sqrt(n_sockets * l3_size / 8)) + return ceil(sqrt(n_sockets * cache_size / 8)) def flush_cache(n: int = get_n_from_cache_size()):