Merge pull request #291 from huangshiyu13/main

huangshiyu13 · web-flow · commit 7ccecd28a7c8 · 2024-01-03T11:32:54.000+08:00
uodate
diff --git a/openrl/supports/opengpu/manager.py b/openrl/supports/opengpu/manager.py
@@ -20,89 +20,91 @@
 import traceback
 from typing import List, Union
 
-from openrl.supports.opengpu.gpu_info import get_local_GPU_info, get_remote_GPU_info
-
-
-class RemoteGPUManager:
-    def __init__(self, pytorch_config=None, check: bool = False):
-        self.gpu_info_dict = get_remote_GPU_info()
-        self.pytorch_config = pytorch_config
-        self.server_list = []
-        if self.pytorch_config is not None:
-            for server_address in self.pytorch_config.GPU_usage_dict:
-                self.server_list.append(server_address)
-
-        if check:
-            self.check_gpus()
-
-        self.cal_learner_number()
-
-    def check_gpus(self):
-        assert self.pytorch_config is not None
-        assert len(self.server_list) > 0
-
-        bad_gpus = []
-        for server_address in self.server_list:
-            assert (
-                server_address in self.gpu_info_dict
-            ), "can not get gpu info from {}".format(server_address)
-            assert len(self.gpu_info_dict[server_address]["gpu_infos"]) > 0
-
-            for gpu_info in self.gpu_info_dict[server_address]["gpu_infos"]:
-                if (
-                    self.pytorch_config.GPU_usage_dict[server_address]["gpus"] == "all"
-                    or gpu_info["gpu"]
-                    in self.pytorch_config.GPU_usage_dict[server_address]["gpus"]
-                ):
-                    if (
-                        gpu_info["memory"]["total"] - gpu_info["memory"]["used"]
-                        < self.pytorch_config.min_memory_per_gpu
-                    ):
-                        bad_gpus.append(
-                            {
-                                "server": server_address,
-                                "gpu": gpu_info["gpu"],
-                                "free": (
-                                    gpu_info["memory"]["total"]
-                                    - gpu_info["memory"]["used"]
-                                ),
-                            }
-                        )
-        if len(bad_gpus) > 0:
-            for bad_gpu in bad_gpus:
-                print(
-                    "server:{} GPU:{}, minimal memory {}GB, but only get {}GB free"
-                    " memory.".format(
-                        bad_gpu["server"],
-                        bad_gpu["gpu"],
-                        self.pytorch_config.min_memory_per_gpu,
-                        bad_gpu["free"],
-                    )
-                )
-            assert False, "GPUs not satisfy."
-
-    def cal_learner_number(self):
-        self.server_gpu_mapping = {}
-        gpu_num = 0
-        for server_address in self.server_list:
-            gpu_mapping = {}
-            for gpu_info in self.gpu_info_dict[server_address]["gpu_infos"]:
-                if (
-                    self.pytorch_config.GPU_usage_dict[server_address]["gpus"] == "all"
-                    or gpu_info["gpu"]
-                    in self.pytorch_config.GPU_usage_dict[server_address]["gpus"]
-                ):
-                    gpu_mapping[gpu_info["gpu"]] = gpu_num
-                    gpu_num += 1
-            self.server_gpu_mapping[server_address] = gpu_mapping
-        self.learner_num = gpu_num
-
-    def get_gpu_info(self, server_list: list):
-        gpu_infos = {}
-        for server_address in server_list:
-            if server_address in self.gpu_info_dict:
-                gpu_infos[server_address] = self.gpu_info_dict[server_address]
-        return gpu_infos
+from openrl.supports.opengpu.gpu_info import get_local_GPU_info
+
+# from openrl.supports.opengpu.gpu_info import get_remote_GPU_info
+
+
+# class RemoteGPUManager:
+#     def __init__(self, pytorch_config=None, check: bool = False):
+#         self.gpu_info_dict = get_remote_GPU_info()
+#         self.pytorch_config = pytorch_config
+#         self.server_list = []
+#         if self.pytorch_config is not None:
+#             for server_address in self.pytorch_config.GPU_usage_dict:
+#                 self.server_list.append(server_address)
+#
+#         if check:
+#             self.check_gpus()
+#
+#         self.cal_learner_number()
+#
+#     def check_gpus(self):
+#         assert self.pytorch_config is not None
+#         assert len(self.server_list) > 0
+#
+#         bad_gpus = []
+#         for server_address in self.server_list:
+#             assert (
+#                 server_address in self.gpu_info_dict
+#             ), "can not get gpu info from {}".format(server_address)
+#             assert len(self.gpu_info_dict[server_address]["gpu_infos"]) > 0
+#
+#             for gpu_info in self.gpu_info_dict[server_address]["gpu_infos"]:
+#                 if (
+#                     self.pytorch_config.GPU_usage_dict[server_address]["gpus"] == "all"
+#                     or gpu_info["gpu"]
+#                     in self.pytorch_config.GPU_usage_dict[server_address]["gpus"]
+#                 ):
+#                     if (
+#                         gpu_info["memory"]["total"] - gpu_info["memory"]["used"]
+#                         < self.pytorch_config.min_memory_per_gpu
+#                     ):
+#                         bad_gpus.append(
+#                             {
+#                                 "server": server_address,
+#                                 "gpu": gpu_info["gpu"],
+#                                 "free": (
+#                                     gpu_info["memory"]["total"]
+#                                     - gpu_info["memory"]["used"]
+#                                 ),
+#                             }
+#                         )
+#         if len(bad_gpus) > 0:
+#             for bad_gpu in bad_gpus:
+#                 print(
+#                     "server:{} GPU:{}, minimal memory {}GB, but only get {}GB free"
+#                     " memory.".format(
+#                         bad_gpu["server"],
+#                         bad_gpu["gpu"],
+#                         self.pytorch_config.min_memory_per_gpu,
+#                         bad_gpu["free"],
+#                     )
+#                 )
+#             assert False, "GPUs not satisfy."
+#
+#     def cal_learner_number(self):
+#         self.server_gpu_mapping = {}
+#         gpu_num = 0
+#         for server_address in self.server_list:
+#             gpu_mapping = {}
+#             for gpu_info in self.gpu_info_dict[server_address]["gpu_infos"]:
+#                 if (
+#                     self.pytorch_config.GPU_usage_dict[server_address]["gpus"] == "all"
+#                     or gpu_info["gpu"]
+#                     in self.pytorch_config.GPU_usage_dict[server_address]["gpus"]
+#                 ):
+#                     gpu_mapping[gpu_info["gpu"]] = gpu_num
+#                     gpu_num += 1
+#             self.server_gpu_mapping[server_address] = gpu_mapping
+#         self.learner_num = gpu_num
+#
+#     def get_gpu_info(self, server_list: list):
+#         gpu_infos = {}
+#         for server_address in server_list:
+#             if server_address in self.gpu_info_dict:
+#                 gpu_infos[server_address] = self.gpu_info_dict[server_address]
+#         return gpu_infos
 
 
 class LocalGPUManager:
diff --git a/tests/test_supports/test_opendata/test_opendata.py b/tests/test_supports/test_opendata/test_opendata.py
@@ -18,17 +18,45 @@
 
 import os
 import sys
+from pathlib import Path
 
 import pytest
 
-from openrl.supports.opendata.utils.opendata_utils import data_abs_path
+from openrl.supports.opendata.utils.opendata_utils import data_abs_path, load_dataset
 
 
 @pytest.mark.unittest
-def test_data_abs_path():
+def test_data_abs_path(tmpdir):
     data_path = "./"
     assert data_abs_path(data_path) == data_path
 
+    data_server_dir = Path.home() / "data_server/"
+
+    new_create = False
+    if not data_server_dir.exists():
+        data_server_dir.mkdir()
+        new_create = True
+    data_abs_path("data_server://data_path")
+    if new_create:
+        data_server_dir.rmdir()
+    data_abs_path("data_server://data_path", str(tmpdir))
+
+
+@pytest.mark.unittest
+def test_load_dataset(tmpdir):
+    try:
+        load_dataset(str(tmpdir), "train")
+    except Exception as e:
+        pass
+    try:
+        load_dataset("data_server://data_path", "train")
+    except Exception as e:
+        pass
+    try:
+        load_dataset(str(tmpdir) + "/test", "train")
+    except Exception as e:
+        pass
+
 
 if __name__ == "__main__":
     sys.exit(pytest.main(["-sv", os.path.basename(__file__)]))
diff --git a/tests/test_supports/test_opengpu/test_gpuinfo.py b/tests/test_supports/test_opengpu/test_gpuinfo.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""""""
+
+import os
+import sys
+
+import pytest
+
+from openrl.supports.opengpu.gpu_info import preserve_decimal
+
+
+@pytest.mark.unittest
+def test_preserve_decimal():
+    preserve_decimal(1, 2)
+    preserve_decimal(1.1, 0)
+    preserve_decimal(1.1, -1)
+    preserve_decimal(1.1, 4)
+    preserve_decimal(-1.1, 4)
+    preserve_decimal(-0.1, 0)
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(["-sv", os.path.basename(__file__)]))
diff --git a/tests/test_supports/test_opengpu/test_manager.py b/tests/test_supports/test_opengpu/test_manager.py
@@ -16,6 +16,7 @@
 
 """"""
 
+import argparse
 import os
 import sys
 
@@ -24,12 +25,56 @@
 from openrl.supports.opengpu.manager import LocalGPUManager
 
 
+@pytest.fixture(
+    scope="module",
+    params=[
+        # 添加不同的参数组合以进行测试
+        0,
+        1,
+        2,
+        None,
+    ],
+)
+def learner_num(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def disable_cuda(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["all", "single", "error_type"])
+def gpu_usage_type(request):
+    return request.param
+
+
+@pytest.fixture(
+    scope="module",
+)
+def args(learner_num, disable_cuda, gpu_usage_type):
+    if learner_num is None:
+        return None
+    current_dict = {}
+    current_dict["learner_num"] = learner_num
+    current_dict["disable_cuda"] = disable_cuda
+    current_dict["gpu_usage_type"] = gpu_usage_type
+
+    return argparse.Namespace(**current_dict)
+
+
 @pytest.mark.unittest
-def test_local_manager():
-    manager = LocalGPUManager()
+def test_local_manager(args):
+    manager = LocalGPUManager(args)
     manager.get_gpu()
-    manager.get_learner_gpu()
-    assert isinstance(manager.get_learner_gpus(), list)
+    try:
+        manager.get_learner_gpu()
+    except IndexError as e:
+        print("Caught an IndexError:", e)
+    try:
+        assert isinstance(manager.get_learner_gpus(), list)
+    except IndexError as e:
+        print("Caught an IndexError:", e)
     manager.get_worker_gpu()
     manager.log_info()