diff options
Diffstat (limited to 'dev-util/rocminfo/files/rocminfo-5.5.1-detect-builtin-amdgpu.patch')
-rw-r--r-- | dev-util/rocminfo/files/rocminfo-5.5.1-detect-builtin-amdgpu.patch | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/dev-util/rocminfo/files/rocminfo-5.5.1-detect-builtin-amdgpu.patch b/dev-util/rocminfo/files/rocminfo-5.5.1-detect-builtin-amdgpu.patch new file mode 100644 index 000000000000..dd1aefe4df4f --- /dev/null +++ b/dev-util/rocminfo/files/rocminfo-5.5.1-detect-builtin-amdgpu.patch @@ -0,0 +1,105 @@ +From 3a4d533a1e2a179ad873c480dc4a42ea23681263 Mon Sep 17 00:00:00 2001 +From: Mike Li <Tianxinmike.Li@amd.com> +Date: Wed, 17 Aug 2022 11:44:09 -0400 +Subject: [PATCH 1/2] Check permission and handle PermissionError exception + +Signed-off-by: Mike Li <Tianxinmike.Li@amd.com> +Change-Id: If7cb8464d0b761e4be45c85eb7147ceed609da61 +--- + rocm_agent_enumerator | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/rocm_agent_enumerator b/rocm_agent_enumerator +index 6264a5f..ceb9e11 100755 +--- a/rocm_agent_enumerator ++++ b/rocm_agent_enumerator +@@ -195,10 +195,15 @@ def readFromKFD(): + node_path = os.path.join(topology_dir, node) + if os.path.isdir(node_path): + prop_path = node_path + '/properties' +- if os.path.isfile(prop_path): ++ if os.path.isfile(prop_path) and os.access(prop_path, os.R_OK): + target_search_term = re.compile("gfx_target_version.+") + with open(prop_path) as f: +- line = f.readline() ++ try: ++ line = f.readline() ++ except PermissionError: ++ # We may have a subsystem (e.g. scheduler) limiting device visibility which ++ # could cause a permission error. ++ line = '' + while line != '' : + search_result = target_search_term.search(line) + if search_result is not None: + +From 94b4b3f0a66eb70912177ca7076b4267f8b5449b Mon Sep 17 00:00:00 2001 +From: Johannes Dieterich <johannes.dieterich@amd.com> +Date: Mon, 21 Nov 2022 18:09:55 +0000 +Subject: [PATCH 2/2] Fix rocminfo when run within docker environments + +Currently, rocminfo will fail when executed inside a docker container +due to being unable to lsmod inside docker. This has impacts on +rocprofiler use. + +Fix this behavior by querying initstate of the amdgpu module from +/sys/module/amdgpu instead. If initstate is marked "live" everything if +fine - error out with either "not loaded" (initstate file does not +exist) or "not live" (initstate file does not contain "live" string). + +Change-Id: I6f2e9655942fd4cf840fd3f56b7d69e893fa84d7 +--- + rocminfo.cc | 30 ++++++++++++++++++++++++------ + 1 file changed, 24 insertions(+), 6 deletions(-) + +diff --git a/rocminfo.cc b/rocminfo.cc +index 0842d57..8ed9111 100755 +--- a/rocminfo.cc ++++ b/rocminfo.cc +@@ -51,6 +51,7 @@ + #include <unistd.h> + #include <pwd.h> + ++#include <fstream> + #include <vector> + #include <string> + #include <sstream> +@@ -1039,16 +1040,33 @@ AcquireAndDisplayAgentInfo(hsa_agent_t agent, void* data) { + + int CheckInitialState(void) { + // Check kernel module for ROCk is loaded +- FILE *fd = popen("lsmod | grep amdgpu", "r"); +- char buf[16]; +- if (fread (buf, 1, sizeof (buf), fd) == 0) { ++ ++ std::ifstream amdgpu_initstate("/sys/module/amdgpu/initstate"); ++ if (amdgpu_initstate){ ++ std::stringstream buffer; ++ buffer << amdgpu_initstate.rdbuf(); ++ amdgpu_initstate.close(); ++ ++ std::string line; ++ bool is_live = false; ++ while (std::getline(buffer, line)){ ++ if (line.find( "live" ) != std::string::npos){ ++ is_live = true; ++ break; ++ } ++ } ++ if (is_live){ ++ printf("%sROCk module is loaded%s\n", COL_WHT, COL_RESET); ++ } else { ++ printf("%sROCk module is NOT live, possibly no GPU devices%s\n", ++ COL_RED, COL_RESET); ++ return -1; ++ } ++ } else { + printf("%sROCk module is NOT loaded, possibly no GPU devices%s\n", + COL_RED, COL_RESET); + return -1; +- } else { +- printf("%sROCk module is loaded%s\n", COL_WHT, COL_RESET); + } +- pclose(fd); + + // Check if user belongs to the group for /dev/kfd (e.g. "video" or + // "render") |