blob: 27266b0b717a9b4d005fb24a659ade4e72a80d45 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
# Copyright 1999-2025 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=8
ROCM_SKIP_GLOBALS=1
PYTHON_COMPAT=( python3_{10..13} )
LLVM_COMPAT=( 19 )
inherit cmake flag-o-matic llvm-r1 python-any-r1 rocm
DESCRIPTION="General matrix-matrix operations library for AMD Instinct accelerators"
HOMEPAGE="https://github.com/ROCm/hipBLASLt"
SRC_URI="https://github.com/ROCm/hipBLASLt/archive/rocm-${PV}.tar.gz -> ${P}.tar.gz"
S="${WORKDIR}/hipBLASLt-rocm-${PV}"
LICENSE="MIT"
SLOT="0/$(ver_cut 1-2)"
KEYWORDS="~amd64"
SUPPORTED_GPUS=( gfx908 gfx90a gfx940 gfx941 gfx942 gfx1100 gfx1101 )
IUSE_TARGETS=( "${SUPPORTED_GPUS[@]/#/amdgpu_targets_}" )
IUSE="${IUSE_TARGETS[@]/#/+} test benchmark"
RESTRICT="!test? ( test )"
RDEPEND="
dev-util/hip:${SLOT}
dev-cpp/msgpack-cxx
"
DEPEND="${RDEPEND}"
BDEPEND="
dev-build/rocm-cmake
sci-libs/hipBLAS-common:${SLOT}
$(python_gen_any_dep '
dev-python/msgpack[${PYTHON_USEDEP}]
dev-python/pyyaml[${PYTHON_USEDEP}]
dev-python/joblib[${PYTHON_USEDEP}]
')
$(llvm_gen_dep 'llvm-core/clang:${LLVM_SLOT}')
test? (
dev-cpp/gtest
virtual/blas
dev-util/rocm-smi:${SLOT}
)
benchmark? (
virtual/blas
llvm-runtimes/openmp
)
"
PATCHES=(
"${FILESDIR}"/${PN}-6.1.1-fix-msgpack-dependency.patch
"${FILESDIR}"/${PN}-6.1.1-no-arch.patch
"${FILESDIR}"/${PN}-6.1.1-no-git.patch
"${FILESDIR}"/${PN}-6.1.1-clang-19.patch
"${FILESDIR}"/${PN}-6.1.1-fix-libcxx.patch
"${FILESDIR}"/${PN}-6.3.0-no-arch-extra.patch
"${FILESDIR}"/${PN}-6.3.0-min-pip-install.patch
)
python_check_deps() {
python_has_version "dev-python/msgpack[${PYTHON_USEDEP}]" &&
python_has_version "dev-python/pyyaml[${PYTHON_USEDEP}]" &&
python_has_version "dev-python/joblib[${PYTHON_USEDEP}]"
}
pkg_setup() {
python-any-r1_pkg_setup
}
pkg_pretend() {
if [[ "${AMDGPU_TARGETS[@]}" = "" ]]; then
ewarn "hipBLASLt supports only few GPUs: ${SUPPORTED_GPUS[@]},"
ewarn "but none of them were defined in AMDGPU_TARGETS USE_EXPAND variable."
ewarn
ewarn "Library will continue to be built in \"dummy\" mode,"
ewarn "serving as a non-functional placeholder for end-user applications."
fi
}
src_prepare() {
sed -e "s,\@LLVM_PATH\@,$(get_llvm_prefix),g" \
"${FILESDIR}"/${PN}-6.1.1-gentoopath.patch > "${S}"/gentoopath.patch || die
eapply "${S}"/gentoopath.patch
local shebangs=($(grep -rl "#!/usr/bin/env python3" tensilelite/Tensile || die))
python_fix_shebang -q ${shebangs[*]}
sed -e "s:\${rocm_path}/bin/amdclang++:$(get_llvm_prefix)/bin/clang++:" \
-i library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh || die
cmake_src_prepare
}
src_configure() {
rocm_use_hipcc
# too many warnings
append-cxxflags -Wno-explicit-specialization-storage-class
local targets="$(get_amdgpu_flags)"
local build_with_tensile=$([ "${AMDGPU_TARGETS[@]}" = "" ] && echo OFF || echo ON )
local mycmakeargs=(
-DROCM_SYMLINK_LIBS=OFF
-DBUILD_WITH_TENSILE="${build_with_tensile}"
-DAMDGPU_TARGETS="${targets}"
-DBUILD_CLIENTS_TESTS=$(usex test ON OFF)
-DBUILD_CLIENTS_BENCHMARKS="$(usex benchmark ON OFF)"
-Wno-dev
)
cmake_src_configure
}
src_compile() {
local -x ROCM_PATH="${EPREFIX}/usr"
# set PYTHONPATH to load Tensile from virtualenv, not the system-wide one
local -x PYTHONPATH="${S}_build/virtualenv/lib/${EPYTHON}/site-packages"
local -x TENSILE_ROCM_ASSEMBLER_PATH="$(get_llvm_prefix)/bin/clang++"
# TensileCreateLibrary reads CMAKE_CXX_COMPILER again
local -x CMAKE_CXX_COMPILER="$(get_llvm_prefix)/bin/clang++"
cmake_src_compile
}
src_install() {
cmake_src_install
# Stop llvm-strip from removing .strtab section from *.hsaco files,
# otherwise rocclr/elf/elf.cpp complains with "failed: null sections(STRTAB)" and crashes
dostrip -x /usr/$(get_libdir)/hipblaslt/library/
}
src_test() {
check_amdgpu
# Expected time for 7900 XTX: 340s (full) or 5s with GTEST_FILTER='*quick*'
cmake_src_test
}
|