summaryrefslogtreecommitdiff
path: root/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch
diff options
context:
space:
mode:
authorV3n3RiX <venerix@koprulu.sector>2024-06-27 07:59:40 +0100
committerV3n3RiX <venerix@koprulu.sector>2024-06-27 07:59:40 +0100
commitd2ed973482fdd800013658e83a61709b29e0a80f (patch)
tree57ea7666a57b5a05a4c8866e4915e90b4a6e7c94 /dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch
parent9f6a82a85d400d6ae7de04c43cee88dbc6bc4da0 (diff)
gentoo auto-resync : 27:06:2024 - 07:59:39
Diffstat (limited to 'dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch')
-rw-r--r--dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch204
1 files changed, 204 insertions, 0 deletions
diff --git a/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch b/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch
new file mode 100644
index 000000000000..e65400c792e4
--- /dev/null
+++ b/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch
@@ -0,0 +1,204 @@
+Load kernels when compatible by ISA, e. g. if AMDGPU_TARGETS is set
+to gfx1030 and some application was started on gfx1036, it loads gfx1030 kernel.
+
+Based on Debian patch by Cordell Bloor <cgmb@slerp.xyz>
+https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0026-extend-hip-isa-compatibility-check.patch
+--- comgr.orig/src/comgr-metadata.cpp
++++ comgr/src/comgr-metadata.cpp
+@@ -923,23 +923,86 @@ static constexpr const char *CLANG_OFFLOAD_BUNDLER_MAGIC =
+ static constexpr size_t OffloadBundleMagicLen =
+ strLiteralLength(CLANG_OFFLOAD_BUNDLER_MAGIC);
+
+-bool isCompatibleIsaName(StringRef IsaName, StringRef CodeObjectIsaName) {
++struct GfxPattern {
++ std::string root;
++ std::string suffixes;
++};
++
++static bool matches(const GfxPattern& p, StringRef s) {
++ if (p.root.size() + 1 != s.size()) {
++ return false;
++ }
++ if (0 != std::memcmp(p.root.data(), s.data(), p.root.size())) {
++ return false;
++ }
++ return p.suffixes.find(s[p.root.size()]) != std::string::npos;
++}
++
++static bool isGfx900EquivalentProcessor(StringRef processor) {
++ return matches(GfxPattern{"gfx90", "029c"}, processor);
++}
++
++static bool isGfx900SupersetProcessor(StringRef processor) {
++ return matches(GfxPattern{"gfx90", "0269c"}, processor);
++}
++
++static bool isGfx1030EquivalentProcessor(StringRef processor) {
++ return matches(GfxPattern{"gfx103", "0123456"}, processor);
++}
++
++static bool isGfx1010EquivalentProcessor(StringRef processor) {
++ return matches(GfxPattern{"gfx101", "0"}, processor);
++}
++
++static bool isGfx1010SupersetProcessor(StringRef processor) {
++ return matches(GfxPattern{"gfx101", "0123"}, processor);
++}
++
++enum CompatibilityScore {
++ CS_EXACT_MATCH = 1 << 4,
++ CS_PROCESSOR_MATCH = 1 << 3,
++ CS_PROCESSOR_COMPATIBLE = 1 << 2,
++ CS_XNACK_SPECIALIZED = 1 << 1,
++ CS_SRAM_ECC_SPECIALIZED = 1 << 0,
++ CS_INCOMPATIBLE = 0,
++};
++
++static int getProcessorCompatibilityScore(StringRef CodeObjectProcessor,
++ StringRef AgentProcessor) {
++ if (CodeObjectProcessor == AgentProcessor) {
++ return CS_PROCESSOR_MATCH;
++ }
++
++ bool compatible = false;
++ if (isGfx900SupersetProcessor(AgentProcessor)) {
++ compatible = isGfx900EquivalentProcessor(CodeObjectProcessor);
++ } else if (isGfx1010SupersetProcessor(AgentProcessor)) {
++ compatible = isGfx1010EquivalentProcessor(CodeObjectProcessor);
++ } else if (isGfx1030EquivalentProcessor(AgentProcessor)) {
++ compatible = isGfx1030EquivalentProcessor(CodeObjectProcessor);
++ }
++
++ return compatible ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE;
++}
++
++static int getCompatiblityScore(StringRef IsaName, StringRef CodeObjectIsaName) {
+ if (IsaName == CodeObjectIsaName) {
+- return true;
++ return CS_EXACT_MATCH;
+ }
+
+ TargetIdentifier CodeObjectIdent;
+ if (parseTargetIdentifier(CodeObjectIsaName, CodeObjectIdent)) {
+- return false;
++ return CS_INCOMPATIBLE;
+ }
+
+ TargetIdentifier IsaIdent;
+ if (parseTargetIdentifier(IsaName, IsaIdent)) {
+- return false;
++ return CS_INCOMPATIBLE;
+ }
+
+- if (CodeObjectIdent.Processor != IsaIdent.Processor) {
+- return false;
++ int ProcessorScore = getProcessorCompatibilityScore(CodeObjectIdent.Processor, IsaIdent.Processor);
++ if (ProcessorScore == CS_INCOMPATIBLE) {
++ return CS_INCOMPATIBLE;
+ }
+
+ char CodeObjectXnack = ' ', CodeObjectSramecc = ' ';
+@@ -963,18 +1026,23 @@ bool isCompatibleIsaName(StringRef IsaName, StringRef CodeObjectIsaName) {
+ }
+ }
+
++ int XnackBonus = 0;
+ if (CodeObjectXnack != ' ') {
+ if (CodeObjectXnack != IsaXnack) {
+- return false;
++ return CS_INCOMPATIBLE;
+ }
++ XnackBonus = CS_XNACK_SPECIALIZED;
+ }
+
++ int SrameccBonus = 0;
+ if (CodeObjectSramecc != ' ') {
+ if (CodeObjectSramecc != IsaSramecc) {
+- return false;
++ return CS_INCOMPATIBLE;
+ }
++ SrameccBonus = CS_SRAM_ECC_SPECIALIZED;
+ }
+- return true;
++
++ return ProcessorScore + XnackBonus + SrameccBonus;
+ }
+
+ amd_comgr_status_t
+@@ -992,14 +1060,21 @@ lookUpCodeObjectInSharedObject(DataObject *DataP,
+ return Status;
+ }
+
++ int MaxScore = 0;
++ unsigned MaxScoreItem;
+ for (unsigned J = 0; J < QueryListSize; J++) {
+- if (isCompatibleIsaName(QueryList[J].isa, IsaName)) {
+- QueryList[J].offset = 0;
+- QueryList[J].size = DataP->Size;
+- break;
++ int Score = getCompatiblityScore(QueryList[J].isa, IsaName);
++ if (Score > MaxScore) {
++ MaxScore = Score;
++ MaxScoreItem = J;
+ }
+ }
+
++ if (MaxScore) {
++ QueryList[MaxScoreItem].offset = 0;
++ QueryList[MaxScoreItem].size = DataP->Size;
++ }
++
+ return AMD_COMGR_STATUS_SUCCESS;
+ }
+
+@@ -1011,7 +1086,6 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP,
+ return lookUpCodeObjectInSharedObject(DataP, QueryList, QueryListSize);
+ }
+
+- int Seen = 0;
+ BinaryStreamReader Reader(StringRef(DataP->Data, DataP->Size),
+ support::little);
+
+@@ -1037,6 +1111,8 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP,
+ QueryList[I].size = 0;
+ }
+
++ std::vector<int> QueryListScores(QueryListSize);
++
+ // For each code object, extract BundleEntryID information, and check that
+ // against each ISA in the QueryList
+ for (uint64_t I = 0; I < NumOfCodeObjects; I++) {
+@@ -1069,28 +1145,22 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP,
+ }
+
+ for (unsigned J = 0; J < QueryListSize; J++) {
+- // If this QueryList item has already been found to be compatible with
++ // If this QueryList item has exact match with
+ // another BundleEntryID, no need to check against the current
+ // BundleEntryID
+- if (QueryList[J].size != 0) {
++ if (QueryListScores[J] == CS_EXACT_MATCH) {
+ continue;
+ }
+
+ // If the QueryList Isa is compatible with the BundleEntryID, set the
+ // QueryList offset/size to this BundleEntryID
+- if (isCompatibleIsaName(QueryList[J].isa, OffloadAndTargetId.second)) {
++ int Score = getCompatiblityScore(QueryList[J].isa, OffloadAndTargetId.second);
++ if (Score > QueryListScores[J]) {
++ QueryListScores[J] = Score;
+ QueryList[J].offset = BundleEntryCodeObjectOffset;
+ QueryList[J].size = BundleEntryCodeObjectSize;
+- Seen++;
+- break;
+ }
+ }
+-
+- // Stop iterating over BundleEntryIDs once we have populated the entire
+- // QueryList
+- if (Seen == (int) QueryListSize) {
+- break;
+- }
+ }
+
+ return AMD_COMGR_STATUS_SUCCESS;