diff options
author | V3n3RiX <venerix@koprulu.sector> | 2024-06-27 07:59:40 +0100 |
---|---|---|
committer | V3n3RiX <venerix@koprulu.sector> | 2024-06-27 07:59:40 +0100 |
commit | d2ed973482fdd800013658e83a61709b29e0a80f (patch) | |
tree | 57ea7666a57b5a05a4c8866e4915e90b4a6e7c94 /dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch | |
parent | 9f6a82a85d400d6ae7de04c43cee88dbc6bc4da0 (diff) |
gentoo auto-resync : 27:06:2024 - 07:59:39
Diffstat (limited to 'dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch')
-rw-r--r-- | dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch b/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch new file mode 100644 index 000000000000..e65400c792e4 --- /dev/null +++ b/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch @@ -0,0 +1,204 @@ +Load kernels when compatible by ISA, e. g. if AMDGPU_TARGETS is set +to gfx1030 and some application was started on gfx1036, it loads gfx1030 kernel. + +Based on Debian patch by Cordell Bloor <cgmb@slerp.xyz> +https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0026-extend-hip-isa-compatibility-check.patch +--- comgr.orig/src/comgr-metadata.cpp ++++ comgr/src/comgr-metadata.cpp +@@ -923,23 +923,86 @@ static constexpr const char *CLANG_OFFLOAD_BUNDLER_MAGIC = + static constexpr size_t OffloadBundleMagicLen = + strLiteralLength(CLANG_OFFLOAD_BUNDLER_MAGIC); + +-bool isCompatibleIsaName(StringRef IsaName, StringRef CodeObjectIsaName) { ++struct GfxPattern { ++ std::string root; ++ std::string suffixes; ++}; ++ ++static bool matches(const GfxPattern& p, StringRef s) { ++ if (p.root.size() + 1 != s.size()) { ++ return false; ++ } ++ if (0 != std::memcmp(p.root.data(), s.data(), p.root.size())) { ++ return false; ++ } ++ return p.suffixes.find(s[p.root.size()]) != std::string::npos; ++} ++ ++static bool isGfx900EquivalentProcessor(StringRef processor) { ++ return matches(GfxPattern{"gfx90", "029c"}, processor); ++} ++ ++static bool isGfx900SupersetProcessor(StringRef processor) { ++ return matches(GfxPattern{"gfx90", "0269c"}, processor); ++} ++ ++static bool isGfx1030EquivalentProcessor(StringRef processor) { ++ return matches(GfxPattern{"gfx103", "0123456"}, processor); ++} ++ ++static bool isGfx1010EquivalentProcessor(StringRef processor) { ++ return matches(GfxPattern{"gfx101", "0"}, processor); ++} ++ ++static bool isGfx1010SupersetProcessor(StringRef processor) { ++ return matches(GfxPattern{"gfx101", "0123"}, processor); ++} ++ ++enum CompatibilityScore { ++ CS_EXACT_MATCH = 1 << 4, ++ CS_PROCESSOR_MATCH = 1 << 3, ++ CS_PROCESSOR_COMPATIBLE = 1 << 2, ++ CS_XNACK_SPECIALIZED = 1 << 1, ++ CS_SRAM_ECC_SPECIALIZED = 1 << 0, ++ CS_INCOMPATIBLE = 0, ++}; ++ ++static int getProcessorCompatibilityScore(StringRef CodeObjectProcessor, ++ StringRef AgentProcessor) { ++ if (CodeObjectProcessor == AgentProcessor) { ++ return CS_PROCESSOR_MATCH; ++ } ++ ++ bool compatible = false; ++ if (isGfx900SupersetProcessor(AgentProcessor)) { ++ compatible = isGfx900EquivalentProcessor(CodeObjectProcessor); ++ } else if (isGfx1010SupersetProcessor(AgentProcessor)) { ++ compatible = isGfx1010EquivalentProcessor(CodeObjectProcessor); ++ } else if (isGfx1030EquivalentProcessor(AgentProcessor)) { ++ compatible = isGfx1030EquivalentProcessor(CodeObjectProcessor); ++ } ++ ++ return compatible ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE; ++} ++ ++static int getCompatiblityScore(StringRef IsaName, StringRef CodeObjectIsaName) { + if (IsaName == CodeObjectIsaName) { +- return true; ++ return CS_EXACT_MATCH; + } + + TargetIdentifier CodeObjectIdent; + if (parseTargetIdentifier(CodeObjectIsaName, CodeObjectIdent)) { +- return false; ++ return CS_INCOMPATIBLE; + } + + TargetIdentifier IsaIdent; + if (parseTargetIdentifier(IsaName, IsaIdent)) { +- return false; ++ return CS_INCOMPATIBLE; + } + +- if (CodeObjectIdent.Processor != IsaIdent.Processor) { +- return false; ++ int ProcessorScore = getProcessorCompatibilityScore(CodeObjectIdent.Processor, IsaIdent.Processor); ++ if (ProcessorScore == CS_INCOMPATIBLE) { ++ return CS_INCOMPATIBLE; + } + + char CodeObjectXnack = ' ', CodeObjectSramecc = ' '; +@@ -963,18 +1026,23 @@ bool isCompatibleIsaName(StringRef IsaName, StringRef CodeObjectIsaName) { + } + } + ++ int XnackBonus = 0; + if (CodeObjectXnack != ' ') { + if (CodeObjectXnack != IsaXnack) { +- return false; ++ return CS_INCOMPATIBLE; + } ++ XnackBonus = CS_XNACK_SPECIALIZED; + } + ++ int SrameccBonus = 0; + if (CodeObjectSramecc != ' ') { + if (CodeObjectSramecc != IsaSramecc) { +- return false; ++ return CS_INCOMPATIBLE; + } ++ SrameccBonus = CS_SRAM_ECC_SPECIALIZED; + } +- return true; ++ ++ return ProcessorScore + XnackBonus + SrameccBonus; + } + + amd_comgr_status_t +@@ -992,14 +1060,21 @@ lookUpCodeObjectInSharedObject(DataObject *DataP, + return Status; + } + ++ int MaxScore = 0; ++ unsigned MaxScoreItem; + for (unsigned J = 0; J < QueryListSize; J++) { +- if (isCompatibleIsaName(QueryList[J].isa, IsaName)) { +- QueryList[J].offset = 0; +- QueryList[J].size = DataP->Size; +- break; ++ int Score = getCompatiblityScore(QueryList[J].isa, IsaName); ++ if (Score > MaxScore) { ++ MaxScore = Score; ++ MaxScoreItem = J; + } + } + ++ if (MaxScore) { ++ QueryList[MaxScoreItem].offset = 0; ++ QueryList[MaxScoreItem].size = DataP->Size; ++ } ++ + return AMD_COMGR_STATUS_SUCCESS; + } + +@@ -1011,7 +1086,6 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP, + return lookUpCodeObjectInSharedObject(DataP, QueryList, QueryListSize); + } + +- int Seen = 0; + BinaryStreamReader Reader(StringRef(DataP->Data, DataP->Size), + support::little); + +@@ -1037,6 +1111,8 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP, + QueryList[I].size = 0; + } + ++ std::vector<int> QueryListScores(QueryListSize); ++ + // For each code object, extract BundleEntryID information, and check that + // against each ISA in the QueryList + for (uint64_t I = 0; I < NumOfCodeObjects; I++) { +@@ -1069,28 +1145,22 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP, + } + + for (unsigned J = 0; J < QueryListSize; J++) { +- // If this QueryList item has already been found to be compatible with ++ // If this QueryList item has exact match with + // another BundleEntryID, no need to check against the current + // BundleEntryID +- if (QueryList[J].size != 0) { ++ if (QueryListScores[J] == CS_EXACT_MATCH) { + continue; + } + + // If the QueryList Isa is compatible with the BundleEntryID, set the + // QueryList offset/size to this BundleEntryID +- if (isCompatibleIsaName(QueryList[J].isa, OffloadAndTargetId.second)) { ++ int Score = getCompatiblityScore(QueryList[J].isa, OffloadAndTargetId.second); ++ if (Score > QueryListScores[J]) { ++ QueryListScores[J] = Score; + QueryList[J].offset = BundleEntryCodeObjectOffset; + QueryList[J].size = BundleEntryCodeObjectSize; +- Seen++; +- break; + } + } +- +- // Stop iterating over BundleEntryIDs once we have populated the entire +- // QueryList +- if (Seen == (int) QueryListSize) { +- break; +- } + } + + return AMD_COMGR_STATUS_SUCCESS; |