summaryrefslogtreecommitdiff
path: root/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch
blob: e65400c792e4ee29386fffacf7ea4aece181410e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
Load kernels when compatible by ISA, e. g. if AMDGPU_TARGETS is set
to gfx1030 and some application was started on gfx1036, it loads gfx1030 kernel.

Based on Debian patch by Cordell Bloor <cgmb@slerp.xyz>
https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0026-extend-hip-isa-compatibility-check.patch
--- comgr.orig/src/comgr-metadata.cpp
+++ comgr/src/comgr-metadata.cpp
@@ -923,23 +923,86 @@ static constexpr const char *CLANG_OFFLOAD_BUNDLER_MAGIC =
 static constexpr size_t OffloadBundleMagicLen =
     strLiteralLength(CLANG_OFFLOAD_BUNDLER_MAGIC);
 
-bool isCompatibleIsaName(StringRef IsaName, StringRef CodeObjectIsaName) {
+struct GfxPattern {
+  std::string root;
+  std::string suffixes;
+};
+
+static bool matches(const GfxPattern& p, StringRef s) {
+  if (p.root.size() + 1 != s.size()) {
+    return false;
+  }
+  if (0 != std::memcmp(p.root.data(), s.data(), p.root.size())) {
+    return false;
+  }
+  return p.suffixes.find(s[p.root.size()]) != std::string::npos;
+}
+
+static bool isGfx900EquivalentProcessor(StringRef processor) {
+  return matches(GfxPattern{"gfx90", "029c"}, processor);
+}
+
+static bool isGfx900SupersetProcessor(StringRef processor) {
+  return matches(GfxPattern{"gfx90", "0269c"}, processor);
+}
+
+static bool isGfx1030EquivalentProcessor(StringRef processor) {
+  return matches(GfxPattern{"gfx103", "0123456"}, processor);
+}
+
+static bool isGfx1010EquivalentProcessor(StringRef processor) {
+  return matches(GfxPattern{"gfx101", "0"}, processor);
+}
+
+static bool isGfx1010SupersetProcessor(StringRef processor) {
+  return matches(GfxPattern{"gfx101", "0123"}, processor);
+}
+
+enum CompatibilityScore {
+  CS_EXACT_MATCH           = 1 << 4,
+  CS_PROCESSOR_MATCH       = 1 << 3,
+  CS_PROCESSOR_COMPATIBLE  = 1 << 2,
+  CS_XNACK_SPECIALIZED     = 1 << 1,
+  CS_SRAM_ECC_SPECIALIZED  = 1 << 0,
+  CS_INCOMPATIBLE          = 0,
+};
+
+static int getProcessorCompatibilityScore(StringRef CodeObjectProcessor,
+                                          StringRef AgentProcessor) {
+  if (CodeObjectProcessor == AgentProcessor) {
+    return CS_PROCESSOR_MATCH;
+  }
+
+  bool compatible = false;
+  if (isGfx900SupersetProcessor(AgentProcessor)) {
+    compatible = isGfx900EquivalentProcessor(CodeObjectProcessor);
+  } else if (isGfx1010SupersetProcessor(AgentProcessor)) {
+    compatible = isGfx1010EquivalentProcessor(CodeObjectProcessor);
+  } else if (isGfx1030EquivalentProcessor(AgentProcessor)) {
+    compatible = isGfx1030EquivalentProcessor(CodeObjectProcessor);
+  }
+
+  return compatible ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE;
+}
+
+static int getCompatiblityScore(StringRef IsaName, StringRef CodeObjectIsaName) {
   if (IsaName == CodeObjectIsaName) {
-    return true;
+    return CS_EXACT_MATCH;
   }
 
   TargetIdentifier CodeObjectIdent;
   if (parseTargetIdentifier(CodeObjectIsaName, CodeObjectIdent)) {
-    return false;
+    return CS_INCOMPATIBLE;
   }
 
   TargetIdentifier IsaIdent;
   if (parseTargetIdentifier(IsaName, IsaIdent)) {
-    return false;
+    return CS_INCOMPATIBLE;
   }
 
-  if (CodeObjectIdent.Processor != IsaIdent.Processor) {
-    return false;
+  int ProcessorScore = getProcessorCompatibilityScore(CodeObjectIdent.Processor, IsaIdent.Processor);
+  if (ProcessorScore == CS_INCOMPATIBLE) {
+    return CS_INCOMPATIBLE;
   }
 
   char CodeObjectXnack = ' ', CodeObjectSramecc = ' ';
@@ -963,18 +1026,23 @@ bool isCompatibleIsaName(StringRef IsaName, StringRef CodeObjectIsaName) {
     }
   }
 
+  int XnackBonus = 0;
   if (CodeObjectXnack != ' ') {
     if (CodeObjectXnack != IsaXnack) {
-      return false;
+      return CS_INCOMPATIBLE;
     }
+    XnackBonus = CS_XNACK_SPECIALIZED;
   }
 
+  int SrameccBonus = 0;
   if (CodeObjectSramecc != ' ') {
     if (CodeObjectSramecc != IsaSramecc) {
-      return false;
+      return CS_INCOMPATIBLE;
     }
+    SrameccBonus = CS_SRAM_ECC_SPECIALIZED;
   }
-  return true;
+
+  return ProcessorScore + XnackBonus + SrameccBonus;
 }
 
 amd_comgr_status_t
@@ -992,14 +1060,21 @@ lookUpCodeObjectInSharedObject(DataObject *DataP,
     return Status;
   }
 
+  int MaxScore = 0;
+  unsigned MaxScoreItem;
   for (unsigned J = 0; J < QueryListSize; J++) {
-    if (isCompatibleIsaName(QueryList[J].isa, IsaName)) {
-      QueryList[J].offset = 0;
-      QueryList[J].size = DataP->Size;
-      break;
+    int Score = getCompatiblityScore(QueryList[J].isa, IsaName);
+    if (Score > MaxScore) {
+      MaxScore = Score;
+      MaxScoreItem = J;
     }
   }
 
+  if (MaxScore) {
+    QueryList[MaxScoreItem].offset = 0;
+    QueryList[MaxScoreItem].size = DataP->Size;
+  }
+
   return AMD_COMGR_STATUS_SUCCESS;
 }
 
@@ -1011,7 +1086,6 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP,
     return lookUpCodeObjectInSharedObject(DataP, QueryList, QueryListSize);
   }
 
-  int Seen = 0;
   BinaryStreamReader Reader(StringRef(DataP->Data, DataP->Size),
                             support::little);
 
@@ -1037,6 +1111,8 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP,
     QueryList[I].size = 0;
   }
 
+  std::vector<int> QueryListScores(QueryListSize);
+
   // For each code object, extract BundleEntryID information, and check that
   // against each ISA in the QueryList
   for (uint64_t I = 0; I < NumOfCodeObjects; I++) {
@@ -1069,28 +1145,22 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP,
     }
 
     for (unsigned J = 0; J < QueryListSize; J++) {
-      // If this QueryList item has already been found to be compatible with
+      // If this QueryList item has exact match with
       // another BundleEntryID, no need to check against the current
       // BundleEntryID
-      if (QueryList[J].size != 0) {
+      if (QueryListScores[J] == CS_EXACT_MATCH) {
         continue;
       }
 
       // If the QueryList Isa is compatible with the BundleEntryID, set the
       // QueryList offset/size to this BundleEntryID
-      if (isCompatibleIsaName(QueryList[J].isa, OffloadAndTargetId.second)) {
+      int Score = getCompatiblityScore(QueryList[J].isa, OffloadAndTargetId.second);
+      if (Score > QueryListScores[J]) {
+        QueryListScores[J] = Score;
         QueryList[J].offset = BundleEntryCodeObjectOffset;
         QueryList[J].size = BundleEntryCodeObjectSize;
-        Seen++;
-        break;
       }
     }
-
-    // Stop iterating over BundleEntryIDs once we have populated the entire
-    // QueryList
-    if (Seen == (int) QueryListSize) {
-      break;
-    }
   }
 
   return AMD_COMGR_STATUS_SUCCESS;