Konstantin Zhuravlyov commited on
Commit
f391d7a
·
unverified ·
1 Parent(s): 5d1dffc

ggml : use __builtin_amdgcn_sudot4 in __dp4a for gfx11 (llama/4787)

Browse files
Files changed (1) hide show
  1. ggml-cuda.cu +1 -1
ggml-cuda.cu CHANGED
@@ -183,7 +183,7 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) {
183
  static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
184
  #if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
185
  c = __builtin_amdgcn_sdot4(a, b, c, false);
186
- #elif defined(__gfx1100__)
187
  c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
188
  #elif defined(__gfx1010__) || defined(__gfx900__)
189
  int tmp1;
 
183
  static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
184
  #if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
185
  c = __builtin_amdgcn_sdot4(a, b, c, false);
186
+ #elif defined(RDNA3)
187
  c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
188
  #elif defined(__gfx1010__) || defined(__gfx900__)
189
  int tmp1;