ggerganov commited on
Commit
df03a10
·
unverified ·
1 Parent(s): 44164ac

ci : enable -Werror for CUDA builds (llama/5579)

Browse files

* cmake : pass -Werror through -Xcompiler

ggml-ci

* make, cmake : enable CUDA errors on warnings

ggml-ci

Files changed (1) hide show
  1. ggml-cuda.cu +26 -24
ggml-cuda.cu CHANGED
@@ -651,18 +651,18 @@ static __device__ __forceinline__ float2 warp_reduce_sum(float2 a) {
651
  return a;
652
  }
653
 
654
- static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) {
655
- #if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
656
- #pragma unroll
657
- for (int mask = 16; mask > 0; mask >>= 1) {
658
- a = __hadd2(a, __shfl_xor_sync(0xffffffff, a, mask, 32));
659
- }
660
- return a;
661
- #else
662
- (void) a;
663
- NO_DEVICE_CODE;
664
- #endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
665
- }
666
 
667
  static __device__ __forceinline__ float warp_reduce_max(float x) {
668
  #pragma unroll
@@ -672,18 +672,18 @@ static __device__ __forceinline__ float warp_reduce_max(float x) {
672
  return x;
673
  }
674
 
675
- static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
676
- #if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
677
- #pragma unroll
678
- for (int mask = 16; mask > 0; mask >>= 1) {
679
- x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32));
680
- }
681
- return x;
682
- #else
683
- (void) x;
684
- NO_DEVICE_CODE;
685
- #endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
686
- }
687
 
688
  static __device__ __forceinline__ float op_repeat(const float a, const float b) {
689
  return b;
@@ -4641,10 +4641,12 @@ static __device__ __forceinline__ float vec_dot_iq2_xs_q8_1(
4641
  const float d = (float)bq2->d * __low2float(bq8_1[ib32].ds) * 0.25f;
4642
  return d * ((0.5f + ls1) * sumi1 + (0.5f + ls2) * sumi2);
4643
  #else
 
4644
  assert(false);
4645
  return 0.f;
4646
  #endif
4647
  #else
 
4648
  assert(false);
4649
  return 0.f;
4650
  #endif
 
651
  return a;
652
  }
653
 
654
+ //static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) {
655
+ //#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
656
+ //#pragma unroll
657
+ // for (int mask = 16; mask > 0; mask >>= 1) {
658
+ // a = __hadd2(a, __shfl_xor_sync(0xffffffff, a, mask, 32));
659
+ // }
660
+ // return a;
661
+ //#else
662
+ // (void) a;
663
+ // NO_DEVICE_CODE;
664
+ //#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
665
+ //}
666
 
667
  static __device__ __forceinline__ float warp_reduce_max(float x) {
668
  #pragma unroll
 
672
  return x;
673
  }
674
 
675
+ //static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
676
+ //#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
677
+ //#pragma unroll
678
+ // for (int mask = 16; mask > 0; mask >>= 1) {
679
+ // x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32));
680
+ // }
681
+ // return x;
682
+ //#else
683
+ // (void) x;
684
+ // NO_DEVICE_CODE;
685
+ //#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
686
+ //}
687
 
688
  static __device__ __forceinline__ float op_repeat(const float a, const float b) {
689
  return b;
 
4641
  const float d = (float)bq2->d * __low2float(bq8_1[ib32].ds) * 0.25f;
4642
  return d * ((0.5f + ls1) * sumi1 + (0.5f + ls2) * sumi2);
4643
  #else
4644
+ (void) ksigns64;
4645
  assert(false);
4646
  return 0.f;
4647
  #endif
4648
  #else
4649
+ (void) ksigns64;
4650
  assert(false);
4651
  return 0.f;
4652
  #endif