Christian Kastner Diego Devesa commited on
Commit
0bcd751
·
1 Parent(s): 39c4fa5

Implement GGML_CPU_ALL_VARIANTS for PowerPC (llama/14286)

Browse files

* Add PowerPC feature detection and scoring

* ggml-cpu: Implement GGML_CPU_ALL_VARIANTS for PowerPC

* ggml-cpu: Delay some initializations until function is called

When using GGML_BACKEND_DL=ON, these initializations might use
instructions that are not supported by the current CPU.

---------

Co-authored-by: Diego Devesa <[email protected]>

ggml/src/CMakeLists.txt CHANGED
@@ -286,6 +286,10 @@ function(ggml_add_cpu_backend_variant tag_name)
286
  foreach (feat ${ARGN})
287
  set(GGML_INTERNAL_${feat} ON)
288
  endforeach()
 
 
 
 
289
  endif()
290
 
291
  ggml_add_cpu_backend_variant_impl(${tag_name})
@@ -337,6 +341,19 @@ if (GGML_CPU_ALL_VARIANTS)
337
  else()
338
  message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
339
  endif()
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  else()
341
  message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
342
  endif()
 
286
  foreach (feat ${ARGN})
287
  set(GGML_INTERNAL_${feat} ON)
288
  endforeach()
289
+ elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
290
+ foreach (feat ${ARGN})
291
+ set(GGML_INTERNAL_${feat} ON)
292
+ endforeach()
293
  endif()
294
 
295
  ggml_add_cpu_backend_variant_impl(${tag_name})
 
341
  else()
342
  message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
343
  endif()
344
+ elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
345
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
346
+ ggml_add_cpu_backend_variant(power0)
347
+ ggml_add_cpu_backend_variant(power7_1 POWER7)
348
+ ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
349
+ ggml_add_cpu_backend_variant(power8_1 POWER8)
350
+ ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
351
+ ggml_add_cpu_backend_variant(power9 POWER9 VSX)
352
+ ggml_add_cpu_backend_variant(power10 POWER10 VSX)
353
+ ggml_add_cpu_backend_variant(power11 POWER11 VSX)
354
+ else()
355
+ message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
356
+ endif()
357
  else()
358
  message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
359
  endif()
ggml/src/ggml-cpu/CMakeLists.txt CHANGED
@@ -388,6 +388,27 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
388
  else()
389
  list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
390
  endif()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  else()
392
  if (GGML_CPU_POWERPC_CPUTYPE)
393
  list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
 
388
  else()
389
  list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
390
  endif()
391
+ elseif(GGML_CPU_ALL_VARIANTS)
392
+ # Begin with the lowest baseline
393
+ set(ARCH_DEFINITIONS "")
394
+
395
+ # When a feature is selected, bump the MCPU to the first
396
+ # version that supported it
397
+ foreach(PVER RANGE 7 11)
398
+ if(DEFINED GGML_INTERNAL_POWER${PVER})
399
+ set(POWERPC_MCPU "power${PVER}")
400
+ list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
401
+ endif()
402
+ endforeach()
403
+ if (GGML_INTERNAL_VSX)
404
+ list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
405
+ list(APPEND ARCH_FLAGS -mvsx)
406
+ endif()
407
+
408
+ if (DEFINED POWERPC_MCPU)
409
+ list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
410
+ endif()
411
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
412
  else()
413
  if (GGML_CPU_POWERPC_CPUTYPE)
414
  list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # include "ggml-backend-impl.h"
2
+
3
+ #if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
4
+
5
+ #if defined(__linux__)
6
+ #include <sys/auxv.h>
7
+ #endif
8
+
9
+ #include <string>
10
+
11
+ struct powerpc_features {
12
+ std::string platform = "";
13
+ int power_version = -1;
14
+
15
+ bool has_vsx = false;
16
+
17
+ powerpc_features() {
18
+ #if defined(__linux__)
19
+ unsigned long auxval = getauxval(AT_PLATFORM);
20
+ if (auxval) {
21
+ platform = std::string(reinterpret_cast<const char*>(auxval));
22
+ // TBD: Do systems exist that return this in uppercase?
23
+ if (platform.substr(0, 5) == "power") {
24
+ // Extractt a numeric suffix, if one exists
25
+ int vpos = -1;
26
+ for (int i = platform.length() - 1; i >= 0; i--) {
27
+ if (std::isdigit(platform[i])) {
28
+ vpos = i;
29
+ } else {
30
+ break;
31
+ }
32
+ }
33
+ if (vpos > -1) {
34
+ power_version = std::stoi(platform.substr(vpos));
35
+ }
36
+ }
37
+ }
38
+ #endif
39
+ if (power_version >= 9) {
40
+ has_vsx = true;
41
+ }
42
+ }
43
+ };
44
+
45
+ static int ggml_backend_cpu_powerpc_score() {
46
+ int score = 1;
47
+ powerpc_features pf;
48
+
49
+ // Platform scores
50
+ #if defined(GGML_USE_POWER7)
51
+ if (pf.power_version < 7) { return 0; }
52
+ score += 1<<1;
53
+ #endif
54
+ #if defined(GGML_USE_POWER8)
55
+ if (pf.power_version < 8) { return 0; }
56
+ score += 1<<2;
57
+ #endif
58
+ #if defined(GGML_USE_POWER9)
59
+ if (pf.power_version < 9) { return 0; }
60
+ score += 1<<3;
61
+ #endif
62
+ #if defined(GGML_USE_POWER10)
63
+ if (pf.power_version < 10) { return 0; }
64
+ score += 1<<4;
65
+ #endif
66
+ #if defined(GGML_USE_POWER11)
67
+ if (pf.power_version < 11) { return 0; }
68
+ score += 1<<5;
69
+ #endif
70
+
71
+ // Feature scores
72
+ #if defined(GGML_USE_VSX)
73
+ if (!pf.has_vsx) { return 0; }
74
+ score += 1<<6;
75
+ #endif
76
+
77
+ return score;
78
+ }
79
+
80
+ GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_powerpc_score)
81
+
82
+ #endif // defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
ggml/src/ggml-cpu/repack.cpp CHANGED
@@ -1411,44 +1411,45 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
1411
  }
1412
  };
1413
 
1414
- // instance for Q4
1415
- static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
1416
- static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
1417
- static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
1418
- static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
1419
-
1420
- // instance for IQ4
1421
- static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
1422
-
1423
  } // namespace ggml::cpu::repack
1424
 
1425
  static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
 
 
 
 
 
 
 
 
 
 
1426
  if (cur->type == GGML_TYPE_Q4_0) {
1427
  if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
1428
  if (cur->ne[1] % 8 == 0) {
1429
- return &ggml::cpu::repack::q4_0_8x8_q8_0;
1430
  }
1431
  }
1432
  if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
1433
  if (cur->ne[1] % 4 == 0) {
1434
- return &ggml::cpu::repack::q4_0_4x8_q8_0;
1435
  }
1436
  }
1437
  if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
1438
  if (cur->ne[1] % 4 == 0) {
1439
- return &ggml::cpu::repack::q4_0_4x4_q8_0;
1440
  }
1441
  }
1442
  } else if (cur->type == GGML_TYPE_Q4_K) {
1443
  if (ggml_cpu_has_avx2()) {
1444
  if (cur->ne[1] % 8 == 0) {
1445
- return &ggml::cpu::repack::q4_K_8x8_q8_K;
1446
  }
1447
  }
1448
  } else if (cur->type == GGML_TYPE_IQ4_NL) {
1449
  if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
1450
  if (cur->ne[1] % 4 == 0) {
1451
- return &ggml::cpu::repack::iq4_nl_4x4_q8_0;
1452
  }
1453
  }
1454
  }
 
1411
  }
1412
  };
1413
 
 
 
 
 
 
 
 
 
 
1414
  } // namespace ggml::cpu::repack
1415
 
1416
  static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
1417
+
1418
+ // instance for Q4
1419
+ static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
1420
+ static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
1421
+ static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
1422
+ static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
1423
+
1424
+ // instance for IQ4
1425
+ static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
1426
+
1427
  if (cur->type == GGML_TYPE_Q4_0) {
1428
  if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
1429
  if (cur->ne[1] % 8 == 0) {
1430
+ return &q4_0_8x8_q8_0;
1431
  }
1432
  }
1433
  if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
1434
  if (cur->ne[1] % 4 == 0) {
1435
+ return &q4_0_4x8_q8_0;
1436
  }
1437
  }
1438
  if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
1439
  if (cur->ne[1] % 4 == 0) {
1440
+ return &q4_0_4x4_q8_0;
1441
  }
1442
  }
1443
  } else if (cur->type == GGML_TYPE_Q4_K) {
1444
  if (ggml_cpu_has_avx2()) {
1445
  if (cur->ne[1] % 8 == 0) {
1446
+ return &q4_K_8x8_q8_K;
1447
  }
1448
  }
1449
  } else if (cur->type == GGML_TYPE_IQ4_NL) {
1450
  if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
1451
  if (cur->ne[1] % 4 == 0) {
1452
+ return &iq4_nl_4x4_q8_0;
1453
  }
1454
  }
1455
  }