Spaces:
Running
Running
Christian Kastner
Diego Devesa
commited on
Commit
·
0bcd751
1
Parent(s):
39c4fa5
Implement GGML_CPU_ALL_VARIANTS for PowerPC (llama/14286)
Browse files* Add PowerPC feature detection and scoring
* ggml-cpu: Implement GGML_CPU_ALL_VARIANTS for PowerPC
* ggml-cpu: Delay some initializations until function is called
When using GGML_BACKEND_DL=ON, these initializations might use
instructions that are not supported by the current CPU.
---------
Co-authored-by: Diego Devesa <[email protected]>
- ggml/src/CMakeLists.txt +17 -0
- ggml/src/ggml-cpu/CMakeLists.txt +21 -0
- ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- ggml/src/ggml-cpu/repack.cpp +15 -14
ggml/src/CMakeLists.txt
CHANGED
|
@@ -286,6 +286,10 @@ function(ggml_add_cpu_backend_variant tag_name)
|
|
| 286 |
foreach (feat ${ARGN})
|
| 287 |
set(GGML_INTERNAL_${feat} ON)
|
| 288 |
endforeach()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
endif()
|
| 290 |
|
| 291 |
ggml_add_cpu_backend_variant_impl(${tag_name})
|
|
@@ -337,6 +341,19 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
| 337 |
else()
|
| 338 |
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
|
| 339 |
endif()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
else()
|
| 341 |
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
|
| 342 |
endif()
|
|
|
|
| 286 |
foreach (feat ${ARGN})
|
| 287 |
set(GGML_INTERNAL_${feat} ON)
|
| 288 |
endforeach()
|
| 289 |
+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
| 290 |
+
foreach (feat ${ARGN})
|
| 291 |
+
set(GGML_INTERNAL_${feat} ON)
|
| 292 |
+
endforeach()
|
| 293 |
endif()
|
| 294 |
|
| 295 |
ggml_add_cpu_backend_variant_impl(${tag_name})
|
|
|
|
| 341 |
else()
|
| 342 |
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
|
| 343 |
endif()
|
| 344 |
+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
| 345 |
+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
| 346 |
+
ggml_add_cpu_backend_variant(power0)
|
| 347 |
+
ggml_add_cpu_backend_variant(power7_1 POWER7)
|
| 348 |
+
ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
|
| 349 |
+
ggml_add_cpu_backend_variant(power8_1 POWER8)
|
| 350 |
+
ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
|
| 351 |
+
ggml_add_cpu_backend_variant(power9 POWER9 VSX)
|
| 352 |
+
ggml_add_cpu_backend_variant(power10 POWER10 VSX)
|
| 353 |
+
ggml_add_cpu_backend_variant(power11 POWER11 VSX)
|
| 354 |
+
else()
|
| 355 |
+
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
|
| 356 |
+
endif()
|
| 357 |
else()
|
| 358 |
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
|
| 359 |
endif()
|
ggml/src/ggml-cpu/CMakeLists.txt
CHANGED
|
@@ -388,6 +388,27 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
| 388 |
else()
|
| 389 |
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
|
| 390 |
endif()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
else()
|
| 392 |
if (GGML_CPU_POWERPC_CPUTYPE)
|
| 393 |
list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
|
|
|
|
| 388 |
else()
|
| 389 |
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
|
| 390 |
endif()
|
| 391 |
+
elseif(GGML_CPU_ALL_VARIANTS)
|
| 392 |
+
# Begin with the lowest baseline
|
| 393 |
+
set(ARCH_DEFINITIONS "")
|
| 394 |
+
|
| 395 |
+
# When a feature is selected, bump the MCPU to the first
|
| 396 |
+
# version that supported it
|
| 397 |
+
foreach(PVER RANGE 7 11)
|
| 398 |
+
if(DEFINED GGML_INTERNAL_POWER${PVER})
|
| 399 |
+
set(POWERPC_MCPU "power${PVER}")
|
| 400 |
+
list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
|
| 401 |
+
endif()
|
| 402 |
+
endforeach()
|
| 403 |
+
if (GGML_INTERNAL_VSX)
|
| 404 |
+
list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
|
| 405 |
+
list(APPEND ARCH_FLAGS -mvsx)
|
| 406 |
+
endif()
|
| 407 |
+
|
| 408 |
+
if (DEFINED POWERPC_MCPU)
|
| 409 |
+
list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
|
| 410 |
+
endif()
|
| 411 |
+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
|
| 412 |
else()
|
| 413 |
if (GGML_CPU_POWERPC_CPUTYPE)
|
| 414 |
list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
|
ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# include "ggml-backend-impl.h"
|
| 2 |
+
|
| 3 |
+
#if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
|
| 4 |
+
|
| 5 |
+
#if defined(__linux__)
|
| 6 |
+
#include <sys/auxv.h>
|
| 7 |
+
#endif
|
| 8 |
+
|
| 9 |
+
#include <string>
|
| 10 |
+
|
| 11 |
+
struct powerpc_features {
|
| 12 |
+
std::string platform = "";
|
| 13 |
+
int power_version = -1;
|
| 14 |
+
|
| 15 |
+
bool has_vsx = false;
|
| 16 |
+
|
| 17 |
+
powerpc_features() {
|
| 18 |
+
#if defined(__linux__)
|
| 19 |
+
unsigned long auxval = getauxval(AT_PLATFORM);
|
| 20 |
+
if (auxval) {
|
| 21 |
+
platform = std::string(reinterpret_cast<const char*>(auxval));
|
| 22 |
+
// TBD: Do systems exist that return this in uppercase?
|
| 23 |
+
if (platform.substr(0, 5) == "power") {
|
| 24 |
+
// Extractt a numeric suffix, if one exists
|
| 25 |
+
int vpos = -1;
|
| 26 |
+
for (int i = platform.length() - 1; i >= 0; i--) {
|
| 27 |
+
if (std::isdigit(platform[i])) {
|
| 28 |
+
vpos = i;
|
| 29 |
+
} else {
|
| 30 |
+
break;
|
| 31 |
+
}
|
| 32 |
+
}
|
| 33 |
+
if (vpos > -1) {
|
| 34 |
+
power_version = std::stoi(platform.substr(vpos));
|
| 35 |
+
}
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
#endif
|
| 39 |
+
if (power_version >= 9) {
|
| 40 |
+
has_vsx = true;
|
| 41 |
+
}
|
| 42 |
+
}
|
| 43 |
+
};
|
| 44 |
+
|
| 45 |
+
static int ggml_backend_cpu_powerpc_score() {
|
| 46 |
+
int score = 1;
|
| 47 |
+
powerpc_features pf;
|
| 48 |
+
|
| 49 |
+
// Platform scores
|
| 50 |
+
#if defined(GGML_USE_POWER7)
|
| 51 |
+
if (pf.power_version < 7) { return 0; }
|
| 52 |
+
score += 1<<1;
|
| 53 |
+
#endif
|
| 54 |
+
#if defined(GGML_USE_POWER8)
|
| 55 |
+
if (pf.power_version < 8) { return 0; }
|
| 56 |
+
score += 1<<2;
|
| 57 |
+
#endif
|
| 58 |
+
#if defined(GGML_USE_POWER9)
|
| 59 |
+
if (pf.power_version < 9) { return 0; }
|
| 60 |
+
score += 1<<3;
|
| 61 |
+
#endif
|
| 62 |
+
#if defined(GGML_USE_POWER10)
|
| 63 |
+
if (pf.power_version < 10) { return 0; }
|
| 64 |
+
score += 1<<4;
|
| 65 |
+
#endif
|
| 66 |
+
#if defined(GGML_USE_POWER11)
|
| 67 |
+
if (pf.power_version < 11) { return 0; }
|
| 68 |
+
score += 1<<5;
|
| 69 |
+
#endif
|
| 70 |
+
|
| 71 |
+
// Feature scores
|
| 72 |
+
#if defined(GGML_USE_VSX)
|
| 73 |
+
if (!pf.has_vsx) { return 0; }
|
| 74 |
+
score += 1<<6;
|
| 75 |
+
#endif
|
| 76 |
+
|
| 77 |
+
return score;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_powerpc_score)
|
| 81 |
+
|
| 82 |
+
#endif // defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
|
ggml/src/ggml-cpu/repack.cpp
CHANGED
|
@@ -1411,44 +1411,45 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
|
|
| 1411 |
}
|
| 1412 |
};
|
| 1413 |
|
| 1414 |
-
// instance for Q4
|
| 1415 |
-
static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
|
| 1416 |
-
static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
|
| 1417 |
-
static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
|
| 1418 |
-
static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
|
| 1419 |
-
|
| 1420 |
-
// instance for IQ4
|
| 1421 |
-
static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
|
| 1422 |
-
|
| 1423 |
} // namespace ggml::cpu::repack
|
| 1424 |
|
| 1425 |
static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1426 |
if (cur->type == GGML_TYPE_Q4_0) {
|
| 1427 |
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
|
| 1428 |
if (cur->ne[1] % 8 == 0) {
|
| 1429 |
-
return &
|
| 1430 |
}
|
| 1431 |
}
|
| 1432 |
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
|
| 1433 |
if (cur->ne[1] % 4 == 0) {
|
| 1434 |
-
return &
|
| 1435 |
}
|
| 1436 |
}
|
| 1437 |
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
|
| 1438 |
if (cur->ne[1] % 4 == 0) {
|
| 1439 |
-
return &
|
| 1440 |
}
|
| 1441 |
}
|
| 1442 |
} else if (cur->type == GGML_TYPE_Q4_K) {
|
| 1443 |
if (ggml_cpu_has_avx2()) {
|
| 1444 |
if (cur->ne[1] % 8 == 0) {
|
| 1445 |
-
return &
|
| 1446 |
}
|
| 1447 |
}
|
| 1448 |
} else if (cur->type == GGML_TYPE_IQ4_NL) {
|
| 1449 |
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
|
| 1450 |
if (cur->ne[1] % 4 == 0) {
|
| 1451 |
-
return &
|
| 1452 |
}
|
| 1453 |
}
|
| 1454 |
}
|
|
|
|
| 1411 |
}
|
| 1412 |
};
|
| 1413 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1414 |
} // namespace ggml::cpu::repack
|
| 1415 |
|
| 1416 |
static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
|
| 1417 |
+
|
| 1418 |
+
// instance for Q4
|
| 1419 |
+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
|
| 1420 |
+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
|
| 1421 |
+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
|
| 1422 |
+
static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
|
| 1423 |
+
|
| 1424 |
+
// instance for IQ4
|
| 1425 |
+
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
|
| 1426 |
+
|
| 1427 |
if (cur->type == GGML_TYPE_Q4_0) {
|
| 1428 |
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
|
| 1429 |
if (cur->ne[1] % 8 == 0) {
|
| 1430 |
+
return &q4_0_8x8_q8_0;
|
| 1431 |
}
|
| 1432 |
}
|
| 1433 |
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
|
| 1434 |
if (cur->ne[1] % 4 == 0) {
|
| 1435 |
+
return &q4_0_4x8_q8_0;
|
| 1436 |
}
|
| 1437 |
}
|
| 1438 |
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
|
| 1439 |
if (cur->ne[1] % 4 == 0) {
|
| 1440 |
+
return &q4_0_4x4_q8_0;
|
| 1441 |
}
|
| 1442 |
}
|
| 1443 |
} else if (cur->type == GGML_TYPE_Q4_K) {
|
| 1444 |
if (ggml_cpu_has_avx2()) {
|
| 1445 |
if (cur->ne[1] % 8 == 0) {
|
| 1446 |
+
return &q4_K_8x8_q8_K;
|
| 1447 |
}
|
| 1448 |
}
|
| 1449 |
} else if (cur->type == GGML_TYPE_IQ4_NL) {
|
| 1450 |
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
|
| 1451 |
if (cur->ne[1] % 4 == 0) {
|
| 1452 |
+
return &iq4_nl_4x4_q8_0;
|
| 1453 |
}
|
| 1454 |
}
|
| 1455 |
}
|