Spaces:
Running
Running
Kevin Brothaler
commited on
Commit
·
1fff54f
1
Parent(s):
dbe19c7
Check for both __ARM_NEON and __ARM_FEATURE_FMA so that the project can be compiled for armv7a.
Browse filesAndroid armeabi-v7a's NEON support doesn't support FMA unless configured with `-mfpu=neon-fp-armv8`, which would need runtime checks.
* Also removed ABI filter from Android project.
- examples/whisper.android/.idea/gradle.xml +1 -0
- examples/whisper.android/app/build.gradle +0 -4
- ggml.c +12 -4
- ggml.h +1 -0
- whisper.cpp +1 -0
examples/whisper.android/.idea/gradle.xml
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
<project version="4">
|
|
|
|
| 3 |
<component name="GradleSettings">
|
| 4 |
<option name="linkedExternalProjectsSettings">
|
| 5 |
<GradleProjectSettings>
|
|
|
|
| 1 |
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
<project version="4">
|
| 3 |
+
<component name="GradleMigrationSettings" migrationVersion="1" />
|
| 4 |
<component name="GradleSettings">
|
| 5 |
<option name="linkedExternalProjectsSettings">
|
| 6 |
<GradleProjectSettings>
|
examples/whisper.android/app/build.gradle
CHANGED
|
@@ -14,10 +14,6 @@ android {
|
|
| 14 |
versionCode 1
|
| 15 |
versionName "1.0"
|
| 16 |
|
| 17 |
-
ndk {
|
| 18 |
-
abiFilters 'arm64-v8a', 'x86_64'
|
| 19 |
-
}
|
| 20 |
-
|
| 21 |
testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
|
| 22 |
vectorDrawables {
|
| 23 |
useSupportLibrary true
|
|
|
|
| 14 |
versionCode 1
|
| 15 |
versionName "1.0"
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
|
| 18 |
vectorDrawables {
|
| 19 |
useSupportLibrary true
|
ggml.c
CHANGED
|
@@ -333,7 +333,7 @@ inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, co
|
|
| 333 |
|
| 334 |
inline static void ggml_vec_dot_f32(const int n, float * restrict s, const float * restrict x, const float * restrict y) {
|
| 335 |
ggml_float sumf = 0.0;
|
| 336 |
-
#
|
| 337 |
// NEON 128-bit
|
| 338 |
const int n16 = (n & ~15);
|
| 339 |
|
|
@@ -511,7 +511,7 @@ inline static void ggml_vec_dot_f32(const int n, float * restrict s, const float
|
|
| 511 |
|
| 512 |
inline static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t * restrict x, ggml_fp16_t * restrict y) {
|
| 513 |
ggml_float sumf = 0.0;
|
| 514 |
-
#
|
| 515 |
const int n32 = (n & ~31);
|
| 516 |
|
| 517 |
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
|
|
@@ -760,7 +760,7 @@ inline static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t
|
|
| 760 |
}
|
| 761 |
|
| 762 |
inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float * restrict x, const float v) {
|
| 763 |
-
#
|
| 764 |
// NEON 128-bit
|
| 765 |
const int n16 = (n & ~15);
|
| 766 |
|
|
@@ -909,7 +909,7 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float
|
|
| 909 |
}
|
| 910 |
|
| 911 |
inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * restrict y, ggml_fp16_t * restrict x, const float v) {
|
| 912 |
-
#
|
| 913 |
// NEON 128-bit
|
| 914 |
const int n32 = (n & ~31);
|
| 915 |
|
|
@@ -8432,6 +8432,14 @@ int ggml_cpu_has_neon(void) {
|
|
| 8432 |
#endif
|
| 8433 |
}
|
| 8434 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8435 |
int ggml_cpu_has_f16c(void) {
|
| 8436 |
#if defined(__F16C__)
|
| 8437 |
return 1;
|
|
|
|
| 333 |
|
| 334 |
inline static void ggml_vec_dot_f32(const int n, float * restrict s, const float * restrict x, const float * restrict y) {
|
| 335 |
ggml_float sumf = 0.0;
|
| 336 |
+
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_FMA)
|
| 337 |
// NEON 128-bit
|
| 338 |
const int n16 = (n & ~15);
|
| 339 |
|
|
|
|
| 511 |
|
| 512 |
inline static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t * restrict x, ggml_fp16_t * restrict y) {
|
| 513 |
ggml_float sumf = 0.0;
|
| 514 |
+
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_FMA)
|
| 515 |
const int n32 = (n & ~31);
|
| 516 |
|
| 517 |
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
|
|
|
|
| 760 |
}
|
| 761 |
|
| 762 |
inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float * restrict x, const float v) {
|
| 763 |
+
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_FMA)
|
| 764 |
// NEON 128-bit
|
| 765 |
const int n16 = (n & ~15);
|
| 766 |
|
|
|
|
| 909 |
}
|
| 910 |
|
| 911 |
inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * restrict y, ggml_fp16_t * restrict x, const float v) {
|
| 912 |
+
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_FMA)
|
| 913 |
// NEON 128-bit
|
| 914 |
const int n32 = (n & ~31);
|
| 915 |
|
|
|
|
| 8432 |
#endif
|
| 8433 |
}
|
| 8434 |
|
| 8435 |
+
int ggml_cpu_has_arm_fma(void) {
|
| 8436 |
+
#if defined(__ARM_FEATURE_FMA)
|
| 8437 |
+
return 1;
|
| 8438 |
+
#else
|
| 8439 |
+
return 0;
|
| 8440 |
+
#endif
|
| 8441 |
+
}
|
| 8442 |
+
|
| 8443 |
int ggml_cpu_has_f16c(void) {
|
| 8444 |
#if defined(__F16C__)
|
| 8445 |
return 1;
|
ggml.h
CHANGED
|
@@ -725,6 +725,7 @@ int ggml_cpu_has_avx(void);
|
|
| 725 |
int ggml_cpu_has_avx2(void);
|
| 726 |
int ggml_cpu_has_avx512(void);
|
| 727 |
int ggml_cpu_has_neon(void);
|
|
|
|
| 728 |
int ggml_cpu_has_f16c(void);
|
| 729 |
int ggml_cpu_has_fp16_va(void);
|
| 730 |
int ggml_cpu_has_wasm_simd(void);
|
|
|
|
| 725 |
int ggml_cpu_has_avx2(void);
|
| 726 |
int ggml_cpu_has_avx512(void);
|
| 727 |
int ggml_cpu_has_neon(void);
|
| 728 |
+
int ggml_cpu_has_arm_fma(void);
|
| 729 |
int ggml_cpu_has_f16c(void);
|
| 730 |
int ggml_cpu_has_fp16_va(void);
|
| 731 |
int ggml_cpu_has_wasm_simd(void);
|
whisper.cpp
CHANGED
|
@@ -2555,6 +2555,7 @@ const char * whisper_print_system_info(void) {
|
|
| 2555 |
s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | ";
|
| 2556 |
s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | ";
|
| 2557 |
s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | ";
|
|
|
|
| 2558 |
s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | ";
|
| 2559 |
s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | ";
|
| 2560 |
s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
|
|
|
|
| 2555 |
s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | ";
|
| 2556 |
s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | ";
|
| 2557 |
s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | ";
|
| 2558 |
+
s += "ARM FMA = " + std::to_string(ggml_cpu_has_arm_fma()) + " | ";
|
| 2559 |
s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | ";
|
| 2560 |
s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | ";
|
| 2561 |
s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
|