ggerganov commited on
Commit
6284570
·
unverified ·
1 Parent(s): 451937f

ggml : remove old files (skip) (#0)

Browse files
ggml/include/ggml-amx.h DELETED
@@ -1,25 +0,0 @@
1
- #pragma once
2
-
3
- #include "ggml.h"
4
- #include "ggml-backend.h"
5
-
6
-
7
- #ifdef __cplusplus
8
- extern "C" {
9
- #endif
10
-
11
- // buffer_type API
12
- GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
13
-
14
- GGML_BACKEND_API bool ggml_backend_is_amx(ggml_backend_t backend);
15
-
16
- // backend API
17
- GGML_BACKEND_API ggml_backend_t ggml_backend_amx_init(void);
18
-
19
- GGML_BACKEND_API void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads);
20
-
21
- GGML_BACKEND_API ggml_backend_reg_t ggml_backend_amx_reg(void);
22
-
23
- #ifdef __cplusplus
24
- }
25
- #endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ggml/src/ggml-cuda/ggml/CMakeLists.txt DELETED
@@ -1,152 +0,0 @@
1
- cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
2
-
3
- find_package(CUDAToolkit)
4
-
5
- if (CUDAToolkit_FOUND)
6
- message(STATUS "CUDA Toolkit found")
7
-
8
- if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
9
- # native == GPUs available at build time
10
- # 52 == Maxwell, lowest CUDA 12 standard
11
- # 60 == P100, FP16 CUDA intrinsics
12
- # 61 == Pascal, __dp4a instruction (per-byte integer dot product)
13
- # 70 == V100, FP16 tensor cores
14
- # 75 == Turing, int8 tensor cores
15
- if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6" AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
16
- set(CMAKE_CUDA_ARCHITECTURES "native")
17
- elseif(GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
18
- set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
19
- else()
20
- set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75")
21
- endif()
22
- endif()
23
- message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
24
-
25
- enable_language(CUDA)
26
-
27
- file(GLOB GGML_HEADERS_CUDA "*.cuh")
28
- list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h")
29
-
30
- file(GLOB GGML_SOURCES_CUDA "*.cu")
31
- file(GLOB SRCS "template-instances/fattn-wmma*.cu")
32
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
33
- file(GLOB SRCS "template-instances/mmq*.cu")
34
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
35
-
36
- if (GGML_CUDA_FA_ALL_QUANTS)
37
- file(GLOB SRCS "template-instances/fattn-vec*.cu")
38
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
39
- add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
40
- else()
41
- file(GLOB SRCS "template-instances/fattn-vec*q4_0-q4_0.cu")
42
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
43
- file(GLOB SRCS "template-instances/fattn-vec*q8_0-q8_0.cu")
44
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
45
- file(GLOB SRCS "template-instances/fattn-vec*f16-f16.cu")
46
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
47
- endif()
48
-
49
- ggml_add_backend_library(ggml-cuda
50
- ${GGML_HEADERS_CUDA}
51
- ${GGML_SOURCES_CUDA}
52
- )
53
-
54
- add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
55
-
56
- if (GGML_CUDA_GRAPHS)
57
- add_compile_definitions(GGML_CUDA_USE_GRAPHS)
58
- endif()
59
-
60
- if (GGML_CUDA_FORCE_MMQ)
61
- add_compile_definitions(GGML_CUDA_FORCE_MMQ)
62
- endif()
63
-
64
- if (GGML_CUDA_FORCE_CUBLAS)
65
- add_compile_definitions(GGML_CUDA_FORCE_CUBLAS)
66
- endif()
67
-
68
- if (GGML_CUDA_NO_VMM)
69
- add_compile_definitions(GGML_CUDA_NO_VMM)
70
- endif()
71
-
72
- if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
73
- add_compile_definitions(GGML_CUDA_F16)
74
- endif()
75
-
76
- if (GGML_CUDA_NO_PEER_COPY)
77
- add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
78
- endif()
79
-
80
- if (GGML_STATIC)
81
- if (WIN32)
82
- # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library
83
- target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
84
- else ()
85
- target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
86
- endif()
87
- else()
88
- target_link_libraries(ggml-cuda PRIVATE CUDA::cudart CUDA::cublas CUDA::cublasLt)
89
- endif()
90
-
91
- if (GGML_CUDA_NO_VMM)
92
- # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so)
93
- else()
94
- target_link_libraries(ggml-cuda PRIVATE CUDA::cuda_driver)
95
- endif()
96
-
97
- set(CUDA_CXX_FLAGS "")
98
-
99
- set(CUDA_FLAGS -use_fast_math)
100
-
101
- if (GGML_FATAL_WARNINGS)
102
- list(APPEND CUDA_FLAGS -Werror all-warnings)
103
- endif()
104
-
105
- if (GGML_ALL_WARNINGS AND NOT MSVC)
106
- set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
107
- if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
108
- list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
109
- endif()
110
-
111
- execute_process(
112
- COMMAND ${NVCC_CMD} -Xcompiler --version
113
- OUTPUT_VARIABLE CUDA_CCFULLVER
114
- ERROR_QUIET
115
- )
116
-
117
- if (NOT CUDA_CCFULLVER MATCHES clang)
118
- set(CUDA_CCID "GNU")
119
- execute_process(
120
- COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
121
- OUTPUT_VARIABLE CUDA_CCVER
122
- ERROR_QUIET
123
- )
124
- else()
125
- if (CUDA_CCFULLVER MATCHES Apple)
126
- set(CUDA_CCID "AppleClang")
127
- else()
128
- set(CUDA_CCID "Clang")
129
- endif()
130
- string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER})
131
- endif()
132
-
133
- message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
134
-
135
- ggml_get_flags(${CUDA_CCID} ${CUDA_CCVER})
136
- list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
137
- endif()
138
-
139
- if (NOT MSVC)
140
- list(APPEND CUDA_CXX_FLAGS -Wno-pedantic)
141
- endif()
142
-
143
- list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
144
-
145
- if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
146
- list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
147
- endif()
148
-
149
- target_compile_options(ggml-cuda PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
150
- else()
151
- message(FATAL_ERROR "CUDA Toolkit not found")
152
- endif()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ggml/src/ggml-cuda/rwkv-wkv.cu DELETED
@@ -1,89 +0,0 @@
1
- #include "common.cuh"
2
- #include "rwkv-wkv.cuh"
3
-
4
- static __global__ void rwkv_wkv_f32(const int B, const int T, const int C, const int H, const float * k, const float * v, const float * r, const float * tf, const float * td, const float * s, float * dst) {
5
- const int tid = threadIdx.x;
6
- const int bid = blockIdx.x;
7
-
8
- const int head_size = CUDA_WKV_BLOCK_SIZE;
9
- const int batch_i = bid / H;
10
- const int head_i = bid % H;
11
- const int state_size = C * head_size;
12
- const int n_seq_tokens = T / B;
13
-
14
- float state[head_size];
15
- __shared__ float _k[head_size], _r[head_size], _tf[head_size], _td[head_size];
16
-
17
- #pragma unroll
18
- for (int i = 0; i < head_size; i++) {
19
- state[i] = s[batch_i * state_size + head_i * head_size * head_size + i * head_size + tid];
20
- }
21
-
22
- __syncthreads();
23
- _tf[tid] = tf[head_i * head_size + tid];
24
- __syncthreads();
25
-
26
- for (int t = batch_i * n_seq_tokens * C + head_i * head_size + tid; t < (batch_i + 1) * n_seq_tokens * C + head_i * head_size + tid; t += C) {
27
- __syncthreads();
28
- _k[tid] = k[t];
29
- _r[tid] = r[t];
30
- _td[tid] = td[t];
31
- __syncthreads();
32
-
33
- const float _v = v[t];
34
- float y = 0;
35
- for (int j = 0; j < head_size; j += 4) {
36
- const float4& k = (float4&)(_k[j]);
37
- const float4& r = (float4&)(_r[j]);
38
- const float4& tf = (float4&)(_tf[j]);
39
- const float4& td = (float4&)(_td[j]);
40
- float4& s = (float4&)(state[j]);
41
- float4 kv;
42
-
43
- kv.x = k.x * _v;
44
- kv.y = k.y * _v;
45
- kv.z = k.z * _v;
46
- kv.w = k.w * _v;
47
-
48
- y += r.x * (tf.x * kv.x + s.x);
49
- y += r.y * (tf.y * kv.y + s.y);
50
- y += r.z * (tf.z * kv.z + s.z);
51
- y += r.w * (tf.w * kv.w + s.w);
52
-
53
- s.x = s.x * td.x + kv.x;
54
- s.y = s.y * td.y + kv.y;
55
- s.z = s.z * td.z + kv.z;
56
- s.w = s.w * td.w + kv.w;
57
- }
58
- dst[t] = y;
59
- }
60
-
61
- #pragma unroll
62
- for (int i = 0; i < head_size; i++) {
63
- dst[T * C + batch_i * state_size + head_i * head_size * head_size + i * head_size + tid] = state[i];
64
- }
65
- }
66
-
67
- void ggml_cuda_op_rwkv_wkv(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
68
- const float * k_d = (const float *)dst->src[0]->data;
69
- const float * v_d = (const float *)dst->src[1]->data;
70
- const float * r_d = (const float *)dst->src[2]->data;
71
- const float * tf_d = (const float *)dst->src[3]->data;
72
- const float * td_d = (const float *)dst->src[4]->data;
73
- const float * s_d = (const float *)dst->src[5]->data;
74
-
75
- const int64_t B = dst->src[5]->ne[1];
76
- const int64_t T = dst->src[0]->ne[3];
77
- const int64_t C = dst->ne[0];
78
- const int64_t H = dst->src[0]->ne[2];
79
-
80
- float * dst_d = (float *)dst->data;
81
-
82
- cudaStream_t stream = ctx.stream();
83
-
84
- GGML_ASSERT(dst->src[5]->type == GGML_TYPE_F32);
85
- GGML_ASSERT(C % H == 0);
86
- GGML_ASSERT(C / H == CUDA_WKV_BLOCK_SIZE);
87
-
88
- rwkv_wkv_f32<<<B * H, C / H, 0, stream>>>(B, T, C, H, k_d, v_d, r_d, tf_d, td_d, s_d, dst_d);
89
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ggml/src/ggml-cuda/rwkv-wkv.cuh DELETED
@@ -1,5 +0,0 @@
1
- #include "common.cuh"
2
-
3
- #define CUDA_WKV_BLOCK_SIZE 64
4
-
5
- void ggml_cuda_op_rwkv_wkv(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
 
 
 
 
 
 
ggml/src/ggml-musa/ggml/CMakeLists.txt DELETED
@@ -1,107 +0,0 @@
1
- if (NOT EXISTS $ENV{MUSA_PATH})
2
- if (NOT EXISTS /opt/musa)
3
- set(MUSA_PATH /usr/local/musa)
4
- else()
5
- set(MUSA_PATH /opt/musa)
6
- endif()
7
- else()
8
- set(MUSA_PATH $ENV{MUSA_PATH})
9
- endif()
10
-
11
- set(CMAKE_C_COMPILER "${MUSA_PATH}/bin/clang")
12
- set(CMAKE_C_EXTENSIONS OFF)
13
- set(CMAKE_CXX_COMPILER "${MUSA_PATH}/bin/clang++")
14
- set(CMAKE_CXX_EXTENSIONS OFF)
15
-
16
- list(APPEND CMAKE_MODULE_PATH "${MUSA_PATH}/cmake")
17
-
18
- find_package(MUSAToolkit)
19
-
20
- if (MUSAToolkit_FOUND)
21
- message(STATUS "MUSA Toolkit found")
22
-
23
- if (NOT DEFINED MUSA_ARCHITECTURES)
24
- set(MUSA_ARCHITECTURES "21;22")
25
- endif()
26
- message(STATUS "Using MUSA architectures: ${MUSA_ARCHITECTURES}")
27
-
28
- file(GLOB GGML_HEADERS_MUSA "../ggml-cuda/*.cuh")
29
- list(APPEND GGML_HEADERS_MUSA "../../include/ggml-cuda.h")
30
-
31
- file(GLOB GGML_SOURCES_MUSA "../ggml-cuda/*.cu")
32
- file(GLOB SRCS "../ggml-cuda/template-instances/fattn-wmma*.cu")
33
- list(APPEND GGML_SOURCES_MUSA ${SRCS})
34
- file(GLOB SRCS "../ggml-cuda/template-instances/mmq*.cu")
35
- list(APPEND GGML_SOURCES_MUSA ${SRCS})
36
-
37
- if (GGML_CUDA_FA_ALL_QUANTS)
38
- file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*.cu")
39
- list(APPEND GGML_SOURCES_MUSA ${SRCS})
40
- add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
41
- else()
42
- file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
43
- list(APPEND GGML_SOURCES_MUSA ${SRCS})
44
- file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
45
- list(APPEND GGML_SOURCES_MUSA ${SRCS})
46
- file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
47
- list(APPEND GGML_SOURCES_MUSA ${SRCS})
48
- endif()
49
-
50
- set_source_files_properties(${GGML_SOURCES_MUSA} PROPERTIES LANGUAGE CXX)
51
- foreach(SOURCE ${GGML_SOURCES_MUSA})
52
- set(COMPILE_FLAGS "-x musa -mtgpu")
53
- foreach(ARCH ${MUSA_ARCHITECTURES})
54
- set(COMPILE_FLAGS "${COMPILE_FLAGS} --cuda-gpu-arch=mp_${ARCH}")
55
- endforeach()
56
- set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS ${COMPILE_FLAGS})
57
- endforeach()
58
-
59
- ggml_add_backend_library(ggml-musa
60
- ${GGML_HEADERS_MUSA}
61
- ${GGML_SOURCES_MUSA}
62
- )
63
-
64
- # TODO: do not use CUDA definitions for MUSA
65
- target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
66
-
67
- add_compile_definitions(GGML_USE_MUSA)
68
- add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
69
-
70
- if (GGML_CUDA_GRAPHS)
71
- add_compile_definitions(GGML_CUDA_USE_GRAPHS)
72
- endif()
73
-
74
- if (GGML_CUDA_FORCE_MMQ)
75
- add_compile_definitions(GGML_CUDA_FORCE_MMQ)
76
- endif()
77
-
78
- if (GGML_CUDA_FORCE_CUBLAS)
79
- add_compile_definitions(GGML_CUDA_FORCE_CUBLAS)
80
- endif()
81
-
82
- if (GGML_CUDA_NO_VMM)
83
- add_compile_definitions(GGML_CUDA_NO_VMM)
84
- endif()
85
-
86
- if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
87
- add_compile_definitions(GGML_CUDA_F16)
88
- endif()
89
-
90
- if (GGML_CUDA_NO_PEER_COPY)
91
- add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
92
- endif()
93
-
94
- if (GGML_STATIC)
95
- target_link_libraries(ggml-musa PRIVATE MUSA::musart_static MUSA::mublas_static)
96
- else()
97
- target_link_libraries(ggml-musa PRIVATE MUSA::musart MUSA::mublas)
98
- endif()
99
-
100
- if (GGML_CUDA_NO_VMM)
101
- # No VMM requested, no need to link directly with the musa driver lib (libmusa.so)
102
- else()
103
- target_link_libraries(ggml-musa PRIVATE MUSA::musa_driver)
104
- endif()
105
- else()
106
- message(FATAL_ERROR "MUSA Toolkit not found")
107
- endif()