Spaces:
Running
Running
ref #40 : start working on the documentation
Browse files
ggml.h
CHANGED
|
@@ -1,5 +1,174 @@
|
|
| 1 |
#pragma once
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
#ifdef __cplusplus
|
| 4 |
extern "C" {
|
| 5 |
#endif
|
|
@@ -21,7 +190,8 @@ typedef __fp16 ggml_fp16_t;
|
|
| 21 |
typedef uint16_t ggml_fp16_t;
|
| 22 |
#endif
|
| 23 |
|
| 24 |
-
|
|
|
|
| 25 |
ggml_fp16_t ggml_fp32_to_fp16(float x);
|
| 26 |
|
| 27 |
struct ggml_object;
|
|
@@ -36,6 +206,7 @@ enum ggml_type {
|
|
| 36 |
GGML_TYPE_COUNT,
|
| 37 |
};
|
| 38 |
|
|
|
|
| 39 |
enum ggml_op {
|
| 40 |
GGML_OP_NONE = 0,
|
| 41 |
|
|
@@ -136,7 +307,7 @@ struct ggml_init_params {
|
|
| 136 |
void * mem_buffer; // if NULL, memory will be allocated internally
|
| 137 |
};
|
| 138 |
|
| 139 |
-
void
|
| 140 |
int64_t ggml_time_ms(void);
|
| 141 |
int64_t ggml_time_us(void);
|
| 142 |
int64_t ggml_cycles(void);
|
|
|
|
| 1 |
#pragma once
|
| 2 |
|
| 3 |
+
//
|
| 4 |
+
// GGML Tensor Library
|
| 5 |
+
//
|
| 6 |
+
// This documentation is still a work in progress.
|
| 7 |
+
// If you wish some specific topics to be covered, feel free to drop a comment:
|
| 8 |
+
//
|
| 9 |
+
// https://github.com/ggerganov/whisper.cpp/issues/40
|
| 10 |
+
//
|
| 11 |
+
// ## Overview
|
| 12 |
+
//
|
| 13 |
+
// This library implements:
|
| 14 |
+
//
|
| 15 |
+
// - a set of tensor operations
|
| 16 |
+
// - automatic differentiation
|
| 17 |
+
// - basic optimization algorithms
|
| 18 |
+
//
|
| 19 |
+
// The aim of this library is to provide a minimalistic approach for various machine learning tasks. This includes,
|
| 20 |
+
// but is not limited to, the following:
|
| 21 |
+
//
|
| 22 |
+
// - linear regression
|
| 23 |
+
// - support vector machines
|
| 24 |
+
// - neural networks
|
| 25 |
+
//
|
| 26 |
+
// The library allows the user to define a certain function using the available tensor operations. This function
|
| 27 |
+
// definition is represented internally via a computation graph. Each tensor operation in the function definition
|
| 28 |
+
// corresponds to a node in the graph. Having the computation graph defined, the user can choose to compute the
|
| 29 |
+
// function's value and/or its gradient with respect to the input variables. Optionally, the function can be optimized
|
| 30 |
+
// using one of the available optimization algorithms.
|
| 31 |
+
//
|
| 32 |
+
// For example, here we define the function: f(x) = a*x^2 + b
|
| 33 |
+
//
|
| 34 |
+
// {
|
| 35 |
+
// struct ggml_init_params params = {
|
| 36 |
+
// .mem_size = 16*1024*1024,
|
| 37 |
+
// .mem_buffer = NULL,
|
| 38 |
+
// };
|
| 39 |
+
//
|
| 40 |
+
// // memory allocation happens here
|
| 41 |
+
// struct ggml_context * ctx = ggml_init(params);
|
| 42 |
+
//
|
| 43 |
+
// struct ggml_tensor * x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
| 44 |
+
//
|
| 45 |
+
// ggml_set_param(ctx, x); // x is an input variable
|
| 46 |
+
//
|
| 47 |
+
// struct ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
| 48 |
+
// struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
| 49 |
+
// struct ggml_tensor * x2 = ggml_mul(ctx, x, x);
|
| 50 |
+
// struct ggml_tensor * f = ggml_add(ctx, ggml_mul(ctx, a, x2), b);
|
| 51 |
+
//
|
| 52 |
+
// ...
|
| 53 |
+
// }
|
| 54 |
+
//
|
| 55 |
+
// Notice that the function definition above does not involve any actual computation. The computation is performed only
|
| 56 |
+
// when the user explicitly requests it. For example, to compute the function's value at x = 2.0:
|
| 57 |
+
//
|
| 58 |
+
// {
|
| 59 |
+
// ...
|
| 60 |
+
//
|
| 61 |
+
// struct ggml_cgraph gf = ggml_build_forward(f);
|
| 62 |
+
//
|
| 63 |
+
// // set the input variable and parameter values
|
| 64 |
+
// ggml_set_f32(x, 2.0f);
|
| 65 |
+
// ggml_set_f32(a, 3.0f);
|
| 66 |
+
// ggml_set_f32(b, 4.0f);
|
| 67 |
+
//
|
| 68 |
+
// ggml_graph_compute(ctx0, &gf);
|
| 69 |
+
//
|
| 70 |
+
// printf("f = %f\n", ggml_get_f32_1d(f, 0));
|
| 71 |
+
//
|
| 72 |
+
// ...
|
| 73 |
+
// }
|
| 74 |
+
//
|
| 75 |
+
// The actual computation is performed in the ggml_graph_compute() function.
|
| 76 |
+
//
|
| 77 |
+
// The ggml_new_tensor_...() functions create new tensors. They are allocated in the memory buffer provided to the
|
| 78 |
+
// ggml_init() function. You have to be careful not to exceed the memory buffer size. Therefore, you have to know
|
| 79 |
+
// in advance how much memory you need for your computation. Alternatively, you can allocate a large enough memory
|
| 80 |
+
// and after defining the computation graph, call the ggml_used_mem() function to find out how much memory was
|
| 81 |
+
// actually needed.
|
| 82 |
+
//
|
| 83 |
+
// The ggml_set_param() function marks a tensor as an input variable. This is used by the automatic
|
| 84 |
+
// differentiation and optimization algorithms.
|
| 85 |
+
//
|
| 86 |
+
// The described approach allows to define the function graph once and then compute its forward or backward graphs
|
| 87 |
+
// multiple times. All computations will use the same memory buffer allocated in the ggml_init() function. This way
|
| 88 |
+
// the user can avoid the memory allocation overhead at runtime.
|
| 89 |
+
//
|
| 90 |
+
// The library supports multi-dimensional tensors - up to 4 dimensions. The FP16 and FP32 data types are first class
|
| 91 |
+
// citizens, but in theory the library can be extended to support FP8 and integer data types.
|
| 92 |
+
//
|
| 93 |
+
// Each tensor operation produces a new tensor. Initially the library was envisioned to support only the use of unary
|
| 94 |
+
// and binary operations. Most of the available operations fall into one of these two categories. With time, it became
|
| 95 |
+
// clear that the library needs to support more complex operations. The way to support these operations is not clear
|
| 96 |
+
// yet, but a few examples are demonstrated in the following operations:
|
| 97 |
+
//
|
| 98 |
+
// - ggml_permute()
|
| 99 |
+
// - ggml_conv_1d_1s()
|
| 100 |
+
// - ggml_conv_1d_2s()
|
| 101 |
+
//
|
| 102 |
+
// For each tensor operator, the library implements a forward and backward computation function. The forward function
|
| 103 |
+
// computes the output tensor value given the input tensor values. The backward function computes the adjoint of the
|
| 104 |
+
// input tensors given the adjoint of the output tensor. For a detailed explanation of what this means, take a
|
| 105 |
+
// calculus class, or watch the following video:
|
| 106 |
+
//
|
| 107 |
+
// What is Automatic Differentiation?
|
| 108 |
+
// https://www.youtube.com/watch?v=wG_nF1awSSY
|
| 109 |
+
//
|
| 110 |
+
//
|
| 111 |
+
// ## Tensor data (struct ggml_tensor)
|
| 112 |
+
//
|
| 113 |
+
// The tensors are stored in memory via the ggml_tensor struct. The structure provides information about the size of
|
| 114 |
+
// the tensor, the data type, and the memory buffer where the tensor data is stored. Additionally, it contains
|
| 115 |
+
// pointers to the "source" tensors - i.e. the tensors that were used to compute the current tensor. For example:
|
| 116 |
+
//
|
| 117 |
+
// {
|
| 118 |
+
// struct ggml_tensor * c = ggml_add(ctx, a, b);
|
| 119 |
+
//
|
| 120 |
+
// assert(c->src[0] == a);
|
| 121 |
+
// assert(c->src[1] == b);
|
| 122 |
+
// }
|
| 123 |
+
//
|
| 124 |
+
// The multi-dimensional tensors are stored in row-major order. The ggml_tensor struct contains fields for the
|
| 125 |
+
// number of elements in each dimension ("ne") as well as the number of bytes ("nb", a.k.a. stride). This allows
|
| 126 |
+
// to store tensors that are not contiguous in memory, which is useful for operations such as transposition and
|
| 127 |
+
// permutation. All tensor operations have to take the stride into account and not assume that the tensor is
|
| 128 |
+
// contiguous in memory.
|
| 129 |
+
//
|
| 130 |
+
// The data of the tensor is accessed via the "data" pointer. For example:
|
| 131 |
+
//
|
| 132 |
+
// {
|
| 133 |
+
// struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
|
| 134 |
+
//
|
| 135 |
+
// // a[1, 2] = 1.0f;
|
| 136 |
+
// *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
|
| 137 |
+
//
|
| 138 |
+
// // a[2, 0] = 2.0f;
|
| 139 |
+
// *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f;
|
| 140 |
+
//
|
| 141 |
+
// ...
|
| 142 |
+
// }
|
| 143 |
+
//
|
| 144 |
+
// Alternatively, there are helper functions, such as ggml_get_f32_1d() and ggml_set_f32_1d() that can be used.
|
| 145 |
+
//
|
| 146 |
+
// ## The matrix multiplication operator (ggml_mul_mat)
|
| 147 |
+
//
|
| 148 |
+
// TODO
|
| 149 |
+
//
|
| 150 |
+
//
|
| 151 |
+
// ## Multi-threading
|
| 152 |
+
//
|
| 153 |
+
// TODO
|
| 154 |
+
//
|
| 155 |
+
//
|
| 156 |
+
// ## Overview of ggml.c
|
| 157 |
+
//
|
| 158 |
+
// TODO
|
| 159 |
+
//
|
| 160 |
+
//
|
| 161 |
+
// ## SIMD optimizations
|
| 162 |
+
//
|
| 163 |
+
// TODO
|
| 164 |
+
//
|
| 165 |
+
//
|
| 166 |
+
// ## Debugging ggml
|
| 167 |
+
//
|
| 168 |
+
// TODO
|
| 169 |
+
//
|
| 170 |
+
//
|
| 171 |
+
|
| 172 |
#ifdef __cplusplus
|
| 173 |
extern "C" {
|
| 174 |
#endif
|
|
|
|
| 190 |
typedef uint16_t ggml_fp16_t;
|
| 191 |
#endif
|
| 192 |
|
| 193 |
+
// convert FP16 <-> FP32
|
| 194 |
+
float ggml_fp16_to_fp32(ggml_fp16_t x);
|
| 195 |
ggml_fp16_t ggml_fp32_to_fp16(float x);
|
| 196 |
|
| 197 |
struct ggml_object;
|
|
|
|
| 206 |
GGML_TYPE_COUNT,
|
| 207 |
};
|
| 208 |
|
| 209 |
+
// available tensor operations:
|
| 210 |
enum ggml_op {
|
| 211 |
GGML_OP_NONE = 0,
|
| 212 |
|
|
|
|
| 307 |
void * mem_buffer; // if NULL, memory will be allocated internally
|
| 308 |
};
|
| 309 |
|
| 310 |
+
void ggml_time_init(void); // call this once at the beginning of the program
|
| 311 |
int64_t ggml_time_ms(void);
|
| 312 |
int64_t ggml_time_us(void);
|
| 313 |
int64_t ggml_cycles(void);
|