Skip to content

Commit 445c0f2

Browse files
committed
update ggml and propogate changes; embed and textgen works
1 parent 08a6d71 commit 445c0f2

9 files changed

Lines changed: 20 additions & 67 deletions

File tree

gadget/compute.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,13 @@
2121
ggml_backend_free,
2222
ggml_backend_alloc_ctx_tensors,
2323
ggml_backend_get_default_buffer_type,
24-
ggml_backend_cpu_set_n_threads,
2524
ggml_backend_graph_compute,
2625
ggml_gallocr_new,
2726
ggml_gallocr_reserve,
2827
ggml_gallocr_alloc_graph,
2928
ggml_gallocr_free,
3029
)
3130
from .tensor import (
32-
get_framework,
3331
get_tensor_name,
3432
get_tensor_info,
3533
create_tensor,
@@ -295,7 +293,6 @@ def test_torch(input_dim=256, output_dim=32, batch_size=16, qtype=T.F32, backend
295293

296294
# define model function
297295
def test_model(ctx, par, ten):
298-
n, m = par['input_dim'], par['output_dim']
299296
a, b, x = ten['a'], ten['b'], ten['x']
300297
x1 = ggml_mul_mat(ctx, a, x, name=f'x1')
301298
x2 = ggml_add(ctx, x1, b, name=f'x2')

gadget/ggml.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
ggml_element_size,
5151
ggml_nelements,
5252
ggml_nbytes,
53-
ggml_internal_get_type_traits,
53+
ggml_get_type_traits,
5454
# tensor ops
5555
ggml_dup,
5656
ggml_dup_inplace,
@@ -153,17 +153,13 @@
153153
ggml_soft_max,
154154
ggml_soft_max_inplace,
155155
ggml_soft_max_ext,
156-
ggml_soft_max_back,
157-
ggml_soft_max_back_inplace,
158156
ggml_rope,
159157
ggml_rope_inplace,
160158
ggml_rope_ext,
161159
ggml_rope_ext_inplace,
162160
ggml_rope_yarn_corr_dims,
163-
ggml_rope_back,
164161
ggml_clamp,
165162
ggml_im2col,
166-
ggml_conv_depthwise_2d,
167163
ggml_conv_1d,
168164
ggml_conv_1d_ph,
169165
ggml_conv_transpose_1d,

gadget/libs/_libggml.py

Lines changed: 3 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -268,23 +268,18 @@ class ggml_backend_buffer_type_context(ctypes.Structure): ...
268268
("op" , ctypes.c_int ),
269269
("op_params", ctypes.c_int32 * GGML_MAX_OP_PARAMS_INT),
270270
("flags" , ctypes.c_int32 ),
271-
("grad" , ggml_tensor_p ),
272271
("src" , ggml_tensor_p * GGML_MAX_SRC ),
273272
("view_src" , ggml_tensor_p ),
274273
("view_offs", ctypes.c_size_t ),
275274
("data" , ctypes.c_void_p ),
276275
("name" , ctypes.c_char * GGML_MAX_NAME ),
277276
("extra" , ctypes.c_void_p ),
277+
("padding" , ctypes.c_char * 8 ),
278278
]
279279

280280
# types and quantization
281281
ggml_to_float_p = ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.POINTER(ctypes.c_float), ctypes.c_int64)
282282
ggml_from_float_p = ctypes.CFUNCTYPE(None, ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_int64)
283-
ggml_from_float_to_mat_p = ctypes.CFUNCTYPE(None, ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_int64, ctypes.c_int64, ctypes.c_int64)
284-
ggml_vec_dot_p = ctypes.CFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.c_size_t, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int)
285-
ggml_vec_dot_p = ctypes.CFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.c_size_t, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int)
286-
ggml_gemv_p = ctypes.CFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.c_size_t, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int)
287-
ggml_gemm_p = ctypes.CFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.c_size_t, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int)
288283

289284
class ggml_type_traits(ctypes.Structure):
290285
_fields_ = [
@@ -294,15 +289,7 @@ class ggml_type_traits(ctypes.Structure):
294289
("type_size" , ctypes.c_size_t ),
295290
("is_quantized" , ctypes.c_bool ),
296291
("to_float" , ggml_to_float_p ),
297-
("from_float" , ggml_from_float_p ),
298292
("from_float_ref" , ggml_from_float_p ),
299-
("from_float_to_mat" , ggml_from_float_to_mat_p),
300-
("vec_dot" , ggml_vec_dot_p ),
301-
("vec_dot_type" , ctypes.c_int ),
302-
("nrows" , ctypes.c_int64 ),
303-
("ncols" , ctypes.c_int64 ),
304-
("gemv" , ggml_gemv_p ),
305-
("gemm" , ggml_gemm_p ),
306293
]
307294
ggml_type_traits_p = ctypes.POINTER(ggml_type_traits)
308295

@@ -660,9 +647,9 @@ def ggml_is_contiguous(tensor): ...
660647

661648
@ctypes_function(_ggml,
662649
[ctypes.c_int],
663-
ggml_type_traits
650+
ggml_type_traits_p
664651
)
665-
def ggml_internal_get_type_traits(ttype): ...
652+
def ggml_get_type_traits(ttype): ...
666653

667654
## graphs
668655

@@ -1306,18 +1293,6 @@ def ggml_soft_max_inplace(ctx, a): ...
13061293
)
13071294
def ggml_soft_max_ext(ctx, a, mask, scale, max_bias): ...
13081295

1309-
@ctypes_function(_ggml,
1310-
[ggml_context_p, ggml_tensor_p, ggml_tensor_p],
1311-
ggml_tensor_p
1312-
)
1313-
def ggml_soft_max_back(ctx, a, b): ...
1314-
1315-
@ctypes_function(_ggml,
1316-
[ggml_context_p, ggml_tensor_p, ggml_tensor_p],
1317-
ggml_tensor_p
1318-
)
1319-
def ggml_soft_max_back_inplace(ctx, a, b): ...
1320-
13211296
@ctypes_function(_ggml,
13221297
[ggml_context_p, ggml_tensor_p, ggml_tensor_p, ctypes.c_int, ctypes.c_int],
13231298
ggml_tensor_p
@@ -1348,12 +1323,6 @@ def ggml_rope_ext_inplace(ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, fre
13481323
)
13491324
def ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, dims): ...
13501325

1351-
@ctypes_function(_ggml,
1352-
[ggml_context_p, ggml_tensor_p, ggml_tensor_p, ggml_tensor_p, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_float, ctypes.c_float, ctypes.c_float, ctypes.c_float, ctypes.c_float, ctypes.c_float],
1353-
ggml_tensor_p
1354-
)
1355-
def ggml_rope_back(ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow): ...
1356-
13571326
@ctypes_function(_ggml,
13581327
[ggml_context_p, ggml_tensor_p, ctypes.c_float, ctypes.c_float],
13591328
ggml_tensor_p
@@ -1366,12 +1335,6 @@ def ggml_clamp(ctx, a, min, max): ...
13661335
)
13671336
def ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, is_2D, dst_type): ...
13681337

1369-
@ctypes_function(_ggml,
1370-
[ggml_context_p, ggml_tensor_p, ggml_tensor_p, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int],
1371-
ggml_tensor_p
1372-
)
1373-
def ggml_conv_depthwise_2d(ctx, a, b, s0, s1, p0, p1, d0, d1): ...
1374-
13751338
@ctypes_function(_ggml,
13761339
[ggml_context_p, ggml_tensor_p, ggml_tensor_p, ctypes.c_int, ctypes.c_int, ctypes.c_int],
13771340
ggml_tensor_p

gadget/libs/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ class GGMLQuantizationType(IntEnum):
5555
F64 = 28
5656
IQ1_M = 29
5757
BF16 = 30
58+
TQ1_0 = 34
59+
TQ2_0 = 35
60+
MXFP4 = 39
61+
COUNT = 40
5862

5963
# embed pooling types
6064
class LlamaPoolingType(IntEnum):

gadget/models/bert.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
# bert implementation
22

3-
import numpy as np
4-
53
from ..ggml import (
6-
ggml_add,
74
ggml_add_inplace,
85
ggml_get_rows,
96
ggml_view_1d,
@@ -13,7 +10,6 @@
1310
)
1411
from ..model import GgmlModel, Parameter, State, Tensor
1512
from .layers import (
16-
linear_layer,
1713
norm_layer,
1814
attention_layer,
1915
feed_forward_layer,

gadget/models/llama.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
1-
# llama implementation
1+
# llama implementation (llama-3.1)
22

33
import numpy as np
44

55
from ..ggml import (
66
ggml_element_size,
77
ggml_add_inplace,
88
ggml_get_rows,
9-
ggml_transpose,
109
ggml_view_1d,
1110
ggml_view_2d,
1211
ggml_cont,
1312
)
14-
from ..tensor import get_tensor_shape, get_tensor_info
13+
from ..tensor import get_tensor_shape
1514
from ..model import GgmlModel, Parameter, State, Tensor
1615
from .cache import KVCache
1716
from .layers import (
@@ -107,7 +106,6 @@ def forward(self):
107106
ctx = self.ctx_graph
108107

109108
# get runtime state
110-
batch_size, context_length = self.params['batch_size', 'context_length']
111109
n_past, n_tokens = self.state['n_past', 'n_tokens']
112110

113111
# get params

gadget/tensor.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
ggml_new_tensor_4d,
1818
ggml_backend_tensor_set,
1919
ggml_backend_tensor_get,
20-
ggml_internal_get_type_traits,
20+
ggml_get_type_traits,
2121
ggml_backend_buffer_is_host,
2222
)
2323

@@ -120,8 +120,8 @@ def trim_nelem(shape):
120120
return shape[:dims]
121121

122122
def get_type_traits(ttype):
123-
traits = ggml_internal_get_type_traits(ttype)
124-
return traits.blck_size, traits.type_size
123+
traits = ggml_get_type_traits(ttype)
124+
return traits.contents.blck_size, traits.contents.type_size
125125

126126
def get_tensor_name(tensor):
127127
value = tensor.contents
@@ -274,7 +274,7 @@ def array_to_tensor(array, tensor, offset=0, strict=True):
274274
src_p = ctypes.cast(src, ctypes.POINTER(ctypes.c_float))
275275
dst_p = ctypes.cast(dst, ctypes.c_void_p)
276276
size = ggml_nelements(tensor)
277-
traits = ggml_internal_get_type_traits(ttype)
277+
traits = ggml_get_type_traits(ttype)
278278
traits.from_float(src_p, dst_p, size)
279279
else:
280280
src_p = ctypes.cast(src, ctypes.c_void_p)
@@ -306,7 +306,7 @@ def tensor_to_array(tensor, framework='numpy', device='cpu'):
306306
src_p = ctypes.cast(src, ctypes.c_void_p)
307307
dst_p = ctypes.cast(dst, ctypes.POINTER(ctypes.c_float))
308308
size = ggml_nelements(tensor)
309-
traits = ggml_internal_get_type_traits(ttype)
309+
traits = ggml_get_type_traits(ttype)
310310
traits.to_float(src_p, dst_p, size)
311311
else:
312312
dst_p = ctypes.cast(dst, ctypes.c_void_p)

gadget/textgen.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# text generation
22

3-
import numpy as np
43
from transformers import AutoTokenizer
54

65
from .loader import GgufFile
@@ -100,10 +99,10 @@ def stream_chat(self, message, prefill=None, add_generation_prompt=True, **kwarg
10099
self.history.append({'role': 'assistant', 'content': reply})
101100

102101
def generate_chat(self, message, **kwargs):
103-
tokens = []
104-
for tok in self.stream_chat(message, **kwargs):
105-
tokens += [tok]
106-
return self.detokenize(tokens)
102+
reply = ''
103+
for chunk in self.stream_chat(message, **kwargs):
104+
reply += chunk
105+
return reply
107106

108107
def test_logits(gguf_path, model_id, model_class=LlamaModel, batch_size=128, **kwargs):
109108
model = TextGen(gguf_path, model_id, model_class=model_class, batch_size=batch_size, **kwargs)

ggml

Submodule ggml updated from 2327bda to 83835ff

0 commit comments

Comments
 (0)