@@ -595,7 +595,9 @@ static bool ggml_gallocr_is_own(ggml_gallocr_t galloc, struct ggml_tensor * t) {
595595}
596596
597597static bool ggml_gallocr_is_allocated (ggml_gallocr_t galloc , struct ggml_tensor * t ) {
598- return t -> data != NULL || ggml_gallocr_hash_get (galloc , t )-> allocated ;
598+ return t -> data != NULL // tensor data already set externally
599+ || t -> buffer // tensor on external buffer (but not yet allocated)
600+ || ggml_gallocr_is_own (galloc , t ); // tensor will be allocated by galloc
599601}
600602
601603// free the extra space at the end if the new tensor is smaller
@@ -813,7 +815,8 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
813815 }
814816}
815817
816- bool ggml_gallocr_reserve_n (ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids ) {
818+ static bool ggml_gallocr_reserve_n_impl (
819+ ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids , bool no_alloc ) {
817820 size_t min_hash_size = graph -> n_nodes + graph -> n_leafs ;
818821 // add 25% margin to avoid hash collisions
819822 min_hash_size += min_hash_size / 4 ;
@@ -915,21 +918,41 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
915918 if (realloc ) {
916919#ifndef NDEBUG
917920 size_t cur_size = galloc -> buffers [i ] ? ggml_vbuffer_size (galloc -> buffers [i ]) : 0 ;
918- GGML_LOG_DEBUG ("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size / 1024.0 / 1024.0 , new_size / 1024.0 / 1024.0 );
921+ GGML_LOG_DEBUG ("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" ,
922+ __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size / 1024.0 / 1024.0 , new_size / 1024.0 / 1024.0 );
919923#endif
920924
921925 ggml_vbuffer_free (galloc -> buffers [i ]);
922- galloc -> buffers [i ] = ggml_vbuffer_alloc (galloc -> bufts [i ], galloc -> buf_tallocs [i ], GGML_BACKEND_BUFFER_USAGE_COMPUTE );
923- if (galloc -> buffers [i ] == NULL ) {
924- GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
925- return false;
926+ if (no_alloc ) {
927+ galloc -> buffers [i ] = NULL ;
928+ } else {
929+ galloc -> buffers [i ] = ggml_vbuffer_alloc (galloc -> bufts [i ], galloc -> buf_tallocs [i ], GGML_BACKEND_BUFFER_USAGE_COMPUTE );
930+ if (galloc -> buffers [i ] == NULL ) {
931+ GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
932+ return false;
933+ }
926934 }
927935 }
928936 }
929937
930938 return true;
931939}
932940
941+ void ggml_gallocr_reserve_n_size (
942+ ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids , size_t * sizes ) {
943+ GGML_ASSERT (ggml_gallocr_reserve_n_impl (galloc , graph , node_buffer_ids , leaf_buffer_ids , /*no_alloc =*/ true));
944+ for (int i = 0 ; i < galloc -> n_buffers ; i ++ ) {
945+ sizes [i ] = 0 ;
946+ for (int c = 0 ; c < galloc -> buf_tallocs [i ]-> n_chunks ; c ++ ) {
947+ sizes [i ] += galloc -> buf_tallocs [i ]-> chunks [c ]-> max_size ;
948+ }
949+ }
950+ }
951+
952+ bool ggml_gallocr_reserve_n (ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids ) {
953+ return ggml_gallocr_reserve_n_impl (galloc , graph , node_buffer_ids , leaf_buffer_ids , /*no_alloc =*/ false);
954+ }
955+
933956bool ggml_gallocr_reserve (ggml_gallocr_t galloc , struct ggml_cgraph * graph ) {
934957 return ggml_gallocr_reserve_n (galloc , graph , NULL , NULL );
935958}
@@ -1132,14 +1155,16 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
11321155 return true;
11331156}
11341157
1135- ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1158+ static ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_impl (
1159+ struct ggml_context * ctx , ggml_backend_buffer_type_t buft , size_t * nbytes_total , bool no_alloc ) {
11361160 GGML_ASSERT (ggml_get_no_alloc (ctx ) == true);
11371161
11381162 size_t alignment = ggml_backend_buft_get_alignment (buft );
11391163 size_t max_size = ggml_backend_buft_get_max_size (buft );
11401164
11411165 ggml_backend_buffer_t * buffers = NULL ;
11421166 size_t n_buffers = 0 ;
1167+ * nbytes_total = 0 ;
11431168
11441169 size_t cur_buf_size = 0 ;
11451170 struct ggml_tensor * first = ggml_get_first_tensor (ctx );
@@ -1151,10 +1176,11 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11511176
11521177 if (cur_buf_size > 0 && (cur_buf_size + this_size ) > max_size ) {
11531178 // allocate tensors in the current buffer
1154- if (!alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
1179+ if (!no_alloc && ! alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
11551180 return NULL ;
11561181 }
11571182 first = t ;
1183+ * nbytes_total += cur_buf_size ;
11581184 cur_buf_size = this_size ;
11591185 } else {
11601186 cur_buf_size += this_size ;
@@ -1163,15 +1189,21 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11631189
11641190 // allocate remaining tensors
11651191 if (cur_buf_size > 0 ) {
1166- if (!alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
1192+ * nbytes_total += cur_buf_size ;
1193+ if (!no_alloc && !alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
11671194 return NULL ;
11681195 }
11691196 }
11701197
1198+ if (no_alloc ) {
1199+ return NULL ;
1200+ }
1201+
11711202 if (n_buffers == 0 ) {
11721203#ifndef NDEBUG
11731204 GGML_LOG_DEBUG ("%s: all tensors in the context are already allocated\n" , __func__ );
11741205#endif
1206+ GGML_ASSERT (!buffers );
11751207 return NULL ;
11761208 }
11771209
@@ -1181,10 +1213,24 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11811213 } else {
11821214 buffer = ggml_backend_multi_buffer_alloc_buffer (buffers , n_buffers );
11831215 }
1184- free (buffers );
1216+ if (buffers ) {
1217+ free (buffers ); // can be NULL if context is empty or no_alloc
1218+ }
11851219 return buffer ;
11861220}
11871221
1222+ size_t ggml_backend_alloc_ctx_tensors_from_buft_size (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1223+ size_t nbytes_total = 0 ;
1224+ ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*no_alloc=*/ true);
1225+ GGML_ASSERT (!buf );
1226+ return nbytes_total ;
1227+ }
1228+
1229+ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1230+ size_t nbytes_total = 0 ;
1231+ return ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*no_alloc =*/ false);
1232+ }
1233+
11881234ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors (struct ggml_context * ctx , ggml_backend_t backend ) {
11891235 return ggml_backend_alloc_ctx_tensors_from_buft (ctx , ggml_backend_get_default_buffer_type (backend ));
11901236}
0 commit comments