Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions soteria-c/lib/ctree_block.ml
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,9 @@ let uninit ~range = Tree.make ~node:(Owned (Uninit Totally)) ~range ()
let mk_fix_typed ofs ty () =
let* len = Layout.size_of_s ty in
let+ fixes = mk_fix_typed ty () in
List.map (fun v -> [ MemVal { offset = ofs; len; v } ]) fixes
List.map (fun v -> [ TB.MemVal { offset = ofs; len; v } ]) fixes

let mk_fix_any ofs len () = [ [ MemVal { offset = ofs; len; v = SAny } ] ]
let mk_fix_any ofs len () = [ [ TB.MemVal { offset = ofs; len; v = SAny } ] ]

let decode ~ty ~ofs node =
match node with
Expand Down
6 changes: 5 additions & 1 deletion soteria-c/lib/interp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1154,7 +1154,11 @@ module Make (State : State_intf.S) = struct
With_origin.
{
node =
Freeable.Alive [ Ctree_block.MemVal { offset; len; v = SZeros } ];
Freeable.Alive
[
Soteria.Sym_states.Tree_block.MemVal
{ offset; len; v = Ctree_block.MemVal.SZeros };
];
info = None;
}
in
Expand Down
33 changes: 18 additions & 15 deletions soteria-rust/lib/rtree_block.ml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,20 @@ open Rustsymex
open Rustsymex.Result
open Charon

type 'a serialized_val =
| SInit of ('a Rust_val.t * Types.ty)
| SUninit
| SZeros
| SAny
[@@deriving show { with_path = false }]

let iter_vars_serialized_val iter_vars_ptr v f =
match v with SInit (v, _) -> Rust_val.iter_vars iter_vars_ptr v f | _ -> ()

let subst_serialized_val subst_ptr f = function
| SInit (v, ty) -> SInit (Rust_val.subst subst_ptr f v, ty)
| v -> v

module Make (Sptr : Sptr.S) = struct
module Encoder = Encoder.Make (Sptr)

Expand Down Expand Up @@ -80,22 +94,11 @@ module Make (Sptr : Sptr.S) = struct
| Lazy | Uninit Partially ->
failwith "Should never split an intermediate node"

type serialized =
| SInit of (rust_val * Types.ty)
[@printer Fmt.(pair ~sep:comma pp_rust_val Charon_util.pp_ty)]
| SUninit
| SZeros
| SAny
type serialized = Sptr.t serialized_val
[@@deriving show { with_path = false }]

let subst_serialized f = function
| SInit (v, ty) -> SInit (Rust_val.subst Sptr.subst f v, ty)
| v -> v

let iter_vars_serialized v f =
match v with
| SInit (v, _) -> Rust_val.iter_vars Sptr.iter_vars v f
| _ -> ()
let iter_vars_serialized v f = iter_vars_serialized_val Sptr.iter_vars v f
let subst_serialized f = subst_serialized_val Sptr.subst f

(* TODO: serialize tree borrow information! *)
let serialize ((t, _) : t) : serialized Seq.t option =
Expand Down Expand Up @@ -222,7 +225,7 @@ module Make (Sptr : Sptr.S) = struct
let ((_, bound) as range) = Range.of_low_and_size ofs size in
let mk_fixes () =
let+ v = Layout.nondet ty in
[ [ MemVal { offset = ofs; len = size; v = SInit (v, ty) } ] ]
[ [ TB.MemVal { offset = ofs; len = size; v = SInit (v, ty) } ] ]
in
let@ t = with_bound_and_owned_check ~mk_fixes t bound in
let replace_node t =
Expand Down
65 changes: 62 additions & 3 deletions soteria-rust/lib/state.ml
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,60 @@ and t = {
}
[@@deriving show { with_path = false }]

type serialized = Tree_block.serialized Freeable.serialized SPmap.serialized
type serialized =
(serialized_atom list, serialized_globals) State_intf.Template.t
Comment on lines +113 to +114
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than State_intf.Template.t, with { heap; globals } i would much rather have

type serialized = 
| Heap of serialized_atom
| Global of serialized_global

We'll probably need more information later on: for treeborrows maybe, for pointer decays (mapping a location to it's exposed integer value), for functions (mapping a function reference to a pointer? though i have other plans there anyways), and for errors (if the pre/post condition imply unwinding, what the error is)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this also corresponds much more closely to work on CSE with core predicates, so I'd prefer we stick to what we know works :P

Copy link
Contributor

@giltho giltho Oct 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this change is directly inspired by a similar thing in Coteria. A serialized heap need not be decomposed in disjunctions of things? As long as I can produce/consume it it's fine

But, now that we're discussing this, maybe Opale is correct here, in that we have discovered with Pedro's work that we want atoms of the assertion to be as small as possible as to enable feasible matching plans

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to me it never made sense to have more than an atom is serialized -- it makes using them harder; it should just be Missing of serialized list list, where serialized is just atoms and is built to be kept small :P

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my intuition comes mostly from using core predicates within Gillian but i don't see a reason to complexify it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Efficiency. It might be that knowing at one time what you are going to produce/consume is cheaper than consuming atom by atom.

E.g. if I know that I'm going to produce a whole sub-tree, or maybe a whole object into memory, it's cheaper to do it in one step rather than doing it bit by bit and re-going through pmap and treeblock operations for each bit

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair enough :) but in that case id rather this only be done where it's relevant (PMap and PList only) and nowhere else; e.g. there is no advantage in producing globals and heap data "together" vs splitting it into two

Copy link
Contributor

@N1ark N1ark Oct 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also to ease processing i wonder if as a utility function in e.g. the matching plan one can have some flatten : serialized -> serialized list and merge : serialized list -> serialized list that e.g. converts (loc, [ ser1; ser2; ser3 ])to [(loc, [ ser1 ]); (loc, [ ser2 ]); (loc [ ser3 ])] and inversely (purely syntactically for the merge) -- maybe Pedro already has this

[@@deriving show { with_path = false }]

and serialized_atom = T.sloc Typed.t * Tree_block.serialized Freeable.serialized
[@@deriving show { with_path = false }]

and serialized_globals = T.sloc Typed.t list
[@@deriving show { with_path = false }]

let serialize_globals globals : serialized_globals =
ListLabels.fold_left (GlobMap.bindings globals) ~init:[]
~f:(fun acc (_, ((ptr : Sptr.t), _)) -> Typed.Ptr.loc ptr.ptr :: acc)
Comment on lines +120 to +125
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't make sense to me -- just knowing "a global" is in a pointer isn't enough; what you should have is probably serialized_global = Charon.Types.global_decl_id * T.sloc Typed.t.
Sidenote, I'm not sure why you have serialized_globals as a list rather than serialized_global and then a list of those, like with serialized_atom

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should glob_map be an instance of Pure_fun and then serialize comes for free?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmm that'd require changing the state to be PMap (Product (Pure_fun (GlobalThing)) (Freeable (TreeBlock))) i think, which could be done but id rather avoid it, given i have plans of putting Freeable (TreeBlock) into a With_info thing that tracks what kind of allocation it is like here
https://github.com/rust-lang/rust/blob/be0ade2b602bdfe37a3cc259fcc79e8624dcba94/compiler/rustc_middle/src/mir/interpret/mod.rs#L260-L276


let lift_fix_globals globals res =
let+? heap = res in
State_intf.Template.{ heap; globals = serialize_globals globals }
Comment on lines +127 to +129
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also here youre adding all globals to all fixes raised -- i don't think this is normal? i wouldve expected globals = [], since you're lifting the fixes, not adding globlals which already exist (ie. producing them would be a no-op)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead (with the change im recommending), youd just have let+? heap_fixes = res in List.map (fun f -> Heap f) heap_fixes, i think


let serialize st : serialized =
let heap =
match st.state with
| None -> []
| Some st ->
let serialize_freeable (b, _) = Tree_block.serialize b in
SPmap.serialize (Freeable.serialize serialize_freeable) st
in
let globals = serialize_globals st.globals in
{ heap; globals }

let subst_serialized (subst_var : Svalue.Var.t -> Svalue.Var.t)
(serialized : serialized) : serialized =
let heap =
let subst_serialized_block subst_var f =
Freeable.subst_serialized Tree_block.subst_serialized subst_var f
in
SPmap.subst_serialized subst_serialized_block subst_var serialized.heap
in
let globals = List.map (Typed.subst subst_var) serialized.globals in
{ heap; globals }

let iter_vars_serialized (s : serialized) :
(Svalue.Var.t * [< Typed.T.cval ] Typed.ty -> unit) -> unit =
let iter_heap =
let iter_vars_serialized_block s =
Freeable.iter_vars_serialized Tree_block.iter_vars_serialized s
in
SPmap.iter_vars_serialized iter_vars_serialized_block s.heap
in
let iter_globals =
let append acc x = Iter.append acc (Typed.iter_vars x) in
List.fold_left append Iter.empty s.globals
in
Comment on lines +161 to +164
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of using list and calling Iter.append a bunch of times (bad), do Iter.of_list globals |> Iter.map ...

Iter.append iter_heap iter_globals

let pp_pretty ~ignore_freed ft { state; _ } =
let ignore =
if ignore_freed then function _, Freeable.Freed -> true | _ -> false
Expand Down Expand Up @@ -190,7 +241,10 @@ let with_ptr (ptr : Sptr.t) (st : t)
in
let loc, ofs = Typed.Ptr.decompose ptr.ptr in
let@ state = with_state st in
let* res = (SPmap.wrap (Freeable.wrap (fun st -> f (ofs, st)))) loc state in
let* res =
(SPmap.wrap (Freeable.wrap (fun st -> f (ofs, st)))) loc state
|> lift_fix_globals st.globals
in
match res with
| Missing _ as miss ->
if%sat Sptr.is_at_null_loc ptr then Result.error `UBDanglingPointer
Expand Down Expand Up @@ -365,7 +419,9 @@ let alloc ?zeroed size align st =
let tb = Tree_borrow.init ~state:Unique () in
let block = Tree_block.alloc ?zeroed size in
let block = Freeable.Alive (block, tb) in
let** loc, state = SPmap.alloc ~new_codom:block state in
let** loc, state =
SPmap.alloc ~new_codom:block state |> lift_fix_globals st.globals
in
let ptr = Typed.Ptr.mk loc Usize.(0s) in
let ptr : Sptr.t Rust_val.full_ptr =
({ ptr; tag = tb.tag; align; size }, None)
Expand Down Expand Up @@ -404,6 +460,7 @@ let alloc_tys tys st =
}
in
(block, (ptr, None)))
|> lift_fix_globals st.globals

let free (({ ptr; _ } : Sptr.t), _) ({ state; _ } as st) =
let** () = assert_ (Typed.Ptr.ofs ptr ==@ Usize.(0s)) `InvalidFree st in
Expand All @@ -415,6 +472,7 @@ let free (({ ptr; _ } : Sptr.t), _) ({ state; _ } as st) =
(Freeable.free ~assert_exclusively_owned:(fun t ->
Tree_block.assert_exclusively_owned @@ Option.map fst t))
(Typed.Ptr.loc ptr) state
|> lift_fix_globals st.globals
in
((), { st with state })

Expand Down Expand Up @@ -559,6 +617,7 @@ let leak_check st =
Result.ok (k :: leaks)
else Result.ok leaks)
[] state
|> lift_fix_globals st.globals
in
if List.is_empty leaks then Result.ok ((), state)
else (
Expand Down
24 changes: 23 additions & 1 deletion soteria-rust/lib/state_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ open T
open Rustsymex
open Charon
open Rust_val
open Soteria.Sym_states

module Template = struct
type ('a, 'b) t = { heap : 'a; globals : 'b }
[@@deriving show { with_path = false }]
end
Comment on lines +8 to +11
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also this doesn't necessarily apply to all states either, so define it within state.ml and expose it in state.mli if needed


module type S = sig
module Sptr : Sptr.S
Expand All @@ -11,7 +17,14 @@ module type S = sig

(* state *)
type t
type serialized

type serialized = (serialized_atom list, sloc Typed.t list) Template.t

and serialized_atom =
sloc Typed.t
* (Sptr.t Rtree_block.serialized_val, sint Value.t) Tree_block.serialized
Freeable.serialized
Comment on lines +21 to +26
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to extract the types from the functors because otherwise I can't write this in the include line in the .mli file. Freeable.serialized, Tree_block.serialized and Rtree_block.MemVal.serialized are only accessible after their respective functor application, which is done internally in state.ml. Extracting them from the functors makes it possible to access the serialized structures when I don't have knowledge of how the functor was applied (as is the case in the .mli). Maybe an alternative could be to provide each of those modules in the .mli file where the signature includes those serialized structures. That way the general interface stays the same.

Unfortunately, that is still not enough because Sptr.t is only accessible after the include. I can't access Sptr in the .mli, which is why I need the type to be defined in the interface. Again, an alternative here would be to define an Sptr module in the .mli, but in this case I think I would have to redefine the interface that Sptr has, if I'm thinking correctly.

Basically, as is, I can't expose the serialized structure in the .mli in a clean way. I'm honestly alright with adding the modules Freeable, Tree_block and MemVal to the .mli (along with their serialized types), but the Sptr module is the main issue.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does this not work ? 2fef10c

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(the example doesn't compile bc i was lazy but supposedly the interface works fine -- maybe i missed sth)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get the idea. My main issue with this is that ArithPtr is being hardcoded, which I was trying to avoid as I should probably only rely on the Sptr interface. But if that just sounds like overthinking of my part, then that should do the work, thanks!

Copy link
Contributor

@N1ark N1ark Oct 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i mean you're hardcoding the entire serialised structure anyways so adding Sptr doesn't matter much -- this is a particular module that is made for ArithPtr, not State_intf, which is the general one

it's ok to specialise State because all the engine relies on State_intf, it just allows to expose information for your particular tool

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, thanks! I'll make things work like this for RUXt then, so I guess this PR can be ignored for now

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure! if you want to edit this / re-open a PR to expose serialized in State directly I'm happy to merge, there's no harm in that


Comment on lines +20 to +27
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't do this -- if we want to add a new state model for polymorphic analysis the state shape will be different. Instead, in State.mli add with type serialized = ..., to expose the signature for that one module, and then your implementation will just rely on the fact that's the state model used, which is ok

type 'a err

val add_to_call_trace :
Expand All @@ -22,6 +35,15 @@ module type S = sig
(** Prettier but expensive printing. *)
val pp_pretty : ignore_freed:bool -> t Fmt.t

val serialize : t -> serialized
val pp_serialized : Format.formatter -> serialized -> unit

val iter_vars_serialized :
serialized -> (Svalue.Var.t * [< cval ] Typed.ty -> unit) -> unit

val subst_serialized :
(Svalue.Var.t -> Svalue.Var.t) -> serialized -> serialized

val empty : t

val load :
Expand Down
24 changes: 13 additions & 11 deletions soteria/lib/sym_states/freeable.ml
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
(* TODO: Build this with the Sum transformer and `Excl(Freed)` *)

type 'a t = Freed | Alive of 'a
type 'a serialized = 'a t

let iter_vars_serialized iter_inner serialized f =
match serialized with Freed -> () | Alive a -> iter_inner a f

let subst_serialized subst_inner subst_var = function
| Freed -> Freed
| Alive a -> Alive (subst_inner subst_var a)

let pp ?(alive_prefix = "") pp_alive ft = function
| Freed -> Fmt.pf ft "Freed"
Expand All @@ -10,25 +18,19 @@ module Make (Symex : Symex.S) = struct
open Symex.Syntax

type nonrec 'a t = 'a t = Freed | Alive of 'a
type 'a serialized = 'a t
type nonrec 'a serialized = 'a serialized

let lift_fix fix = Alive fix
let lift_fix_s r = Symex.Result.map_missing r lift_fix
let subst_serialized = subst_serialized
let iter_vars_serialized = iter_vars_serialized
let pp = pp
let pp_serialized = pp

let serialize serialize_inner = function
| Freed -> Freed
| Alive a -> Alive (serialize_inner a)

let subst_serialized subst_inner subst_var = function
| Freed -> Freed
| Alive a -> Alive (subst_inner subst_var a)

let iter_vars_serialized iter_inner serialized f =
match serialized with Freed -> () | Alive a -> iter_inner a f

let pp = pp
let pp_serialized = pp

Comment on lines -22 to -31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

another thing :p i'm not sure these should be moved out of the functor; they could be but really im not sure why you'd need it; instead you need to e.g. use State.Freeable.subst_serialized, which will be the correct instantiated module

let unwrap_alive = function
| None -> Symex.Result.ok None
| Some (Alive s) -> Symex.Result.ok (Some s)
Expand Down
75 changes: 47 additions & 28 deletions soteria/lib/sym_states/tree_block.ml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,33 @@ and 'a node = NotOwned of node_qty | Owned of 'a
and node_qty = Partially | Totally
[@@deriving show { with_path = false }, make]

type ('a, 'sint) serialized_atom =
| MemVal of { offset : 'sint; len : 'sint; v : 'a }
| Bound of 'sint
[@@deriving show { with_path = false }]

type ('a, 'sint) serialized = ('a, 'sint) serialized_atom list

let iter_vars_serialized iter_vars_serialized_val iter_vars_sint serialized f =
List.iter
(function
| MemVal { offset; len; v } ->
iter_vars_sint offset f;
iter_vars_sint len f;
iter_vars_serialized_val v f
| Bound v -> iter_vars_sint v f)
serialized

let subst_serialized subst_serialized_val subst_sint subst_var serialized =
let v_subst = subst_sint subst_var in
let subst_atom = function
| MemVal { offset; len; v } ->
let v = subst_serialized_val subst_var v in
MemVal { offset = v_subst offset; len = v_subst len; v }
| Bound v -> Bound (v_subst v)
in
List.map subst_atom serialized

(** The input module of [Tree_block]. A memory value [t] represents an owned
part of the tree block, with the property that it can be split or merged as
needed.
Expand Down Expand Up @@ -534,16 +561,7 @@ struct

(** Logic *)

type serialized_atom =
| MemVal of {
offset : sint; [@printer Symex.Value.ppa]
len : sint; [@printer Symex.Value.ppa]
v : MemVal.serialized;
}
| Bound of sint [@printer fun f v -> Fmt.pf f "Bound(%a)" Symex.Value.ppa v]
[@@deriving show { with_path = false }]

type serialized = serialized_atom list
type nonrec serialized = (MemVal.serialized, sint) serialized

let lift_miss ~offset ~len symex =
let+? fix = symex in
Expand Down Expand Up @@ -613,27 +631,28 @@ struct

(** Logic *)

let iter_vars_serialized serialized f =
let iter_vars_serialized =
iter_vars_serialized MemVal.iter_vars_serialized Symex.Value.iter_vars
in
iter_vars_serialized serialized f

let subst_serialized subst_var (serialized : serialized) =
let v_subst v = Symex.Value.subst subst_var v in
let subst_atom = function
| MemVal { offset; len; v } ->
let v = MemVal.subst_serialized subst_var v in
MemVal { offset = v_subst offset; len = v_subst len; v }
| Bound v -> Bound (v_subst v)
let subst_serialized =
subst_serialized MemVal.subst_serialized Symex.Value.subst
in
List.map subst_atom serialized
subst_serialized subst_var serialized

let iter_vars_serialized serialized f =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment here -- i dont think any of these definitions should be moved out of the functor, just use the right module instead

List.iter
(function
| MemVal { offset; len; v } ->
Symex.Value.iter_vars offset f;
Symex.Value.iter_vars len f;
MemVal.iter_vars_serialized v f
| Bound v -> Symex.Value.iter_vars v f)
serialized

let pp_serialized = Fmt.Dump.list pp_serialized_atom
let pp_serialized_atom ft serialized =
let pp_serialized_atom =
pp_serialized_atom MemVal.pp_serialized Symex.Value.ppa
in
match serialized with
| Bound bound -> Fmt.pf ft "Bound(%a)" Symex.Value.ppa bound
| _ -> pp_serialized_atom ft serialized

let pp_serialized ft serialized =
Fmt.Dump.list pp_serialized_atom ft serialized

let serialize t =
let bound =
Expand Down