UQ-PAC · ailrst · Jun 17, 2025 · Jun 16, 2025 · Jun 16, 2025 · Jun 16, 2025
diff --git a/.github/workflows/run-examples.yml b/.github/workflows/run-examples.yml
@@ -249,6 +249,7 @@ jobs:
           AnalysisSystemTest2,
           AnalysisSystemTest3,
           AnalysisSystemTest4,
+          TVSystemTest,
         ]
       fail-fast: false
 

diff --git a/build.mill b/build.mill
@@ -20,6 +20,7 @@ import basilmill.BasilDocs
 import basilmill.BasilVersion
 import basilmill.ProfileModule
 import basilmill.Z3Module
+//import basilmill.CVC5Module
 
 import os.Path
 
@@ -49,8 +50,12 @@ object `package` extends ScalaModule with BasilDocs with BasilVersion with Scala
   val aslpOffline = mvn"io.github.uq-pac::lifter:0.1.0"
   val javaSmt = mvn"org.sosy-lab:java-smt:5.0.0"
   val javaSmtZ3 = mvn"org.sosy-lab:javasmt-solver-z3:4.14.0"
+  val javaSmtCVC5 = mvn"org.sosy-lab:javasmt-solver-cvc5:1.2.1-g8594a8e4dc"
+  val cats_collections = mvn"org.typelevel::cats-collections-core:0.9.10"
+  val cats_core = mvn"org.typelevel::cats-core:2.13.0"
+  val cats_kernel = mvn"org.typelevel::cats-kernel:2.13.0"
 
-  override def mvnDeps = Seq(scalactic, sourceCode, mainArgs, upickle, aslpOffline, javaSmt, javaSmtZ3)
+  override def mvnDeps = Seq(scalactic, sourceCode, mainArgs, upickle, aslpOffline, javaSmt, javaSmtZ3, javaSmtCVC5, cats_kernel, cats_core, cats_collections)
 
   override def repositoriesTask = Task.Anon {
     super.repositoriesTask() :+ MavenRepository(
@@ -67,6 +72,7 @@ object `package` extends ScalaModule with BasilDocs with BasilVersion with Scala
 
   override def moduleDir = BuildCtx.workspaceRoot / "src"
 
+
   override def sources = Task.Sources("main/scala")
 
   override def forkArgs = Task {
@@ -223,6 +229,7 @@ object `package` extends ScalaModule with BasilDocs with BasilVersion with Scala
   }
 
   object z3 extends Z3Module
+//  object cvc5 extends CVC5Module
 
   def ctagsConfig = Task.Source {
     BuildCtx.workspaceRoot / "basilmill" / "scala.ctags"
@@ -255,7 +262,9 @@ object `package` extends ScalaModule with BasilDocs with BasilVersion with Scala
   def runProfile(profileDest: String, args: String*) = Task.Command {
     println(s"Profiling: you may want to set\n  sudo sysctl kernel.perf_event_paranoid=1\n  sudo sysctl kernel.kptr_restrict=0\n")
     val prof = asyncProf.path()
-    os.call(("java", s"-agentpath:${prof}=start,event=cpu,file=${profileDest}",  "-jar", assembly().path.toString, args), stdout = os.Inherit, cwd = BuildCtx.workspaceRoot)
+    val oargs : Seq[String] = forkArgs() 
+    val realArgs : Seq[String] = oargs ++ Seq(s"-agentpath:${prof}=start,event=cpu,file=${profileDest}")
+    os.call(Seq("java") ++ realArgs ++ Seq( "-jar", assembly().path.toString) ++ args, stdout = os.Inherit, cwd = BuildCtx.workspaceRoot)
   }
 
 }
diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md
@@ -20,6 +20,10 @@
 # Analyses
 
 - [Data Structure Analysis](development/dsa.md) Basil Memory Analysis
+- [Translation Validation](development/tv/index.md)
+  - [Public API](development/tv/tv-api.md)
+  - [Implementation](development/tv/implementation.md)
+  - [Assumptions](development/tv/assumptions.md)
 
 # Development
 

diff --git a/docs/src/development/profiling.md b/docs/src/development/profiling.md
@@ -34,4 +34,20 @@ sudo sysctl kernel.perf_event_paranoid=1
 sudo sysctl kernel.kptr_restrict=0
 ```
 
+## Identifying memory leaks
+
+Memory leaks can be identified using async profiler.
+
+E.g. build an assembly `./mill assembly` to get `out/assembly.dest/out.jar`
+
+Then use asprof with flags `-e alloc --total --live`.
+
+```
+./async-profiler-4.0-linux-x64/bin/asprof -e alloc -o flamegraph --live -f alloc.html --total -d 60 out.jar
+```
+
+This will show the total allocation count only for references that are still live, allocations wiht a reference still retained 
+somewhere.
+
+[related blog article](https://web.archive.org/web/20240228031308/https://krzysztofslusarski.github.io/2022/11/27/async-live.html).
 
diff --git a/docs/src/development/tv/assumptions.md b/docs/src/development/tv/assumptions.md
@@ -0,0 +1,113 @@
+# Lifter Assumptions
+
+A transform pass may want drive a code-transform with a reasonable assumption
+about the behaviour, that it cannot technically prove with analysis.
+
+One such example is the `AssumeCallPreserved` pass which modifies
+procedure parameter lists on the assumption that the program
+respects the arm64 calling convention.
+
+To allow this, we introduce an assertion to the program, encoding the
+assumption that was made by the transform. When the program
+is eventually verified in Boogie, we discharge this assumption,
+proving that our assumption was sound.
+
+The translation validation pipeline proves that as long as 
+the assumption holds, the programs have the same behaviour.
+Because the introduction of an assert statement changes the
+program behaviour, we need to add the specification that
+traces on which the assertion fails can be ignored.
+
+Procedure procedure `_start` is an interesting example.
+
+We can use the script in `scripts/soundnesslitmuscvc5wrapper.sh` to get the number of
+program statements contributing to the unsat core of the program:
+
+
+```
+
+tvsmt/simplifyCFG-_start_4213248.smt2
+66% contributed  	171/259 asserts
+
+tvsmt/Parameters-_start_4213248.smt2
+66% contributed  	145/219 asserts
+
+tvsmt/AssumeCallPreserved-_start_4213248.smt2
+68% contributed  	177/260 asserts
+
+tvsmt/DSA-_start_4213248.smt2
+53% contributed  	169/316 asserts
+
+tvsmt/CopyProp-_start_4213248.smt2
+4% contributed  	11/257 asserts
+
+tvsmt/GuardCleanup-_start_4213248.smt2
+7% contributed  	10/138 asserts
+
+```
+
+The procedure `_start` contains an unresolved indirect tail call, 
+so the pass AssumeCallPreserved injects assertions that don't hold (but TV passes as its not TV's responsibility to prove them), 
+but then subsequent analyses that leverage this assumption are not sound and we get tv passing trivially due to 
+`assert eq(0x404a34:bv64, 0x404b64:bv64) { .comment = "R30 = R30_in" }; ~> assert false`. This is the expected behavior, a 
+program with an assert false is obviously a program that doesn't verify.
+
+This unsat core is showing roughly that the translation validation is passing vacuously because
+the copyprop transform has derived false from the assertion we introduced.
+
+```
+cvc5 --dump-unsat-cores tvsmt/CopyProp-_start_4213248.smt2
+unsat
+(
+source5
+source47
+source54
+source56
+source51
+source57
+source53
+source58
+source69
+source70
+source48
+)
+```
+
+<details>
+    <summary>full _start IL after copyprop pass</summary>
+
+```
+
+  (R0_in:bv64, R10_in:bv64, R11_in:bv64, R12_in:bv64, R13_in:bv64, R14_in:bv64, R15_in:bv64, R16_in:bv64, R17_in:bv64, R18_in:bv64, R1_in:bv64, R29_in:bv64, R2_in:bv64, R30_in:bv64, R31_in:bv64, R3_in:bv64, R4_in:bv64, R5_in:bv64, R6_in:bv64, R7_in:bv64, R8_in:bv64, R9_in:bv64, _PC_in:bv64)
+    -> (R0_out:bv64, R1_out:bv64, R2_out:bv64, R3_out:bv64, R4_out:bv64, R5_out:bv64, R6_out:bv64, R7_out:bv64, _PC_out:bv64)
+  { .name = "_start"; .address = 0x404a00 }
+[
+  block %_start_entry {.address = 0x404a00; .originalLabel = "ufjL9zmpTde18uF80OwPVQ=="} [
+    var R5_2: bv64 := bvor(0x0:bv64, bvshl(R0_in:bv64, 0x0:bv64));
+    var var1_4213376_bv64_1: bv64 := load le $mem R31_in:bv64 64;
+    var var2_4202816_bv64_1: bv64 := load le $mem bvadd(0x430000:bv64, 0x40:bv64) 64;
+    assert eq(0x404b64:bv64, 0x404b64:bv64) { .comment = "R30 = R30_in" };
+    var (R0_4:bv64, R10_2:bv64, R11_2:bv64, R12_2:bv64, R13_2:bv64, R14_2:bv64, R15_2:bv64, R16_4:bv64, R17_3:bv64, R18_2:bv64, R1_3:bv64, R29_3:bv64, R2_3:bv64, R3_3:bv64, R4_3:bv64, R5_3:bv64, R6_3:bv64, R7_2:bv64, R8_2:bv64, R9_2:bv64)
+        := call @__libc_start_main (0x404a34:bv64, R10_in:bv64, R11_in:bv64, R12_in:bv64, R13_in:bv64, R14_in:bv64, R15_in:bv64, 0x430040:bv64, var2_4202816_bv64_1:bv64, R18_in:bv64, var1_4213376_bv64_1:bv64, 0x0:bv64, bvadd(R31_in:bv64, 0x8:bv64), 0x404b64:bv64, 0x0:bv64, 0x0:bv64, R5_2:bv64, R31_in:bv64, R7_in:bv64, R8_in:bv64, R9_in:bv64);
+    goto(%phi_5);
+  ];
+  block %_start_10 {.address = 0x404a30; .originalLabel = "eH7LoljnQS6XhgPVv4Qipg=="} [
+    assert eq(0x404a34:bv64, 0x404b64:bv64) { .comment = "R30 = R30_in" };
+    var var2_4203488_bv64_1: bv64 := load le $mem bvadd(0x430000:bv64, 0x190:bv64) 64;
+    assert eq(0x404a34:bv64, 0x404a34:bv64) { .comment = "R30 = R30_in" };
+    var (R0_6:bv64, R10_4:bv64, R11_4:bv64, R12_4:bv64, R13_4:bv64, R14_4:bv64, R15_4:bv64, R16_7:bv64, R17_5:bv64, R18_4:bv64, R1_5:bv64, R29_5:bv64, R2_5:bv64, R3_5:bv64, R4_5:bv64, R5_5:bv64, R6_5:bv64, R7_4:bv64, R8_4:bv64, R9_4:bv64)
+        := call @abort (R0_4:bv64, R10_2:bv64, R11_2:bv64, R12_2:bv64, R13_2:bv64, R14_2:bv64, R15_2:bv64, 0x430190:bv64, var2_4203488_bv64_1:bv64, R18_2:bv64, R1_3:bv64, R29_3:bv64, R2_3:bv64, 0x404a34:bv64, R3_3:bv64, R4_3:bv64, R5_3:bv64, R6_3:bv64, R7_2:bv64, R8_2:bv64, R9_2:bv64);
+    goto(%phi_6);
+  ];
+  block %phi_5 {.originalLabel = "eH7LoljnQS6XhgPVv4Qipg==, ufjL9zmpTde18uF80OwPVQ=="} [
+    goto(%_start_10);
+  ];
+  block %phi_6 {.originalLabel = "eH7LoljnQS6XhgPVv4Qipg=="} [
+    goto(%_start_return);
+  ];
+  block %_start_return [
+    return (R0_6:bv64, R1_5:bv64, R2_5:bv64, R3_5:bv64, R4_5:bv64, R5_5:bv64, R6_5:bv64, R7_4:bv64, _PC_in:bv64);
+  ]
+];
+```
+</details>
diff --git a/docs/src/development/tv/implementation.md b/docs/src/development/tv/implementation.md
@@ -0,0 +1,170 @@
+# Translation Validation Implementation
+
+At the highest level the translation validation takes two programs, an invariant linking them
+and produces, constructs product program describing the simultaneous execution of the two programs, 
+and verifies this program satisfies the invariant.
-At the highest level the translation validation takes two programs, an invariant linking them
-and produces, constructs product program describing the simultaneous execution of the two programs, 
-and verifies this program satisfies the invariant.
+At the highest level, the translation validation takes two programs and an invariant linking them
+and produces a product program describing the simultaneous execution of the two programs.
+Then, it verifies the product program satisfies the invariant.
-At the highest level the translation validation takes two programs, an invariant linking them
-and produces, constructs product program describing the simultaneous execution of the two programs, 
-and verifies this program satisfies the invariant.
+At the highest level, the translation validation takes two programs and an invariant linking them
+and produces a product program describing the simultaneous execution of the two programs.
+Then, it verifies the product program satisfies the invariant.
+
+We initially describe the translation pipeline and structures used throughout this process.
+
+## Phases
+
+### Cut-Transition System
+
+- `TransitionSystem.scala`
+- Transforms a Basil IR program to an equivalent acyclic Basil IR program
+
+A transition system describes one acyclic aggregate program step. This step breaks a single 
+Basil IR program into a program which represents a single acyclic step at a time.
+This effectively fans out every loop in the program into a single loop which is equivalent
+to the original program.
+
+#### Cut transform:
+
+  1. Create a new entry and exit
+  2. Use program entry as a cut link it to the new entry and guard with a specific PC value `ENTRY`
+  3. Use program exit as a cut link it to the new exit and set with a specific PC value `RETURN`
+  2. Identify each loop header as a cut, set `PC := Loop$i` and redirect through exit, add edge 
+     from entry to header guarded by a pc value `Loop$i`
+
+### Monadic Local Side-Effect Form
+
+- `SSADAG.scala` and `Ackermann.scala`
+
+This translates the Basil IR program to a program containing three statement types:
+
+1. (Simultaneous) assignment `(a, b, c) := (d, e, f)` (Scala type `SimulAssign`)
+2. Side effect calls : `(TRACE, a, b, c) := EffName (TRACE, d, e, f)` (Scala type `SideEffectStatement`). 
+  This is an uninterpreted function call with multiple/tuple return.
+3. Assumes / Guards
+
+Note the `TRACE` boolean-typed variable here which represents the state passed through the program.
+This is where the "monadic" terminology comes from. A boolean type is sufficient here as it only 
+needs to represent the truth value of the equivalence between the source and target trace.
+
+Think of this `TRACE` as an oracle represnting the entire universe, i.e. we assume
+the precondition `TRACE_source == TRACE_target`. This is assuming the programs execute at the
+same time in the same universe state; thus it captures external IO, assuming
+both programs will always receive identical external inputs if they are invariantly 
+in the same state.
+
+A frame analysis is used to identify the interprocedural effects of calls. This transform
+pulls these side effects (memory access, global variable access) into the parameter list
+of the side-effect statement.
+
+### SSA Form
+
+This performs a naive SSA transform (not considering loops) on the Monadic program.
+
+It introduces reachability predicates (`${blockName}_done`) for the end of every block. 
+This predicate is the conjunction of 
+
+1. the disjunction of the reachability of its predcessors and
+2. conjunction of all assume statements in the block.
+
+Note the phi nodes have a slightly odd structure so they fit in the existing Basil IR.
+In the below code, the assume at the start of block `l3` represents the phi node
+joining `l1` and `l2`.
+
+
+```c
+block l1  [
+  r0_0 := 1; // was r0 := 1
+  goto  l3;
+];
+block l2  [
+  r0_1 := 2; // was r0 := 2
+  goto  l3;
+];
+block l3  [
+  assume (l1_done ==> r0_3 == r0_0 && l2_done ==> r0_3 == r0_2);
+  ret; 
+];
+```
+
+This transform returns a function which renames an un-indexed expression 
+to one in terms of the ssa-indexed variables defined at a given block.
+
+### Ackermannisation
+
+- This is an invariant inference pass perfomed on the SSA-form program
+
+This is a transform which soundly performs the reasoning about the correspondence of
+side effects in the product program ahead of verification-time. 
+
+At a high level, assume we have side-effect statements in the source and target program below:
+
+```
+// source program:
+(source_TRACE_1, source_R0_1) := source_Load_mem_64 (source_TRACE, source_R1);
+// target program:
+(target_TRACE_1, target_R0_1) := target_Load_mem_64 (target_TRACE, target_R1);
+```
+
+Analagous to the congruence rule of uninterpreted functions we have the axiom:
+
+```
+\forall ts, tt, r0s, r0t :: tt == ts && r0s == r0t 
+  ==> source_Load_mem_64(ts, t0s) == target_Load_mem_64(tt, t0t)
+```
+
+I.e. these loads have the same effect as long as they are the same address and occur
+in the same state.
+
+We would want to instantiate this axiom whenever we have two corresponding
+source and target loads, but really we only care about those that
+are already likely to line up. Instead of letting the SMT solver
+decide when to instantiate this axiom we use the control-flow graph, 
+and the requirement that transforms must preserve the order and number
+of side-effects to instantiate exactly only the instances of the axiom that
+the verification will need.
-are already likely to line up. Instead of letting the SMT solver
-decide when to instantiate this axiom we use the control-flow graph, 
-and the requirement that transforms must preserve the order and number
-of side-effects to instantiate exactly only the instances of the axiom that
-the verification will need.
+are already likely to line up. Instead of letting the SMT solver
+decide when to instantiate this axiom, we use the control-flow graph,
+along with the requirement that transforms must preserve the order and number
+of side-effects, to instantiate only the instances of the axiom that
+the verification will need.
-are already likely to line up. Instead of letting the SMT solver
-decide when to instantiate this axiom we use the control-flow graph, 
-and the requirement that transforms must preserve the order and number
-of side-effects to instantiate exactly only the instances of the axiom that
-the verification will need.
+are already likely to line up. Instead of letting the SMT solver
+decide when to instantiate this axiom, we use the control-flow graph,
+along with the requirement that transforms must preserve the order and number
+of side-effects, to instantiate only the instances of the axiom that
+the verification will need.
+
+This is done by walking the source and target CFGs in lockstep,
+identifying matching side-effects and adding the
+body of the axiom as an assertion to the verification condition.
+
+- After this is performed all `SideEffectStatement` are removed from the program.
+
+### Passified Form
+
+Since we have SSA form the semantics of assignment are unneccessary, we replace
+every assignment with an `Assume` stating the equality of assignees.
+
+We now have a program consisting only of `Assume` statements.
+
+### SMT
+
+- `TranslationValidate.scala`
+
+- Infer invariant component at each cut and rename for the SSA renaming at the corresponding cut
+  - Rename free variables for ssa indexes for synth entry precondition and emit assertion
+  - Rename free variables for ssa indexes for synth exit and emit negated assertion
+- Add every assume from the passified program to the SMT query
+- Add the initial invariant to the SMT query, add the negated exit-invariant to the SMT query.
+
+This is built with `JavaSMT` and Basil's internal SMT builder.
+
+
+## Debugging
+
+### Validation Failure
+
+When immediate verification is enabled, and `sat` is returned, the validator emits an `.il` file and a 
+CFG for a containing a fake representation of the passified product
+program. It attempts to annotate the CFG with the model, however note that it often
+incorrectly relates source variables to target variables (due to mis-alignment of blocks, assigns, SSA-indexing), 
+so this cannot be taken as given.
+
+### Unsoundness
+
+A litmus-test for the soundness of the verification is to generate the unsat core for the dumped SMT query.
+If the verification is substantive, the unsat core should contain the entire transition system:
+assertions named `source$number` and `tgt$number`.
+
+
+# Split Optimisation
+
+For large procedures we break down the proof based on the entry cut. Because we always ahve the precondition
-For large procedures we break down the proof based on the entry cut. Because we always ahve the precondition
+For large procedures, we break down the proof based on the entry cut. Because we always have the precondition
-For large procedures we break down the proof based on the entry cut. Because we always ahve the precondition
+For large procedures, we break down the proof based on the entry cut. Because we always have the precondition
+that we start in the same entry cut, for each possible entry we select the edge corresponding to that
+entry and remove all other outgoing edges from the entry point. This assumption is then fully propogated
+through by a dead code elimination. (In fact we remove the edge before the SSA pass so the flow is removed from the program).
+