diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index b93493388..000000000 --- a/.gitattributes +++ /dev/null @@ -1,3 +0,0 @@ -*.js linguist-vendored=true -*.css linguist-vendored=true -*.html linguist-vendored=true \ No newline at end of file diff --git a/.github/workflows/deploy-branch-snapshot.yml b/.github/workflows/deploy-branch-snapshot.yml deleted file mode 100644 index 4f745a6f2..000000000 --- a/.github/workflows/deploy-branch-snapshot.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: Branch Documentation Snapshot - -permissions: - pages: write - deployments: write - contents: write - -on: - push: - branches: - - main - - develop - - paths: - - 'mkdocs.yml' - - 'docs/**' - - '.github/workflows/deploy-branch-snapshot.yml' - -concurrency: pages - -jobs: - deploy-snapshot: - uses: secure-software-engineering/actions/documentation/pin-version/action.yml@develop - with: - latest_branch: develop - version: "maven" \ No newline at end of file diff --git a/.github/workflows/deploy-pr-preview.yml b/.github/workflows/deploy-pr-preview.yml deleted file mode 100644 index a0d1cdf28..000000000 --- a/.github/workflows/deploy-pr-preview.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: PR Documentation Preview - -permissions: - pages: write - deployments: write - contents: write - pull-requests: write - -on: - pull_request: - types: - - opened - - closed - - synchronize - - reopened - paths: - - 'mkdocs.yml' - - 'docs/**' - - '.github/workflows/deploy-pr-preview.yml' - -concurrency: pages - -jobs: - deploy-preview: - uses: secure-software-engineering/actions/documentation/handle-pr-preview/action.yml@develop - with: - enable_comment: true - title_prefix: "Boomerang PR Preview: " \ No newline at end of file diff --git a/.github/workflows/doc_deployment.yml b/.github/workflows/doc_deployment.yml new file mode 100644 index 000000000..963aabf64 --- /dev/null +++ b/.github/workflows/doc_deployment.yml @@ -0,0 +1,60 @@ +name: Documentation Deployment + +on: + push: + branches: + - develop + tags: + - '*' + +concurrency: + group: gh-pages + +jobs: + # On push/merge to develop: Deploy the current doc as default/latest + deploy-doc-snapshots: + name: Deploy Snapshot Documentation + if: ${{ github.event_name == 'push' }} + runs-on: ubuntu-latest + permissions: + contents: write + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Extract Maven Version + id: version + run: | + VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) + echo "version=$VERSION" >> $GITHUB_OUTPUT + + - name: Deploy Snapshot Documentation + uses: secure-software-engineering/actions/documentation/handle-deployment@develop + with: + name: ${{ steps.version.outputs.version }} + title: ${{ steps.version.outputs.version }} + + # On tag creation (i.e. new release): Deploy a stable version to directory with tag + deploy-doc-stable: + name: Deploy Stable Documentation + if: startsWith(github.ref, 'refs/tags/') + runs-on: ubuntu-latest + permissions: + contents: write + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + ref: ${{ github.ref }} + fetch-depth: 0 + + - name: Deploy Stable Documentation + uses: secure-software-engineering/actions/documentation/handle-deployment@develop + with: + name: ${{ github.ref_name }} + title: ${{ github.ref_name }} + stable: true diff --git a/.github/workflows/doc_preview.yml b/.github/workflows/doc_preview.yml new file mode 100644 index 000000000..3db2dbdc0 --- /dev/null +++ b/.github/workflows/doc_preview.yml @@ -0,0 +1,36 @@ +name: Documentation Preview + +on: + pull_request: + types: + - opened + - closed + - synchronize + - reopened + paths: + - mkdocs.yml + - docs/** + - .github/workflows/doc_preview.yml + +concurrency: + group: gh-pages + +jobs: + deploy-preview: + name: Preview documentation + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Create Documentation Preview + uses: secure-software-engineering/actions/documentation/handle-pr-preview@develop + with: + preview-name: pr-${{ github.event.pull_request.number }} + preview-title: Preview for PR-${{ github.event.pull_request.number }} diff --git a/.gitignore b/.gitignore index efb7ca307..209c89908 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ shippable/ *.prefs *.xml **/target +**/site diff --git a/docs/boomerang/allocation_sites.md b/docs/boomerang/allocation_sites.md new file mode 100644 index 000000000..f457b0bbf --- /dev/null +++ b/docs/boomerang/allocation_sites.md @@ -0,0 +1,169 @@ +# Defining Allocation Sites + +Boomerang provides an interface that allows the definition of individual allocation sites. An allocation site is a value that should be considered as a points-to object. + + +## Allocation Site Interface + +To define an individual allocation site, we have to implement the `IAllocationSite` interface and override its method `getAllocationSite(...)` that returns an optional `AllocVal`. +An `AllocVal` represents an allocation site and acts as a wrapper for the allocation site statement and value. +If the optional is present, the `AllocVal` is added to the resulting allocation sites. + +When performing a backward analysis, Boomerang calls this method on each statement on each data-flow path. +It provides three parameters to the method `getAllocationSite`: + +- Method: The current method +- Statement: The current statement that may contain an allocation site +- Val: The current propagated data-flow fact + +These parameters necessitate two checks that should be part of each allocation site implementation: + +- Check whether the statement is an assignment +- Check whether the left operand of the assignment is equal to the propagated data-flow fact + +The first point is relevant because an allocation site is defined as an assignment. +The second aspect is relevant to avoid returning statements that are not relevant to the points-to analysis. +Boomerang propagates only data-flow facts that are relevant to or alias with the query variable. +Therefore, one can exclude irrelevant assignments with the second check. + +To this end, a self-defined allocation site should have at least the following code: + +```java +public class ExtendedAllocationSite implements IAllocationSite { + + @Override + public Optional getAllocationSite(Method method, Statement statement, Val fact) { + // Check for assignments + if (!statement.isAssignStmt()) { + return Optional.empty(); + } + + Val leftOp = statement.getLeftOp(); + Val rightOp = statement.getRightOp(); + // Check for correct data-flow fact + if (!leftOp.equals(fact)) { + return Optional.empty(); + } + + // rightOp is a potential allocation site + ... + } +} +``` + +Last, to use our self-defined allocation site, we need to add it to the options: + +```java +BoomerangOptions options = + BoomerangOptions.builder() + .withAllocationSite(new ExtendedAllocationSite()) + ... + .build(); +``` + +## Simple Allocation Site + +To show how an implementation of the `IAllocationSite` interface may look like, we consider the following simple example: + +Assume our program requires *constants* and *new expressions* as allocation sites. +Then, the interface implementation may look like this: + +```java +public class SimpleAllocationSite implements IAllocationSite { + + @Override + public Optional getAllocationSite(Method method, Statement statement, Val fact) { + // Check for assignments + if (!statement.isAssignStmt()) { + return Optional.empty(); + } + + Val leftOp = statement.getLeftOp(); + Val rightOp = statement.getRightOp(); + // Check for correct data-flow fact + if (!leftOp.equals(fact)) { + return Optional.empty(); + } + + // Constant allocation sites: var = + if (rightOp.isConstant()) { + AllocVal allocVal = new AllocVal(leftOp, statement, rightOp); + return Optional.of(allocVal); + } + + // New expressions: var = new java.lang.Object + if (rightOp.isNewExpr()) { + AllocVal allocVal = new AllocVal(leftOp, statement, rightOp); + return Optional.of(allocVal); + } + + return Optional.empty(); + } +} +``` + +Using this allocation site implementation, Boomerang returns values that are either *new expressions* (e.g. `new java.lang.Object`) or *constants* (e.g. int, String etc.). + +## Allocation Site with DataFlowScope + +In many cases, we are interested in finding an allocation site to analyze it. +However, a common scenario where Boomerang cannot find an allocation site occurs when a data-flow path ends because we have a function call that is not part of the application. +For example, using the `SimpleAllocationSite` from the previous section, Boomerang would not find an allocation site in the following program: + +```java +String s = System.getProperty("property"); // Most precise allocation site +... +queryFor(s); +``` + +Boomerang does not compute an allocation site because `System.getProperty("property")` is not a *constant* or a *new expression*. +Additionally, we may be interested in analyzing only our own application, that is, we do not load the JDK class `java.lang.System` and exclude it in the `DataFlowScope`. +In this case, Boomerang returns an empty results set because the data-flow path ends at the call `System.getProperty("property")`. + +To cover these scenarios, we can include the `DataFlowScope` in the allocation site implementation. +For example, we can extend the [DefaultAllocationSite](https://github.com/secure-software-engineering/Boomerang/blob/develop/boomerangPDS/src/main/java/boomerang/options/DefaultAllocationSite.java) as follows: + +```java +public class ExtendedDataFlowScope extends DefaultAllocationSite { + + private final DataFlowScope dataFlowScope; + + public ExtendedDataFlowScope(DataFlowScope dataFlowScope) { + this.dataFlowScope = dataFlowScope; + } + + @Override + public Optional getAllocationSite(Method method, Statement statement, Val fact) { + // Check for assignments + if (!statement.isAssignStmt()) { + return Optional.empty(); + } + + Val leftOp = statement.getLeftOp(); + Val rightOp = statement.getRightOp(); + // Check for correct data-flow fact + if (!leftOp.equals(fact)) { + return Optional.empty(); + } + + // Check for function calls that would end the data-flow path + // If the function call is not excluded, Boomerang can continue with the analysis + if (statement.containsInvokeExpr()) { + InvokeExpr invokeExpr = statement.getInvokeExpr(); + DeclaredMethod declaredMethod = invokeExpr.getDeclaredMethod(); + + if (dataFlowScope.isExcluded(declaredMethod)) { + // rightOp is the invoke expression + AllocVal allocVal = new AllocVal(leftOp, statement, rightOp); + return Optional.of(allocVal); + } + } + + // If the statement does not contain a function call, we continue with the default behavior + return super.getAllocationSite(method, statement, fact); + } +} +``` + +With this implementation, we cover function calls that would end the analysis, and we can conclude that the allocation site cannot be computed precisely. +For example, having `System.getProperty("property")` as allocation site indicates that the query variable points to some object that depends on some system variables at runtime. diff --git a/docs/boomerang/boomerang_setup.md b/docs/boomerang/boomerang_setup.md new file mode 100644 index 000000000..e3873b56d --- /dev/null +++ b/docs/boomerang/boomerang_setup.md @@ -0,0 +1,103 @@ +# Boomerang Setup + +Boomerang's purpose is the computation of points-to information for a variable on-demand. +Starting at a specific statement, it traverses the program and its data-flow paths backwards until it finds an allocation site for the desired variable. +While doing that, it computes relevant alias information. + +In the following sections, we give an overview of relevant constructs and API calls. +We highly recommend to take a look at the [Examples](./../boomerang/examples.md) to see the best way to combine these constructs. + +## Backward Queries + +Boomerang uses *backward queries* to compute relevant points-to information. +A **BackwardQuery** consists of a statement `s` and a variable `v`. `s` is the starting statement where the backwards analysis starts and `v` is the data-flow fact to solve for. + +Backward queries can be easily constructed. +However, due to Boomerang's scope implementation, we need to specify the corresponding control-flow graph edge with the starting statement `s` as target (see the [Boomerang Scopes](./../general/boomerang_scope.md)). +With that, we can construct a backward query as follows: + +```java +public void createBackwardQuery(ControlFlowGraph.Edge, edge, Val fact) { + BackwardQuery query = BackwardQuery.make(edge, fact); +} +``` + +## Running Boomerang + +Boomerang requires a [FrameworkScope](./../general/framework_scopes.md) and a set [Options](./../boomerang/options.md). With that, we can solve a backward query as follows: + +```java +public void solveQuery( + BackwardQuery query, + FrameworkScope scope, + BoomerangOptions options) { + Boomerang solver = new Boomerang(scope, options); + BackwardBoomerangResults results = solver.solve(query); +} +``` + +The call to `solve` solves the query and returns a wrapper for the results. + +!!! Important: + A `Boomerang` instance can be used to solve exactly one query. + If you want to solve multiple queries with the same instance, you have to set [allowMultipleQueries]() in the options to `true` and you have to call `unregisterAllListeners()` after each call to `solve`. + This may look like this: + + ```java + public void solveQueries( + Collection queries, + FrameworkScope scope, + BoomerangOptions options) { + Boomerang solver = new Boomerang(scope, options); + + for (BackwardQuery query : queries) { + BackwardBoomerangResults results = solver.solve(query); + // + solver.unregisterAllListeners(); + } + } + ``` + +## Extracting Allocation Sites + +After running Boomerang, we can use the results to compute the allocation sites, i.e. the objects the query variable points to. An allocation site `AllocVal` is wrapped into a `ForwardQuery` object. Note that the computed allocation sites heavily depend on the used [AllocationSite](./../boomerang/allocation_sites.md) definition. We can extract the corresponding `AllocVal` objects as follows: + +```java +public void extractAllocationSites(BackwardBoomerangResults results) { + // Compute the allocation sites + Collection allocationSites = results.getAllocationSites().keySet(); + + for (ForwardQuery query : allocationSites) { + // This is a single allocation site + AllocVal allocVal = query.getAllocVal(); + System.out.println( + "Query variable points to " + + allocVal.getAllocVal() + + " @ statement" + + allocVal.getAllocStatement() + + " @ line " + + allocVal.getAllocStatement().getLineNumber() + + " in method " + + allocVal.getAllocStatement().getMethod()); + } +} +``` + +## Extracting Aliases + +Beside the allocation sites, we can use the results to compute the aliases for the query variable. An alias is represented by an `AccessPath` that holds the base variable and the field chain. For example, an alias `x.f.g` is represented by an `AccessPath` with the base `x` and the field chain `[f, g]`. We can compute the access paths as follows: + +```java +public void extractAliases(BackwardBoomerangResults results) { + Collection aliases = results.getAllAliases(); + + System.out.println("Found the following aliases:") + for (AccessPath alias : aliases) { + // 'toCompactString()' transforms the access path into a basic String, e.g. x.f.g + System.out.println(alias.toCompactString()); + } + +} +``` + +// TODO Aliases at specific statement diff --git a/docs/boomerang/examples.md b/docs/boomerang/examples.md new file mode 100644 index 000000000..727874ed0 --- /dev/null +++ b/docs/boomerang/examples.md @@ -0,0 +1,19 @@ +# Examples + +## Taint Analysis +A **Taint Analysis** is a common use case for Boomerang. Our goal is to decide whether a variable points to a specific object (*source*) (e.g. a password) that is unintentionally used as parameter in a method call (*sink*) (e.g. a print statement). + +Assume we have the following program: + +```java +A a1 = new A(); // Object o +A a2 = a1; // Create an alias, i.e. a1 and a2 point to o + +Object s = source(); // Read some tainted value +a1.f = s; // Store tainted value in field of o + +Object z = a2.f; // Read the field from o +sink(z); // Is the tainted value used in the sink? +``` + +In this program, the variable `s` points to some tainted value that should not be used in a sink. Although `s` aliases with the field `f` of `a1` and we read the field `f` of `a2`, the tainted value `s` is still used in the sink because `a1` and `a2` alias. diff --git a/docs/boomerang/options.md b/docs/boomerang/options.md new file mode 100644 index 000000000..3b416915b --- /dev/null +++ b/docs/boomerang/options.md @@ -0,0 +1 @@ +# Boomerang Options \ No newline at end of file diff --git a/docs/examples.md b/docs/examples.md deleted file mode 100644 index 14ac9e2ca..000000000 --- a/docs/examples.md +++ /dev/null @@ -1 +0,0 @@ -# Example Analyses diff --git a/docs/general/boomerang_scope.md b/docs/general/boomerang_scope.md new file mode 100644 index 000000000..0bae3882b --- /dev/null +++ b/docs/general/boomerang_scope.md @@ -0,0 +1,235 @@ +# Boomerang Scope + +Boomerang defines its own scope that is not related to any static analysis framework. +The scope consists of a set of interfaces and classes that specify relevant information required by Boomerang to perform its analyses. +Currently, we provide a scope implementations for the static analysis frameworks [Soot](https://github.com/soot-oss/soot), [SootUp](https://github.com/soot-oss/sootup) and [Opal](https://github.com/opalj/opal) (see the [FrameworkScopes](framework_scopes.md)). +The scopes contain implementations for all relevant interfaces and objects s.t. Boomerang can be used with those frameworks without the need of additional implementation. + +## Dealing with Framework Objects + +The Boomerang scope is designed to be as similar as possible to the analysis frameworks. +That is, for most Boomerang scope object there is a corresponding object in the analysis framework that is used to implement the required interface methods. +For example, the Boomerang scope has an abstract class `Method` that expects implementations to override certain methods: + +```java +public abstract class Method { + + public abstract boolean isStaticInitializer(); +} +``` + +The concrete scope implementations extend this class by delegating the corresponding objects and using them to override the required methods. +For example, for the class `Method`, we have the following (shortened) implementations: + +=== "Soot" + ```java + public class JimpleMethod extends Method { + // SootMethod is the corresponding method object in Soot + private final SootMethod delegate; + + @Override + public boolean isStaticInitializer() { + return delegate.isStaticInitializer(); + } + } + ``` + +=== "SootUp" + ```java + public class JimpleUpMethod extends Method { + // JavaSootMethod is the corresponding method object in SootUp + private final JavaSootMethod delegate; + + @Override + public boolean isStaticInitializer() { + return delegate.isStaticInitializer(); + } + ``` + +=== "Opal" + ```scala + // The method object from the br package is the corresponding object in Opal + class OpalMethod(val delegate: org.opalj.br.Method) extends Method { + + override def isStaticInitializer: Boolean = delegate.isStaticInitializer + } + ``` + +With this setup, one can easily instantiate scope objects from the analysis framework objects and access delegated objects. +The objects from each framework scope can be identified by their name: + +- The scope objects for Soot are denoted with the prefix **Jimple** (e.g. `JimpleMethod`, `JimpleStatement` etc.) +- The scope objects for SootUp are denoted with the prefix **JimpleUp** (e.g. `JimpleUpMethod`, `JimpleUpStatement` etc.) +- The scope objects for Opal are denoted with the prefix **Opal** (e.g. `OpalMethod`, `OpalStatement` etc.) + +To simplify the process, we provide a `ScopeConverter` for each framework scope. +These utility classes have basic methods to construct scope objects and extract delegated objects. +This concept may be relevant when working with Boomerang's results because Boomerang returns the general Boomerang scope object. +For example, we can work with a `Method` and a `Statement` as follows: + +=== "Soot" + ```java + // Assumption: 'stmt' is a statement in 'method' + public void scopeObjects(Scene scene, SootMethod sootMethod, Stmt stmt) { + // Create a method and statement from the Boomerang scope for Soot + Method jimpleMethod = SootScopeConverter.createJimpleMethod(sootMethod, scene); + Statement jimpleStatement = SootScopeConverter.createJimpleStatement(stmt, jimpleMethod); + + // Extract the delegated objects + SootMethod extractedSootMethod = SootScopeConverter.extractSootMethod(jimpleMethod); + Stmt extractedStmt = SootScopeConverter.extractSootStatement(jimpleStatement); + } + ``` + +=== "SootUp" + ```java + // Assumption: 'stmt' is a statement in 'method' + public void scopeObjects(JavaView view, JavaSootMethod sootMethod, Stmt stmt) { + // Create a method and statement from the Boomerang scope for SootUp + Method jimpleUpMethod = SootUpScopeConverter.createJimpleUpMethod(sootMethod, scene); + Statement jimpleUpStatement = SootUpScopeConverter.createJimpleUpStatement(stmt, jimpleUpMethod); + + // Extract the delegated objects + JavaSootMethod extractedSootUpMethod = SootUpScopeConverter.extractSootUpMethod(jimpleUpMethod); + Stmt extractedStmt = SootUpScopeConverter.extractSootUpStatement(jimpleUpStatement); + } + ``` + +=== "Opal" + ```scala + // Assumption: 'stmt' is a statement in 'method' + def scopeObjects(project: Project[_], method: Method, stmt: Stmt[TacLocal]): Unit = { + // Create a method and statement from the Boomerang scope for Opal + val opalMethod = OpalScopeConverter.createOpalMethod(method, project) + val opalStatement = OpalScopeConverter.createOpalStatement(stmt, opalMethod) + + // Extract the delegated objects + val extractedMethod = OpalScopeConverter.extractOpalMethod(opalMethod); + val extractedStmt = OpalScopeConverter.extractOpalStatement(opalStatement); + } + ``` + +## CallGraph + +The Boomerang scope contains its own call graph representation that is used to compute data-flows during the analysis. +Each framework scope provides a parser that transforms a generated call graph into the corresponding Boomerang scope representation that is applied when instantiating a [FrameworkScope](framework_scopes.md). +The corresponding call graphs are accessible from the scope instances as follows: + +=== "Soot" + ```java + SootFrameworkScope scope = new SootFrameworkScope(...); + SootCallGraph callGraph = scope.getCallGraph(); + System.out.println("CallGraph has " + callGraph.size() + " edges"); + ``` + +=== "SootUp" + ```java + SootUpFrameworkScope scope = new SootUpFrameworkScope(...); + SootUpCallGraph callGraph = scope.getCallGraph(); + System.out.println("CallGraph has " + callGraph.size() + " edges"); + ``` + +=== "Opal" + ```scala + val scope = new OpalFrameworkScope(...) + val callGraph = scope.getCallGraph + println(s"CallGraph has ${callGraph.size} edges"); + ``` + +## DataFlowScope + +The data-flow scope determines the program's scope that is analyzed. +By default, Boomerang computes data-flows along the complete reachable program. +However, in many scenarios, only a subset of the target program is from interest during the analysis. +For example, we are only interested in data-flow paths that belong to the application. +To this end, the data-flow scope allows the exclusion of methods to reduce the data-flows. + +A `DataFlowScope` defines two methods `isExcluded(...)` that evaluate whether a method should be excluded from the analysis. +Boomerang calls these methods at each call site and when entering a new method. +If the methods evaluate to `true`, Boomerang skips the methods and steps over corresponding call site. +For example, we can define a `DataFlowScope` that excludes non-application classes and methods with the name `callSite` as follows: + +```java +public class ExtendedDataFlowScope implements DataFlowScope { + + @Override + public boolean isExcluded(Method method) { + // Exclude methods from non-application classes + if (!method.getDeclaringClass().isApplicationClass()) return true; + // Exclude methods with the name 'callSite' + if (method.getName().equals("callSite")) return true; + + // All other methods should be analyzed + return false; + } + + @Override + public boolean isExcluded(DeclaredMethod declaredMethod) { + // Exclude call sites from non-application classes + if (!method.getDeclaringClass().isApplicationClass()) return true; + // Exclude call sites with the name 'callSite' + if (method.getName().equals("callSite")) return true; + + // All other call sites should be analyzed + return false; + } +} +``` + +// TODO Example + +!!! Important + You should always make sure that potential allocation sites are considered when excluding call sites. + In the example above, we exclude the call to `System.getProperty` because the class `java.lang.System` is not an application class. + At this point, the data-flow stops because Boomerang cannot compute the returned value. + To deal with such cases, we provide a solution when defining the [AllocationSite](./../boomerang/allocation_sites.md#allocation-site-with-dataflowscope) + +## Queries + +## AnalysisScope + +Boomerang provides an `AnalysisScope` to compute initial queries along the complete reachable program. +It traverses the call graph starting at the entry points that are defined in the [FrameworkScopes](framework_scopes.md) while respecting their `DataFlowScopes`. + +The `AnalysisScope` calls a method `generateSeed` on each reachable control-flow graph edge where we can decide whether a query should be generated. +For example, we may be interested in the backward analysis of the first parameter of calls to a method `sink`. +Then, we can implement an `AnalysisScope` as follows: + +```java +public class SinkAnalysisScope { + + public SinkAnalysisScope(FrameworkScope scope) { + super(scope); + } + + @Override + protected Collection generate(ControlFlowGraph.Edge edge) { + // Backward solve means that the current statement is the target + Statement stmt = edge.getTarget(); + if (stmt.containsInvokeExpr()) { + InvokeExpr invokeExpr = stmt.getInvokeExpr(); + DeclaredMethod declaredMethod = invokeExpr.getDeclaredMethod(); + + if (declaredMethod.getName().equals("sink") && invokeExpr.getArgs().size > 0) { + Val arg = invokeExpr.getArg(0); + + // Create the query to analyze the first parameter from the call to 'sink' + BackwardQuery query = BackwardQuery.make(edge, arg); + return Collections.singleton(query); + } + } + + return Collections.emptySet(); + } +} +``` + +We can use this implementation to compute relevant queries across the complete program as follows (see [FrameworkScopes](framework_scopes.md) on how to initialize a framework scope): + +```java +FrameworkScope scope = ...; +AnalysisScope analysisScope = new SinkAnalysisScope(scope); +Collection queries = analysisScope.computeSeeds(); +``` + +The collection `queries` contains all queries for a statement `sink(v, ...)` that can be solved with the [Boomerang Solver](./../boomerang/boomerang_setup.md). diff --git a/docs/general/framework_scopes.md b/docs/general/framework_scopes.md new file mode 100644 index 000000000..85d4aae5a --- /dev/null +++ b/docs/general/framework_scopes.md @@ -0,0 +1,171 @@ +# Framework Scopes + +We provide an implementation of the BoomerangScope for the static analysis frameworks [Soot](https://github.com/soot-oss/soot), [SootUp](https://github.com/soot-oss/sootup) and [Opal](https://github.com/opalj/opal). +Depending on the framework that you plan to use, include the following dependencies in your project (replace `x.y.z` with the most recent version): + +=== "Soot" + ``` + + de.fraunhofer.iem + boomerangScope-Soot + x.y.z + + ``` + +=== "SootUp" + ``` + + de.fraunhofer.iem + boomerangScope-SootUp + x.y.z + + ``` + +=== "Opal" + ``` + + de.fraunhofer.iem + boomerangScope-Opal + x.y.z + + ``` + +## Setting up a Framework Scope + +Each framework scope consists of the following objects: + +- The static analysis framework's main instance: + - Soot: `Scene` + - SootUp: `JavaView` + - Opal: `Project` +- A call graph computed from the main instance +- A data-flow scope +- A set of entry point methods + +Boomerang uses the framework scope to access the main instance, call graph and data-flow scope during the analysis. +Additionally, you may specify a set of entry point methods that define the starting points in the call graph when using the [AnalysisScope](boomerang_scope.md#AnalysisScope). + +## Transformations + +To perform the analysis correctly, Boomerang requires an initial transformation step before the actual analysis. +This step consists of initializing fields that are not initialized in the original program. +If a field is not initialized, Java assigns a default value (e.g. `null` for objects) that is not part of the actual program. +Hence, Boomerang is not able to find corresponding allocation sites s.t. an explicit assignment with corresponding default values is required. + +Additionally, Boomerang provides a transformation that extracts constant parameters and creates new assignments. For example, the following statement + +```java +queryFor(10); +``` + +may be transformed to + +```java +varReplacer = 10; +queryFor(varReplacer); +``` + +The intention of this step is to transform the intermediate code representation into a Boomerang compatible form. +By definition, Boomerang can only solve for query variables (not constants). +This transformation step is optional (enabled by default) and corresponding local variables can be identified by the *varReplacer* name. + +The following snippets show the transformations for Soot and SootUp. Boomerang applies the transformation in Opal automatically, that is, there is no transformer/interceptor. + +=== "Soot" + ```java + // Set to false if constants should not be extracted + BoomerangPreTransformer.TRANSFORM_CONSTANTS = true; + + // Call this after the call graph construction + BoomerangPreTransformer.v().reset(); + BoomerangPreTransformer.v().apply(); + ``` + +=== "SootUp" + ```java + // Set to false if constants should not be extracted + BoomerangPreInterceptor interceptor = new BoomerangPreInterceptor(true); + + // Add the interceptor to the other interceptors when creating a View + List interceptors = List.of(, interceptor); + ``` + +## Example Setup + +The following snippets show an example of the instantiation of the framework scope for each static analysis framework. +Thereby, we construct the call graphs using the CHA algorithm, and we use a data-flow scope that excludes all methods from classes that are not loaded (*phantom* classes). + +=== "Soot" + ```java + // Soot setup + G.reset(); + Options.v().set_whole_program(true); + Options.v().set_output_format(Options.output_format_none); + Options.v().set_no_bodies_for_excluded(true); + Options.v().set_allow_phantom_refs(true); + Options.v().set_keep_line_number(true); + Options.v().set_soot_classpath("VIRTUAL_FS_FOR_JDK" + File.pathSeparator + "path/to/app"); + Options.v().setPhaseOption("jb.sils", "enabled:false"); + Options.v().setPhaseOption("jb", "use-original-names:true"); + + Scene.v().loadNecassaryClasses(); + + // Compute call graph + Options.v().setPhaseOption("cg.cha", "on"); + PackManager.v().getPack("cg").apply(); + + // Do not forget the PreTransformer + BoomerangPretransformer.v().reset(); + BoomerangPretransformer.v().apply(); + + // Framework scope setup + DataFlowScope dataFlowScope = DataFlowScope.EXCLUDE_PHANTOM_CLASSES; + CallGraph callGraph = Scene.v().getCallGraph(); + Collection entryPoints = EntryPoints.v().mainsOfApplicationClasses(); + + FrameworkScope scope = new SootFrameworkScope(Scene.v(), callGraph, entryPoints, dataFlowScope); + + // Read the Boomerang scope call graph + boomerang.scope.CallGraph cg = scope.getCallGraph(); + ``` + +=== "SootUp" + ```java + // SootUp setup (Do not forget the PreInterceptor) + AnalysisInputLocation inputLocation = new JavaClassPathAnalysisInputLocation("path/to/project", SourceType.Application, List.of(new BoomerangPreInterceptor())); + JavaView view = new JavaView(inputLocation); + + // Construct call graph + ClassHierarchyAnalysisAlgorithm cha = new ClassHierarchyAnalysisAlgorithm(view); + MethodSignature mainMethod = cha.findMainMethod(); + CallGraph callGraph = cha.initialize(List.of(mainMethod)); + + // Framework scope setup + DataFlowScope dataFlowScope = DataFlowScope.EXCLUDE_PHANTOM_CLASSES; + Optional entryPoint = view.getMethod(mainMethod); + if (entryPoint.isEmpty()) { + throw new RuntimeException("No main method present"); + } + + FrameworkScope scope = new SootUpFrameworkScope(view, callGraph, Collections.singleton(entryPoint.get()), dataFlowScope); + + // Read the Boomerang scope call graph + boomerang.scope.CallGraph cg = scope.getCallGraph(); + ``` + +=== "Opal" + ```scala + // Opal setup + val project = Project(new File("path/to/project")) + + // Compute call graph + val callGraph = project.get(CHACallGraphKey) + + // Framework scope setup + val dataFlowScope = DataFlowScope.EXCLUDE_PHANTOM_CLASSES + val entryPoints = project.allMethodsWithBody.toSet + val scope = new OpalFrameworkScope(project, callGraph, entryPoints, dataFlowScope) + + // Read the Boomerang scope call graph + val opalCallGraph = scope.getCallGraph + ``` diff --git a/docs/installation.md b/docs/general/installation.md similarity index 100% rename from docs/installation.md rename to docs/general/installation.md diff --git a/docs/getting-started.md b/docs/getting-started.md deleted file mode 100644 index 82bdaa534..000000000 --- a/docs/getting-started.md +++ /dev/null @@ -1 +0,0 @@ -# Getting Started with Boomerang diff --git a/docs/index.md b/docs/index.md index d42134da4..9f21f2d0f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1 +1 @@ -# Welcome to Boomerang Documentation +# Welcome to the Documentation of Boomerang diff --git a/mkdocs.yml b/mkdocs.yml index 3052ec30d..d918d1482 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -5,14 +5,19 @@ edit_uri: edit/develop/docs/ nav: - Home: index.md - - Getting Started: - - Installation: installation.md - - First Steps: getting-started.md - - Example Analyses: examples.md + - General: + - Installation: general/installation.md + - Boomerang Scope: general/boomerang_scope.md + - Framework Scopes: general/framework_scopes.md + - Boomerang: + - Boomerang Setup: boomerang/boomerang_setup.md + - Options: boomerang/options.md + - Allocation Sites: boomerang/allocation_sites.md + - Examples: boomerang/examples.md theme: name: material - logo: ./img/SparseBoomerangLogo.png + logo: ./img/BoomerangLogo.png # favicon: ./img/icon.png palette: primary: blue