Skip to content
Open
Show file tree
Hide file tree
Changes from 82 commits
Commits
Show all changes
95 commits
Select commit Hold shift + click to select a range
c28ceb6
Add BfsWithLoggingPathSelector
sergeyrid Jul 7, 2023
26be358
Add features to BfsWithLoggingPathSelector
sergeyrid Jul 10, 2023
7df7145
Add jsonAggregator
sergeyrid Jul 11, 2023
c8f0dfa
Add InferencePathSelector
sergeyrid Jul 11, 2023
a93a138
Fix jsonAggregator and InferencePathSelector
sergeyrid Jul 12, 2023
11ee239
Change jsonAggregator and fix path selectors
sergeyrid Jul 13, 2023
8987aa7
Add graph visualization and alternative reward
sergeyrid Jul 14, 2023
5a33d7a
Fix bugs and paths, add hashes to final json
sergeyrid Jul 18, 2023
5310c7c
Add BlockGraph to BfsWithLoggingPathSelector
sergeyrid Jul 20, 2023
da8e9dd
Add BlockGraph logging
sergeyrid Jul 20, 2023
fb8f93b
Add concurrency and fix bugs
sergeyrid Jul 20, 2023
3523c44
Add features, fix bugs and refactor
sergeyrid Jul 24, 2023
575a266
Add features, fix bugs and refactor
sergeyrid Jul 25, 2023
f85016c
Merge branch 'main' of https://github.com/UnitTestBot/usvm into ml-pa…
sergeyrid Jul 26, 2023
4a4bcd0
Add PPO support
sergeyrid Jul 26, 2023
1932d37
Fix path selectors
sergeyrid Jul 31, 2023
c731f02
Merge branch 'main' of https://github.com/UnitTestBot/usvm into ml-pa…
sergeyrid Aug 2, 2023
f21c385
Change jsonAggregator and add options
sergeyrid Aug 3, 2023
2093db0
Add options and fix block graph
sergeyrid Aug 8, 2023
03b93a5
Add graph features and refactor
sergeyrid Aug 8, 2023
cfacb55
Merge remote-tracking branch 'origin/main' into ml-path-selection
sergeyrid Aug 8, 2023
211ae65
Support string and class constants
Saloed Aug 9, 2023
d3fc3a7
Update tests ignore reasons
Saloed Aug 9, 2023
2f5219c
Add gnn inference and refactor main
sergeyrid Aug 10, 2023
52bdecf
Close machine (solver) after test
Saloed Aug 10, 2023
337c494
Mock native methods + few approximations
Saloed Aug 10, 2023
df92f73
Update test ignore reasons
Saloed Aug 10, 2023
db0ce16
Add graph features and probabilities logging
sergeyrid Aug 11, 2023
7860242
Merge remote-tracking branch 'origin/main' into ml-path-selection
sergeyrid Aug 11, 2023
d6dd569
Merge remote-tracking branch 'origin/saloed/native-mocks' into ml-pat…
sergeyrid Aug 11, 2023
93270eb
Add features, jar processing and other
sergeyrid Aug 16, 2023
71ea504
Merge remote-tracking branch 'origin/main' into ml-path-selection
sergeyrid Aug 16, 2023
f8491f5
Update tests ignore reasons
Saloed Aug 9, 2023
b69df4f
Mock native methods + few approximations
Saloed Aug 10, 2023
9f22c2e
Update test ignore reasons
Saloed Aug 10, 2023
ed2579b
Fix mocks type constraints
Saloed Aug 11, 2023
f965c98
Update test ignore reasons
Saloed Aug 11, 2023
cb2ce74
Safe test resolver
Saloed Aug 11, 2023
f7e5106
Change test disabling method
Saloed Aug 11, 2023
e71e824
More approximations
Saloed Aug 11, 2023
925a7c9
Scoring in type selector
Saloed Aug 14, 2023
b0da1a4
Timeouts
Saloed Aug 14, 2023
943c7e3
Fix ignore reasons
Saloed Aug 14, 2023
d526ed1
Fix sc test
Saloed Aug 14, 2023
e64a46f
Disable more tests
Saloed Aug 14, 2023
38905e4
Use yices by default
Saloed Aug 14, 2023
3565713
Ensure thread stopped after timeout
Saloed Aug 15, 2023
13823e6
Support IOB in arraycopy approximation
Saloed Aug 15, 2023
b7aa3f9
Enable few tests
Saloed Aug 15, 2023
739223f
Safe logging
Saloed Aug 15, 2023
01b2245
Better timeout handling
Saloed Aug 15, 2023
5da1c9b
tmp
Saloed Aug 17, 2023
a7b4153
Change graphs and refactor
sergeyrid Aug 21, 2023
cfd157b
Merge remote-tracking branch 'origin/main' into ml-path-selection
sergeyrid Aug 21, 2023
d844878
Merge remote-tracking branch 'origin/saloed/native-mocks' into ml-pat…
sergeyrid Aug 21, 2023
2f93c79
Disable unnecessary logging
sergeyrid Aug 21, 2023
723d705
Add RNN
sergeyrid Aug 22, 2023
b83d742
Make main take jars as input
sergeyrid Aug 23, 2023
ac012a7
Fix, refactor and add options
sergeyrid Aug 23, 2023
acc5780
Add statistics calculation
sergeyrid Aug 25, 2023
f6d4257
Change queue to lru and fix statistics
sergeyrid Aug 25, 2023
cfe7cd1
Fix reward and statistics
sergeyrid Aug 28, 2023
2470cfa
Merge branch 'main' of https://github.com/UnitTestBot/usvm into ml-pa…
sergeyrid Aug 31, 2023
652f180
Revert native-mocks changes
sergeyrid Aug 31, 2023
effd951
Merge branch 'main' of https://github.com/UnitTestBot/usvm into ml-pa…
sergeyrid Aug 31, 2023
b2615f7
Move ml path selector to separate module
sergeyrid Sep 3, 2023
e4f3704
Separate ml from other modules
sergeyrid Sep 5, 2023
b50f08f
Merge branch 'main' of https://github.com/UnitTestBot/usvm into ml-pa…
sergeyrid Sep 5, 2023
8a477ee
Add OtherUMachine
sergeyrid Sep 5, 2023
acfab1b
Refactor
sergeyrid Sep 5, 2023
d7ff815
Add ml environment
sergeyrid Sep 5, 2023
fdc3d7a
Refactor
sergeyrid Sep 6, 2023
4084382
Add README.md
sergeyrid Sep 6, 2023
58c1e16
Change README.md
sergeyrid Sep 6, 2023
736485c
Change README.md
sergeyrid Sep 6, 2023
eb21924
Change README.md
sergeyrid Sep 7, 2023
fe2edde
Uncomment code and fix gradle
sergeyrid Sep 7, 2023
fb5791f
Refactor
sergeyrid Sep 7, 2023
5baa20a
Add test dependencies
sergeyrid Sep 7, 2023
01a795b
Merge branch 'main' of https://github.com/UnitTestBot/usvm into ml-pa…
sergeyrid Sep 7, 2023
8278b6e
Fix ModifiedUMachine
sergeyrid Sep 7, 2023
52e2385
Fix ModifiedUMachine
sergeyrid Sep 8, 2023
9625b22
Refactor
sergeyrid Sep 13, 2023
389c5a3
Merge branch 'main' of https://github.com/UnitTestBot/usvm into ml-pa…
sergeyrid Sep 13, 2023
587321f
Fix merge
sergeyrid Sep 17, 2023
e7cd950
Merge branch 'main' of https://github.com/UnitTestBot/usvm into ml-pa…
sergeyrid Sep 20, 2023
65a2876
Fix merge
sergeyrid Sep 25, 2023
50f6f1c
Merge remote-tracking branch 'origin/main' into ml-path-selection
sergeyrid Oct 16, 2023
fe38cea
Fix merge
sergeyrid Oct 18, 2023
629f26b
Refactor BlockGraph
sergeyrid Oct 18, 2023
c689de1
Refactor CoverageCounter
sergeyrid Oct 18, 2023
f837ef3
Refactor CoverageCounterStatistics
sergeyrid Oct 18, 2023
cac5201
Refactor
sergeyrid Oct 30, 2023
542c11e
Merge remote-tracking branch 'origin/main' into ml-path-selection
sergeyrid Oct 30, 2023
72e1a72
Fix merge
sergeyrid Oct 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ include("usvm-core")
include("usvm-jvm")
include("usvm-util")
include("usvm-sample-language")
include("usvm-ml-path-selection")
44 changes: 44 additions & 0 deletions usvm-ml-path-selection/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
## Machine Learning Path Selector

### Entry point

To run tests with this path selector use `jarRunner.kt`. You can pass a path to a configuration json as the first argument. Gathered statistics will be put in a folder according to your configuration.

### Config

A config object is declared inside `MLConfig.kt`. A detailed description of all the options is listed below:

- `gameEnvPath` - a path to a folder that contains trained models (`rnn_cell.onnx`, `gnn_model.onnx`, `actor_model.onnx`) and a blacklist of tests to be skipped (`blacklist.txt`), also some logs are saved to this folder
- `dataPath` - a path to a folder to save all statistics into
- `defaultAlgorithm` - an algorithm to use if a trained model is not found, must be one of: `BFS`, `ForkDepthRandom`
- `postprocessing` - how actor model's outputs should be processed, must be one of: `Argmax` (choose an id of the maximum value), `Softmax` (sample from a distribution derived from the outputs via the softmax), `None` (sample from the outputs — only when they form a distribution)
- `mode` - a mode for `jarRunner.kt`, must be one of: `Calculation` (to calculate statistics used to train models), `Aggregation` (to aggregate statistics for different tests into one file), `Both` (to both calculate statistics and aggregate them), `Test` (to test this path selector with different time limits and compare it to other path selectors)
- `logFeatures` - whether to save statistics used to train models
- `shuffleTests` - whether to shuffle tests before running (affects the tests being run if the `dataConsumption` option is less than 100)
- `discounts` - time discounts used when testing path selectors
- `inputShape` - an input shape of an actor model
- `maxAttentionLength` - a maximum attention length of a PPO actor model
- `useGnn` - whether to use a GNN model
- `dataConsumption` - a percentage of tests to run
- `hardTimeLimit` - a time limit for one test
- `solverTimeLimit` - a time limit for one solver call
- `maxConcurrency` - a maximum number of threads running different tests concurrently
- `graphUpdate` - when to update block graph data, must be one of: `Once` (at the beginning of a test), `TestGeneration` (every time a new test is generated)
- `logGraphFeatuers` - whether to save graph statistics used to train a GNN model to a dataset file
- `gnnFeaturesCount` - a number of features that a GNN model returns
- `useRnn` - whether to use an RNN model
- `rnnStateShape` - a shape of an RNN state
- `rnnFeaturesCount` - a number of features that an RNN model returns
- `inputJars` - jars and their packages to run tests on

### How to modify the metric

To modify the metric you may change values of the `reward` property of the `ActionData` objects. They are written inside the property `path` of the `FeaturesLoggingPathSelector`. Currently, the metric is calculated in the `remove` method of the `FeaturesLoggingPathSelector`.

### Training environment

The training environment and its description are inside `environment.zip`.

### "Modified" files

Source files which names start with "Modified" are modified copies of files from other modules. They were modified to support this path selector.
22 changes: 22 additions & 0 deletions usvm-ml-path-selection/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
object MLVersions {
const val serialization = "1.5.1"
const val onnxruntime = "1.15.1"
const val dotlin = "1.0.2"
}

plugins {
id("usvm.kotlin-conventions")
kotlin("plugin.serialization") version "1.8.21"
}

dependencies {
implementation(project(":usvm-jvm"))
implementation(project(":usvm-core"))

implementation("org.jacodb:jacodb-analysis:${Versions.jcdb}")
implementation("ch.qos.logback:logback-classic:${Versions.logback}")

implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:${MLVersions.serialization}")
implementation("io.github.rchowell:dotlin:${MLVersions.dotlin}")
implementation("com.microsoft.onnxruntime:onnxruntime:${MLVersions.onnxruntime}")
}
Binary file added usvm-ml-path-selection/environment.zip
Binary file not shown.
74 changes: 74 additions & 0 deletions usvm-ml-path-selection/src/main/kotlin/org/usvm/CoverageCounter.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package org.usvm

import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.buildJsonObject
import kotlinx.serialization.json.put
import kotlinx.serialization.json.putJsonObject
import java.util.concurrent.ConcurrentHashMap

class CoverageCounter {
private val testCoverages = ConcurrentHashMap<String, List<Float>>()
private val testStatementsCounts = ConcurrentHashMap<String, Float>()
private val testDiscounts = ConcurrentHashMap<String, List<Float>>()
private val testFinished = ConcurrentHashMap<String, Boolean>()

fun addTest(testName: String, statementsCount: Float) {
testCoverages[testName] = List(MLConfig.discounts.size) { 0.0f }
testStatementsCounts[testName] = statementsCount
testDiscounts[testName] = List(MLConfig.discounts.size) { 1.0f }
testFinished[testName] = false
}

fun updateDiscounts(testName: String) {
testDiscounts[testName] = testDiscounts.getValue(testName)
.mapIndexed { id, currentDiscount -> MLConfig.discounts[id] * currentDiscount }
}

fun updateResults(testName: String, newCoverage: Float) {
val currentDiscounts = testDiscounts.getValue(testName)
testCoverages[testName] = testCoverages.getValue(testName)
.mapIndexed { id, currentCoverage -> currentCoverage + currentDiscounts[id] * newCoverage }
}

fun finishTest(testName: String) {
testFinished[testName] = true
}

fun reset() {
testCoverages.clear()
testStatementsCounts.clear()
testDiscounts.clear()
testFinished.clear()
}

private fun getTotalCoverages(): List<Float> {
return testCoverages.values.reduce { acc, floats ->
acc.zip(floats).map { (total, value) -> total + value }
}
}

fun getStatistics(): JsonObject {
return buildJsonObject {
Comment thread
Saloed marked this conversation as resolved.
Outdated
putJsonObject("Tests") {
testCoverages.forEach { (test, coverages) ->
putJsonObject(test) {
putJsonObject("discounts") {
MLConfig.discounts.zip(coverages).forEach { (discount, coverage) ->
put(discount.toString(), coverage)
}
}
put("statementsCount", testStatementsCounts[test])
put("finished", testFinished[test])
}
}
}
putJsonObject("totalDiscounts") {
MLConfig.discounts.zip(getTotalCoverages()).forEach { (discount, coverage) ->
put(discount.toString(), coverage)
}
}
put("totalStatementsCount", testStatementsCounts.values.sum())
put("finishedTestsCount", testFinished.values.sumOf { if (it) 1.0 else 0.0 })
}
}
}
51 changes: 51 additions & 0 deletions usvm-ml-path-selection/src/main/kotlin/org/usvm/MLConfig.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package org.usvm

enum class Postprocessing {
Argmax,
Softmax,
None,
}

enum class Mode {
Calculation,
Aggregation,
Both,
Test,
}

enum class Algorithm {
BFS,
ForkDepthRandom,
}

enum class GraphUpdate {
Once,
TestGeneration,
}

object MLConfig {
Comment thread
Saloed marked this conversation as resolved.
Outdated
var gameEnvPath = "../Game_env"
var dataPath = "../Data"
var defaultAlgorithm = Algorithm.BFS
var postprocessing = Postprocessing.Argmax
var mode = Mode.Both
var logFeatures = true
var shuffleTests = true
var discounts = listOf(1.0f, 0.998f, 0.99f)
var inputShape = listOf<Long>(1, -1, 77)
var maxAttentionLength = -1
var useGnn = true
var dataConsumption = 100.0f
var hardTimeLimit = 30000 // in ms
var solverTimeLimit = 10000 // in ms
var maxConcurrency = 64
var graphUpdate = GraphUpdate.Once
var logGraphFeatures = false
var gnnFeaturesCount = 8
var useRnn = true
var rnnStateShape = listOf<Long>(4, 1, 512)
var rnnFeaturesCount = 33
var inputJars = mapOf(
Pair("../Game_env/jars/usvm-jvm-new.jar", listOf("org.usvm.samples", "com.thealgorithms"))
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like hardcoded path. Maybe we should pass this as environment variable or via configuration?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a default value inside of a configuration object, it can be changed with a configuration file.

) // path to jar file -> list of package names
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package org.usvm

enum class ModifiedPathSelectionStrategy {
/**
* Collects features according to states selected by any other path selector.
*/
FEATURES_LOGGING,
/**
* Collects features and feeds them to the ML model to select states.
* Extends FEATURE_LOGGING path selector.
*/
MACHINE_LEARNING,
}

data class ModifiedUMachineOptions(
Comment thread
Saloed marked this conversation as resolved.
/**
* State selection heuristics.
* If multiple heuristics are specified, they are combined according to [pathSelectorCombinationStrategy].
*
* @see PathSelectionStrategy
*/
val pathSelectionStrategies: List<ModifiedPathSelectionStrategy> = listOf(ModifiedPathSelectionStrategy.MACHINE_LEARNING),
/**
* Strategy to combine multiple [pathSelectionStrategies].
*
* @see PathSelectorCombinationStrategy
*/
val pathSelectorCombinationStrategy: PathSelectorCombinationStrategy = PathSelectorCombinationStrategy.INTERLEAVED,
/**
* Seed used for random operations.
*/
val randomSeed: Long = 0,
/**
* Code coverage percent to stop execution on. Considered only if in range [1..100].
*/
val stopOnCoverage: Int = 100,
/**
* Optional limit of symbolic execution steps to stop execution on.
*/
val stepLimit: ULong? = 1500u,
/**
* Optional limit of collected states to stop execution on.
*/
val collectedStatesLimit: Int? = null,
/**
* Optional timeout in milliseconds to stop execution on.
*/
val timeoutMs: Long? = 20_000,
/**
* A number of steps from the last terminated state.
*/
val stepsFromLastCovered: Long? = null,
/**
* Scope of methods which coverage is considered.
*
* @see CoverageZone
*/
val coverageZone: CoverageZone = CoverageZone.METHOD,
/**
* Whether we should prefer exceptional state in the queue to the regular ones.
*/
val exceptionsPropagation: Boolean = true,
/**
* SMT solver type used for path constraint solving.
*/
val solverType: SolverType = SolverType.Z3
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
package org.usvm.machine

import mu.KLogging
import org.jacodb.api.JcClasspath
import org.jacodb.api.JcMethod
import org.jacodb.api.cfg.JcInst
import org.jacodb.api.ext.methods
import org.usvm.*
import org.usvm.machine.interpreter.JcInterpreter
import org.usvm.machine.state.JcMethodResult
import org.usvm.machine.state.JcState
import org.usvm.machine.state.lastStmt
import org.usvm.ps.FeaturesLoggingPathSelector
import org.usvm.ps.modifiedCreatePathSelector
import org.usvm.statistics.*
import org.usvm.stopstrategies.createStopStrategy

val logger = object : KLogging() {}.logger

class ModifiedJcMachine(
cp: JcClasspath,
private val options: ModifiedUMachineOptions
) : UMachine<JcState>() {
private val applicationGraph = JcApplicationGraph(cp)

private val typeSystem = JcTypeSystem(cp)
private val components = JcComponents(typeSystem, options.solverType)
private val ctx = JcContext(cp, components)

private val interpreter = JcInterpreter(ctx, applicationGraph)

private val distanceStatistics = DistanceStatistics(applicationGraph)

fun analyze(
method: JcMethod,
coverageCounter: CoverageCounter? = null
): List<JcState> {
logger.debug("$this.analyze($method)")
val initialState = interpreter.getInitialState(method)

val methodsToTrackCoverage =
when (options.coverageZone) {
CoverageZone.METHOD -> setOf(method)
CoverageZone.TRANSITIVE -> setOf(method)
// TODO: more adequate method filtering. !it.isConstructor is used to exclude default constructor which is often not covered
CoverageZone.CLASS -> method.enclosingClass.methods.filter {
it.enclosingClass == method.enclosingClass && !it.isConstructor
}.toSet()
}

val coverageStatistics: CoverageStatistics<JcMethod, JcInst, JcState> = CoverageStatistics(
methodsToTrackCoverage,
applicationGraph
)

val pathSelector = modifiedCreatePathSelector(
initialState,
options,
{ coverageStatistics },
{ distanceStatistics },
{ applicationGraph }
)

val statesCollector = CoveredNewStatesCollector<JcState>(coverageStatistics) {
it.methodResult is JcMethodResult.JcException
}
val stopStrategy = createStopStrategy(
UMachineOptions(
pathSelectorCombinationStrategy = options.pathSelectorCombinationStrategy,
randomSeed = options.randomSeed,
stopOnCoverage = options.stopOnCoverage,
stepLimit = options.stepLimit,
collectedStatesLimit = options.collectedStatesLimit,
timeoutMs = options.timeoutMs,
stepsFromLastCovered = options.stepsFromLastCovered,
coverageZone = options.coverageZone,
exceptionsPropagation = options.exceptionsPropagation,
solverType = options.solverType
),
coverageStatistics = { coverageStatistics },
getCollectedStatesCount = { statesCollector.collectedStates.size }
)

val observers = mutableListOf<UMachineObserver<JcState>>(coverageStatistics)

observers.add(TerminatedStateRemover())

if (options.coverageZone == CoverageZone.TRANSITIVE) {
observers.add(
TransitiveCoverageZoneObserver(
initialMethod = method,
methodExtractor = { state -> state.lastStmt.location.method },
addCoverageZone = { coverageStatistics.addCoverageZone(it) },
ignoreMethod = { false } // TODO replace with a configurable setting
)
)
}
observers.add(statesCollector)

val methodName = method.toString().dropWhile { it != ')' }.drop(1)
Comment thread
Saloed marked this conversation as resolved.
Outdated
if (coverageCounter != null) {
observers.add(CoverageCounterStatistics(coverageStatistics, coverageCounter, methodName))
}

run(
interpreter,
pathSelector,
observer = CompositeUMachineObserver(observers),
isStateTerminated = ::isStateTerminated,
stopStrategy = stopStrategy,
)

coverageCounter?.finishTest(methodName)
if (pathSelector is FeaturesLoggingPathSelector<*, *, *>) {
if (MLConfig.logFeatures && MLConfig.mode != Mode.Test) {
pathSelector.savePath()
}
}

return statesCollector.collectedStates
}

private fun isStateTerminated(state: JcState): Boolean {
return state.callStack.isEmpty()
}

override fun close() {
components.close()
}
}
Loading