Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
03c3375
PR1 (M1+M2): Add passive SCIP shard infrastructure and opt-in dual em…
jupblb May 27, 2026
7b9b9da
PR1 (M3): Fork SemanticdbVisitor into native ScipVisitor
jupblb May 27, 2026
7b5725a
PR1 (M4): Add in-plugin ScipSignatureFormatter for Java
jupblb May 27, 2026
e32667c
PR1 (M5): Add ScipShardAggregator for .scip shards
jupblb May 27, 2026
7a3faa7
PR1 (M6): Opt-in --use-scip-shards flag wires CLI + build tools
jupblb May 27, 2026
e8d4473
PR1 (M7): Stabilize ScipVisitor output and regenerate minimized snaps…
jupblb May 27, 2026
12001af
PR1 (M8): Make compiler-side SCIP shard emission the default
jupblb May 27, 2026
72794e9
PR1 (cleanup): Simplify newly-added SCIP shard code
jupblb May 27, 2026
19b0f86
PR2 (K1): passive Kotlin SCIP shard infrastructure
jupblb May 28, 2026
0c52c98
PR2 (K2): dual-emit SemanticDB + SCIP shards from semanticdb-kotlinc
jupblb May 28, 2026
26a34c2
PR2 (K3): make scip-semanticdb aggregator shard-walk safe
jupblb May 28, 2026
0ed5d46
PR2 (K4): default index-semanticdb to shard consumption
jupblb May 28, 2026
b1c5681
PR2 (K5): regenerate Kotlin minimized snapshots
jupblb May 28, 2026
65309f0
PR3 (D1): decouple ScipVisitor from Semanticdb protobuf types
jupblb May 28, 2026
fa52631
PR3 (D2): semanticdb-javac emits only SCIP shards
jupblb May 28, 2026
6a01dd5
PR3 (D3): semanticdb-kotlinc emits only SCIP shards
jupblb May 28, 2026
0e152d3
PR3 (D4): aggregator drops the legacy SemanticDB fallback
jupblb May 28, 2026
5b9897e
PR3 (D5): drop SemanticDB protobuf generation and dead builders
jupblb May 28, 2026
6ae2659
PR3 (D6): scrub legacy SemanticDB wording from docs and help text
jupblb May 28, 2026
900a2a6
docs: remove per-file SCIP shards section
jupblb May 29, 2026
f6ce610
docs: remove dangling pipeline.svg reference
jupblb May 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ If you'd rather install tools manually, you'll need at least:

These are the main components of the project.

- `semanticdb-javac/src/main/java`: the Java compiler plugin that creates
SemanticDB files.
- `semanticdb-javac/src/main/java`: the Java compiler plugin that emits
SCIP shard files.
- `tests/minimized`: minimized Java source files that reproduce interesting test
cases.
- `tests/unit`: fast running unit tests that are helpful for local edit-and-test
Expand Down
28 changes: 18 additions & 10 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,7 @@ lazy val semanticdb = project
.in(file("semanticdb-java"))
.settings(
moduleName := "semanticdb-java",
javaOnlySettings,
(Compile / PB.targets) :=
Seq(PB.gens.java(V.protobuf) -> (Compile / sourceManaged).value)
javaOnlySettings
)

lazy val agent = project
Expand Down Expand Up @@ -169,7 +167,7 @@ lazy val javacPlugin = project
.inAll
)
)
.dependsOn(semanticdb)
.dependsOn(semanticdb, scipProto)

lazy val scipProto = project
.in(file("scip-java-proto"))
Expand Down Expand Up @@ -440,13 +438,15 @@ lazy val semanticdbKotlinc = project
Attributed.blank(dir)
}
)
.dependsOn(scipProto)

// `semanticdbKotlincMinimized` mirrors the (still-present) Gradle build at
// semanticdb-kotlinc/minimized/build.gradle.kts. It compiles a small set of
// Kotlin and Java fixtures with the assembled `semanticdbKotlinc` plugin
// attached to kotlinc/javac, producing *.semanticdb files under
// target/semanticdb-targetroot/ which are then converted to SCIP and rendered
// as the human-readable golden snapshots by the `snapshots` task.
// attached to kotlinc/javac, producing *.scip shard files under
// target/semanticdb-targetroot/ which are then aggregated into a single SCIP
// index and rendered as the human-readable golden snapshots by the
// `snapshots` task.
lazy val semanticdbKotlincMinimized = project
.in(file("semanticdb-kotlinc/minimized"))
.enablePlugins(KotlinPlugin)
Expand Down Expand Up @@ -510,7 +510,7 @@ lazy val semanticdbKotlincMinimized = project
// ----- snapshots regeneration task -----
// Invokes `com.sourcegraph.scip_java.ScipJava.main` twice in the cli JVM
// (forked — ScipJava.main calls System.exit on failure). First pass
// converts the *.semanticdb files under target/semanticdb-targetroot/
// aggregates the *.scip shard files under target/semanticdb-targetroot/
// into an index.scip; second pass renders that index as the human-readable
// golden snapshots.
//
Expand All @@ -524,15 +524,18 @@ lazy val semanticdbKotlincMinimized = project
val snapDir =
(baseDirectory.value / "src" / "generatedSnapshots" / "resources")
.getAbsolutePath
val scipOut = s"$tgtRoot/index.scip"
// Place the aggregated `index.scip` outside the shard-scanned
// targetroot so a subsequent run doesn't re-ingest it as a shard.
val scipOut = (target.value / "scip-index" / "index.scip")
.getAbsolutePath
val mainCls = "com.sourcegraph.scip_java.ScipJava"
Def.sequential(
Compile / compile,
(cli / Compile / runMain).toTask(
s" $mainCls index-semanticdb --no-emit-inverse-relationships --cwd $srcRoot --output $scipOut $tgtRoot"
),
(cli / Compile / runMain).toTask(
s" $mainCls snapshot --cwd $srcRoot --output $snapDir $tgtRoot"
s" $mainCls snapshot --cwd $srcRoot --output $snapDir ${file(scipOut).getParentFile.getAbsolutePath}"
)
)
}
Expand Down Expand Up @@ -695,6 +698,11 @@ lazy val fatjarPackageSettings = List[Def.Setting[_]](
MergeStrategy.discard
case PathList("META-INF", "versions", "9", "module-info.class") =>
MergeStrategy.discard
// Bazel BUILD files live next to *.proto sources in our subprojects; they are
// not needed at runtime and would conflict when multiple proto modules are
// merged into the same fat jar.
case PathList("BUILD") =>
MergeStrategy.discard
case x =>
val oldStrategy = (assembly / assemblyMergeStrategy).value
oldStrategy(x)
Expand Down
28 changes: 3 additions & 25 deletions docs/design.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ title: Design

This project is implemented as a
[Java compiler plugin](https://docs.oracle.com/en/java/javase/11/docs/api/jdk.compiler/com/sun/source/util/Plugin.html)
that generates one
[SemanticDB](https://scalameta.org/docs/semanticdb/specification.html) file for
every `*.java` source file. After compilation completes, the SemanticDB files
are processed to produce SCIP.
that emits one [SCIP](https://github.com/sourcegraph/scip) shard file for every
`*.java` source file. After compilation completes, the per-file SCIP shards are
aggregated into a single SCIP index.

### Why Java compiler plugin?

Expand All @@ -24,24 +23,3 @@ There are several benefits to implementing scip-java as a compiler plugin:
tool, we minimize the risk of diverging from the CI build environment such as
installed system dependencies, custom compiler options and custom annotation
processors.

### Why SemanticDB?

SemanticDB is Protobuf schema for information about symbols and types in Java
programs and other languages. There are several benefits to using SemanticDB as
an intermediary representation for SCIP:

- **Simplicity**: It's easy to translate a single Java source file into a single
SemanticDB file inside a compiler plugin. It's more complicated to produce
SCIP because compiler plugins does not have access to a project-wide context,
which is necessary to produce accurate definitions and hovers in multi-module
projects with external library dependencies.
- **Performance**: SemanticDB is fast to write and read. Each compilation unit
can be processed independently to keep memory usage low. The final conversion
from SemanticDB to SCIP can be safely parallelized.
- **Cross-repository**: Compiler plugins have access to both source code and the
classpath (compiled bytecode of upstream dependencies). SemanticDB has been
designed so that it's also possible to generate spec-compliant symbols from
the classpath alone (no source code) and from the syntax tree of an individual
source file (no classpath). This flexibility will be helpful for scip-java in
the future to unblock cross-repository navigation.
6 changes: 3 additions & 3 deletions docs/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -351,10 +351,10 @@ Next, run the following command to generate the SCIP index (`index.scip`).
```
bazel run @scip_java//scip-semanticdb:bazel -- --sourceroot $PWD

# (optional) Validate that SemanticDB files were generated.
# (optional) Validate that SCIP shard files were generated.
# The command below works for the `examples/bazel-example` directory in the sourcegraph/scip-java repository.
❯ jar tf bazel-bin/src/main/java/example/libexample.jar | grep semanticdb$
META-INF/semanticdb/src/main/java/example/Example.java.semanticdb
❯ jar tf bazel-bin/src/main/java/example/libexample.jar | grep scip$
META-INF/scip/src/main/java/example/Example.java.scip
```

Finally, run the following commands to upload the SCIP index to Sourcegraph.
Expand Down
21 changes: 10 additions & 11 deletions docs/manual-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@ fails.

Indexing a codebase consists of two independent phases:

- Compile the codebase with the SemanticDB compiler plugin.
- Generate SCIP index from SemanticDB files.

![A three stage pipeline that starts with a list of Java sources, creates a list of SemanticDB files that then become a single SCIP index.](assets/semanticdb-javac-pipeline.svg)
- Compile the codebase with the SemanticDB compiler plugin, which writes one
SCIP shard per Java source file.
- Aggregate the SCIP shards into a single SCIP index.

The first phase can be complicated to configure and it can take a while to run.
The second phase is quite simple to configure and it usually runs very fast.
Expand Down Expand Up @@ -63,7 +62,7 @@ compiler plugin. To do this you need to explicitly configure two directories:
It's important that all of the source files that should be index live under
this directory.
- `-targetroot:PATH`: the absolute path to the directory where to generate
SemanticDB file. This directory can be anywhere on your file system.
SCIP shard files. This directory can be anywhere on your file system.
Alternatively, pass in `-targetroot:javac-classes-directory` for the plugin to
automatically use the `javac` output directory.

Expand Down Expand Up @@ -112,13 +111,13 @@ examples:
- Maven: `mvn clean verify -DskipTests`
- Bazel: `bazel build //...`

If everything went well, you should have a lot of `*.semanticdb` files in the
If everything went well, you should have a lot of `*.scip` shard files in the
targetroot directory.

```
❯ find $TARGETROOT -type f
build/semanticdb-targetroot/META-INF/semanticdb/j11/src/test/java/example/ExampleTest.java.semanticdb
build/semanticdb-targetroot/META-INF/semanticdb/j11/src/main/java/example/Example.java.semanticdb
build/semanticdb-targetroot/META-INF/scip/j11/src/test/java/example/ExampleTest.java.scip
build/semanticdb-targetroot/META-INF/scip/j11/src/main/java/example/Example.java.scip
...
```

Expand Down Expand Up @@ -198,13 +197,13 @@ Which allows you to invoke it by simply running `mvn sourcegraph:sourcegraphDepe
Cross-repository navigation is a feature that allows "goto definition" and "find
references" to show results from multiple repositories.

## Step 5: Generate SCIP index from SemanticDB files
## Step 5: Aggregate SCIP shards into a single SCIP index

First, install the `scip-java` command-line tool according to the instructions
in the [getting started guide](getting-started.md).

Next, run the `scip-java index-semanticdb` command to convert SemanticDB files
into SCIP.
Next, run the `scip-java index-semanticdb` command to aggregate the per-file
SCIP shards into a single SCIP index.

```sh
❯ scip-java index-semanticdb $TARGETROOT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ class ScipBuildTool(index: IndexCommand) extends BuildTool("SCIP", index) {
.app
.reporter
.info(
"Some SemanticDB files got generated even if there were compile errors. " +
"Some SCIP shard files got generated even if there were compile errors. " +
"In most cases, this means that scip-java managed to index everything " +
"except the locations that had compile errors and you can ignore the compile errors."
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ case class IndexCommand(
@Description("The path where to generate the SCIP index.")
output: Path = Paths.get("index.scip"),
@Description(
"The directory where to generate SemanticDB files. " +
"The directory where to generate SCIP shard files. " +
"Defaults to a build-specific path. " +
"For example, the default value for Gradle is 'build/semanticdb-targetroot' and for Maven it's 'target/semanticdb-targetroot'"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ import com.sourcegraph.scip_java.BuildInfo
import com.sourcegraph.scip_java.buildtools.ClasspathEntry
import com.sourcegraph.scip_semanticdb.ConsoleScipSemanticdbReporter
import com.sourcegraph.scip_semanticdb.ScipOutputFormat
import com.sourcegraph.scip_semanticdb.ScipSemanticdb
import com.sourcegraph.scip_semanticdb.ScipSemanticdbOptions
import com.sourcegraph.scip_semanticdb.ScipShardAggregator
import moped.annotations._
import moped.cli.Application
import moped.cli.Command
import moped.cli.CommandParser
import ujson.Arr
import ujson.Obj

@Description("Converts SemanticDB files into a single SCIP index file.")
@Description("Aggregates SCIP shard files into a single SCIP index file.")
@Usage("scip-java index-semanticdb [OPTIONS ...] [POSITIONAL ARGUMENTS ...]")
@ExampleUsage(
"scip-java index-semanticdb --out=myindex.scip my/targetroot1 my/targetroot2"
Expand All @@ -30,10 +30,10 @@ import ujson.Obj
final case class IndexSemanticdbCommand(
@Description("The name of the output file.")
output: Path = Paths.get("index.scip"),
@Description("Whether to process the SemanticDB files in parallel")
@Description("Whether to process the SCIP shard files in parallel")
parallel: Boolean = true,
@Description(
"Whether to infer the location of SemanticDB files based as produced by Bazel"
"Whether to infer the location of SCIP shard files based as produced by Bazel"
)
bazel: Boolean = true,
@Description(
Expand All @@ -44,7 +44,7 @@ final case class IndexSemanticdbCommand(
@Description("URL to a PackageHub instance")
@Hidden
packagehub: Option[String] = None,
@Description("Directories that contain SemanticDB files.")
@Description("Directories that contain SCIP shard files.")
@PositionalArguments()
targetroot: List[Path] = Nil,
@Description(
Expand Down Expand Up @@ -108,7 +108,7 @@ final case class IndexSemanticdbCommand(
allowEmptyIndex,
allowExportingGlobalSymbolsFromDirectoryEntries
)
ScipSemanticdb.run(options)
ScipShardAggregator.run(options)
postPackages(packages)
if (!app.reporter.hasErrors()) {
app.info(options.output.toString)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,31 +51,37 @@ case class SnapshotCommand(
attrs: BasicFileAttributes
): FileVisitResult = {
if (scipPattern.matches(file)) {
foundScipFile = true
val index = Scip.Index.parseFrom(Files.readAllBytes(file))
val root = URI.create(index.getMetadata.getProjectRoot)
index
.getDocumentsList
.asScala
.foreach { doc =>
val sourcepath = Paths.get(root.resolve(doc.getRelativePath))
val source =
new String(
Files.readAllBytes(sourcepath),
StandardCharsets.UTF_8
// Skip per-source shards emitted by the compiler plugin (those don't have a
// project_root). The aggregator produces a single top-level index file that
// carries the project_root and is the canonical input for snapshot rendering.
val rawProjectRoot = index.getMetadata.getProjectRoot
if (rawProjectRoot.nonEmpty) {
foundScipFile = true
val projectRoot = URI.create(rawProjectRoot)
index
.getDocumentsList
.asScala
.foreach { doc =>
val sourcepath = Paths.get(projectRoot.resolve(doc.getRelativePath))
val source =
new String(
Files.readAllBytes(sourcepath),
StandardCharsets.UTF_8
)
val document = ScipPrinters.printTextDocument(
doc,
source,
CommentSyntax.default
)
val document = ScipPrinters.printTextDocument(
doc,
source,
CommentSyntax.default
)
val snapshotOutput = output.resolve(doc.getRelativePath)
Files.createDirectories(snapshotOutput.getParent)
Files.write(
snapshotOutput,
document.getBytes(StandardCharsets.UTF_8)
)
}
val snapshotOutput = output.resolve(doc.getRelativePath)
Files.createDirectories(snapshotOutput.getParent)
Files.write(
snapshotOutput,
document.getBytes(StandardCharsets.UTF_8)
)
}
}
}
super.visitFile(file, attrs)
}
Expand Down
1 change: 0 additions & 1 deletion scip-semanticdb/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ java_library(
":all_java_proto",
"//scip-java-proto/src/main/protobuf:scip_java_proto",
"//semanticdb-java",
"//semanticdb-java/src/main/protobuf:semanticdb_java_proto",
"@maven//:com_google_code_findbugs_jsr305",
"@maven//:com_google_protobuf_protobuf_java",
"@maven//:com_google_protobuf_protobuf_java_util",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public boolean hasErrors() {
/* allowEmptyIndex */ true,
/* indexDirectoryEntries */ false // because Bazel only compiles to jar files.
);
ScipSemanticdb.run(scipOptions);
ScipShardAggregator.run(scipOptions);

if (!scipOptions.reporter.hasErrors()) {
System.out.println("done: " + scipOptions.output);
Expand Down

This file was deleted.

Loading
Loading