Skip to content

Commit 6e81441

Browse files
brandonroscodex
andcommitted
feat(llvm): add llvm19 support
Add the initial llvm19 cargo/build.rs plumbing while preserving the llvm7\ncheck path. Assemble a v19 libintrinsics bitcode at build time and route\nnvvm.rs through the build-script-provided path.\n\nDocument the validated baseline on the current host and the first Layer 1\nblocker: the existing C++ shim no longer builds unchanged against LLVM 19\nbecause rustllvm.h still expects headers like llvm/ADT/Triple.h. RUST_CUDA_ALLOW_LEGACY_ARCH_WITH_LLVM19 compute_100 target working through compilation errors working throw sigsegv on vecadd nix flake libintrinsics libintrinsics chore(llvm19): close out Layer 3 pre-smoke work Finalize the Layer 3 plan, add env-driven final-module and LLVM IR capture hooks to vecadd, and validate the harness locally so the next phase can move straight to CUDA 12.9+ smoke testing. refactor(llvm19): close out Layer 2 containment Add named Rust-side containment helpers for debug info and target machine creation, make the current ThinLTO behavior explicit, and update LLVM19_PLAN.md to mark Layers 2c and 2d complete. refactor(llvm19): start Layer 2 helper containment Add a small Rust-side helper surface in src/llvm.rs for call-building, symbol insertion, and debug-location setting, then migrate the obvious callers without introducing LLVM-version cfg branching. Update LLVM19_PLAN.md to reflect the real Layer 2 state: 2a is complete, 2b is complete, 2c is partially landed, and 2d is still pending. Include the current .gitignore change in this checkpoint as requested. feat(llvm19): complete Layer 1 C++ shim bridge Bridge the wrapper headers and C++ shims so rustc_codegen_nvvm now builds against both LLVM 7 and LLVM 19. This adds the LLVM 19 wrapper headers, ports RustWrapper.cpp and PassWrapper.cpp through the current checkpoint, and records the completed Layer 1 progress and remaining Layer 2 caveats in the plan. ptxjitcompiler.so load_ptx_with_log unified? Co-Authored-By: OpenAI Codex <codex@openai.com>
1 parent 0bc607f commit 6e81441

39 files changed

Lines changed: 2053 additions & 255 deletions

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@ book
22
/target
33
**/.vscode
44
.devcontainer
5+
.codex
6+
rustc-ice-*.txt
7+
.nix-driver-libs
8+
.claude

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/cuda_builder/src/lib.rs

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,21 @@ pub struct CudaBuilder {
196196
pub final_module_path: Option<PathBuf>,
197197
}
198198

199+
/// Default arch for new `CudaBuilder`s.
200+
///
201+
/// When the backend is being built with LLVM 19 support (detected via the `LLVM_CONFIG_19`
202+
/// env var — the same signal `rustc_codegen_nvvm`'s build script uses), default to the
203+
/// lowest Blackwell compute capability (`Compute100`). Pre-Blackwell archs use the legacy
204+
/// LLVM 7 NVVM dialect, so pairing them with an LLVM 19 backend is never the right choice.
205+
/// Callers can still override via [`CudaBuilder::arch`].
206+
fn default_arch() -> NvvmArch {
207+
if env::var_os("LLVM_CONFIG_19").is_some() {
208+
NvvmArch::Compute100
209+
} else {
210+
NvvmArch::default()
211+
}
212+
}
213+
199214
impl CudaBuilder {
200215
pub fn new(path_to_crate_root: impl AsRef<Path>) -> Self {
201216
Self {
@@ -204,7 +219,7 @@ impl CudaBuilder {
204219
ptx_file_copy_path: None,
205220
generate_line_info: true,
206221
nvvm_opts: true,
207-
arch: NvvmArch::default(),
222+
arch: default_arch(),
208223
ftz: false,
209224
fast_sqrt: false,
210225
fast_div: false,
@@ -355,6 +370,7 @@ impl CudaBuilder {
355370
/// ptx file. If [`ptx_file_copy_path`](Self::ptx_file_copy_path) is set, this returns the copied path.
356371
pub fn build(self) -> Result<PathBuf, CudaBuilderError> {
357372
println!("cargo:rerun-if-changed={}", self.path_to_crate.display());
373+
println!("cargo:rerun-if-env-changed=LLVM_CONFIG_19");
358374
let path = invoke_rustc(&self)?;
359375
if let Some(copy_path) = self.ptx_file_copy_path {
360376
std::fs::copy(path, &copy_path).map_err(CudaBuilderError::FailedToCopyPtxFile)?;
@@ -550,13 +566,21 @@ fn build_backend_and_find(filename: &str) -> Option<PathBuf> {
550566

551567
let target_dir = workspace_dir.join("target").join("cuda-builder-codegen");
552568

553-
let status = Command::new("cargo")
554-
.args(["build", "-p", "rustc_codegen_nvvm"])
569+
let mut cmd = Command::new("cargo");
570+
cmd.args(["build", "-p", "rustc_codegen_nvvm"])
555571
.arg("--target-dir")
556572
.arg(&target_dir)
557-
.current_dir(&workspace_dir)
558-
.status()
559-
.ok()?;
573+
.current_dir(&workspace_dir);
574+
575+
// Propagate the llvm19 cargo feature to the nested build when the surrounding
576+
// shell is configured for LLVM 19 (signalled by LLVM_CONFIG_19). Without this
577+
// rustc_codegen_nvvm's build.rs defaults to the LLVM 7 path and falls through
578+
// to the prebuilt LLVM 7 download, which fails on Linux.
579+
if env::var_os("LLVM_CONFIG_19").is_some() {
580+
cmd.args(["--features", "llvm19"]);
581+
}
582+
583+
let status = cmd.status().ok()?;
560584

561585
if !status.success() {
562586
return None;

crates/cust/build.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,12 @@ fn main() {
4040
println!("cargo::rustc-cfg=cuGraphGetEdges_v2");
4141
println!("cargo::rustc-cfg=cuCtxCreate_v4");
4242
}
43+
44+
// In CUDA 13.2 the `id` field in `CUmemLocation_st` was placed inside an anonymous union.
45+
// Bindgen renders this as `__bindgen_anon_1: CUmemLocation_st__bindgen_ty_1` instead of a
46+
// direct `id` field. This cfg gates the struct initialization syntax accordingly.
47+
println!("cargo::rustc-check-cfg=cfg(cuMemLocation_anon_id)");
48+
if driver_version >= 13020 {
49+
println!("cargo::rustc-cfg=cuMemLocation_anon_id");
50+
}
4351
}

crates/cust/src/memory/unified.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
647647
#[cfg(cuMemPrefetchAsync_v2)]
648648
driver_sys::CUmemLocation {
649649
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
650+
#[cfg(cuMemLocation_anon_id)]
651+
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
652+
#[cfg(not(cuMemLocation_anon_id))]
650653
id,
651654
},
652655
#[cfg(not(cuMemPrefetchAsync_v2))]
@@ -693,6 +696,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
693696
#[cfg(cuMemPrefetchAsync_v2)]
694697
driver_sys::CUmemLocation {
695698
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
699+
#[cfg(cuMemLocation_anon_id)]
700+
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
701+
#[cfg(not(cuMemLocation_anon_id))]
696702
id,
697703
},
698704
#[cfg(not(cuMemPrefetchAsync_v2))]
@@ -735,6 +741,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
735741
#[cfg(cuMemAdvise_v2)]
736742
driver_sys::CUmemLocation {
737743
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
744+
#[cfg(cuMemLocation_anon_id)]
745+
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
746+
#[cfg(not(cuMemLocation_anon_id))]
738747
id,
739748
},
740749
#[cfg(not(cuMemAdvise_v2))]
@@ -777,6 +786,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
777786
#[cfg(cuMemAdvise_v2)]
778787
driver_sys::CUmemLocation {
779788
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
789+
#[cfg(cuMemLocation_anon_id)]
790+
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
791+
#[cfg(not(cuMemLocation_anon_id))]
780792
id,
781793
},
782794
#[cfg(not(cuMemAdvise_v2))]
@@ -801,6 +813,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
801813
#[cfg(cuMemAdvise_v2)]
802814
driver_sys::CUmemLocation {
803815
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
816+
#[cfg(cuMemLocation_anon_id)]
817+
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
818+
#[cfg(not(cuMemLocation_anon_id))]
804819
id,
805820
},
806821
#[cfg(not(cuMemAdvise_v2))]

crates/cust_raw/build/cuda_sdk.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ impl CudaSdk {
215215
vec![
216216
cuda_root.join("nvvm").join("bin"),
217217
cuda_root.join("nvvm").join("lib64"),
218+
cuda_root.join("nvvm").join("lib"),
218219
]
219220
};
220221
let library_dirs = Self::normalize_dirpaths(search_dirs);

crates/cust_raw/build/main.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,12 @@ fn main() {
9999
println!("cargo::rustc-link-search=native={}", libdir.display());
100100
}
101101
println!("cargo::rustc-link-lib=dylib=nvvm");
102-
// Handle libdevice support.
103-
fs::copy(sdk.libdevice_bitcode_path(), outdir.join("libdevice.bc"))
102+
// `fs::copy` preserves source mode. When libdevice.10.bc comes from
103+
// the Nix store (0444), re-running this build can't overwrite the
104+
// previous copy in OUT_DIR. Drop it first.
105+
let dest = outdir.join("libdevice.bc");
106+
let _ = fs::remove_file(&dest);
107+
fs::copy(sdk.libdevice_bitcode_path(), &dest)
104108
.expect("Cannot copy libdevice bitcode file.");
105109
}
106110
}

crates/nvvm/src/lib.rs

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ use std::{
44
ffi::{CStr, CString},
55
fmt::Display,
66
mem::MaybeUninit,
7-
ptr::null_mut,
87
str::FromStr,
98
};
109

@@ -325,6 +324,10 @@ pub enum NvvmArch {
325324
Compute89,
326325
Compute90,
327326
Compute90a,
327+
/// First Blackwell arch and the cutoff for NVVM's modern IR dialect — everything at
328+
/// or above this capability uses the LLVM 19-flavored bitcode accepted by CUDA 12.9+
329+
/// `libnvvm`. See [`NvvmArch::uses_modern_ir_dialect`]. This is also the default arch
330+
/// `cuda_builder` picks when the backend is built with `LLVM_CONFIG_19` set.
328331
Compute100,
329332
Compute100f,
330333
Compute100a,
@@ -448,6 +451,14 @@ impl NvvmArch {
448451
self.capability_value() % 10
449452
}
450453

454+
/// Whether this target uses NVVM's modern IR dialect rather than the legacy LLVM 7 dialect.
455+
///
456+
/// CUDA 13.2 documents the modern dialect as Blackwell-and-later only, which begins at
457+
/// `compute_100`.
458+
pub fn uses_modern_ir_dialect(&self) -> bool {
459+
self.capability_value() >= 100
460+
}
461+
451462
/// Get the target feature string (e.g., "compute_50" for `Compute50`, "compute_90a" for
452463
/// `Compute90a`).
453464
pub fn target_feature(&self) -> &'static str {
@@ -739,7 +750,24 @@ impl NvvmProgram {
739750
/// Verify the program without actually compiling it. In the case of invalid IR, you can find
740751
/// more detailed error info by calling [`compiler_log`](Self::compiler_log).
741752
pub fn verify(&self) -> Result<(), NvvmError> {
742-
unsafe { nvvm_sys::nvvmVerifyProgram(self.raw, 0, null_mut()).to_result() }
753+
self.verify_with_options(&[])
754+
}
755+
756+
/// Like [`verify`](Self::verify), but runs the verifier with the same `NvvmOption`s that will
757+
/// be passed to [`compile`](Self::compile). Passing the user-selected `-arch=compute_XXX` in
758+
/// particular matters for CUDA 12.9+ / LLVM 19 bitcode: without it the verifier can fall back
759+
/// to the legacy LLVM 7 parser and reject modern-dialect bitcode that would otherwise compile
760+
/// fine.
761+
pub fn verify_with_options(&self, options: &[NvvmOption]) -> Result<(), NvvmError> {
762+
unsafe {
763+
let options = options.iter().map(|x| format!("{x}\0")).collect::<Vec<_>>();
764+
let mut options_ptr = options
765+
.iter()
766+
.map(|x| x.as_ptr().cast())
767+
.collect::<Vec<_>>();
768+
nvvm_sys::nvvmVerifyProgram(self.raw, options.len() as i32, options_ptr.as_mut_ptr())
769+
.to_result()
770+
}
743771
}
744772
}
745773

crates/rustc_codegen_nvvm/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ readme = "../../README.md"
1414
[lib]
1515
crate-type = ["dylib"]
1616

17+
[features]
18+
default = []
19+
llvm19 = []
20+
1721
[dependencies]
1822
nvvm = { version = "0.1", path = "../nvvm" }
1923
rustc-demangle = "0.1.24"

0 commit comments

Comments
 (0)