Skip to content

Commit 0367d60

Browse files
committed
add RangeCompatBytesKey, derived from BytesKey
1 parent fca10ae commit 0367d60

File tree

3 files changed

+483
-67
lines changed

3 files changed

+483
-67
lines changed

crates/table/src/table_index/bytes_key.rs

Lines changed: 207 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use spacetimedb_memory_usage::MemoryUsage;
55
use spacetimedb_primitives::ColList;
66
use spacetimedb_sats::bsatn::{DecodeError, Deserializer, Serializer};
77
use spacetimedb_sats::de::{DeserializeSeed, Error as _};
8-
use spacetimedb_sats::{u256, AlgebraicType, AlgebraicValue, ProductTypeElement, Serialize as _, WithTypespace};
8+
use spacetimedb_sats::{i256, u256, AlgebraicType, AlgebraicValue, ProductTypeElement, Serialize as _, WithTypespace};
99

1010
/// A key for an all-primitive multi-column index
1111
/// serialized to a byte array.
@@ -43,8 +43,9 @@ pub(super) const fn size_sub_row_pointer(n: usize) -> usize {
4343
///
4444
/// If keys at `ty` are incompatible with fixed byte keys,
4545
/// e.g., because they are of unbounded length,
46+
/// or because `is_ranged_idx` and `ty` contains a float,
4647
/// then `None` is returned.
47-
pub(super) fn required_bytes_key_size(ty: &AlgebraicType) -> Option<usize> {
48+
pub(super) fn required_bytes_key_size(ty: &AlgebraicType, is_ranged_idx: bool) -> Option<usize> {
4849
use AlgebraicType::*;
4950

5051
match ty {
@@ -55,10 +56,18 @@ pub(super) fn required_bytes_key_size(ty: &AlgebraicType) -> Option<usize> {
5556

5657
// For sum, we report the greatest possible fixed size.
5758
// A key may be of variable size, a long as it fits within an upper bound.
59+
//
60+
// It's valid to use `RangeCompatBytesKey`-ified sums in range index,
61+
// i.e., when `is_range_idx`,
62+
// as `Ord for AlgebraicValue` delegates to `Ord for SumValue`
63+
// which compares the `tag` first and the payload (`value`) second,
64+
// The `RangeCompatBytesKey` encoding of sums places the `tag` first and the payload second.
65+
// When comparing two `[u8]` slices with encoded sums,
66+
// this produces an ordering that also compares the `tag` first and the payload second.
5867
Sum(ty) => {
5968
let mut max_size = 0;
6069
for var in &ty.variants {
61-
let variant_size = required_bytes_key_size(&var.algebraic_type)?;
70+
let variant_size = required_bytes_key_size(&var.algebraic_type, is_ranged_idx)?;
6271
max_size = max_size.max(variant_size);
6372
}
6473
// The sum tag is represented as a u8 in BSATN,
@@ -70,11 +79,15 @@ pub(super) fn required_bytes_key_size(ty: &AlgebraicType) -> Option<usize> {
7079
Product(ty) => {
7180
let mut total_size = 0;
7281
for elem in &ty.elements {
73-
total_size += required_bytes_key_size(&elem.algebraic_type)?;
82+
total_size += required_bytes_key_size(&elem.algebraic_type, is_ranged_idx)?;
7483
}
7584
Some(total_size)
7685
}
7786

87+
// Floats are stored in IEEE 754 format,
88+
// so their byte representation is not order-preserving.
89+
F32 | F64 if is_ranged_idx => None,
90+
7891
// Primitives:
7992
Bool | U8 | I8 => Some(mem::size_of::<u8>()),
8093
U16 | I16 => Some(mem::size_of::<u16>()),
@@ -130,11 +143,12 @@ impl<const N: usize> BytesKey<N> {
130143
// Check that the `prefix` and the `endpoint` together fit into the key.
131144
let prefix_len = prefix.len();
132145
let endpoint_len = endpoint.len();
133-
Self::ensure_key_fits(prefix_len + endpoint_len)?;
146+
let total_len = prefix_len + endpoint_len;
147+
Self::ensure_key_fits(total_len)?;
134148
// Copy the `prefix` and the `endpoint` over.
135149
let mut arr = [0; N];
136150
arr[..prefix_len].copy_from_slice(prefix);
137-
arr[prefix_len..prefix_len + endpoint_len].copy_from_slice(endpoint);
151+
arr[prefix_len..total_len].copy_from_slice(endpoint);
138152
Ok(Self(arr))
139153
}
140154

@@ -183,16 +197,195 @@ impl<const N: usize> BytesKey<N> {
183197
}
184198
}
185199

200+
/// A key for an all-primitive multi-column index
201+
/// serialized to a byte array.
202+
///
203+
/// These keys are derived from [`BytesKey`]
204+
/// but are post-processed to work with ranges,
205+
/// unlike the former type,
206+
/// which only work with point indices (e.g., hash indices).
207+
///
208+
/// The post-processing converts how some types are stored in the encoding:
209+
/// - unsigned integer types `uN`, where `N > 8` from little-endian to big-endian.
210+
/// - signed integers are shifted such that `iN::MIN` is stored as `0`
211+
/// and `iN:MAX` is stored as `uN::MAX`.
212+
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
213+
pub(super) struct RangeCompatBytesKey<const N: usize>([u8; N]);
214+
215+
impl<const N: usize> MemoryUsage for RangeCompatBytesKey<N> {}
216+
217+
/// Splits `slice` into the first `N` bytes converting the former via `map_bytes`
218+
/// and returning the rest.
219+
fn split_map_write_back<const N: usize>(slice: &mut [u8], map_bytes: impl FnOnce([u8; N]) -> [u8; N]) -> &mut [u8] {
220+
let (bytes, rest) = slice.split_first_chunk_mut().unwrap();
221+
*bytes = map_bytes(*bytes);
222+
rest
223+
}
224+
225+
impl<const N: usize> RangeCompatBytesKey<N> {
226+
/// Decodes `self` as an [`AlgebraicValue`] at `key_type`.
227+
///
228+
/// An incorrect `key_type`,
229+
/// i.e., one other than what was used when the index was created,
230+
/// may lead to a panic, but this is not guaranteed.
231+
/// The method could also silently succeed
232+
/// if the passed `key_type` incidentally happens to be compatible the stored bytes in `self`.
233+
pub(super) fn decode_algebraic_value(&self, key_type: &AlgebraicType) -> AlgebraicValue {
234+
Self::to_bytes_key(*self, key_type).decode_algebraic_value(key_type)
235+
}
236+
237+
/// Decodes `prefix` and `endpoint` in BSATN to a [`RangeCompatBytesKey<N>`]
238+
/// by copying over both and massaging if they fit into the key.
239+
pub(super) fn from_bsatn_prefix_and_endpoint(
240+
prefix: &[u8],
241+
prefix_types: &[ProductTypeElement],
242+
endpoint: &[u8],
243+
range_type: &AlgebraicType,
244+
) -> DecodeResult<Self> {
245+
let BytesKey(mut array) = BytesKey::from_bsatn_prefix_and_endpoint(prefix, prefix_types, endpoint, range_type)?;
246+
247+
// Masage the bytes in `key`.
248+
let mut slice = array.as_mut_slice();
249+
for ty in prefix_types {
250+
slice = Self::process_from_bytes_key(slice, &ty.algebraic_type);
251+
}
252+
Self::process_from_bytes_key(slice, range_type);
253+
254+
Ok(Self(array))
255+
}
256+
257+
/// Decodes `bytes` in BSATN to a [`RangeCompatBytesKey<N>`]
258+
/// by copying over the bytes if they fit into the key.
259+
pub(super) fn from_bsatn(ty: &AlgebraicType, bytes: &[u8]) -> DecodeResult<Self> {
260+
let key = BytesKey::from_bsatn(ty, bytes)?;
261+
Ok(Self::from_bytes_key(key, ty))
262+
}
263+
264+
/// Serializes the columns `cols` in `row_ref` to a [`BytesKey<N>`].
265+
///
266+
/// It's assumed that `row_ref` projected to `cols`
267+
/// will fit into `N` bytes when serialized into BSATN.
268+
/// The method panics otherwise.
269+
///
270+
/// SAFETY: Any `col` in `cols` is in-bounds of `row_ref`'s layout.
271+
pub(super) unsafe fn from_row_ref(cols: &ColList, row_ref: RowRef<'_>, ty: &AlgebraicType) -> Self {
272+
// SAFETY: same as caller requirements.
273+
let key = unsafe { BytesKey::from_row_ref(cols, row_ref) };
274+
Self::from_bytes_key(key, ty)
275+
}
276+
277+
/// Serializes `av` to a [`BytesKey<N>`].
278+
///
279+
/// It's assumed that `av`
280+
/// will fit into `N` bytes when serialized into BSATN.
281+
/// The method panics otherwise.
282+
pub(super) fn from_algebraic_value(av: &AlgebraicValue, ty: &AlgebraicType) -> Self {
283+
let key = BytesKey::from_algebraic_value(av);
284+
Self::from_bytes_key(key, ty)
285+
}
286+
287+
fn from_bytes_key(key: BytesKey<N>, ty: &AlgebraicType) -> Self {
288+
let BytesKey(mut array) = key;
289+
Self::process_from_bytes_key(array.as_mut_slice(), ty);
290+
Self(array)
291+
}
292+
293+
fn process_from_bytes_key<'a>(mut slice: &'a mut [u8], ty: &AlgebraicType) -> &'a mut [u8] {
294+
use AlgebraicType::*;
295+
match ty {
296+
// For sums, read the tag and process the active variant.
297+
Sum(ty) => {
298+
let (&mut tag, rest) = slice.split_first_mut().unwrap();
299+
let ty = &ty.variants[tag as usize].algebraic_type;
300+
Self::process_from_bytes_key(rest, ty)
301+
}
302+
// For products, just process each field in sequence.
303+
Product(ty) => {
304+
for ty in &ty.elements {
305+
slice = Self::process_from_bytes_key(slice, &ty.algebraic_type);
306+
}
307+
slice
308+
}
309+
// No need to do anything as these are only a single byte long.
310+
Bool | U8 => &mut slice[1..],
311+
// For unsigned integers, read them as LE and write back as BE.
312+
U16 => split_map_write_back(slice, |b| u16::from_le_bytes(b).to_be_bytes()),
313+
U32 => split_map_write_back(slice, |b| u32::from_le_bytes(b).to_be_bytes()),
314+
U64 => split_map_write_back(slice, |b| u64::from_le_bytes(b).to_be_bytes()),
315+
U128 => split_map_write_back(slice, |b| u128::from_le_bytes(b).to_be_bytes()),
316+
U256 => split_map_write_back(slice, |b| u256::from_le_bytes(b).to_be_bytes()),
317+
// For signed integers, read them as LE, make them unsigned, and write back as BE.
318+
I8 => split_map_write_back(slice, |b| i8::from_le_bytes(b).wrapping_sub(i8::MIN).to_be_bytes()),
319+
I16 => split_map_write_back(slice, |b| i16::from_le_bytes(b).wrapping_sub(i16::MIN).to_be_bytes()),
320+
I32 => split_map_write_back(slice, |b| i32::from_le_bytes(b).wrapping_sub(i32::MIN).to_be_bytes()),
321+
I64 => split_map_write_back(slice, |b| i64::from_le_bytes(b).wrapping_sub(i64::MIN).to_be_bytes()),
322+
I128 => split_map_write_back(slice, |b| i128::from_le_bytes(b).wrapping_sub(i128::MIN).to_be_bytes()),
323+
I256 => split_map_write_back(slice, |b| i256::from_le_bytes(b).wrapping_sub(i256::MIN).to_be_bytes()),
324+
// Refs don't exist here and
325+
// arrays and strings are of unbounded length.
326+
// For floats, we haven't considred them yet.
327+
Ref(_) | Array(_) | String | F32 | F64 => unreachable!(),
328+
}
329+
}
330+
331+
fn to_bytes_key(key: Self, ty: &AlgebraicType) -> BytesKey<N> {
332+
fn process<'a>(mut slice: &'a mut [u8], ty: &AlgebraicType) -> &'a mut [u8] {
333+
use AlgebraicType::*;
334+
match ty {
335+
// For sums, read the tag and process the active variant.
336+
Sum(ty) => {
337+
let (&mut tag, rest) = slice.split_first_mut().unwrap();
338+
let ty = &ty.variants[tag as usize].algebraic_type;
339+
process(rest, ty)
340+
}
341+
// For products, just process each field in sequence.
342+
Product(ty) => {
343+
for ty in &ty.elements {
344+
slice = process(slice, &ty.algebraic_type);
345+
}
346+
slice
347+
}
348+
// No need to do anything as these are only a single byte long.
349+
Bool | U8 => &mut slice[1..],
350+
// For unsigned integers, read them as BE and write back as LE.
351+
U16 => split_map_write_back(slice, |b| u16::from_be_bytes(b).to_le_bytes()),
352+
U32 => split_map_write_back(slice, |b| u32::from_be_bytes(b).to_le_bytes()),
353+
U64 => split_map_write_back(slice, |b| u64::from_be_bytes(b).to_le_bytes()),
354+
U128 => split_map_write_back(slice, |b| u128::from_be_bytes(b).to_le_bytes()),
355+
U256 => split_map_write_back(slice, |b| u256::from_be_bytes(b).to_le_bytes()),
356+
// For signed integers, read them as LE, make them unsigned, and write back as BE.
357+
I8 => split_map_write_back(slice, |b| i8::from_be_bytes(b).wrapping_add(i8::MIN).to_le_bytes()),
358+
I16 => split_map_write_back(slice, |b| i16::from_be_bytes(b).wrapping_add(i16::MIN).to_le_bytes()),
359+
I32 => split_map_write_back(slice, |b| i32::from_be_bytes(b).wrapping_add(i32::MIN).to_le_bytes()),
360+
I64 => split_map_write_back(slice, |b| i64::from_be_bytes(b).wrapping_add(i64::MIN).to_le_bytes()),
361+
I128 => split_map_write_back(slice, |b| i128::from_be_bytes(b).wrapping_add(i128::MIN).to_le_bytes()),
362+
I256 => split_map_write_back(slice, |b| i256::from_be_bytes(b).wrapping_add(i256::MIN).to_le_bytes()),
363+
// Refs don't exist here and
364+
// arrays and strings are of unbounded length.
365+
// For floats, we haven't considred them yet.
366+
Ref(_) | Array(_) | String | F32 | F64 => unreachable!(),
367+
}
368+
}
369+
370+
let Self(mut array) = key;
371+
process(array.as_mut_slice(), ty);
372+
BytesKey(array)
373+
}
374+
}
375+
186376
#[cfg(test)]
187377
mod test {
188378
use super::*;
379+
use proptest::array::uniform;
189380
use proptest::prelude::*;
190381
use spacetimedb_sats::bsatn::to_len;
191-
use spacetimedb_sats::proptest::generate_typed_row;
382+
use spacetimedb_sats::proptest::{gen_with, generate_product_value, generate_row_type, generate_typed_row, SIZE};
192383

193384
const N: usize = 4096;
194385

195386
proptest! {
387+
#![proptest_config(ProptestConfig { max_global_rejects: 65536, ..<_>::default() })]
388+
196389
#[test]
197390
fn test_bytes_key_round_trip((ty, av) in generate_typed_row()) {
198391
let len = to_len(&av).unwrap();
@@ -205,16 +398,11 @@ mod test {
205398
assert_eq!(av, decoded_av);
206399
}
207400

208-
/*
209-
// This test turned out not to hold for integers larger than u8,
401+
// This test does not hold for `BytesKey`
210402
// as BSATN stores them little-endian,
211403
// but `Ord for AlgebraicValue` compares them as big-endian.
212-
// It's included here for posterity and in case we'd like to
213-
// massage the BSATN before storing it in the `BytesKey`
214-
// to make it order-preserving.
215-
216-
use proptest::array::uniform;
217-
use spacetimedb_sats::proptest::{gen_with, generate_product_value, generate_row_type, SIZE};
404+
// It does however hold for `RangeCompatBytesKey` which
405+
// massages the BSATN to make it order-preserving.
218406

219407
#[test]
220408
fn order_in_bsatn_is_preserved((ty, [r1, r2]) in gen_with(generate_row_type(0..=SIZE), |ty| uniform(generate_product_value(ty)))) {
@@ -223,17 +411,17 @@ mod test {
223411
let r2: AlgebraicValue = r2.into();
224412

225413
let Some(required) = required_bytes_key_size(&ty, true) else {
226-
//dbg!(&ty);
227414
return Err(TestCaseError::reject("type is incompatible with fixed byte keys in range indices"));
228415
};
229416
prop_assume!(required <= N);
230417

231418
let k1 = BytesKey::<N>::from_algebraic_value(&r1);
419+
let kr1 = RangeCompatBytesKey::from_bytes_key(k1, &ty);
232420
let k2 = BytesKey::<N>::from_algebraic_value(&r2);
233-
let ord_k = k1.cmp(&k2);
421+
let kr2 = RangeCompatBytesKey::from_bytes_key(k2, &ty);
422+
let ord_kr = kr1.cmp(&kr2);
234423
let ord_r = r1.cmp(&r2);
235-
prop_assert_eq!(ord_k, ord_r);
424+
prop_assert_eq!(ord_kr, ord_r);
236425
}
237-
*/
238426
}
239427
}

crates/table/src/table_index/key_size.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use crate::table_index::BytesKey;
2-
1+
use super::bytes_key::{BytesKey, RangeCompatBytesKey};
32
use super::Index;
43
use core::mem;
54
use spacetimedb_memory_usage::MemoryUsage;
@@ -222,3 +221,7 @@ impl KeySize for ArrayValue {
222221
impl<const N: usize> KeySize for BytesKey<N> {
223222
type MemoStorage = ();
224223
}
224+
225+
impl<const N: usize> KeySize for RangeCompatBytesKey<N> {
226+
type MemoStorage = ();
227+
}

0 commit comments

Comments
 (0)