Skip to content

Commit 52b897c

Browse files
authored
feat(type): use arrow type to support vectorization (#22)
1 parent 906ad8e commit 52b897c

24 files changed

Lines changed: 1406 additions & 358 deletions

File tree

.github/workflows/ci.yml

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,47 +8,40 @@ env:
88
CARGO_TERM_COLOR: always
99

1010
jobs:
11-
check:
12-
runs-on: ubuntu-20.04
11+
fmt:
12+
runs-on: ubuntu-latest
1313
steps:
14-
- uses: actions/checkout@v2
14+
- uses: actions/checkout@v3
1515
- uses: actions-rs/toolchain@v1
1616
with:
1717
profile: minimal
18-
toolchain: nightly-2023-04-07
1918
components: rustfmt, clippy
20-
- name: Check code format
21-
uses: actions-rs/cargo@v1
19+
- uses: actions/cache@v3
2220
with:
23-
command: fmt
24-
args: --all -- --check
21+
path: |
22+
~/.cargo/registry/index/
23+
~/.cargo/registry/cache/
24+
~/.cargo/git/db/
25+
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
26+
- name: Check code format
27+
run: cargo fmt --all -- --check
2528

2629

27-
build:
28-
runs-on: ubuntu-20.04
29-
steps:
30-
- uses: actions/checkout@v2
31-
- uses: actions-rs/toolchain@v1
32-
with:
33-
profile: minimal
34-
toolchain: nightly-2023-04-07
35-
- uses: actions/checkout@v2
36-
- name: Build
37-
uses: actions-rs/cargo@v1
38-
with:
39-
command: build
4030

4131
test:
42-
runs-on: ubuntu-20.04
32+
runs-on: ubuntu-latest
4333
steps:
44-
- uses: actions/checkout@v2
34+
- uses: actions/checkout@v3
4535
- uses: actions-rs/toolchain@v1
4636
with:
4737
profile: minimal
48-
toolchain: nightly-2023-04-07
49-
- uses: actions/checkout@v2
50-
- name: Test
51-
uses: actions-rs/cargo@v1
38+
- uses: actions/cache@v3
5239
with:
53-
command: test
54-
args: --release --no-fail-fast
40+
path: |
41+
~/.cargo/registry/index/
42+
~/.cargo/registry/cache/
43+
~/.cargo/git/db/
44+
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
45+
- uses: taiki-e/install-action@nextest
46+
- name: Test
47+
run: cargo nextest run --no-fail-fast --all-features

.github/workflows/cr.yml

Lines changed: 0 additions & 28 deletions
This file was deleted.

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,17 @@ serde = { version = "1", features = ["derive", "rc"] }
3131
serde_json = "1"
3232
async-trait = "0.1.68"
3333
integer-encoding = "3.0.4"
34+
arrow = { version = "28", features = ["prettyprint", "simd"] }
35+
strum_macros = "0.24"
36+
ordered-float = "3.0"
3437
petgraph = "0.6.3"
3538
futures-async-stream = "0.2.6"
3639
async-channel = "1.8.0"
3740
async-backtrace = "0.2.6"
3841
futures = "0.3.25"
3942
futures-lite = "1.12.0"
4043

44+
4145
[dev-dependencies]
4246
ctor = "0.2.0"
4347
env_logger = "0.10"

rust-toolchain

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nightly-2023-04-07

src/binder/create.rs

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1+
use std::collections::HashSet;
2+
3+
use anyhow::Result;
4+
use sqlparser::ast::{ColumnDef, ObjectName};
5+
16
use super::Binder;
27
use crate::binder::{lower_case_name, split_name};
3-
use crate::catalog::{Column, ColumnDesc};
8+
use crate::catalog::ColumnCatalog;
49
use crate::planner::logical_create_table_plan::LogicalCreateTablePlan;
5-
use crate::planner::LogicalPlan;
6-
use crate::types::{ColumnId, TableId};
7-
use anyhow::Result;
8-
use sqlparser::ast::{ColumnDef, ObjectName};
9-
use std::collections::HashSet;
1010

1111
impl Binder {
1212
pub(crate) fn bind_create_table(
@@ -29,10 +29,10 @@ impl Binder {
2929
}
3030
}
3131

32-
let mut columns: Vec<Column> = columns
32+
let columns: Vec<ColumnCatalog> = columns
3333
.iter()
3434
.enumerate()
35-
.map(|(_, col)| Column::from(col))
35+
.map(|(_, col)| ColumnCatalog::from(col.clone()))
3636
.collect();
3737

3838
let plan = LogicalCreateTablePlan {
@@ -48,36 +48,31 @@ impl Binder {
4848

4949
#[cfg(test)]
5050
mod tests {
51+
use sqlparser::ast::CharacterLength;
52+
5153
use super::*;
5254
use crate::binder::BinderContext;
53-
use crate::catalog::Root;
54-
use crate::types::{DataTypeExt, DataTypeKind};
55-
use sqlparser::ast::CharacterLength;
56-
use std::sync::Arc;
55+
use crate::catalog::{ColumnDesc, RootCatalog};
56+
use crate::planner::LogicalPlan;
57+
use crate::types::LogicalType;
5758

5859
#[test]
5960
fn test_create_bind() {
6061
let sql = "create table t1 (id int , name varchar(10))";
61-
let mut binder = Binder::new(BinderContext::new(Root::new()));
62+
let binder = Binder::new(BinderContext::new(RootCatalog::new()));
6263
let stmt = crate::parser::parse_sql(sql).unwrap();
6364
let plan1 = binder.bind(&stmt[0]).unwrap();
6465

65-
let character_length = CharacterLength {
66-
length: 10,
67-
unit: None,
68-
};
6966
let plan2 = LogicalPlan::CreateTable(LogicalCreateTablePlan {
7067
table_name: "t1".to_string(),
7168
columns: vec![
7269
(
7370
"id".to_string(),
74-
DataTypeKind::Int(None).nullable().to_column(),
71+
ColumnDesc::new(LogicalType::Integer, false),
7572
),
7673
(
7774
"name".to_string(),
78-
DataTypeKind::Varchar(Option::from(character_length))
79-
.nullable()
80-
.to_column(),
75+
ColumnDesc::new(LogicalType::Varchar, false),
8176
),
8277
],
8378
});

src/binder/expr.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1+
use anyhow::Result;
12
use sqlparser::ast::Expr;
23

34
use super::Binder;
45
use crate::expression::ScalarExpression;
5-
use anyhow::Result;
66

77
impl Binder {
88
pub(crate) fn bind_expr(&mut self, expr: &Expr) -> Result<ScalarExpression> {

src/binder/mod.rs

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,16 @@ mod select;
55

66
use std::collections::HashMap;
77

8-
use crate::{catalog::CatalogRef, expression::ScalarExpression, planner::LogicalPlan};
9-
10-
use crate::catalog::{Root, DEFAULT_SCHEMA_NAME};
11-
use crate::types::TableId;
128
use anyhow::Result;
139
use sqlparser::ast::{Ident, ObjectName, Statement};
10+
11+
use crate::catalog::{RootCatalog, DEFAULT_SCHEMA_NAME};
12+
use crate::expression::ScalarExpression;
13+
use crate::planner::LogicalPlan;
14+
use crate::types::TableId;
1415
#[derive(Clone)]
1516
pub struct BinderContext {
16-
catalog: Root,
17+
catalog: RootCatalog,
1718
bind_table: HashMap<String, TableId>,
1819
aliases: HashMap<String, ScalarExpression>,
1920
group_by_exprs: Vec<ScalarExpression>,
@@ -22,7 +23,7 @@ pub struct BinderContext {
2223
}
2324

2425
impl BinderContext {
25-
pub fn new(catalog: Root) -> Self {
26+
pub fn new(catalog: RootCatalog) -> Self {
2627
BinderContext {
2728
catalog,
2829
bind_table: Default::default(),
@@ -91,3 +92,21 @@ fn split_name(name: &ObjectName) -> Result<(&str, &str)> {
9192
_ => return Err(anyhow::anyhow!("Invalid table name: {:?}", name)),
9293
})
9394
}
95+
96+
#[derive(thiserror::Error, Debug)]
97+
pub enum BindError {
98+
#[error("unsupported statement {0}")]
99+
UnsupportedStmt(String),
100+
#[error("invalid table {0}")]
101+
InvalidTable(String),
102+
#[error("invalid table name: {0:?}")]
103+
InvalidTableName(Vec<Ident>),
104+
#[error("invalid column {0}")]
105+
InvalidColumn(String),
106+
#[error("ambiguous column {0}")]
107+
AmbiguousColumn(String),
108+
#[error("binary operator types mismatch: {0} != {1}")]
109+
BinaryOpTypeMismatch(String, String),
110+
#[error("subquery in FROM must have an alias")]
111+
SubqueryMustHaveAlias,
112+
}

src/binder/select.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,7 @@ impl Binder {
166166
///
167167
/// - Qualified name, e.g. `SELECT t.a FROM t`
168168
/// - Qualified name with wildcard, e.g. `SELECT t.* FROM t,t1`
169-
/// - Scalar expression or aggregate expression, e.g. `SELECT COUNT(*) + 1
170-
/// AS count FROM t`
169+
/// - Scalar expression or aggregate expression, e.g. `SELECT COUNT(*) + 1 AS count FROM t`
171170
///
172171
fn normalize_select_item(&mut self, items: &[SelectItem]) -> Result<Vec<ScalarExpression>> {
173172
let mut select_items = vec![];
@@ -293,8 +292,8 @@ impl Binder {
293292
let expr = self.bind_expr(expr)?;
294293
match expr {
295294
ScalarExpression::Constant(dv) => match dv {
296-
DataValue::Int32(v) if v > 0 => limit = v as usize,
297-
DataValue::Int64(v) if v > 0 => limit = v as usize,
295+
DataValue::Int32(Some(v)) if v > 0 => limit = v as usize,
296+
DataValue::Int64(Some(v)) if v > 0 => limit = v as usize,
298297
_ => return Err(anyhow::Error::msg("invalid limit expression.".to_owned())),
299298
},
300299
_ => return Err(anyhow::Error::msg("invalid limit expression.".to_owned())),
@@ -305,8 +304,8 @@ impl Binder {
305304
let expr = self.bind_expr(&expr.value)?;
306305
match expr {
307306
ScalarExpression::Constant(dv) => match dv {
308-
DataValue::Int32(v) if v > 0 => offset = v as usize,
309-
DataValue::Int64(v) if v > 0 => offset = v as usize,
307+
DataValue::Int32(Some(v)) if v > 0 => offset = v as usize,
308+
DataValue::Int64(Some(v)) if v > 0 => offset = v as usize,
310309
_ => return Err(anyhow::Error::msg("invalid limit expression.".to_owned())),
311310
},
312311
_ => return Err(anyhow::Error::msg("invalid offset expression.".to_owned())),

0 commit comments

Comments
 (0)