|
| 1 | +use arrow::record_batch::RecordBatch; |
| 2 | +use futures_async_stream::try_stream; |
| 3 | +use crate::execution_v1::ExecutorError; |
| 4 | +use crate::execution_v1::volcano_executor::BoxedExecutor; |
| 5 | + |
| 6 | +pub struct Limit {} |
| 7 | + |
| 8 | +impl Limit { |
| 9 | + #[try_stream(boxed, ok = RecordBatch, error = ExecutorError)] |
| 10 | + pub async fn execute(offset: Option<usize>, limit: Option<usize>, input: BoxedExecutor) { |
| 11 | + let offset_val = offset.unwrap_or(0); |
| 12 | + if limit.is_some() && limit.unwrap() == 0 { |
| 13 | + return Ok(()); |
| 14 | + } |
| 15 | + |
| 16 | + let mut returned_count = 0; |
| 17 | + |
| 18 | + #[for_await] |
| 19 | + for batch in input { |
| 20 | + let batch = batch?; |
| 21 | + |
| 22 | + let cardinality = batch.num_rows() as usize; |
| 23 | + let limit_val = limit.unwrap_or(cardinality); |
| 24 | + |
| 25 | + let start = returned_count.max(offset_val) - returned_count; |
| 26 | + let end = { |
| 27 | + // from total returned rows level, the total_end is end index of whole returned |
| 28 | + // rows level. |
| 29 | + let total_end = offset_val + limit_val; |
| 30 | + let current_batch_end = returned_count + cardinality; |
| 31 | + // we choose the min of total_end and current_batch_end as the end index of to |
| 32 | + // match limit semantics. |
| 33 | + let real_end = total_end.min(current_batch_end); |
| 34 | + // to calculate the end index of current batch |
| 35 | + real_end - returned_count |
| 36 | + }; |
| 37 | + returned_count += cardinality; |
| 38 | + |
| 39 | + // example: offset=1000, limit=2, cardinality=100 |
| 40 | + // when first loop: |
| 41 | + // start = 0.max(1000)-0 = 1000 |
| 42 | + // end = (1000+2).min(0+100)-0 = 100 |
| 43 | + // so, start(1000) > end(100), we skip this loop batch. |
| 44 | + if start >= end { |
| 45 | + continue; |
| 46 | + } |
| 47 | + |
| 48 | + if (start..end) == (0..cardinality) { |
| 49 | + yield batch; |
| 50 | + } else { |
| 51 | + let length = end - start; |
| 52 | + yield batch.slice(start as usize, length as usize); |
| 53 | + } |
| 54 | + |
| 55 | + // dut to returned_count is always += cardinality, and returned_batch maybe slsliced, |
| 56 | + // so it will larger than real total_end. |
| 57 | + // example: offset=1, limit=4, cardinality=6, data=[(0..6)] |
| 58 | + // returned_count=6 > 1+4, meanwhile returned_batch size is 4 ([0..5]) |
| 59 | + if returned_count >= offset_val + limit_val { |
| 60 | + break; |
| 61 | + } |
| 62 | + } |
| 63 | + } |
| 64 | +} |
0 commit comments