Skip to content

Commit f04d995

Browse files
author
Leo Pourcelot
committed
Uses &str instead of &[u8] in parsing
This removes a lot of ambiguity and allows us to ensure that `ClojureRS` will still work with non-ascii characters. Additionaly, a bug lurking in `consume_clojure_whitespaces` was fixed. This bug triggered an error when the function had no whitespaces to consume, while it should not.
1 parent 6f56dd2 commit f04d995

4 files changed

Lines changed: 110 additions & 105 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ pest = "*"
1212
pest_derive = "*"
1313
nom = "*"
1414
text_io = "*"
15-
rand = "*"
15+
rand = "*"

src/main.rs

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ use nom::Needed::Size;
4242
fn main()
4343
{
4444
println!("Clojure RS 0.0.1");
45-
45+
4646
// Register our macros / functions ahead of time
4747
let add_fn = rust_core::AddFn{};
4848
let str_fn = rust_core::StrFn{};
@@ -59,11 +59,11 @@ fn main()
5959
let def_macro = Value::DefMacro{};
6060
let fn_macro = Value::FnMacro{};
6161
let defmacro_macro = Value::DefmacroMacro{};
62-
62+
6363
let environment = Rc::new(Environment::new_main_environment());
64-
64+
6565
let eval_fn = rust_core::EvalFn::new(Rc::clone(&environment));
66-
66+
6767
environment.insert(Symbol::intern("+"),add_fn.to_rc_value());
6868
environment.insert(Symbol::intern("let"),let_macro.to_rc_value());
6969
environment.insert(Symbol::intern("str"),str_fn.to_rc_value());
@@ -87,33 +87,32 @@ fn main()
8787
//
8888
let stdin = io::stdin();
8989
print!("user=> ");
90-
let mut remaining_input_buffer = String::from("");
90+
let mut input_buffer = String::new();
9191
for line in stdin.lock().lines() {
92-
let line = line.unwrap();
93-
remaining_input_buffer.push_str(&line);
94-
let mut remaining_input_bytes = remaining_input_buffer.as_bytes();
95-
loop {
96-
let next_read_parse = reader::try_read(remaining_input_bytes);
97-
match next_read_parse {
98-
Ok((_remaining_input_bytes,value)) => {
99-
print!("{} ",value.eval(Rc::clone(&environment)).to_string_explicit());
100-
remaining_input_bytes = _remaining_input_bytes;
101-
},
102-
Err(Incomplete(_)) => {
103-
remaining_input_buffer = String::from_utf8(remaining_input_bytes.to_vec()).unwrap();
104-
break;
105-
},
106-
err => {
107-
print!("{}",Value::Condition(format!("Reader Error: {:?}",err)));
108-
remaining_input_buffer = String::from("");
109-
break;
110-
}
111-
}
112-
}
113-
println!();
114-
print!("user=> ");
92+
let line = line.unwrap();
93+
input_buffer.push_str(&line);
94+
let mut remaining_input = input_buffer.as_str();
95+
loop {
96+
let next_read_parse = reader::try_read(remaining_input);
97+
match next_read_parse {
98+
Ok((_remaining_input,value)) => {
99+
print!("{} ",value.eval(Rc::clone(&environment)).to_string_explicit());
100+
remaining_input = _remaining_input;
101+
},
102+
Err(Incomplete(_)) => {
103+
break;
104+
},
105+
err => {
106+
print!("{}",Value::Condition(format!("Reader Error: {:?}",err)));
107+
input_buffer.clear();
108+
break;
109+
}
110+
}
111+
}
112+
println!();
113+
print!("user=> ");
115114
}
116-
115+
117116
}
118117

119118

src/reader.rs

Lines changed: 55 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,16 @@
77
//! or even reader macros, although the latter will likely be reserved for our interpreter here (but perhaps
88
//! not; since this is about being a 'free-er' Clojure, especially since it can't compete with it in raw
99
//! power, neither speed or ecosystem, it might be worth it to leave in reader macros.
10-
extern crate nom;
1110
1211
use nom::{
1312
IResult,
1413
branch::alt,
1514
error::convert_error,
16-
character::{is_alphabetic,is_alphanumeric},
1715
character::complete::multispace0,
18-
character::is_digit,
19-
bytes::complete::{take_while1,take_until,tag},
16+
bytes::complete::{take_while1,tag},
17+
take_until,
18+
terminated,
19+
map,
2020
combinator::map_res,
2121
sequence::{preceded,terminated}};
2222

@@ -26,27 +26,30 @@ use crate::persistent_vector::{ToPersistentVector};
2626
use crate::persistent_list_map::{PersistentListMap,ToPersistentListMap};
2727
use crate::maps::MapEntry;
2828
use crate::symbol::Symbol;
29-
use std::rc::Rc;
29+
use std::{
30+
iter::FromIterator,
31+
rc::Rc,
32+
};
3033

3134
use std::fs::File;
3235

3336
/// Parses valid Clojure identifiers
3437
/// Example Successes: ab, cat, -12+3, |blah|, <well>
3538
/// Example Failures: 'a, 12b, ,cat
36-
pub fn identifier_parser(input:&[u8]) -> IResult<&[u8], String> {
37-
named!( non_numeric_identifier_char<&[u8],u8>,
38-
alt!( map!(one_of!("|?<>+-_=^%&$*!"), |x| x as u8 ) |
39-
map!(take_while_m_n!(1,1,is_alphabetic),|ls| ls[0] as u8)));
40-
named!( identifier_char<&[u8],u8>,
41-
alt!( map!(one_of!("|?<>+-_=^%&$*!"), |x| x as u8 ) |
42-
map!(take_while_m_n!(1,1,is_alphanumeric),|ls| ls[0] as u8)));
43-
named!( identifier_ <&[u8],String> ,
39+
pub fn identifier_parser(input: &str) -> IResult<&str, String> {
40+
named!( non_numeric_identifier_char<&str, char>,
41+
alt!( one_of!("|?<>+-_=^%&$*!") |
42+
map!(take_while_m_n!(1,1,char::is_alphabetic),|ls| ls.chars().next().unwrap())));
43+
named!( identifier_char<&str, char>,
44+
alt!( one_of!("|?<>+-_=^%&$*!") |
45+
map!(take_while_m_n!(1,1,char::is_alphanumeric),|ls| ls.chars().next().unwrap())));
46+
named!( identifier_ <&str, String> ,
4447
do_parse!(
4548
head: non_numeric_identifier_char >>
4649
rest_input:
47-
map_res!(
50+
map!(
4851
many0!(complete!(identifier_char)),
49-
String::from_utf8) >>
52+
String::from_iter) >>
5053
(format!("{}{}",head as char,rest_input))
5154
));
5255

@@ -55,7 +58,7 @@ pub fn identifier_parser(input:&[u8]) -> IResult<&[u8], String> {
5558
}
5659

5760
/// Parses valid Clojure symbols, whose name is a valid identifier
58-
pub fn symbol_parser(input: &[u8]) -> IResult<&[u8], Symbol> {
61+
pub fn symbol_parser(input: &str) -> IResult<&str, Symbol> {
5962
identifier_parser(input).map(|(rest_input,name)| {
6063
(rest_input, Symbol::intern(&name))
6164
})
@@ -64,12 +67,10 @@ pub fn symbol_parser(input: &[u8]) -> IResult<&[u8], Symbol> {
6467
// @TODO add negatives
6568
/// Parses valid integers
6669
/// Example Successes: 1, 2, 4153, -12421
67-
pub fn integer(input: &[u8]) -> IResult<&[u8],i32> {
68-
map_res(take_while1(is_digit),|digits: &[u8]| {
69-
String::from_utf8(digits.to_vec()).map(|digit_string| {
70-
digit_string.parse::<i32>().unwrap()
71-
})
72-
})(input)
70+
pub fn integer(input: &str) -> IResult<&str, i32> {
71+
named!(integer_lexer<&str, &str>, take_while1!(|c: char| c.is_digit(10)));
72+
73+
integer_lexer(input).map(|(rest, digits)| (rest, digits.parse().unwrap()))
7374
}
7475
// Currently used to create 'try_readers', which are readers (or
7576
// reader functions, at least) that are basically composable InputType
@@ -93,7 +94,7 @@ pub fn to_value_parser<I,O: ToValue>(parser: impl Fn(I) -> IResult<I,O>) -> impl
9394
/// 1231415 => Value::I32(1231415)
9495
/// Example Failures:
9596
/// 1.5, 7.1321 , 1423152621625226126431525
96-
pub fn try_read_i32(input: &[u8]) -> IResult<&[u8],Value> {
97+
pub fn try_read_i32(input: &str) -> IResult<&str, Value> {
9798
to_value_parser(integer)(input)
9899
}
99100

@@ -104,32 +105,36 @@ pub fn try_read_i32(input: &[u8]) -> IResult<&[u8],Value> {
104105
/// +common-lisp-global+ => Value::Symbol(Symbol { name: "+common-lisp-global+" })
105106
/// Example Failures:
106107
/// 12cat, 'quoted, @at-is-for-references
107-
pub fn try_read_symbol(input: &[u8]) -> IResult<&[u8],Value> {
108+
pub fn try_read_symbol(input: &str) -> IResult<&str, Value> {
108109
to_value_parser(symbol_parser)(input)
109110
}
110111

111112
// @TODO allow escaped strings
112113
/// Tries to parse &[u8] into Value::String
113114
/// Example Successes:
114115
/// "this is pretty straightforward" => Value::String("this is pretty straightforward")
115-
pub fn try_read_string(input: &[u8]) -> IResult<&[u8],Value> {
116-
named!(quotation, preceded!(consume_clojure_whitespaces, tag!("\"")));
116+
pub fn try_read_string(input: &str) -> IResult<&str, Value> {
117+
named!(quotation<&str, &str>, preceded!(consume_clojure_whitespaces, tag!("\"")));
118+
117119
let (rest_input,_) = quotation(input)?;
118-
to_value_parser(
119-
map_res(
120-
terminated(
121-
take_until("\""),
122-
tag("\"")),
123-
|bytes: &[u8]| String::from_utf8(bytes.to_vec())))(rest_input)
120+
named!(
121+
string_parser<&str, String>,
122+
map!(
123+
terminated!(take_until!("\""), tag("\"")),
124+
|v| String::from(v)
125+
)
126+
);
127+
128+
to_value_parser(string_parser)(input)
124129
}
125130

126131
// @TODO Perhaps generalize this, or even generalize it as a reader macro
127132
/// Tries to parse &[u8] into Value::PersistentListMap, or some other Value::..Map
128133
/// Example Successes:
129134
/// {:a 1} => Value::PersistentListMap {PersistentListMap { MapEntry { :a, 1} .. ]})
130-
pub fn try_read_map(input: &[u8]) -> IResult<&[u8],Value> {
131-
named!(lbracep, preceded!(consume_clojure_whitespaces, tag!("{")));
132-
named!(rbracep, preceded!(consume_clojure_whitespaces, tag!("}")));
135+
pub fn try_read_map(input: &str) -> IResult<&str, Value> {
136+
named!(lbracep<&str, &str>, preceded!(consume_clojure_whitespaces, tag!("{")));
137+
named!(rbracep<&str, &str>, preceded!(consume_clojure_whitespaces, tag!("}")));
133138
let (map_inner_input,_) = lbracep(input)?;
134139
let mut map_as_vec : Vec<MapEntry> = vec![];
135140
let mut rest_input = map_inner_input;
@@ -155,9 +160,9 @@ pub fn try_read_map(input: &[u8]) -> IResult<&[u8],Value> {
155160
/// [1 2 3] => Value::PersistentVector(PersistentVector { vals: [Rc(Value::I32(1) ... ]})
156161
/// [1 2 [5 10 15] 3]
157162
/// => Value::PersistentVector(PersistentVector { vals: [Rc(Value::I32(1) .. Rc(Value::PersistentVector..)]})
158-
pub fn try_read_vector(input: &[u8]) -> IResult<&[u8],Value> {
159-
named!(lbracketp, preceded!(consume_clojure_whitespaces, tag!("[")));
160-
named!(rbracketp, preceded!(consume_clojure_whitespaces, tag!("]")));
163+
pub fn try_read_vector(input: &str) -> IResult<&str, Value> {
164+
named!(lbracketp<&str, &str>, preceded!(consume_clojure_whitespaces, tag!("[")));
165+
named!(rbracketp<&str, &str>, preceded!(consume_clojure_whitespaces, tag!("]")));
161166
let (vector_inner_input,_) = lbracketp(input)?;
162167
let mut vector_as_vec = vec![];
163168
// What's left of our input as we read more of our PersistentVector
@@ -190,9 +195,9 @@ pub fn try_read_vector(input: &[u8]) -> IResult<&[u8],Value> {
190195
}
191196
}
192197

193-
pub fn try_read_list(input: &[u8]) -> IResult<&[u8],Value> {
194-
named!(lparenp, preceded!(consume_clojure_whitespaces, tag!("(")));
195-
named!(rparenp, preceded!(consume_clojure_whitespaces, tag!(")")));
198+
pub fn try_read_list(input: &str) -> IResult<&str, Value> {
199+
named!(lparenp<&str, &str>, preceded!(consume_clojure_whitespaces, tag!("(")));
200+
named!(rparenp<&str, &str>, preceded!(consume_clojure_whitespaces, tag!(")")));
196201

197202
let (list_inner_input,_) = lparenp(input)?;
198203
let mut list_as_vec = vec![];
@@ -221,17 +226,18 @@ pub fn try_read_list(input: &[u8]) -> IResult<&[u8],Value> {
221226
}
222227
}
223228

224-
pub fn try_read(input: &[u8]) -> IResult<&[u8], Value> {
229+
pub fn try_read(input: &str) -> IResult<&str, Value> {
225230
preceded(multispace0,alt(
226231
(try_read_map,
227232
try_read_string,
228233
try_read_symbol,
229234
try_read_i32,
230235
try_read_list,
231-
try_read_vector)))(input)
236+
try_read_vector
237+
)))(input)
232238
}
233239

234-
pub fn debug_try_read(input: &[u8]) -> IResult<&[u8], Value> {
240+
pub fn debug_try_read(input: &str) -> IResult<&str, Value> {
235241

236242
let reading = try_read(input);
237243
match &reading {
@@ -244,15 +250,14 @@ pub fn debug_try_read(input: &[u8]) -> IResult<&[u8], Value> {
244250
/// Consumes one or more whitespaces from the input.
245251
///
246252
/// A whitespace is either an ASCII whitespace or a comma.
247-
fn consume_clojure_whitespaces(input: &[u8]) -> IResult<&[u8], ()> {
248-
named!(parser, take_while1!(is_clojure_whitespace));
253+
fn consume_clojure_whitespaces(input: &str) -> IResult<&str, ()> {
254+
named!(parser<&str, &str>, take_while!(is_clojure_whitespace));
249255
parser(input).map(|(rest, _)| (rest, ()))
250256
}
251257

252258
/// Returns whether if a given character is a whitespace.
253259
///
254-
/// Clojure defines a whitespace as either a comma or an ASCII whitespace.
255-
fn is_clojure_whitespace(c: u8) -> bool {
256-
// ASCII symbol of `,` is 44.
257-
c.is_ascii_whitespace() || c == 44
260+
/// Clojure defines a whitespace as either a comma or an unicode whitespace.
261+
fn is_clojure_whitespace(c: char) -> bool {
262+
c.is_whitespace() || c == ','
258263
}

src/repl.rs

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,35 +21,36 @@ use nom::Needed::Size;
2121
pub fn try_eval_file(environment: &Rc<Environment>,filepath: &str) -> Result<(),io::Error>{
2222
let core = File::open(filepath)?;
2323

24-
24+
2525
let reader = BufReader::new(core);
2626

27-
let mut remaining_input_buffer = String::from("");
27+
let mut input_buffer = String::new();
28+
2829
for line in reader.lines() {
29-
let line = line?;
30-
remaining_input_buffer.push_str(&line);
31-
let mut remaining_input_bytes = remaining_input_buffer.as_bytes();
32-
loop {
33-
let next_read_parse = reader::try_read(remaining_input_bytes);
34-
match next_read_parse {
35-
Ok((_remaining_input,value)) => {
36-
//print!("{} ",value.eval(Rc::clone(&environment)).to_string_explicit());
37-
value.eval(Rc::clone(&environment));
38-
remaining_input_bytes = _remaining_input;
39-
},
40-
Err(Incomplete(Size(1))) => {
41-
remaining_input_buffer = String::from_utf8(remaining_input_bytes.to_vec()).unwrap();
42-
break;
43-
},
44-
err => {
45-
println!("Error evaluating file {}; {}",filepath,Value::Condition(format!("Reader Error: {:?}",err)));
46-
remaining_input_buffer = String::from("");
47-
break;
48-
}
49-
}
50-
}
30+
let line = line?;
31+
input_buffer.push_str(&line);
32+
let mut remaining_input = input_buffer.as_str();
33+
loop {
34+
let next_read_parse = reader::try_read(remaining_input);
35+
match next_read_parse {
36+
Ok((_remaining_input,value)) => {
37+
//print!("{} ",value.eval(Rc::clone(&environment)).to_string_explicit());
38+
value.eval(Rc::clone(&environment));
39+
remaining_input = _remaining_input;
40+
},
41+
Err(Incomplete(Size(1))) => {
42+
break;
43+
},
44+
err => {
45+
println!("Error evaluating file {}; {}",filepath,Value::Condition(format!("Reader Error: {:?}",err)));
46+
input_buffer.clear();
47+
remaining_input = "";
48+
break;
49+
}
50+
}
51+
}
5152
}
5253

5354
Ok(())
54-
55+
5556
}

0 commit comments

Comments
 (0)