77//! or even reader macros, although the latter will likely be reserved for our interpreter here (but perhaps
88//! not; since this is about being a 'free-er' Clojure, especially since it can't compete with it in raw
99//! power, neither speed or ecosystem, it might be worth it to leave in reader macros.
10- extern crate nom;
1110
1211use nom:: {
1312 IResult ,
1413 branch:: alt,
1514 error:: convert_error,
16- character:: { is_alphabetic, is_alphanumeric} ,
1715 character:: complete:: multispace0,
18- character:: is_digit,
19- bytes:: complete:: { take_while1, take_until, tag} ,
16+ bytes:: complete:: { take_while1, tag} ,
17+ take_until,
18+ terminated,
19+ map,
2020 combinator:: map_res,
2121 sequence:: { preceded, terminated} } ;
2222
@@ -26,27 +26,30 @@ use crate::persistent_vector::{ToPersistentVector};
2626use crate :: persistent_list_map:: { PersistentListMap , ToPersistentListMap } ;
2727use crate :: maps:: MapEntry ;
2828use crate :: symbol:: Symbol ;
29- use std:: rc:: Rc ;
29+ use std:: {
30+ iter:: FromIterator ,
31+ rc:: Rc ,
32+ } ;
3033
3134use std:: fs:: File ;
3235
3336/// Parses valid Clojure identifiers
3437/// Example Successes: ab, cat, -12+3, |blah|, <well>
3538/// Example Failures: 'a, 12b, ,cat
36- pub fn identifier_parser ( input : & [ u8 ] ) -> IResult < & [ u8 ] , String > {
37- named ! ( non_numeric_identifier_char<& [ u8 ] , u8 >,
38- alt!( map! ( one_of!( "|?<>+-_=^%&$*!" ) , |x| x as u8 ) |
39- map!( take_while_m_n!( 1 , 1 , is_alphabetic) , |ls| ls[ 0 ] as u8 ) ) ) ;
40- named ! ( identifier_char<& [ u8 ] , u8 >,
41- alt!( map! ( one_of!( "|?<>+-_=^%&$*!" ) , |x| x as u8 ) |
42- map!( take_while_m_n!( 1 , 1 , is_alphanumeric) , |ls| ls[ 0 ] as u8 ) ) ) ;
43- named ! ( identifier_ <& [ u8 ] , String > ,
39+ pub fn identifier_parser ( input : & str ) -> IResult < & str , String > {
40+ named ! ( non_numeric_identifier_char<& str , char >,
41+ alt!( one_of!( "|?<>+-_=^%&$*!" ) |
42+ map!( take_while_m_n!( 1 , 1 , char :: is_alphabetic) , |ls| ls. chars ( ) . next ( ) . unwrap ( ) ) ) ) ;
43+ named ! ( identifier_char<& str , char >,
44+ alt!( one_of!( "|?<>+-_=^%&$*!" ) |
45+ map!( take_while_m_n!( 1 , 1 , char :: is_alphanumeric) , |ls| ls. chars ( ) . next ( ) . unwrap ( ) ) ) ) ;
46+ named ! ( identifier_ <& str , String > ,
4447 do_parse!(
4548 head: non_numeric_identifier_char >>
4649 rest_input:
47- map_res !(
50+ map !(
4851 many0!( complete!( identifier_char) ) ,
49- String :: from_utf8 ) >>
52+ String :: from_iter ) >>
5053 ( format!( "{}{}" , head as char , rest_input) )
5154 ) ) ;
5255
@@ -55,7 +58,7 @@ pub fn identifier_parser(input:&[u8]) -> IResult<&[u8], String> {
5558}
5659
5760/// Parses valid Clojure symbols, whose name is a valid identifier
58- pub fn symbol_parser ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Symbol > {
61+ pub fn symbol_parser ( input : & str ) -> IResult < & str , Symbol > {
5962 identifier_parser ( input) . map ( |( rest_input, name) | {
6063 ( rest_input, Symbol :: intern ( & name) )
6164 } )
@@ -64,12 +67,10 @@ pub fn symbol_parser(input: &[u8]) -> IResult<&[u8], Symbol> {
6467// @TODO add negatives
6568/// Parses valid integers
6669/// Example Successes: 1, 2, 4153, -12421
67- pub fn integer ( input : & [ u8 ] ) -> IResult < & [ u8 ] , i32 > {
68- map_res ( take_while1 ( is_digit) , |digits : & [ u8 ] | {
69- String :: from_utf8 ( digits. to_vec ( ) ) . map ( |digit_string| {
70- digit_string. parse :: < i32 > ( ) . unwrap ( )
71- } )
72- } ) ( input)
70+ pub fn integer ( input : & str ) -> IResult < & str , i32 > {
71+ named ! ( integer_lexer<& str , & str >, take_while1!( |c: char | c. is_digit( 10 ) ) ) ;
72+
73+ integer_lexer ( input) . map ( |( rest, digits) | ( rest, digits. parse ( ) . unwrap ( ) ) )
7374}
7475// Currently used to create 'try_readers', which are readers (or
7576// reader functions, at least) that are basically composable InputType
@@ -93,7 +94,7 @@ pub fn to_value_parser<I,O: ToValue>(parser: impl Fn(I) -> IResult<I,O>) -> impl
9394/// 1231415 => Value::I32(1231415)
9495/// Example Failures:
9596/// 1.5, 7.1321 , 1423152621625226126431525
96- pub fn try_read_i32 ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Value > {
97+ pub fn try_read_i32 ( input : & str ) -> IResult < & str , Value > {
9798 to_value_parser ( integer) ( input)
9899}
99100
@@ -104,32 +105,36 @@ pub fn try_read_i32(input: &[u8]) -> IResult<&[u8],Value> {
104105/// +common-lisp-global+ => Value::Symbol(Symbol { name: "+common-lisp-global+" })
105106/// Example Failures:
106107/// 12cat, 'quoted, @at-is-for-references
107- pub fn try_read_symbol ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Value > {
108+ pub fn try_read_symbol ( input : & str ) -> IResult < & str , Value > {
108109 to_value_parser ( symbol_parser) ( input)
109110}
110111
111112// @TODO allow escaped strings
112113/// Tries to parse &[u8] into Value::String
113114/// Example Successes:
114115/// "this is pretty straightforward" => Value::String("this is pretty straightforward")
115- pub fn try_read_string ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Value > {
116- named ! ( quotation, preceded!( consume_clojure_whitespaces, tag!( "\" " ) ) ) ;
116+ pub fn try_read_string ( input : & str ) -> IResult < & str , Value > {
117+ named ! ( quotation<& str , & str >, preceded!( consume_clojure_whitespaces, tag!( "\" " ) ) ) ;
118+
117119 let ( rest_input, _) = quotation ( input) ?;
118- to_value_parser (
119- map_res (
120- terminated (
121- take_until ( "\" " ) ,
122- tag ( "\" " ) ) ,
123- |bytes : & [ u8 ] | String :: from_utf8 ( bytes. to_vec ( ) ) ) ) ( rest_input)
120+ named ! (
121+ string_parser<& str , String >,
122+ map!(
123+ terminated!( take_until!( "\" " ) , tag( "\" " ) ) ,
124+ |v| String :: from( v)
125+ )
126+ ) ;
127+
128+ to_value_parser ( string_parser) ( input)
124129}
125130
126131// @TODO Perhaps generalize this, or even generalize it as a reader macro
127132/// Tries to parse &[u8] into Value::PersistentListMap, or some other Value::..Map
128133/// Example Successes:
129134/// {:a 1} => Value::PersistentListMap {PersistentListMap { MapEntry { :a, 1} .. ]})
130- pub fn try_read_map ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Value > {
131- named ! ( lbracep, preceded!( consume_clojure_whitespaces, tag!( "{" ) ) ) ;
132- named ! ( rbracep, preceded!( consume_clojure_whitespaces, tag!( "}" ) ) ) ;
135+ pub fn try_read_map ( input : & str ) -> IResult < & str , Value > {
136+ named ! ( lbracep< & str , & str > , preceded!( consume_clojure_whitespaces, tag!( "{" ) ) ) ;
137+ named ! ( rbracep< & str , & str > , preceded!( consume_clojure_whitespaces, tag!( "}" ) ) ) ;
133138 let ( map_inner_input, _) = lbracep ( input) ?;
134139 let mut map_as_vec : Vec < MapEntry > = vec ! [ ] ;
135140 let mut rest_input = map_inner_input;
@@ -155,9 +160,9 @@ pub fn try_read_map(input: &[u8]) -> IResult<&[u8],Value> {
155160/// [1 2 3] => Value::PersistentVector(PersistentVector { vals: [Rc(Value::I32(1) ... ]})
156161/// [1 2 [5 10 15] 3]
157162/// => Value::PersistentVector(PersistentVector { vals: [Rc(Value::I32(1) .. Rc(Value::PersistentVector..)]})
158- pub fn try_read_vector ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Value > {
159- named ! ( lbracketp, preceded!( consume_clojure_whitespaces, tag!( "[" ) ) ) ;
160- named ! ( rbracketp, preceded!( consume_clojure_whitespaces, tag!( "]" ) ) ) ;
163+ pub fn try_read_vector ( input : & str ) -> IResult < & str , Value > {
164+ named ! ( lbracketp< & str , & str > , preceded!( consume_clojure_whitespaces, tag!( "[" ) ) ) ;
165+ named ! ( rbracketp< & str , & str > , preceded!( consume_clojure_whitespaces, tag!( "]" ) ) ) ;
161166 let ( vector_inner_input, _) = lbracketp ( input) ?;
162167 let mut vector_as_vec = vec ! [ ] ;
163168 // What's left of our input as we read more of our PersistentVector
@@ -190,9 +195,9 @@ pub fn try_read_vector(input: &[u8]) -> IResult<&[u8],Value> {
190195 }
191196}
192197
193- pub fn try_read_list ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Value > {
194- named ! ( lparenp, preceded!( consume_clojure_whitespaces, tag!( "(" ) ) ) ;
195- named ! ( rparenp, preceded!( consume_clojure_whitespaces, tag!( ")" ) ) ) ;
198+ pub fn try_read_list ( input : & str ) -> IResult < & str , Value > {
199+ named ! ( lparenp< & str , & str > , preceded!( consume_clojure_whitespaces, tag!( "(" ) ) ) ;
200+ named ! ( rparenp< & str , & str > , preceded!( consume_clojure_whitespaces, tag!( ")" ) ) ) ;
196201
197202 let ( list_inner_input, _) = lparenp ( input) ?;
198203 let mut list_as_vec = vec ! [ ] ;
@@ -221,17 +226,18 @@ pub fn try_read_list(input: &[u8]) -> IResult<&[u8],Value> {
221226 }
222227}
223228
224- pub fn try_read ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Value > {
229+ pub fn try_read ( input : & str ) -> IResult < & str , Value > {
225230 preceded ( multispace0, alt (
226231 ( try_read_map,
227232 try_read_string,
228233 try_read_symbol,
229234 try_read_i32,
230235 try_read_list,
231- try_read_vector) ) ) ( input)
236+ try_read_vector
237+ ) ) ) ( input)
232238}
233239
234- pub fn debug_try_read ( input : & [ u8 ] ) -> IResult < & [ u8 ] , Value > {
240+ pub fn debug_try_read ( input : & str ) -> IResult < & str , Value > {
235241
236242 let reading = try_read ( input) ;
237243 match & reading {
@@ -244,15 +250,14 @@ pub fn debug_try_read(input: &[u8]) -> IResult<&[u8], Value> {
244250/// Consumes one or more whitespaces from the input.
245251///
246252/// A whitespace is either an ASCII whitespace or a comma.
247- fn consume_clojure_whitespaces ( input : & [ u8 ] ) -> IResult < & [ u8 ] , ( ) > {
248- named ! ( parser, take_while1 !( is_clojure_whitespace) ) ;
253+ fn consume_clojure_whitespaces ( input : & str ) -> IResult < & str , ( ) > {
254+ named ! ( parser< & str , & str > , take_while !( is_clojure_whitespace) ) ;
249255 parser ( input) . map ( |( rest, _) | ( rest, ( ) ) )
250256}
251257
252258/// Returns whether if a given character is a whitespace.
253259///
254- /// Clojure defines a whitespace as either a comma or an ASCII whitespace.
255- fn is_clojure_whitespace ( c : u8 ) -> bool {
256- // ASCII symbol of `,` is 44.
257- c. is_ascii_whitespace ( ) || c == 44
260+ /// Clojure defines a whitespace as either a comma or an unicode whitespace.
261+ fn is_clojure_whitespace ( c : char ) -> bool {
262+ c. is_whitespace ( ) || c == ','
258263}
0 commit comments