@@ -104,14 +104,16 @@ fn cons_str(head: char, tail: &str) -> String {
104104/// - `$`,
105105/// - `*`,
106106/// - `!`,
107- fn is_identifier_char ( chr : char ) -> bool {
108- chr . is_alphanumeric ( ) || "|?<>+-_=^%&$*!." . contains ( chr )
107+ fn is_identifier_char ( ch : char ) -> bool {
108+ ch . is_alphanumeric ( ) || "|?<>+-_=^%&$*!." . contains ( ch )
109109}
110110
111- /// Returns whether if a character can be in the head of an identifier.
111+ /// Returns true if a character is an acceptable (non numeric) identifier char
112112///
113- /// An identifier is composed of a head (its first char) and a tail (the other
114- /// chars).
113+ /// An identifier is either a non numeric identifier char, followed by any number
114+ /// of identifier chars, or is a '/' and nothing else.
115+ ///
116+ /// A separate function will be used to detect if an identifier is possibly just '/'
115117///
116118/// A character is an identifier char if it is alphabetic or if it is one of:
117119/// - `|`,
@@ -128,20 +130,55 @@ fn is_identifier_char(chr: char) -> bool {
128130/// - `$`,
129131/// - `*`,
130132/// - `!`,
131- fn is_non_numeric_identifier_char ( chr : char ) -> bool {
132- chr. is_alphabetic ( ) || "|?<>+-_=^%&$*!." . contains ( chr)
133+ fn is_non_numeric_identifier_char ( ch : char ) -> bool {
134+ ch. is_alphabetic ( ) || "|?<>+-_=^%&$*!." . contains ( ch)
135+ }
136+
137+ /// Returns true if a character is an acceptable (non numeric) identifier char, or '/'
138+ ///
139+ /// An identifier is either a non numeric identifier char, followed by any number
140+ /// of identifier chars, or is a '/' and nothing else.
141+ ///
142+ /// The reason we check if this is *either* a non numeric identifier char, or a '/',
143+ /// is because we will want to use it to parse either
144+ /// 1.a normal identifier
145+ /// 2.'/',
146+ /// 3. something like '/blah'
147+ /// And then, if we have '/blah', we will proactively make the read fail
148+ ///
149+ /// We need to explicitly look for this '/blah' case is otherwise, if we just check for 1 and 2,
150+ /// then in the case where someone types in '/blah' it will count as two valid separate reads --
151+ /// first the symbol '/' and then the symbol 'blah'.
152+ ///
153+ /// This function passes if the char is alphabetic, a '/', or one of:
154+ /// - `|`,
155+ /// - `?`,
156+ /// - `<`,
157+ /// - `>`,
158+ /// - `+`,
159+ /// - `-`,
160+ /// - `_`,
161+ /// - `=`,
162+ /// - `^`,
163+ /// - `%`,
164+ /// - `&`,
165+ /// - `$`,
166+ /// - `*`,
167+ /// - `!`,
168+ fn is_non_numeric_identifier_char_or_slash ( ch : char ) -> bool {
169+ ch == '/' || is_non_numeric_identifier_char ( ch)
133170}
134171
135172/// Returns true if given character is a minus character
136173/// - `-`,
137- fn is_minus_char ( chr : char ) -> bool {
138- chr == '-'
174+ fn is_minus_char ( ch : char ) -> bool {
175+ ch == '-'
139176}
140177
141178/// Returns true if given character is a period character
142179/// - `-`,
143- fn is_period_char ( chr : char ) -> bool {
144- chr == '.'
180+ fn is_period_char ( ch : char ) -> bool {
181+ ch == '.'
145182}
146183
147184/// Returns whether if a given character is a whitespace.
@@ -207,27 +244,40 @@ fn identifier_tail(input: &str) -> IResult<&str, &str> {
207244}
208245
209246/// Parses valid Clojure identifiers
210- /// Example Successes: ab, cat, -12+3, |blah|, <well>
211- /// Example Failures: 'a, 12b, ,cat
247+ /// Example Successes: ab, cat, -12+3, |blah|, <well>, / (edge case)
248+ /// Example Failures: 'a, 12b, ,cat , /ab
212249pub fn identifier_parser ( input : & str ) -> IResult < & str , String > {
213- named ! ( identifier_head<& str , char >,
250+ // We will try to parse either a valid identifier, *or* the invalid identifier
251+ // '/slashwithmorecharacters'
252+ // Because if we do get the '/blah', we want to know and actively fail, otherwise '/blah'
253+ // will just count as two valid reads; one for '/' and one for 'blah'
254+ // So, we call these parsers 'maybe_valid_identifier_..', as they are also trying to catch
255+ // this one invalid case
256+ named ! ( maybe_invalid_identifier_head_parser<& str , char >,
214257 map!(
215- take_while_m_n!( 1 , 1 , is_non_numeric_identifier_char ) ,
258+ take_while_m_n!( 1 , 1 , is_non_numeric_identifier_char_or_slash ) ,
216259 first_char
217260 )
218261 ) ;
219262
220- // identifier_tail<&str,&str> defined above to have magic 'complete' powers
263+ // identifier_tail<&str,&str> defined above so it can be a 'completion' parser instead of a
264+ // 'streaming' parser -- look into nom's documentation for more info
221265
222- named ! ( identifier <& str , String >,
266+ named ! ( maybe_invalid_identifier_parser <& str , String >,
223267 do_parse!(
224- head: identifier_head >>
268+ head: maybe_invalid_identifier_head_parser >>
225269 rest_input: identifier_tail >>
226270 ( cons_str( head, & rest_input) )
227271 )
228272 ) ;
229273
230- identifier ( input)
274+ named ! ( valid_identifier_parser <& str , String >,
275+ verify!( maybe_invalid_identifier_parser, |identifier| {
276+ first_char( & identifier) != '/' ||
277+ identifier == "/"
278+ } ) ) ;
279+
280+ valid_identifier_parser ( input)
231281}
232282
233283/// Parses valid Clojure symbol
@@ -432,16 +482,6 @@ pub fn try_read_pattern(input: &str) -> IResult<&str, Value> {
432482 Ok ( ( rest_input, regex) )
433483}
434484
435- /// This reader is needed for parsing the division sign /
436- pub fn try_read_division_forward_slash ( input : & str ) -> IResult < & str , Value > {
437- named ! ( slash_parser<& str , & str >, preceded!( consume_clojure_whitespaces_parser, tag!( "/" ) ) ) ;
438-
439- let ( rest_input, slash) = slash_parser ( input) ?;
440-
441- // If an error is thrown, this will be coerced into a condition
442- Ok ( ( rest_input, Value :: Symbol ( Symbol :: intern ( slash) ) ) )
443- }
444-
445485// @TODO Perhaps generalize this, or even generalize it as a reader macro
446486/// Tries to parse &str into Value::PersistentListMap, or some other Value::..Map
447487/// Example Successes:
@@ -543,7 +583,6 @@ pub fn try_read(input: &str) -> IResult<&str, Value> {
543583 try_read_bool,
544584 try_read_nil,
545585 try_read_symbol,
546- try_read_division_forward_slash,
547586 try_read_keyword,
548587 try_read_list,
549588 try_read_vector,
@@ -788,6 +827,7 @@ mod tests {
788827 use crate :: persistent_vector;
789828 use crate :: reader:: try_read;
790829 use crate :: symbol:: Symbol ;
830+ use crate :: keyword:: Keyword ;
791831 use crate :: value:: Value :: { PersistentList , PersistentListMap , PersistentVector } ;
792832 use crate :: value:: { ToValue , Value } ;
793833
@@ -903,6 +943,36 @@ mod tests {
903943 try_read( "/ " ) . ok( ) . unwrap( ) . 1
904944 ) ;
905945 }
946+ #[ test]
947+ fn try_read_forward_slash_with_letters_and_fails_test ( ) {
948+ assert ! ( try_read( "/ab " ) . ok( ) . is_none( ) ) ;
949+ }
950+
951+ #[ test]
952+ fn try_read_forward_slash_keyword_test ( ) {
953+ assert_eq ! (
954+ Value :: Keyword ( Keyword :: intern( & "/" ) ) ,
955+ try_read( ":/ " ) . ok( ) . unwrap( ) . 1
956+ ) ;
957+ }
958+
959+ #[ test]
960+ fn try_read_forward_slash_keyword_with_letters_and_fails_test ( ) {
961+ assert ! ( try_read( ":/ab " ) . ok( ) . is_none( ) ) ;
962+ }
963+
964+ #[ test]
965+ fn try_read_forward_slash_keyword_with_ns_test ( ) {
966+ assert_eq ! (
967+ Value :: Keyword ( Keyword :: intern_with_ns( "core" , "/" ) ) ,
968+ try_read( ":core// " ) . ok( ) . unwrap( ) . 1
969+ ) ;
970+ }
971+
972+ #[ test]
973+ fn try_read_forward_slash_keyword_with_ns_with_letters_and_fails_test ( ) {
974+ assert ! ( try_read( ":core//ab " ) . ok( ) . is_none( ) ) ;
975+ }
906976 }
907977
908978 mod regex_tests {
0 commit comments