@@ -107,14 +107,16 @@ fn cons_str(head: char, tail: &str) -> String {
107107/// - `$`,
108108/// - `*`,
109109/// - `!`,
110- fn is_identifier_char ( chr : char ) -> bool {
111- chr . is_alphanumeric ( ) || "|?<>+-_=^%&$*!." . contains ( chr )
110+ fn is_identifier_char ( ch : char ) -> bool {
111+ ch . is_alphanumeric ( ) || "|?<>+-_=^%&$*!." . contains ( ch )
112112}
113113
114- /// Returns whether if a character can be in the head of an identifier.
114+ /// Returns true if a character is an acceptable (non numeric) identifier char
115115///
116- /// An identifier is composed of a head (its first char) and a tail (the other
117- /// chars).
116+ /// An identifier is either a non numeric identifier char, followed by any number
117+ /// of identifier chars, or is a '/' and nothing else.
118+ ///
119+ /// A separate function will be used to detect if an identifier is possibly just '/'
118120///
119121/// A character is an identifier char if it is alphabetic or if it is one of:
120122/// - `|`,
@@ -131,20 +133,55 @@ fn is_identifier_char(chr: char) -> bool {
131133/// - `$`,
132134/// - `*`,
133135/// - `!`,
134- fn is_non_numeric_identifier_char ( chr : char ) -> bool {
135- chr. is_alphabetic ( ) || "|?<>+-_=^%&$*!." . contains ( chr)
136+ fn is_non_numeric_identifier_char ( ch : char ) -> bool {
137+ ch. is_alphabetic ( ) || "|?<>+-_=^%&$*!." . contains ( ch)
138+ }
139+
140+ /// Returns true if a character is an acceptable (non numeric) identifier char, or '/'
141+ ///
142+ /// An identifier is either a non numeric identifier char, followed by any number
143+ /// of identifier chars, or is a '/' and nothing else.
144+ ///
145+ /// The reason we check if this is *either* a non numeric identifier char, or a '/',
146+ /// is because we will want to use it to parse either
147+ /// 1.a normal identifier
148+ /// 2.'/',
149+ /// 3. something like '/blah'
150+ /// And then, if we have '/blah', we will proactively make the read fail
151+ ///
152+ /// We need to explicitly look for this '/blah' case is otherwise, if we just check for 1 and 2,
153+ /// then in the case where someone types in '/blah' it will count as two valid separate reads --
154+ /// first the symbol '/' and then the symbol 'blah'.
155+ ///
156+ /// This function passes if the char is alphabetic, a '/', or one of:
157+ /// - `|`,
158+ /// - `?`,
159+ /// - `<`,
160+ /// - `>`,
161+ /// - `+`,
162+ /// - `-`,
163+ /// - `_`,
164+ /// - `=`,
165+ /// - `^`,
166+ /// - `%`,
167+ /// - `&`,
168+ /// - `$`,
169+ /// - `*`,
170+ /// - `!`,
171+ fn is_non_numeric_identifier_char_or_slash ( ch : char ) -> bool {
172+ ch == '/' || is_non_numeric_identifier_char ( ch)
136173}
137174
138175/// Returns true if given character is a minus character
139176/// - `-`,
140- fn is_minus_char ( chr : char ) -> bool {
141- chr == '-'
177+ fn is_minus_char ( ch : char ) -> bool {
178+ ch == '-'
142179}
143180
144181/// Returns true if given character is a period character
145182/// - `-`,
146- fn is_period_char ( chr : char ) -> bool {
147- chr == '.'
183+ fn is_period_char ( ch : char ) -> bool {
184+ ch == '.'
148185}
149186
150187/// Returns whether if a given character is a whitespace.
@@ -210,27 +247,40 @@ fn identifier_tail(input: &str) -> IResult<&str, &str> {
210247}
211248
212249/// Parses valid Clojure identifiers
213- /// Example Successes: ab, cat, -12+3, |blah|, <well>
214- /// Example Failures: 'a, 12b, ,cat
250+ /// Example Successes: ab, cat, -12+3, |blah|, <well>, / (edge case)
251+ /// Example Failures: 'a, 12b, ,cat , /ab
215252pub fn identifier_parser ( input : & str ) -> IResult < & str , String > {
216- named ! ( identifier_head<& str , char >,
253+ // We will try to parse either a valid identifier, *or* the invalid identifier
254+ // '/slashwithmorecharacters'
255+ // Because if we do get the '/blah', we want to know and actively fail, otherwise '/blah'
256+ // will just count as two valid reads; one for '/' and one for 'blah'
257+ // So, we call these parsers 'maybe_valid_identifier_..', as they are also trying to catch
258+ // this one invalid case
259+ named ! ( maybe_invalid_identifier_head_parser<& str , char >,
217260 map!(
218- take_while_m_n!( 1 , 1 , is_non_numeric_identifier_char ) ,
261+ take_while_m_n!( 1 , 1 , is_non_numeric_identifier_char_or_slash ) ,
219262 first_char
220263 )
221264 ) ;
222265
223- // identifier_tail<&str,&str> defined above to have magic 'complete' powers
266+ // identifier_tail<&str,&str> defined above so it can be a 'completion' parser instead of a
267+ // 'streaming' parser -- look into nom's documentation for more info
224268
225- named ! ( identifier <& str , String >,
269+ named ! ( maybe_invalid_identifier_parser <& str , String >,
226270 do_parse!(
227- head: identifier_head >>
271+ head: maybe_invalid_identifier_head_parser >>
228272 rest_input: identifier_tail >>
229273 ( cons_str( head, & rest_input) )
230274 )
231275 ) ;
232276
233- identifier ( input)
277+ named ! ( valid_identifier_parser <& str , String >,
278+ verify!( maybe_invalid_identifier_parser, |identifier| {
279+ first_char( & identifier) != '/' ||
280+ identifier == "/"
281+ } ) ) ;
282+
283+ valid_identifier_parser ( input)
234284}
235285
236286/// Parses valid Clojure symbol
@@ -1012,6 +1062,43 @@ mod tests {
10121062 _ => panic ! ( "try_read_meta \" ^:cat a\" should return a symbol" )
10131063 }
10141064 }
1065+ #[ test]
1066+ fn try_read_forward_slash_test ( ) {
1067+ assert_eq ! (
1068+ Value :: Symbol ( Symbol :: intern( & "/" ) ) ,
1069+ try_read( "/ " ) . ok( ) . unwrap( ) . 1
1070+ ) ;
1071+ }
1072+ #[ test]
1073+ fn try_read_forward_slash_with_letters_and_fails_test ( ) {
1074+ assert ! ( try_read( "/ab " ) . ok( ) . is_none( ) ) ;
1075+ }
1076+
1077+ #[ test]
1078+ fn try_read_forward_slash_keyword_test ( ) {
1079+ assert_eq ! (
1080+ Value :: Keyword ( Keyword :: intern( & "/" ) ) ,
1081+ try_read( ":/ " ) . ok( ) . unwrap( ) . 1
1082+ ) ;
1083+ }
1084+
1085+ #[ test]
1086+ fn try_read_forward_slash_keyword_with_letters_and_fails_test ( ) {
1087+ assert ! ( try_read( ":/ab " ) . ok( ) . is_none( ) ) ;
1088+ }
1089+
1090+ #[ test]
1091+ fn try_read_forward_slash_keyword_with_ns_test ( ) {
1092+ assert_eq ! (
1093+ Value :: Keyword ( Keyword :: intern_with_ns( "core" , "/" ) ) ,
1094+ try_read( ":core// " ) . ok( ) . unwrap( ) . 1
1095+ ) ;
1096+ }
1097+
1098+ #[ test]
1099+ fn try_read_forward_slash_keyword_with_ns_with_letters_and_fails_test ( ) {
1100+ assert ! ( try_read( ":core//ab " ) . ok( ) . is_none( ) ) ;
1101+ }
10151102 }
10161103
10171104 mod regex_tests {
0 commit comments