@@ -164,7 +164,7 @@ void __init riscv_fill_hwcap(void)
164164
165165 switch (* ext ) {
166166 case 's' :
167- /**
167+ /*
168168 * Workaround for invalid single-letter 's' & 'u'(QEMU).
169169 * No need to set the bit in riscv_isa as 's' & 'u' are
170170 * not valid ISA extensions. It works until multi-letter
@@ -181,53 +181,101 @@ void __init riscv_fill_hwcap(void)
181181 case 'X' :
182182 case 'z' :
183183 case 'Z' :
184+ /*
185+ * Before attempting to parse the extension itself, we find its end.
186+ * As multi-letter extensions must be split from other multi-letter
187+ * extensions with an "_", the end of a multi-letter extension will
188+ * either be the null character or the "_" at the start of the next
189+ * multi-letter extension.
190+ *
191+ * Next, as the extensions version is currently ignored, we
192+ * eliminate that portion. This is done by parsing backwards from
193+ * the end of the extension, removing any numbers. This may be a
194+ * major or minor number however, so the process is repeated if a
195+ * minor number was found.
196+ *
197+ * ext_end is intended to represent the first character *after* the
198+ * name portion of an extension, but will be decremented to the last
199+ * character itself while eliminating the extensions version number.
200+ * A simple re-increment solves this problem.
201+ */
184202 ext_long = true;
185- /* Multi-letter extension must be delimited */
186203 for (; * isa && * isa != '_' ; ++ isa )
187204 if (unlikely (!isalnum (* isa )))
188205 ext_err = true;
189- /* Parse backwards */
206+
190207 ext_end = isa ;
191208 if (unlikely (ext_err ))
192209 break ;
210+
193211 if (!isdigit (ext_end [-1 ]))
194212 break ;
195- /* Skip the minor version */
213+
196214 while (isdigit (* -- ext_end ))
197215 ;
198- if (tolower (ext_end [0 ]) != 'p'
199- || !isdigit (ext_end [-1 ])) {
200- /* Advance it to offset the pre-decrement */
216+
217+ if (tolower (ext_end [0 ]) != 'p' || !isdigit (ext_end [-1 ])) {
201218 ++ ext_end ;
202219 break ;
203220 }
204- /* Skip the major version */
221+
205222 while (isdigit (* -- ext_end ))
206223 ;
224+
207225 ++ ext_end ;
208226 break ;
209227 default :
228+ /*
229+ * Things are a little easier for single-letter extensions, as they
230+ * are parsed forwards.
231+ *
232+ * After checking that our starting position is valid, we need to
233+ * ensure that, when isa was incremented at the start of the loop,
234+ * that it arrived at the start of the next extension.
235+ *
236+ * If we are already on a non-digit, there is nothing to do. Either
237+ * we have a multi-letter extension's _, or the start of an
238+ * extension.
239+ *
240+ * Otherwise we have found the current extension's major version
241+ * number. Parse past it, and a subsequent p/minor version number
242+ * if present. The `p` extension must not appear immediately after
243+ * a number, so there is no fear of missing it.
244+ *
245+ */
210246 if (unlikely (!isalpha (* ext ))) {
211247 ext_err = true;
212248 break ;
213249 }
214- /* Find next extension */
250+
215251 if (!isdigit (* isa ))
216252 break ;
217- /* Skip the minor version */
253+
218254 while (isdigit (* ++ isa ))
219255 ;
256+
220257 if (tolower (* isa ) != 'p' )
221258 break ;
259+
222260 if (!isdigit (* ++ isa )) {
223261 -- isa ;
224262 break ;
225263 }
226- /* Skip the major version */
264+
227265 while (isdigit (* ++ isa ))
228266 ;
267+
229268 break ;
230269 }
270+
271+ /*
272+ * The parser expects that at the start of an iteration isa points to the
273+ * character before the start of the next extension. This will not be the
274+ * case if we have just parsed a single-letter extension and the next
275+ * extension is not a multi-letter extension prefixed with an "_". It is
276+ * also not the case at the end of the string, where it will point to the
277+ * terminating null character.
278+ */
231279 if (* isa != '_' )
232280 -- isa ;
233281
0 commit comments