|
17 | 17 | #include <string.h> |
18 | 18 |
|
19 | 19 | /** |
20 | | - * This callback is called by pm_regexp_parse() when a named capture group is found. |
| 20 | + * Accumulation state for named capture groups found during regexp parsing. |
| 21 | + * The caller initializes this with the call node and passes it to |
| 22 | + * pm_regexp_parse. The regexp parser populates match and names as groups |
| 23 | + * are found. |
21 | 24 | */ |
22 | | -typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data); |
| 25 | +typedef struct { |
| 26 | + /** The call node wrapping the regular expression node (for =~). */ |
| 27 | + pm_call_node_t *call; |
| 28 | + |
| 29 | + /** The match write node being built, or NULL if no captures found yet. */ |
| 30 | + pm_match_write_node_t *match; |
| 31 | + |
| 32 | + /** The list of capture names found so far (for deduplication). */ |
| 33 | + pm_constant_id_list_t names; |
| 34 | +} pm_regexp_name_data_t; |
23 | 35 |
|
24 | 36 | /** |
25 | | - * This callback is called by pm_regexp_parse() when a parse error is found. |
| 37 | + * Callback invoked by pm_regexp_parse() for each named capture group found. |
| 38 | + * |
| 39 | + * @param parser The main parser. |
| 40 | + * @param name The name of the capture group. |
| 41 | + * @param shared Whether the source content is shared (impacts constant storage). |
| 42 | + * @param data The accumulation state for named captures. |
26 | 43 | */ |
27 | | -typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data); |
| 44 | +typedef void (*pm_regexp_name_callback_t)(pm_parser_t *parser, const pm_string_t *name, bool shared, pm_regexp_name_data_t *data); |
28 | 45 |
|
29 | 46 | /** |
30 | | - * Parse a regular expression. |
| 47 | + * Parse a regular expression, validate its encoding, and optionally extract |
| 48 | + * named capture groups. Returns the encoding flags to set on the node. |
31 | 49 | * |
32 | 50 | * @param parser The parser that is currently being used. |
33 | | - * @param source The source code to parse. |
34 | | - * @param size The size of the source code. |
35 | | - * @param extended_mode Whether to parse the regular expression in extended mode. |
| 51 | + * @param node The regular expression node to parse and validate. |
36 | 52 | * @param name_callback The optional callback to call when a named capture group is found. |
37 | | - * @param name_data The optional data to pass to the name callback. |
38 | | - * @param error_callback The callback to call when a parse error is found. |
39 | | - * @param error_data The data to pass to the error callback. |
| 53 | + * @param name_data The optional accumulation state for named captures. |
| 54 | + * @return The encoding flags to set on the node (e.g., FORCED_UTF8_ENCODING). |
| 55 | + */ |
| 56 | +PRISM_EXPORTED_FUNCTION pm_node_flags_t pm_regexp_parse(pm_parser_t *parser, pm_regular_expression_node_t *node, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data); |
| 57 | + |
| 58 | +/** |
| 59 | + * Parse an interpolated regular expression for named capture groups only. |
| 60 | + * No encoding validation is performed. |
| 61 | + * |
| 62 | + * @param parser The parser that is currently being used. |
| 63 | + * @param source The source content to parse. |
| 64 | + * @param size The length of the source content. |
| 65 | + * @param shared Whether the source points into the parser's source buffer. |
| 66 | + * @param extended_mode Whether or not the regular expression is in extended mode. |
| 67 | + * @param name_callback The callback to call when a named capture group is found. |
| 68 | + * @param name_data The accumulation state for named captures. |
40 | 69 | */ |
41 | | -PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data); |
| 70 | +void pm_regexp_parse_named_captures(pm_parser_t *parser, const uint8_t *source, size_t size, bool shared, bool extended_mode, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data); |
42 | 71 |
|
43 | 72 | #endif |
0 commit comments