-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxs_regex.h
150 lines (113 loc) · 3.79 KB
/
xs_regex.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/* copyright (c) 2022 - 2024 grunfink et al. / MIT license */
#ifndef _XS_REGEX_H
#define _XS_REGEX_H
int xs_regex_match(const char *str, const char *rx);
xs_list *xs_regex_split_n(const char *str, const char *rx, int count);
#define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL)
xs_list *xs_regex_select_n(const char *str, const char *rx, int count);
#define xs_regex_select(str, rx) xs_regex_select_n(str, rx, XS_ALL)
xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count);
#define xs_regex_replace_i(str, rx, rep) xs_regex_replace_in(str, rx, rep, XS_ALL)
#define xs_regex_replace_n(str, rx, rep, count) xs_regex_replace_in(xs_dup(str), rx, rep, count)
#define xs_regex_replace(str, rx, rep) xs_regex_replace_in(xs_dup(str), rx, rep, XS_ALL)
#ifdef XS_IMPLEMENTATION
#ifdef __TINYC__
/* fix a compilation error in tcc */
#define _REGEX_NELTS(n)
#endif
#include <regex.h>
xs_list *xs_regex_split_n(const char *str, const char *rx, int count)
/* splits str using regex as a separator, at most count times.
Always returns a list:
len == 0: regcomp error
len == 1: full string (no matches)
len == odd: first part [ separator / next part ]...
*/
{
regex_t re;
regmatch_t rm;
int offset = 0;
xs_list *list = xs_list_new();
const char *p;
if (regcomp(&re, rx, REG_EXTENDED))
return list;
while (count > 0 && !regexec(&re, (p = str + offset), 1, &rm, offset > 0 ? REG_NOTBOL : 0)) {
/* add first the leading part of the string */
xs *s1 = xs_str_new_sz(p, rm.rm_so);
list = xs_list_append(list, s1);
/* add now the matched text as the separator */
xs *s2 = xs_str_new_sz(p + rm.rm_so, rm.rm_eo - rm.rm_so);
list = xs_list_append(list, s2);
/* move forward */
offset += rm.rm_eo;
count--;
}
/* add the rest of the string */
list = xs_list_append(list, p);
regfree(&re);
return list;
}
xs_list *xs_regex_select_n(const char *str, const char *rx, int count)
/* selects all matches and return them as a list */
{
xs_list *list = xs_list_new();
xs *split = NULL;
const xs_val *v;
int n = 0;
int c = 0;
/* split */
split = xs_regex_split_n(str, rx, count);
/* now iterate to get only the 'separators' (odd ones) */
while (xs_list_next(split, &v, &c)) {
if (n & 0x1)
list = xs_list_append(list, v);
n++;
}
return list;
}
xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count)
/* replaces all matches with the rep string. If it contains unescaped &,
they are replaced with the match */
{
xs_str *s = xs_str_new(NULL);
xs *split = xs_regex_split_n(str, rx, count);
const xs_val *v;
int n = 0;
int c = 0;
int pholder = !!strchr(rep, '&');
while (xs_list_next(split, &v, &c)) {
if (n & 0x1) {
if (pholder) {
/* rep has a placeholder; process char by char */
const char *p = rep;
while (*p) {
if (*p == '&')
s = xs_str_cat(s, v);
else {
if (*p == '\\')
p++;
if (!*p)
break;
s = xs_append_m(s, p, 1);
}
p++;
}
}
else
s = xs_str_cat(s, rep);
}
else
s = xs_str_cat(s, v);
n++;
}
xs_free(str);
return s;
}
int xs_regex_match(const char *str, const char *rx)
/* returns if str matches the regex at least once */
{
xs *l = xs_regex_select_n(str, rx, 1);
return xs_list_len(l) == 1;
}
#endif /* XS_IMPLEMENTATION */
#endif /* XS_REGEX_H */