slre.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835
  1. // Copyright (c) 2004-2012 Sergey Lyubka <valenok@gmail.com>
  2. // All rights reserved
  3. //
  4. // Modifications and enhancements by <beoran@gmail.com>, 2013.
  5. //
  6. // Permission is hereby granted, free of charge, to any person obtaining a copy
  7. // of this software and associated documentation files (the "Software"), to deal
  8. // in the Software without restriction, including without limitation the rights
  9. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. // copies of the Software, and to permit persons to whom the Software is
  11. // furnished to do so, subject to the following conditions:
  12. //
  13. // The above copyright notice and this permission notice shall be included in
  14. // all copies or substantial portions of the Software.
  15. //
  16. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. // THE SOFTWARE.
  23. #include <stdio.h>
  24. #include <assert.h>
  25. #include <ctype.h>
  26. #include <stdarg.h>
  27. #include <stdlib.h>
  28. #include <string.h>
  29. #include <errno.h>
  30. #include "slre.h"
  31. #ifdef _WIN32
  32. #define snprintf _snprintf
  33. #endif
  34. // Compiled regular expression
  35. struct slre {
  36. unsigned char code[256];
  37. unsigned char data[256];
  38. int code_size;
  39. int data_size;
  40. int num_caps; // Number of bracket pairs
  41. int anchored; // Must match from string start
  42. enum slre_option options;
  43. int error; // Error code
  44. };
  45. /* BDM: extended to support more isXXX functionality. */
  46. enum {
  47. END, BRANCH, ANY, EXACT, ANYOF, ANYBUT, OPEN, CLOSE, BOL, EOL, STAR, PLUS,
  48. STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT,
  49. NONDIGIT, ALPHA, NONALPHA, ALNUM, NONALNUM, BLANK, NONBLANK, XDIGIT, NONXDIGIT
  50. };
  51. // Commands and operands are all unsigned char (1 byte long). All code offsets
  52. // are relative to current address, and positive (always point forward). Data
  53. // offsets are absolute. Commands with operands:
  54. //
  55. // BRANCH offset1 offset2
  56. // Try to match the code block that follows the BRANCH instruction
  57. // (code block ends with END). If no match, try to match code block that
  58. // starts at offset1. If either of these match, jump to offset2.
  59. //
  60. // EXACT data_offset data_length
  61. // Try to match exact string. String is recorded in data section from
  62. // data_offset, and has length data_length.
  63. //
  64. // OPEN capture_number
  65. // CLOSE capture_number
  66. // If the user have passed 'struct slre_captured' array for captures, OPEN
  67. // records the beginning of the matched substring (cap->ptr), CLOSE
  68. // sets the length (cap->len) for respective capture_number.
  69. //
  70. // STAR code_offset
  71. // PLUS code_offset
  72. // QUEST code_offset
  73. // *, +, ?, respectively. Try to gobble as much as possible from the
  74. // matched buffer while code block that follows these instructions
  75. // matches. When the longest possible string is matched,
  76. // jump to code_offset
  77. //
  78. // STARQ, PLUSQ are non-greedy versions of STAR and PLUS.
  79. static const char *meta_characters = "|.^$*+?()[\\";
  80. static const char *message_match = "Match";
  81. static const char *error_no_match = "No match";
  82. static const char *error_jump_offset = "Jump offset is too big" ;
  83. static const char *error_code_too_long = "RE is too long (code overflow)";
  84. static const char *error_data_too_long = "RE is too long (data overflow)";
  85. static const char *error_text_too_long = "RE is too long (text overflow)";
  86. static const char *error_no_paren = "No closing parenthesis";
  87. static const char *error_bad_paren = "Unbalanced parenthesis";
  88. static const char *error_no_bracket = "No closing ']' bracket";
  89. static const char *error_too_many_paren = "Too many parenthesis";
  90. static const char *error_int_failed = "SLRE_INT: capture failed";
  91. static const char *error_int_size = "SLRE_INT: unsupported size";
  92. static const char *error_float_size = "SLRE_FLOAT: unsupported size";
  93. static const char *error_float_failed = "SLRE_FLOAT: capture failed";
  94. static const char *error_string_size = "SLRE_STRING: buffer size too small";
  95. static const char *error_unknown_type =
  96. "Unknown type, expected SLRE_(INT|FLOAT|STRING|CALLBACK|CAPTURED)";
  97. static const char *error_null_captured = "SLRE_CAPTURED: null captured struct";
  98. /* Converts an SLRE error code to a string. Returns NULL if unknown error is used. */
  99. const char * slre_error(int code) {
  100. switch(code) {
  101. case SLRE_OK : return message_match;
  102. case SLRE_ERROR_NO_MATCH : return error_no_match;
  103. case SLRE_ERROR_BAD_PAREN : return error_bad_paren;
  104. case SLRE_ERROR_CODE_TOO_LONG : return error_code_too_long;
  105. case SLRE_ERROR_DATA_TOO_LONG : return error_data_too_long;
  106. case SLRE_ERROR_TEXT_TOO_LONG : return error_text_too_long;
  107. case SLRE_ERROR_FLOAT_FAILED : return error_float_failed;
  108. case SLRE_ERROR_FLOAT_SIZE : return error_float_size;
  109. case SLRE_ERROR_INT_FAILED : return error_int_failed;
  110. case SLRE_ERROR_INT_SIZE : return error_int_size;
  111. case SLRE_ERROR_JUMP_OFFSET : return error_jump_offset;
  112. case SLRE_ERROR_NO_BRACKET : return error_no_bracket;
  113. case SLRE_ERROR_NO_PAREN : return error_no_paren;
  114. case SLRE_ERROR_STRING_SIZE : return error_string_size;
  115. case SLRE_ERROR_TOO_MANY_PAREN : return error_too_many_paren;
  116. case SLRE_ERROR_UNKNOWN_TYPE : return error_unknown_type;
  117. case SLRE_ERROR_NULL_CAPTURED : return error_null_captured;
  118. default : return NULL;
  119. }
  120. }
  121. /*
  122. */
  123. static void set_jump_offset(struct slre *r, int pc, int offset) {
  124. assert(offset < r->code_size);
  125. if (r->code_size - offset > 0xff) {
  126. r->error = SLRE_ERROR_JUMP_OFFSET;
  127. } else {
  128. r->code[pc] = (unsigned char) (r->code_size - offset);
  129. }
  130. }
  131. static void emit(struct slre *r, int code) {
  132. if (r->code_size >= (int) (sizeof(r->code) / sizeof(r->code[0]))) {
  133. r->error = SLRE_ERROR_CODE_TOO_LONG;
  134. } else {
  135. r->code[r->code_size++] = (unsigned char) code;
  136. }
  137. }
  138. static void store_char_in_data(struct slre *r, int ch) {
  139. if (r->data_size >= (int) sizeof(r->data)) {
  140. r->error = SLRE_ERROR_DATA_TOO_LONG;
  141. } else {
  142. r->data[r->data_size++] = ch;
  143. }
  144. }
  145. static void exact(struct slre *r, const char **re) {
  146. int old_data_size = r->data_size;
  147. while (**re != '\0' && (strchr(meta_characters, **re)) == NULL) {
  148. store_char_in_data(r, *(*re)++);
  149. }
  150. emit(r, EXACT);
  151. emit(r, old_data_size);
  152. emit(r, r->data_size - old_data_size);
  153. }
  154. static int get_escape_char(const char **re) {
  155. int res;
  156. switch (*(*re)++) {
  157. case 'n': res = '\n'; break;
  158. case 'r': res = '\r'; break;
  159. case 't': res = '\t'; break;
  160. case '0': res = 0; break;
  161. case 'S': res = NONSPACE << 8; break;
  162. case 's': res = SPACE << 8; break;
  163. case 'D': res = NONDIGIT << 8; break;
  164. case 'd': res = DIGIT << 8; break;
  165. case 'x': res = XDIGIT << 8; break;
  166. case 'X': res = NONXDIGIT << 8; break;
  167. case 'a': res = ALPHA << 8; break;
  168. case 'A': res = NONALPHA << 8; break;
  169. case 'w': res = ALNUM << 8; break;
  170. case 'W': res = NONALNUM << 8; break;
  171. case 'b': res = BLANK << 8; break;
  172. case 'B': res = NONBLANK << 8; break;
  173. default: res = (*re)[-1]; break;
  174. }
  175. return res;
  176. }
  177. static void anyof(struct slre *r, const char **re) {
  178. int esc, old_data_size = r->data_size, op = ANYOF;
  179. if (**re == '^') {
  180. op = ANYBUT;
  181. (*re)++;
  182. }
  183. while (**re != '\0')
  184. switch (*(*re)++) {
  185. case ']':
  186. emit(r, op);
  187. emit(r, old_data_size);
  188. emit(r, r->data_size - old_data_size);
  189. return;
  190. // NOTREACHED
  191. break;
  192. case '\\':
  193. esc = get_escape_char(re);
  194. if ((esc & 0xff) == 0) {
  195. store_char_in_data(r, 0);
  196. store_char_in_data(r, esc >> 8);
  197. } else {
  198. store_char_in_data(r, esc);
  199. }
  200. break;
  201. default:
  202. store_char_in_data(r, (*re)[-1]);
  203. break;
  204. }
  205. r->error = SLRE_ERROR_NO_BRACKET;
  206. }
  207. static void relocate(struct slre *r, int begin, int shift) {
  208. emit(r, END);
  209. memmove(r->code + begin + shift, r->code + begin, r->code_size - begin);
  210. r->code_size += shift;
  211. }
  212. static void quantifier(struct slre *r, int prev, int op) {
  213. if (r->code[prev] == EXACT && r->code[prev + 2] > 1) {
  214. r->code[prev + 2]--;
  215. emit(r, EXACT);
  216. emit(r, r->code[prev + 1] + r->code[prev + 2]);
  217. emit(r, 1);
  218. prev = r->code_size - 3;
  219. }
  220. relocate(r, prev, 2);
  221. r->code[prev] = op;
  222. set_jump_offset(r, prev + 1, prev);
  223. }
  224. static void exact_one_char(struct slre *r, int ch) {
  225. emit(r, EXACT);
  226. emit(r, r->data_size);
  227. emit(r, 1);
  228. store_char_in_data(r, ch);
  229. }
  230. static void fixup_branch(struct slre *r, int fixup) {
  231. if (fixup > 0) {
  232. emit(r, END);
  233. set_jump_offset(r, fixup, fixup - 2);
  234. }
  235. }
  236. static void compile(struct slre *r, const char **re) {
  237. int op, esc, branch_start, last_op, fixup, cap_no, level;
  238. fixup = 0;
  239. level = r->num_caps;
  240. branch_start = last_op = r->code_size;
  241. for (;;)
  242. switch (*(*re)++) {
  243. case '\0':
  244. (*re)--;
  245. return;
  246. // NOTREACHED
  247. break;
  248. case '^':
  249. emit(r, BOL);
  250. break;
  251. case '$':
  252. emit(r, EOL);
  253. break;
  254. case '.':
  255. last_op = r->code_size;
  256. emit(r, ANY);
  257. break;
  258. case '[':
  259. last_op = r->code_size;
  260. anyof(r, re);
  261. break;
  262. case '\\':
  263. last_op = r->code_size;
  264. esc = get_escape_char(re);
  265. if (esc & 0xff00) {
  266. emit(r, esc >> 8);
  267. } else {
  268. exact_one_char(r, esc);
  269. }
  270. break;
  271. case '(':
  272. last_op = r->code_size;
  273. cap_no = ++r->num_caps;
  274. emit(r, OPEN);
  275. emit(r, cap_no);
  276. compile(r, re);
  277. if (*(*re)++ != ')') {
  278. r->error = SLRE_ERROR_NO_PAREN;
  279. return;
  280. }
  281. emit(r, CLOSE);
  282. emit(r, cap_no);
  283. break;
  284. case ')':
  285. (*re)--;
  286. fixup_branch(r, fixup);
  287. if (level == 0) {
  288. r->error = SLRE_ERROR_BAD_PAREN;
  289. return;
  290. }
  291. return;
  292. // NOTREACHED
  293. break;
  294. case '+':
  295. case '*':
  296. op = (*re)[-1] == '*' ? STAR: PLUS;
  297. if (**re == '?') {
  298. (*re)++;
  299. op = op == STAR ? STARQ : PLUSQ;
  300. }
  301. quantifier(r, last_op, op);
  302. break;
  303. case '?':
  304. quantifier(r, last_op, QUEST);
  305. break;
  306. case '|':
  307. fixup_branch(r, fixup);
  308. relocate(r, branch_start, 3);
  309. r->code[branch_start] = BRANCH;
  310. set_jump_offset(r, branch_start + 1, branch_start);
  311. fixup = branch_start + 2;
  312. r->code[fixup] = 0xff;
  313. break;
  314. default:
  315. (*re)--;
  316. last_op = r->code_size;
  317. exact(r, re);
  318. break;
  319. }
  320. }
  321. // Compile regular expression. If success, 1 is returned.
  322. // If error, 0 is returned and slre.error_string points to the error message.
  323. static int compile2(struct slre *r, const char *re) {
  324. r->error = 0;
  325. r->code_size = r->data_size = r->num_caps = r->anchored = 0;
  326. if (*re == '^') {
  327. r->anchored++;
  328. }
  329. emit(r, OPEN); // This will capture what matches full RE
  330. emit(r, 0);
  331. while (*re != '\0') {
  332. compile(r, &re);
  333. }
  334. if (r->code[2] == BRANCH) {
  335. fixup_branch(r, 4);
  336. }
  337. emit(r, CLOSE);
  338. emit(r, 0);
  339. emit(r, END);
  340. #if 0
  341. static void dump(const struct slre *, FILE *);
  342. dump(r, stdout);
  343. #endif
  344. return r->error;
  345. }
  346. static int match(const struct slre *, int, const char *, int, int *,
  347. struct slre_captured *, int caps_size);
  348. static void loop_greedy(const struct slre *r, int pc, const char *s, int len,
  349. int *ofs) {
  350. int saved_offset, matched_offset;
  351. saved_offset = matched_offset = *ofs;
  352. while (!match(r, pc + 2, s, len, ofs, NULL, 0)) {
  353. saved_offset = *ofs;
  354. if (!match(r, pc + r->code[pc + 1], s, len, ofs, NULL, 0)) {
  355. matched_offset = saved_offset;
  356. }
  357. *ofs = saved_offset;
  358. }
  359. *ofs = matched_offset;
  360. }
  361. static void loop_non_greedy(const struct slre *r, int pc, const char *s,
  362. int len, int *ofs) {
  363. int saved_offset = *ofs;
  364. while (!match(r, pc + 2, s, len, ofs, NULL, 0)) {
  365. saved_offset = *ofs;
  366. if (!match(r, pc + r->code[pc + 1], s, len, ofs, NULL, 0))
  367. break;
  368. }
  369. *ofs = saved_offset;
  370. }
  371. static int is_any_of(const unsigned char *p, int len, const char *s, int *ofs) {
  372. int i, ch;
  373. ch = s[*ofs];
  374. for (i = 0; i < len; i++)
  375. if (p[i] == ch) {
  376. (*ofs)++;
  377. return 1;
  378. }
  379. return 0;
  380. }
  381. static int is_any_but(const unsigned char *p, int len, const char *s,
  382. int *ofs) {
  383. int i, ch;
  384. ch = s[*ofs];
  385. for (i = 0; i < len; i++)
  386. if (p[i] == ch) {
  387. return 0;
  388. }
  389. (*ofs)++;
  390. return 1;
  391. }
  392. static int lowercase(const char *s) {
  393. return tolower(* (const unsigned char *) s);
  394. }
  395. static int casecmp(const void *p1, const void *p2, size_t len) {
  396. const char *s1 = p1, *s2 = p2;
  397. int diff = 0;
  398. if (len > 0)
  399. do {
  400. diff = lowercase(s1++) - lowercase(s2++);
  401. } while (diff == 0 && s1[-1] != '\0' && --len > 0);
  402. return diff;
  403. }
  404. /* Macro to easily implement match cases */
  405. #define MATCH_WITH_FUNCTION(FUNC) \
  406. error = SLRE_ERROR_NO_MATCH; \
  407. if (*ofs < len && FUNC(((unsigned char *)s)[*ofs])) { \
  408. (*ofs)++; \
  409. error = SLRE_OK; \
  410. } \
  411. pc++; \
  412. /* Macro to easily implement negative match cases */
  413. #define MATCH_WITH_NEGATE_FUNCTION(FUNC) \
  414. error = SLRE_ERROR_NO_MATCH; \
  415. if (*ofs < len && (!(FUNC(((unsigned char *)s)[*ofs])))) { \
  416. (*ofs)++; \
  417. error = SLRE_OK; \
  418. } \
  419. pc++; \
  420. static int match(const struct slre *r, int pc, const char *s, int len,
  421. int *ofs, struct slre_captured *caps, int caps_size) {
  422. int n, saved_offset;
  423. int error = SLRE_OK;
  424. int (*cmp)(const void *string1, const void *string2, size_t len);
  425. while (error == SLRE_OK && r->code[pc] != END) {
  426. assert(pc < r->code_size);
  427. assert(pc < (int) (sizeof(r->code) / sizeof(r->code[0])));
  428. switch (r->code[pc]) {
  429. case BRANCH:
  430. saved_offset = *ofs;
  431. error = match(r, pc + 3, s, len, ofs, caps, caps_size);
  432. if (error != SLRE_OK) {
  433. *ofs = saved_offset;
  434. error = match(r, pc + r->code[pc + 1], s, len, ofs, caps,
  435. caps_size);
  436. }
  437. pc += r->code[pc + 2];
  438. break;
  439. case EXACT:
  440. error = SLRE_ERROR_NO_MATCH;
  441. n = r->code[pc + 2]; // String length
  442. cmp = r->options & SLRE_CASE_INSENSITIVE ? casecmp : memcmp;
  443. if (n <= len - *ofs && !cmp(s + *ofs, r->data + r->code[pc + 1], n)) {
  444. (*ofs) += n;
  445. error = SLRE_OK;
  446. }
  447. pc += 3;
  448. break;
  449. case QUEST:
  450. error = SLRE_OK;
  451. saved_offset = *ofs;
  452. if (match(r, pc + 2, s, len, ofs, caps, caps_size) != SLRE_OK) {
  453. *ofs = saved_offset;
  454. }
  455. pc += r->code[pc + 1];
  456. break;
  457. case STAR:
  458. error = SLRE_OK;
  459. loop_greedy(r, pc, s, len, ofs);
  460. pc += r->code[pc + 1];
  461. break;
  462. case STARQ:
  463. error = SLRE_OK;
  464. loop_non_greedy(r, pc, s, len, ofs);
  465. pc += r->code[pc + 1];
  466. break;
  467. case PLUS:
  468. if ((error = match(r, pc + 2, s, len, ofs,
  469. caps, caps_size)) != SLRE_OK) {
  470. break;
  471. }
  472. loop_greedy(r, pc, s, len, ofs);
  473. pc += r->code[pc + 1];
  474. break;
  475. case PLUSQ:
  476. if ((error = match(r, pc + 2, s, len, ofs,
  477. caps, caps_size)) != SLRE_OK) {
  478. break;
  479. }
  480. loop_non_greedy(r, pc, s, len, ofs);
  481. pc += r->code[pc + 1];
  482. break;
  483. case SPACE:
  484. MATCH_WITH_FUNCTION(isspace)
  485. break;
  486. case NONSPACE:
  487. MATCH_WITH_NEGATE_FUNCTION(isspace)
  488. break;
  489. case DIGIT:
  490. MATCH_WITH_FUNCTION(isdigit)
  491. break;
  492. case NONDIGIT:
  493. MATCH_WITH_NEGATE_FUNCTION(isdigit)
  494. break;
  495. case ALPHA:
  496. MATCH_WITH_FUNCTION(isalpha)
  497. break;
  498. case ALNUM:
  499. MATCH_WITH_FUNCTION(isalnum)
  500. break;
  501. case BLANK:
  502. MATCH_WITH_FUNCTION(isblank)
  503. break;
  504. case XDIGIT:
  505. MATCH_WITH_FUNCTION(isxdigit)
  506. break;
  507. case NONALPHA:
  508. MATCH_WITH_NEGATE_FUNCTION(isalpha)
  509. break;
  510. case NONALNUM:
  511. MATCH_WITH_NEGATE_FUNCTION(isalnum)
  512. break;
  513. case NONBLANK:
  514. MATCH_WITH_NEGATE_FUNCTION(isblank)
  515. break;
  516. case NONXDIGIT:
  517. MATCH_WITH_NEGATE_FUNCTION(isxdigit)
  518. break;
  519. case ANY:
  520. error = SLRE_ERROR_NO_MATCH;
  521. if (*ofs < len) {
  522. (*ofs)++;
  523. error = SLRE_OK;
  524. }
  525. pc++;
  526. break;
  527. case ANYOF:
  528. error = SLRE_ERROR_NO_MATCH;
  529. if (*ofs < len)
  530. error = is_any_of(r->data + r->code[pc + 1], r->code[pc + 2],
  531. s, ofs) ? SLRE_OK : SLRE_ERROR_NO_MATCH ;
  532. pc += 3;
  533. break;
  534. case ANYBUT:
  535. error = SLRE_ERROR_NO_MATCH;
  536. if (*ofs < len)
  537. error = is_any_but(r->data + r->code[pc + 1], r->code[pc + 2],
  538. s, ofs) ? SLRE_OK : SLRE_ERROR_NO_MATCH ;
  539. pc += 3;
  540. break;
  541. case BOL:
  542. error = *ofs == 0 ? SLRE_OK : SLRE_ERROR_NO_MATCH;
  543. pc++;
  544. break;
  545. case EOL:
  546. error = *ofs == len ? SLRE_OK : SLRE_ERROR_NO_MATCH;
  547. pc++;
  548. break;
  549. case OPEN:
  550. if (caps != NULL) {
  551. if (caps_size - 2 < r->code[pc + 1]) {
  552. error = SLRE_ERROR_TOO_MANY_PAREN;
  553. } else {
  554. caps[r->code[pc + 1]].ptr = s + *ofs;
  555. }
  556. }
  557. pc += 2;
  558. break;
  559. case CLOSE:
  560. if (caps != NULL) {
  561. assert(r->code[pc + 1] >= 0);
  562. assert(r->code[pc + 1] < caps_size);
  563. caps[r->code[pc + 1]].len = (s + *ofs) -
  564. caps[r->code[pc + 1]].ptr;
  565. }
  566. pc += 2;
  567. break;
  568. case END:
  569. pc++;
  570. break;
  571. default:
  572. printf("unknown cmd (%d) at %d\n", r->code[pc], pc);
  573. assert(0);
  574. break;
  575. }
  576. }
  577. return error;
  578. }
  579. // Return 1 if match, 0 if no match.
  580. // If `captured_substrings' array is not NULL, then it is filled with the
  581. // values of captured substrings. captured_substrings[0] element is always
  582. // a full matched substring. The round bracket captures start from
  583. // captured_substrings[1].
  584. // It is assumed that the size of captured_substrings array is enough to
  585. // hold all captures. The caller function must make sure it is! So, the
  586. // array_size = number_of_round_bracket_pairs + 1
  587. static int match2(const struct slre *r, const char *buf, int len,
  588. struct slre_captured *caps, int caps_size) {
  589. int i, ofs = 0;
  590. int error = SLRE_ERROR_NO_MATCH;
  591. if (caps != NULL) {
  592. memset(caps, 0, caps_size * sizeof(caps[0]));
  593. }
  594. if (r->anchored) {
  595. error = match(r, 0, buf, len, &ofs, caps, caps_size);
  596. } else {
  597. for (i = 0; i < len && error != SLRE_OK; i++) {
  598. ofs = i;
  599. error = match(r, 0, buf, len, &ofs, caps, caps_size);
  600. }
  601. }
  602. return error;
  603. }
  604. static int capture_float(const struct slre_captured *cap, void *p, size_t len) {
  605. const char *fmt;
  606. char buf[20];
  607. switch (len) {
  608. case sizeof(float): fmt = "f"; break;
  609. case sizeof(double): fmt = "lf"; break;
  610. default: return SLRE_ERROR_FLOAT_SIZE;
  611. }
  612. snprintf(buf, sizeof(buf), "%%%d%s", cap->len, fmt);
  613. return sscanf(cap->ptr, buf, p) == 1 ? SLRE_OK : SLRE_ERROR_FLOAT_FAILED;
  614. }
  615. static int capture_string(const struct slre_captured *cap, void *p, size_t len) {
  616. if ((int) len <= cap->len) {
  617. return SLRE_ERROR_STRING_SIZE;
  618. }
  619. memcpy(p, cap->ptr, cap->len);
  620. ((char *) p)[cap->len] = '\0';
  621. return SLRE_OK;
  622. }
  623. static int capture_int(const struct slre_captured *cap, void *p, size_t len) {
  624. const char *fmt;
  625. char buf[20];
  626. switch (len) {
  627. case sizeof(char): fmt = "hh"; break;
  628. case sizeof(short): fmt = "h"; break;
  629. case sizeof(int): fmt = "d"; break;
  630. case sizeof(long long int): fmt = "lld"; break;
  631. default: return SLRE_ERROR_INT_SIZE;
  632. }
  633. snprintf(buf, sizeof(buf), "%%%d%s", cap->len, fmt);
  634. return sscanf(cap->ptr, buf, p) == 1 ? SLRE_OK : SLRE_ERROR_INT_FAILED;
  635. }
  636. static int capture_callback(const struct slre_captured *cap, int i, slre_callback *fp, void * extra) {
  637. return fp(i, cap->ptr, cap->len, extra);
  638. }
  639. static int capture_captured(const struct slre_captured *cap, struct slre_captured *target) {
  640. if(!target) return SLRE_ERROR_NULL_CAPTURED;
  641. (*target) = (*cap);
  642. return 0;
  643. }
  644. static int capture(const struct slre_captured *caps, int num_caps, va_list ap) {
  645. int i, type;
  646. size_t size = 0;
  647. void *p = NULL;
  648. int err = SLRE_OK;
  649. int callback_ok = 0;
  650. slre_callback * fp = NULL;
  651. for (i = 0; i < num_caps; i++) {
  652. /* Callback needs slightly different arguments, only taken once. */
  653. if (!callback_ok) {
  654. type = va_arg(ap, int);
  655. // stop processing captures if the type SLRE_IGNORE is seen.
  656. if (type == SLRE_IGNORE) {
  657. return err;
  658. }
  659. if (type == SLRE_CALLBACK) {
  660. callback_ok = !callback_ok;
  661. fp = va_arg(ap, slre_callback*);
  662. p = va_arg(ap, void *);
  663. } else if (type == SLRE_CAPTURED) {
  664. p = va_arg(ap, void *);
  665. } else {
  666. size = va_arg(ap, size_t);
  667. p = va_arg(ap, void *);
  668. }
  669. }
  670. switch (type) {
  671. case SLRE_INT : err = capture_int(&caps[i], p, size); break;
  672. case SLRE_FLOAT : err = capture_float(&caps[i], p, size); break;
  673. case SLRE_STRING : err = capture_string(&caps[i], p, size); break;
  674. case SLRE_CALLBACK: err = capture_callback(&caps[i], i, fp, p); break;
  675. case SLRE_CAPTURED: err = capture_captured(&caps[i], p); break;
  676. default: err = SLRE_ERROR_UNKNOWN_TYPE; break;
  677. }
  678. }
  679. return err;
  680. }
  681. int slre_match(enum slre_option options, const char *re,
  682. const char *buf, int buf_len, ...) {
  683. struct slre slre;
  684. struct slre_captured caps[SLRE_CAPURES_MAX];
  685. va_list ap;
  686. int error = SLRE_OK;
  687. slre.options = options;
  688. error = compile2(&slre, re);
  689. if(error != SLRE_OK) { return error; }
  690. error = match2(&slre, buf, buf_len, caps, sizeof(caps) / sizeof(caps[0]));
  691. if(error != SLRE_OK) { return error; }
  692. /* Don't capture if not requested. */
  693. if (options & SLRE_NO_CAPTURE) { return error; }
  694. va_start(ap, buf_len);
  695. error = capture(caps + 1, slre.num_caps, ap);
  696. va_end(ap);
  697. return error;
  698. }