#include <stdlib.h>Include dependency graph for pcre.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.
| #define PCRE_ANCHORED 0x0010 |
| #define PCRE_CASELESS 0x0001 |
Definition at line 28 of file pcre.h.
Referenced by atr_match1(), check_filter(), compile_branch(), find_firstassertedchar(), match(), match_ref(), pcre_compile(), pcre_exec(), pcre_study(), process_cmdent(), real_regmatch(), real_regrab(), and set_start_bits().
| #define PCRE_DOLLAR_ENDONLY 0x0020 |
| #define PCRE_DOTALL 0x0004 |
Definition at line 30 of file pcre.h.
Referenced by compile_branch(), is_anchored(), match(), pcre_compile(), and pcre_exec().
| #define PCRE_ERROR_BADMAGIC (-4) |
| #define PCRE_ERROR_BADOPTION (-3) |
| #define PCRE_ERROR_NOMATCH (-1) |
| #define PCRE_ERROR_NOMEMORY (-6) |
Definition at line 50 of file pcre.h.
Referenced by match(), pcre_copy_substring(), and pcre_exec().
| #define PCRE_ERROR_NOSUBSTRING (-7) |
| #define PCRE_ERROR_NULL (-2) |
| #define PCRE_EXTENDED 0x0008 |
| #define PCRE_EXTRA 0x0040 |
Definition at line 34 of file pcre.h.
Referenced by check_escape(), compile_branch(), and pcre_compile().
| #define PCRE_EXTRA_CALLOUT_DATA 0x0004 |
| #define PCRE_EXTRA_MATCH_LIMIT 0x0002 |
| #define PCRE_EXTRA_STUDY_DATA 0x0001 |
| #define PCRE_MULTILINE 0x0002 |
Definition at line 29 of file pcre.h.
Referenced by compile_branch(), is_anchored(), match(), pcre_compile(), and pcre_exec().
| #define PCRE_NO_AUTO_CAPTURE 0x1000 |
| #define PCRE_NOTBOL 0x0080 |
| #define PCRE_NOTEMPTY 0x0400 |
| #define PCRE_NOTEOL 0x0100 |
| #define PCRE_UNGREEDY 0x0200 |
| #define PCRE_UTF8 0x0800 |
| pcre* pcre_compile | ( | const char * | , | |
| int | , | |||
| const char ** | , | |||
| int * | , | |||
| const unsigned char * | ||||
| ) |
Definition at line 4365 of file pcre.cpp.
References compile_data::backref_map, BRASTACK_SIZE, compile_data::cbits, cbits_offset, check_escape(), check_posix_syntax(), compile_regex(), ctype_digit, ctype_meta, ctype_space, ctype_word, compile_data::ctypes, ctypes_offset, digitab, DPRINTF, ERR12, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20, ERR21, ERR22, ERR23, ERR24, ERR26, ERR28, ERR29, ERR32, ERR39, ERR41, ERR42, ERR6, ESC_b, ESC_Q, ESC_REF, EXTRACT_BASIC_MAX, compile_data::fcc, fcc_offset, find_firstassertedchar(), real_pcre::first_byte, is_anchored(), is_counted_repeat(), is_startline(), compile_data::lcc, lcc_offset, LINK_SIZE, MAGIC_NUMBER, real_pcre::magic_number, MAX_PATTERN_SIZE, MAXLIT, real_pcre::name_count, compile_data::name_entry_size, real_pcre::name_entry_size, compile_data::name_table, compile_data::names_found, NEWLINE, OP_BRA, OP_END, real_pcre::options, PCRE_ANCHORED, PCRE_CASELESS, pcre_default_tables, PCRE_DOTALL, PCRE_EXTENDED, PCRE_EXTRA, PCRE_FIRSTSET, PCRE_ICHANGED, PCRE_IMS, PCRE_MULTILINE, PCRE_NO_AUTO_CAPTURE, PCRE_REQCHSET, PCRE_STARTLINE, PCRE_UNGREEDY, PCRE_UTF8, PUBLIC_OPTIONS, read_repeat_counts(), real_pcre::req_byte, REQ_CASELESS, REQ_VARY, compile_data::req_varyopt, real_pcre::size, compile_data::start_code, real_pcre::tables, real_pcre::top_backref, compile_data::top_backref, and real_pcre::top_bracket.
Referenced by CF_HAND(), check_filter(), real_regmatch(), real_regrab(), and regexp_match().
04366 { 04367 real_pcre *re; 04368 int length = 1 + LINK_SIZE; /* For initial BRA plus length */ 04369 int runlength; 04370 int c, firstbyte, reqbyte; 04371 int bracount = 0; 04372 int branch_extra = 0; 04373 int branch_newextra; 04374 int item_count = -1; 04375 int name_count = 0; 04376 int max_name_size = 0; 04377 bool inescq = false; 04378 unsigned int brastackptr = 0; 04379 size_t size; 04380 uschar *code; 04381 const uschar *codestart; 04382 const uschar *ptr; 04383 compile_data compile_block; 04384 int brastack[BRASTACK_SIZE]; 04385 uschar bralenstack[BRASTACK_SIZE]; 04386 04387 /* We can't pass back an error message if errorptr is NULL; I guess the best we 04388 can do is just return NULL. */ 04389 04390 if (errorptr == NULL) return NULL; 04391 *errorptr = NULL; 04392 04393 /* However, we can give a message for this error */ 04394 04395 if (erroroffset == NULL) 04396 { 04397 *errorptr = ERR16; 04398 return NULL; 04399 } 04400 *erroroffset = 0; 04401 04402 /* Can't support UTF8 unless PCRE has been compiled to include the code. */ 04403 04404 if ((options & PCRE_UTF8) != 0) 04405 { 04406 *errorptr = ERR32; 04407 return NULL; 04408 } 04409 04410 if ((options & ~PUBLIC_OPTIONS) != 0) 04411 { 04412 *errorptr = ERR17; 04413 return NULL; 04414 } 04415 04416 /* Set up pointers to the individual character tables */ 04417 04418 if (tables == NULL) tables = pcre_default_tables; 04419 compile_block.lcc = tables + lcc_offset; 04420 compile_block.fcc = tables + fcc_offset; 04421 compile_block.cbits = tables + cbits_offset; 04422 compile_block.ctypes = tables + ctypes_offset; 04423 04424 /* Maximum back reference and backref bitmap. This is updated for numeric 04425 references during the first pass, but for named references during the actual 04426 compile pass. The bitmap records up to 31 back references to help in deciding 04427 whether (.*) can be treated as anchored or not. */ 04428 04429 compile_block.top_backref = 0; 04430 compile_block.backref_map = 0; 04431 04432 /* Reflect pattern for debugging output */ 04433 04434 DPRINTF(("------------------------------------------------------------------\n")); 04435 DPRINTF(("%s\n", pattern)); 04436 04437 /* The first thing to do is to make a pass over the pattern to compute the 04438 amount of store required to hold the compiled code. This does not have to be 04439 perfect as long as errors are overestimates. At the same time we can detect any 04440 flag settings right at the start, and extract them. Make an attempt to correct 04441 for any counted white space if an "extended" flag setting appears late in the 04442 pattern. We can't be so clever for #-comments. */ 04443 04444 ptr = (const uschar *)(pattern - 1); 04445 while ((c = *(++ptr)) != 0) 04446 { 04447 int min, max; 04448 #if defined(WIN32) && (_MSC_VER == 1200) && defined(_M_IX86) && !defined(__INTEL_COMPILER) 04449 // The addition of 'volatile' works around a bug in Version 12.0 of 04450 // Microsoft's Visual C/C++ compiler (part of Visual Studio 6.0). Without 04451 // volatile, class_optcount is calculated properly, but the compiler 04452 // clobbers the EAX register before tests it as class_optcount. 04453 // 04454 // This is not a problem with the Intel Compiler. 04455 // 04456 volatile int class_optcount; 04457 #else 04458 int class_optcount; 04459 #endif 04460 int bracket_length; 04461 int duplength; 04462 04463 /* If we are inside a \Q...\E sequence, all chars are literal */ 04464 04465 if (inescq) goto NORMAL_CHAR; 04466 04467 /* Otherwise, first check for ignored whitespace and comments */ 04468 04469 if ((options & PCRE_EXTENDED) != 0) 04470 { 04471 if ((compile_block.ctypes[c] & ctype_space) != 0) continue; 04472 if (c == '#') 04473 { 04474 /* The space before the ; is to avoid a warning on a silly compiler 04475 on the Macintosh. */ 04476 while ((c = *(++ptr)) != 0 && c != NEWLINE) ; 04477 if (c == 0) break; 04478 continue; 04479 } 04480 } 04481 04482 item_count++; /* Is zero for the first non-comment item */ 04483 04484 switch(c) 04485 { 04486 /* A backslashed item may be an escaped "normal" character or a 04487 character type. For a "normal" character, put the pointers and 04488 character back so that tests for whitespace etc. in the input 04489 are done correctly. */ 04490 04491 case '\\': 04492 { 04493 const uschar *save_ptr = ptr; 04494 c = check_escape(&ptr, errorptr, bracount, options, false); 04495 if (*errorptr != NULL) goto PCRE_ERROR_RETURN; 04496 if (c >= 0) 04497 { 04498 ptr = save_ptr; 04499 c = '\\'; 04500 goto NORMAL_CHAR; 04501 } 04502 } 04503 04504 /* If \Q, enter "literal" mode */ 04505 04506 if (-c == ESC_Q) 04507 { 04508 inescq = true; 04509 continue; 04510 } 04511 04512 /* Other escapes need one byte, and are of length one for repeats */ 04513 04514 length++; 04515 04516 /* A back reference needs an additional 2 bytes, plus either one or 5 04517 bytes for a repeat. We also need to keep the value of the highest 04518 back reference. */ 04519 04520 if (c <= -ESC_REF) 04521 { 04522 int refnum = -c - ESC_REF; 04523 compile_block.backref_map |= (refnum < 32)? (1 << refnum) : 1; 04524 if (refnum > compile_block.top_backref) 04525 compile_block.top_backref = refnum; 04526 length += 2; /* For single back reference */ 04527 if (ptr[1] == '{' && is_counted_repeat(ptr+2)) 04528 { 04529 ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); 04530 if (*errorptr != NULL) goto PCRE_ERROR_RETURN; 04531 if ((min == 0 && (max == 1 || max == -1)) || 04532 (min == 1 && max == -1)) 04533 length++; 04534 else length += 5; 04535 if (ptr[1] == '?') ptr++; 04536 } 04537 } 04538 continue; 04539 04540 case '^': /* Single-byte metacharacters */ 04541 case '.': 04542 case '$': 04543 length++; 04544 continue; 04545 04546 case '*': /* These repeats won't be after brackets; */ 04547 case '+': /* those are handled separately */ 04548 case '?': 04549 length++; 04550 goto POSESSIVE; /* A few lines below */ 04551 04552 /* This covers the cases of braced repeats after a single char, metachar, 04553 class, or back reference. */ 04554 04555 case '{': 04556 if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; 04557 ptr = read_repeat_counts(ptr+1, &min, &max, errorptr); 04558 if (*errorptr != NULL) goto PCRE_ERROR_RETURN; 04559 04560 /* These special cases just insert one extra opcode */ 04561 04562 if ((min == 0 && (max == 1 || max == -1)) || 04563 (min == 1 && max == -1)) 04564 length++; 04565 04566 /* These cases might insert additional copies of a preceding character. */ 04567 04568 else 04569 { 04570 04571 /* Not UTF-8 mode: all characters are one byte */ 04572 { 04573 if (min != 1) 04574 { 04575 length--; /* Uncount the original char or metachar */ 04576 if (min > 0) length += 4; 04577 } 04578 04579 length += (max > 0)? 4 : 2; 04580 } 04581 } 04582 04583 if (ptr[1] == '?') ptr++; /* Needs no extra length */ 04584 04585 POSESSIVE: /* Test for possessive quantifier */ 04586 if (ptr[1] == '+') 04587 { 04588 ptr++; 04589 length += 2 + 2*LINK_SIZE; /* Allow for atomic brackets */ 04590 } 04591 continue; 04592 04593 /* An alternation contains an offset to the next branch or ket. If any ims 04594 options changed in the previous branch(es), and/or if we are in a 04595 lookbehind assertion, extra space will be needed at the start of the 04596 branch. This is handled by branch_extra. */ 04597 04598 case '|': 04599 length += 1 + LINK_SIZE + branch_extra; 04600 continue; 04601 04602 /* A character class uses 33 characters provided that all the character 04603 values are less than 256. Otherwise, it uses a bit map for low valued 04604 characters, and individual items for others. Don't worry about character 04605 types that aren't allowed in classes - they'll get picked up during the 04606 compile. A character class that contains only one single-byte character 04607 uses 2 or 3 bytes, depending on whether it is negated or not. Notice this 04608 where we can. (In UTF-8 mode we can do this only for chars < 128.) */ 04609 04610 case '[': 04611 class_optcount = 0; 04612 04613 if (*(++ptr) == '^') ptr++; 04614 04615 /* Written as a "do" so that an initial ']' is taken as data */ 04616 04617 if (*ptr != 0) do 04618 { 04619 /* Inside \Q...\E everything is literal except \E */ 04620 04621 if (inescq) 04622 { 04623 if (*ptr != '\\' || ptr[1] != 'E') goto NON_SPECIAL_CHARACTER; 04624 inescq = false; 04625 ptr += 1; 04626 continue; 04627 } 04628 04629 /* Outside \Q...\E, check for escapes */ 04630 04631 if (*ptr == '\\') 04632 { 04633 int ch = check_escape(&ptr, errorptr, bracount, options, true); 04634 if (*errorptr != NULL) goto PCRE_ERROR_RETURN; 04635 04636 /* \b is backspace inside a class */ 04637 04638 if (-ch == ESC_b) ch = '\b'; 04639 04640 /* \Q enters quoting mode */ 04641 04642 if (-ch == ESC_Q) 04643 { 04644 inescq = true; 04645 continue; 04646 } 04647 04648 /* Handle escapes that turn into characters */ 04649 04650 if (ch >= 0) 04651 { 04652 class_optcount++; /* for possible optimization */ 04653 } 04654 else class_optcount = 10; /* \d, \s etc; make sure > 1 */ 04655 } 04656 04657 /* Check the syntax for POSIX stuff. The bits we actually handle are 04658 checked during the real compile phase. */ 04659 04660 else if (*ptr == '[' && check_posix_syntax(ptr, &ptr, &compile_block)) 04661 { 04662 ptr++; 04663 class_optcount = 10; /* Make sure > 1 */ 04664 } 04665 04666 /* Anything else just increments the possible optimization count. If 04667 there are wide characters, we are going to have to use an XCLASS. */ 04668 04669 else 04670 { 04671 NON_SPECIAL_CHARACTER: 04672 class_optcount++; 04673 04674 } 04675 } 04676 while (*(++ptr) != 0 && (inescq || *ptr != ']')); /* Concludes "do" above */ 04677 04678 if (*ptr == 0) /* Missing terminating ']' */ 04679 { 04680 *errorptr = ERR6; 04681 goto PCRE_ERROR_RETURN; 04682 } 04683 04684 /* We can optimize when there was only one optimizable character. Repeats 04685 for positive and negated single one-byte chars are handled by the general 04686 code. Here, we handle repeats for the class opcodes. */ 04687 04688 if (class_optcount == 1) length += 3; else 04689 { 04690 length += 33; 04691 04692 /* A repeat needs either 1 or 5 bytes. If it is a possessive quantifier, 04693 we also need extra for wrapping the whole thing in a sub-pattern. */ 04694 04695 if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) 04696 { 04697 ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); 04698 if (*errorptr != NULL) goto PCRE_ERROR_RETURN; 04699 if ((min == 0 && (max == 1 || max == -1)) || 04700 (min == 1 && max == -1)) 04701 length++; 04702 else length += 5; 04703 if (ptr[1] == '+') 04704 { 04705 ptr++; 04706 length += 2 + 2*LINK_SIZE; 04707 } 04708 else if (ptr[1] == '?') ptr++; 04709 } 04710 } 04711 continue; 04712 04713 /* Brackets may be genuine groups or special things */ 04714 04715 case '(': 04716 branch_newextra = 0; 04717 bracket_length = 1 + LINK_SIZE; 04718 04719 /* Handle special forms of bracket, which all start (? */ 04720 04721 if (ptr[1] == '?') 04722 { 04723 int set, unset; 04724 int *optset; 04725 04726 switch (c = ptr[2]) 04727 { 04728 /* Skip over comments entirely */ 04729 case '#': 04730 ptr += 3; 04731 while (*ptr != 0 && *ptr != ')') ptr++; 04732 if (*ptr == 0) 04733 { 04734 *errorptr = ERR18; 04735 goto PCRE_ERROR_RETURN; 04736 } 04737 continue; 04738 04739 /* Non-referencing groups and lookaheads just move the pointer on, and 04740 then behave like a non-special bracket, except that they don't increment 04741 the count of extracting brackets. Ditto for the "once only" bracket, 04742 which is in Perl from version 5.005. */ 04743 04744 case ':': 04745 case '=': 04746 case '!': 04747 case '>': 04748 ptr += 2; 04749 break; 04750 04751 /* (?R) specifies a recursive call to the regex, which is an extension 04752 to provide the facility which can be obtained by (?p{perl-code}) in 04753 Perl 5.6. In Perl 5.8 this has become (??{perl-code}). 04754 04755 From PCRE 4.00, items such as (?3) specify subroutine-like "calls" to 04756 the appropriate numbered brackets. This includes both recursive and 04757 non-recursive calls. (?R) is now synonymous with (?0). */ 04758 04759 case 'R': 04760 ptr++; 04761 04762 case '0': case '1': case '2': case '3': case '4': 04763 case '5': case '6': case '7': case '8': case '9': 04764 ptr += 2; 04765 if (c != 'R') 04766 while ((digitab[*(++ptr)] & ctype_digit) != 0); 04767 if (*ptr != ')') 04768 { 04769 *errorptr = ERR29; 04770 goto PCRE_ERROR_RETURN; 04771 } 04772 length += 1 + LINK_SIZE; 04773 04774 /* If this item is quantified, it will get wrapped inside brackets so 04775 as to use the code for quantified brackets. We jump down and use the 04776 code that handles this for real brackets. */ 04777 04778 if (ptr[1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{') 04779 { 04780 length += 2 + 2 * LINK_SIZE; /* to make bracketed */ 04781 duplength = 5 + 3 * LINK_SIZE; 04782 goto HANDLE_QUANTIFIED_BRACKETS; 04783 } 04784 continue; 04785 04786 /* (?C) is an extension which provides "callout" - to provide a bit of 04787 the functionality of the Perl (?{...}) feature. An optional number may 04788 follow (default is zero). */ 04789 04790 case 'C': 04791 ptr += 2; 04792 while ((digitab[*(++ptr)] & ctype_digit) != 0); 04793 if (*ptr != ')') 04794 { 04795 *errorptr = ERR39; 04796 goto PCRE_ERROR_RETURN; 04797 } 04798 length += 2; 04799 continue; 04800 04801 /* Named subpatterns are an extension copied from Python */ 04802 04803 case 'P': 04804 ptr += 3; 04805 if (*ptr == '<') 04806 { 04807 const uschar *p; /* Don't amalgamate; some compilers */ 04808 p = ++ptr; /* grumble at autoincrement in declaration */ 04809 while ((compile_block.ctypes[*ptr] & ctype_word) != 0) ptr++; 04810 if (*ptr != '>') 04811 { 04812 *errorptr = ERR42; 04813 goto PCRE_ERROR_RETURN; 04814 } 04815 name_count++; 04816 if (ptr - p > max_name_size) max_name_size = (ptr - p); 04817 break; 04818 } 04819 04820 if (*ptr == '=' || *ptr == '>') 04821 { 04822 while ((compile_block.ctypes[*(++ptr)] & ctype_word) != 0); 04823 if (*ptr != ')') 04824 { 04825 *errorptr = ERR42; 04826 goto PCRE_ERROR_RETURN; 04827 } 04828 break; 04829 } 04830 04831 /* Unknown character after (?P */ 04832 04833 *errorptr = ERR41; 04834 goto PCRE_ERROR_RETURN; 04835 04836 /* Lookbehinds are in Perl from version 5.005 */ 04837 04838 case '<': 04839 ptr += 3; 04840 if (*ptr == '=' || *ptr == '!') 04841 { 04842 branch_newextra = 1 + LINK_SIZE; 04843 length += 1 + LINK_SIZE; /* For the first branch */ 04844 break; 04845 } 04846 *errorptr = ERR24; 04847 goto PCRE_ERROR_RETURN; 04848 04849 /* Conditionals are in Perl from version 5.005. The bracket must either 04850 be followed by a number (for bracket reference) or by an assertion 04851 group, or (a PCRE extension) by 'R' for a recursion test. */ 04852 04853 case '(': 04854 if (ptr[3] == 'R' && ptr[4] == ')') 04855 { 04856 ptr += 4; 04857 length += 3; 04858 } 04859 else if ((digitab[ptr[3]] & ctype_digit) != 0) 04860 { 04861 ptr += 4; 04862 length += 3; 04863 while ((digitab[*ptr] & ctype_digit) != 0) ptr++; 04864 if (*ptr != ')') 04865 { 04866 *errorptr = ERR26; 04867 goto PCRE_ERROR_RETURN; 04868 } 04869 } 04870 else /* An assertion must follow */ 04871 { 04872 ptr++; /* Can treat like ':' as far as spacing is concerned */ 04873 if (ptr[2] != '?' || 04874 (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') ) 04875 { 04876 ptr += 2; /* To get right offset in message */ 04877 *errorptr = ERR28; 04878 goto PCRE_ERROR_RETURN; 04879 } 04880 } 04881 break; 04882 04883 /* Else loop checking valid options until ) is met. Anything else is an 04884 error. If we are without any brackets, i.e. at top level, the settings 04885 act as if specified in the options, so massage the options immediately. 04886 This is for backward compatibility with Perl 5.004. */ 04887 04888 default: 04889 set = unset = 0; 04890 optset = &set; 04891 ptr += 2; 04892 04893 for (;; ptr++) 04894 { 04895 c = *ptr; 04896 switch (c) 04897 { 04898 case 'i': 04899 *optset |= PCRE_CASELESS; 04900 continue; 04901 04902 case 'm': 04903 *optset |= PCRE_MULTILINE; 04904 continue; 04905 04906 case 's': 04907 *optset |= PCRE_DOTALL; 04908 continue; 04909 04910 case 'x': 04911 *optset |= PCRE_EXTENDED; 04912 continue; 04913 04914 case 'X': 04915 *optset |= PCRE_EXTRA; 04916 continue; 04917 04918 case 'U': 04919 *optset |= PCRE_UNGREEDY; 04920 continue; 04921 04922 case '-': 04923 optset = &unset; 04924 continue; 04925 04926 /* A termination by ')' indicates an options-setting-only item; if 04927 this is at the very start of the pattern (indicated by item_count 04928 being zero), we use it to set the global options. This is helpful 04929 when analyzing the pattern for first characters, etc. Otherwise 04930 nothing is done here and it is handled during the compiling 04931 process. 04932 04933 [Historical note: Up to Perl 5.8, options settings at top level 04934 were always global settings, wherever they appeared in the pattern. 04935 That is, they were equivalent to an external setting. From 5.8 04936 onwards, they apply only to what follows (which is what you might 04937 expect).] */ 04938 04939 case ')': 04940 if (item_count == 0) 04941 { 04942 options = (options | set) & (~unset); 04943 set = unset = 0; /* To save length */ 04944 item_count--; /* To allow for several */ 04945 } 04946 04947 /* Fall through */ 04948 04949 /* A termination by ':' indicates the start of a nested group with 04950 the given options set. This is again handled at compile time, but 04951 we must allow for compiled space if any of the ims options are 04952 set. We also have to allow for resetting space at the end of 04953 the group, which is why 4 is added to the length and not just 2. 04954 If there are several changes of options within the same group, this 04955 will lead to an over-estimate on the length, but this shouldn't 04956 matter very much. We also have to allow for resetting options at 04957 the start of any alternations, which we do by setting 04958 branch_newextra to 2. Finally, we record whether the case-dependent 04959 flag ever changes within the regex. This is used by the "required 04960 character" code. */ 04961 04962 case ':': 04963 if (((set|unset) & PCRE_IMS) != 0) 04964 { 04965 length += 4; 04966 branch_newextra = 2; 04967 if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED; 04968 } 04969 goto END_OPTIONS; 04970 04971 /* Unrecognized option character */ 04972 04973 default: 04974 *errorptr = ERR12; 04975 goto PCRE_ERROR_RETURN; 04976 } 04977 } 04978 04979 /* If we hit a closing bracket, that's it - this is a freestanding 04980 option-setting. We need to ensure that branch_extra is updated if 04981 necessary. The only values branch_newextra can have here are 0 or 2. 04982 If the value is 2, then branch_extra must either be 2 or 5, depending 04983 on whether this is a lookbehind group or not. */ 04984 04985 END_OPTIONS: 04986 if (c == ')') 04987 { 04988 if (branch_newextra == 2 && 04989 (branch_extra == 0 || branch_extra == 1+LINK_SIZE)) 04990 branch_extra += branch_newextra; 04991 continue; 04992 } 04993 04994 /* If options were terminated by ':' control comes here. Fall through 04995 to handle the group below. */ 04996 } 04997 } 04998 04999 /* Extracting brackets must be counted so we can process escapes in a 05000 Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to 05001 need an additional 3 bytes of store per extracting bracket. However, if 05002 PCRE_NO_AUTO)CAPTURE is set, unadorned brackets become non-capturing, so we 05003 must leave the count alone (it will aways be zero). */ 05004 05005 else if ((options & PCRE_NO_AUTO_CAPTURE) == 0) 05006 { 05007 bracount++; 05008 if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3; 05009 } 05010 05011 /* Save length for computing whole length at end if there's a repeat that 05012 requires duplication of the group. Also save the current value of 05013 branch_extra, and start the new group with the new value. If non-zero, this 05014 will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */ 05015 05016 if (brastackptr >= sizeof(brastack)/sizeof(int)) 05017 { 05018 *errorptr = ERR19; 05019 goto PCRE_ERROR_RETURN; 05020 } 05021 05022 bralenstack[brastackptr] = branch_extra; 05023 branch_extra = branch_newextra; 05024 05025 brastack[brastackptr++] = length; 05026 length += bracket_length; 05027 continue; 05028 05029 /* Handle ket. Look for subsequent max/min; for certain sets of values we 05030 have to replicate this bracket up to that many times. If brastackptr is 05031 0 this is an unmatched bracket which will generate an error, but take care 05032 not to try to access brastack[-1] when computing the length and restoring 05033 the branch_extra value. */ 05034 05035 case ')': 05036 length += 1 + LINK_SIZE; 05037 if (brastackptr > 0) 05038 { 05039 duplength = length - brastack[--brastackptr]; 05040 branch_extra = bralenstack[brastackptr]; 05041 } 05042 else duplength = 0; 05043 05044 /* The following code is also used when a recursion such as (?3) is 05045 followed by a quantifier, because in that case, it has to be wrapped inside 05046 brackets so that the quantifier works. The value of duplength must be 05047 set before arrival. */ 05048 05049 HANDLE_QUANTIFIED_BRACKETS: 05050 05051 /* Leave ptr at the final char; for read_repeat_counts this happens 05052 automatically; for the others we need an increment. */ 05053 05054 if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) 05055 { 05056 ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); 05057 if (*errorptr != NULL) goto PCRE_ERROR_RETURN; 05058 } 05059 else if (c == '*') { min = 0; max = -1; ptr++; } 05060 else if (c == '+') { min = 1; max = -1; ptr++; } 05061 else if (c == '?') { min = 0; max = 1; ptr++; } 05062 else { min = 1; max = 1; } 05063 05064 /* If the minimum is zero, we have to allow for an OP_BRAZERO before the 05065 group, and if the maximum is greater than zero, we have to replicate 05066 maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting 05067 bracket set. */ 05068 05069 if (min == 0) 05070 { 05071 length++; 05072 if (max > 0) length += (max - 1) * (duplength + 3 + 2*LINK_SIZE); 05073 } 05074 05075 /* When the minimum is greater than zero, we have to replicate up to 05076 minval-1 times, with no additions required in the copies. Then, if there 05077 is a limited maximum we have to replicate up to maxval-1 times allowing 05078 for a BRAZERO item before each optional copy and nesting brackets for all 05079 but one of the optional copies. */ 05080 05081 else 05082 { 05083 length += (min - 1) * duplength; 05084 if (max > min) /* Need this test as max=-1 means no limit */ 05085 length += (max - min) * (duplength + 3 + 2*LINK_SIZE) 05086 - (2 + 2*LINK_SIZE); 05087 } 05088 05089 /* Allow space for once brackets for "possessive quantifier" */ 05090 05091 if (ptr[1] == '+') 05092 { 05093 ptr++; 05094 length += 2 + 2*LINK_SIZE; 05095 } 05096 continue; 05097 05098 /* Non-special character. For a run of such characters the length required 05099 is the number of characters + 2, except that the maximum run length is 05100 MAXLIT. We won't get a skipped space or a non-data escape or the start of a 05101 # comment as the first character, so the length can't be zero. */ 05102 05103 NORMAL_CHAR: 05104 default: 05105 length += 2; 05106 runlength = 0; 05107 do 05108 { 05109 05110 /* If in a \Q...\E sequence, check for end; otherwise it's a literal */ 05111 if (inescq) 05112 { 05113 if (c == '\\' && ptr[1] == 'E') 05114 { 05115 inescq = false; 05116 ptr++; 05117 } 05118 else runlength++; 05119 continue; 05120 } 05121 05122 /* Skip whitespace and comments for /x */ 05123 05124 if ((options & PCRE_EXTENDED) != 0) 05125 { 05126 if ((compile_block.ctypes[c] & ctype_space) != 0) continue; 05127 if (c == '#') 05128 { 05129 /* The space before the ; is to avoid a warning on a silly compiler 05130 on the Macintosh. */ 05131 while ((c = *(++ptr)) != 0 && c != NEWLINE) ; 05132 continue; 05133 } 05134 } 05135 05136 /* Backslash may introduce a data char or a metacharacter; stop the 05137 string before the latter. */ 05138 05139 if (c == '\\') 05140 { 05141 const uschar *saveptr = ptr; 05142 c = check_escape(&ptr, errorptr, bracount, options, false); 05143 if (*errorptr != NULL) goto PCRE_ERROR_RETURN; 05144 if (c < 0) { ptr = saveptr; break; } 05145 05146 /* In UTF-8 mode, add on the number of additional bytes needed to 05147 encode this character, and save the total length in case this is a 05148 final char that is repeated. */ 05149 05150 } 05151 05152 /* Ordinary character or single-char escape */ 05153 05154 runlength++; 05155 } 05156 05157 /* This "while" is the end of the "do" above. */ 05158 05159 while (runlength < MAXLIT && 05160 (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0); 05161 05162 /* If we hit a meta-character, back off to point to it */ 05163 05164