mux/src/pcre.h File Reference

#include <stdlib.h>

Include dependency graph for pcre.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  pcre_extra
struct  pcre_callout_block

Defines

#define PCRE_MAJOR   4
#define PCRE_MINOR   5
#define PCRE_DATE   01-December-2003
#define PCRE_DATA_SCOPE   extern
#define PCRE_CASELESS   0x0001
#define PCRE_MULTILINE   0x0002
#define PCRE_DOTALL   0x0004
#define PCRE_EXTENDED   0x0008
#define PCRE_ANCHORED   0x0010
#define PCRE_DOLLAR_ENDONLY   0x0020
#define PCRE_EXTRA   0x0040
#define PCRE_NOTBOL   0x0080
#define PCRE_NOTEOL   0x0100
#define PCRE_UNGREEDY   0x0200
#define PCRE_NOTEMPTY   0x0400
#define PCRE_UTF8   0x0800
#define PCRE_NO_AUTO_CAPTURE   0x1000
#define PCRE_NO_UTF8_CHECK   0x2000
#define PCRE_ERROR_NOMATCH   (-1)
#define PCRE_ERROR_NULL   (-2)
#define PCRE_ERROR_BADOPTION   (-3)
#define PCRE_ERROR_BADMAGIC   (-4)
#define PCRE_ERROR_UNKNOWN_NODE   (-5)
#define PCRE_ERROR_NOMEMORY   (-6)
#define PCRE_ERROR_NOSUBSTRING   (-7)
#define PCRE_ERROR_MATCHLIMIT   (-8)
#define PCRE_ERROR_CALLOUT   (-9)
#define PCRE_ERROR_BADUTF8   (-10)
#define PCRE_ERROR_BADUTF8_OFFSET   (-11)
#define PCRE_INFO_OPTIONS   0
#define PCRE_INFO_SIZE   1
#define PCRE_INFO_CAPTURECOUNT   2
#define PCRE_INFO_BACKREFMAX   3
#define PCRE_INFO_FIRSTBYTE   4
#define PCRE_INFO_FIRSTCHAR   4
#define PCRE_INFO_FIRSTTABLE   5
#define PCRE_INFO_LASTLITERAL   6
#define PCRE_INFO_NAMEENTRYSIZE   7
#define PCRE_INFO_NAMECOUNT   8
#define PCRE_INFO_NAMETABLE   9
#define PCRE_INFO_STUDYSIZE   10
#define PCRE_CONFIG_UTF8   0
#define PCRE_CONFIG_NEWLINE   1
#define PCRE_CONFIG_LINK_SIZE   2
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD   3
#define PCRE_CONFIG_MATCH_LIMIT   4
#define PCRE_CONFIG_STACKRECURSE   5
#define PCRE_EXTRA_STUDY_DATA   0x0001
#define PCRE_EXTRA_MATCH_LIMIT   0x0002
#define PCRE_EXTRA_CALLOUT_DATA   0x0004

Typedefs

typedef real_pcre pcre

Functions

pcrepcre_compile (const char *, int, const char **, int *, const unsigned char *)
int pcre_copy_substring (const char *, int *, int, int, char *, int)
int pcre_exec (const pcre *, const pcre_extra *, const char *, int, int, int, int *, int)
const unsigned char * pcre_maketables (void)
pcre_extrapcre_study (const pcre *, int, const char **)


Define Documentation

#define PCRE_ANCHORED   0x0010

Definition at line 32 of file pcre.h.

Referenced by pcre_compile(), pcre_exec(), and pcre_study().

#define PCRE_CASELESS   0x0001

Definition at line 28 of file pcre.h.

Referenced by atr_match1(), check_filter(), compile_branch(), find_firstassertedchar(), match(), match_ref(), pcre_compile(), pcre_exec(), pcre_study(), process_cmdent(), real_regmatch(), real_regrab(), and set_start_bits().

#define PCRE_CONFIG_LINK_SIZE   2

Definition at line 76 of file pcre.h.

#define PCRE_CONFIG_MATCH_LIMIT   4

Definition at line 78 of file pcre.h.

#define PCRE_CONFIG_NEWLINE   1

Definition at line 75 of file pcre.h.

#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD   3

Definition at line 77 of file pcre.h.

#define PCRE_CONFIG_STACKRECURSE   5

Definition at line 79 of file pcre.h.

#define PCRE_CONFIG_UTF8   0

Definition at line 74 of file pcre.h.

#define PCRE_DATA_SCOPE   extern

Definition at line 18 of file pcre.h.

#define PCRE_DATE   01-December-2003

Definition at line 15 of file pcre.h.

#define PCRE_DOLLAR_ENDONLY   0x0020

Definition at line 33 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_DOTALL   0x0004

Definition at line 30 of file pcre.h.

Referenced by compile_branch(), is_anchored(), match(), pcre_compile(), and pcre_exec().

#define PCRE_ERROR_BADMAGIC   (-4)

Definition at line 48 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_ERROR_BADOPTION   (-3)

Definition at line 47 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_ERROR_BADUTF8   (-10)

Definition at line 54 of file pcre.h.

#define PCRE_ERROR_BADUTF8_OFFSET   (-11)

Definition at line 55 of file pcre.h.

#define PCRE_ERROR_CALLOUT   (-9)

Definition at line 53 of file pcre.h.

#define PCRE_ERROR_MATCHLIMIT   (-8)

Definition at line 52 of file pcre.h.

Referenced by match().

#define PCRE_ERROR_NOMATCH   (-1)

Definition at line 45 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_ERROR_NOMEMORY   (-6)

Definition at line 50 of file pcre.h.

Referenced by match(), pcre_copy_substring(), and pcre_exec().

#define PCRE_ERROR_NOSUBSTRING   (-7)

Definition at line 51 of file pcre.h.

Referenced by pcre_copy_substring().

#define PCRE_ERROR_NULL   (-2)

Definition at line 46 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_ERROR_UNKNOWN_NODE   (-5)

Definition at line 49 of file pcre.h.

Referenced by match().

#define PCRE_EXTENDED   0x0008

Definition at line 31 of file pcre.h.

Referenced by compile_branch(), and pcre_compile().

#define PCRE_EXTRA   0x0040

Definition at line 34 of file pcre.h.

Referenced by check_escape(), compile_branch(), and pcre_compile().

#define PCRE_EXTRA_CALLOUT_DATA   0x0004

Definition at line 85 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_EXTRA_MATCH_LIMIT   0x0002

Definition at line 84 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_EXTRA_STUDY_DATA   0x0001

Definition at line 83 of file pcre.h.

Referenced by pcre_exec(), and pcre_study().

#define PCRE_INFO_BACKREFMAX   3

Definition at line 62 of file pcre.h.

#define PCRE_INFO_CAPTURECOUNT   2

Definition at line 61 of file pcre.h.

#define PCRE_INFO_FIRSTBYTE   4

Definition at line 63 of file pcre.h.

#define PCRE_INFO_FIRSTCHAR   4

Definition at line 64 of file pcre.h.

#define PCRE_INFO_FIRSTTABLE   5

Definition at line 65 of file pcre.h.

#define PCRE_INFO_LASTLITERAL   6

Definition at line 66 of file pcre.h.

#define PCRE_INFO_NAMECOUNT   8

Definition at line 68 of file pcre.h.

#define PCRE_INFO_NAMEENTRYSIZE   7

Definition at line 67 of file pcre.h.

#define PCRE_INFO_NAMETABLE   9

Definition at line 69 of file pcre.h.

#define PCRE_INFO_OPTIONS   0

Definition at line 59 of file pcre.h.

#define PCRE_INFO_SIZE   1

Definition at line 60 of file pcre.h.

#define PCRE_INFO_STUDYSIZE   10

Definition at line 70 of file pcre.h.

#define PCRE_MAJOR   4

Definition at line 13 of file pcre.h.

#define PCRE_MINOR   5

Definition at line 14 of file pcre.h.

#define PCRE_MULTILINE   0x0002

Definition at line 29 of file pcre.h.

Referenced by compile_branch(), is_anchored(), match(), pcre_compile(), and pcre_exec().

#define PCRE_NO_AUTO_CAPTURE   0x1000

Definition at line 40 of file pcre.h.

Referenced by compile_branch(), and pcre_compile().

#define PCRE_NO_UTF8_CHECK   0x2000

Definition at line 41 of file pcre.h.

#define PCRE_NOTBOL   0x0080

Definition at line 35 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_NOTEMPTY   0x0400

Definition at line 38 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_NOTEOL   0x0100

Definition at line 36 of file pcre.h.

Referenced by pcre_exec().

#define PCRE_UNGREEDY   0x0200

Definition at line 37 of file pcre.h.

Referenced by compile_branch(), and pcre_compile().

#define PCRE_UTF8   0x0800

Definition at line 39 of file pcre.h.

Referenced by pcre_compile(), pcre_exec(), and pcre_study().


Typedef Documentation

typedef struct real_pcre pcre

Definition at line 90 of file pcre.h.


Function Documentation

pcre* pcre_compile ( const char *  ,
int  ,
const char **  ,
int *  ,
const unsigned char *   
)

Definition at line 4365 of file pcre.cpp.

References compile_data::backref_map, BRASTACK_SIZE, compile_data::cbits, cbits_offset, check_escape(), check_posix_syntax(), compile_regex(), ctype_digit, ctype_meta, ctype_space, ctype_word, compile_data::ctypes, ctypes_offset, digitab, DPRINTF, ERR12, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20, ERR21, ERR22, ERR23, ERR24, ERR26, ERR28, ERR29, ERR32, ERR39, ERR41, ERR42, ERR6, ESC_b, ESC_Q, ESC_REF, EXTRACT_BASIC_MAX, compile_data::fcc, fcc_offset, find_firstassertedchar(), real_pcre::first_byte, is_anchored(), is_counted_repeat(), is_startline(), compile_data::lcc, lcc_offset, LINK_SIZE, MAGIC_NUMBER, real_pcre::magic_number, MAX_PATTERN_SIZE, MAXLIT, real_pcre::name_count, compile_data::name_entry_size, real_pcre::name_entry_size, compile_data::name_table, compile_data::names_found, NEWLINE, OP_BRA, OP_END, real_pcre::options, PCRE_ANCHORED, PCRE_CASELESS, pcre_default_tables, PCRE_DOTALL, PCRE_EXTENDED, PCRE_EXTRA, PCRE_FIRSTSET, PCRE_ICHANGED, PCRE_IMS, PCRE_MULTILINE, PCRE_NO_AUTO_CAPTURE, PCRE_REQCHSET, PCRE_STARTLINE, PCRE_UNGREEDY, PCRE_UTF8, PUBLIC_OPTIONS, read_repeat_counts(), real_pcre::req_byte, REQ_CASELESS, REQ_VARY, compile_data::req_varyopt, real_pcre::size, compile_data::start_code, real_pcre::tables, real_pcre::top_backref, compile_data::top_backref, and real_pcre::top_bracket.

Referenced by CF_HAND(), check_filter(), real_regmatch(), real_regrab(), and regexp_match().

04366 {
04367 real_pcre *re;
04368 int length = 1 + LINK_SIZE;      /* For initial BRA plus length */
04369 int runlength;
04370 int c, firstbyte, reqbyte;
04371 int bracount = 0;
04372 int branch_extra = 0;
04373 int branch_newextra;
04374 int item_count = -1;
04375 int name_count = 0;
04376 int max_name_size = 0;
04377 bool inescq = false;
04378 unsigned int brastackptr = 0;
04379 size_t size;
04380 uschar *code;
04381 const uschar *codestart;
04382 const uschar *ptr;
04383 compile_data compile_block;
04384 int brastack[BRASTACK_SIZE];
04385 uschar bralenstack[BRASTACK_SIZE];
04386 
04387 /* We can't pass back an error message if errorptr is NULL; I guess the best we
04388 can do is just return NULL. */
04389 
04390 if (errorptr == NULL) return NULL;
04391 *errorptr = NULL;
04392 
04393 /* However, we can give a message for this error */
04394 
04395 if (erroroffset == NULL)
04396   {
04397   *errorptr = ERR16;
04398   return NULL;
04399   }
04400 *erroroffset = 0;
04401 
04402 /* Can't support UTF8 unless PCRE has been compiled to include the code. */
04403 
04404 if ((options & PCRE_UTF8) != 0)
04405   {
04406   *errorptr = ERR32;
04407   return NULL;
04408   }
04409 
04410 if ((options & ~PUBLIC_OPTIONS) != 0)
04411   {
04412   *errorptr = ERR17;
04413   return NULL;
04414   }
04415 
04416 /* Set up pointers to the individual character tables */
04417 
04418 if (tables == NULL) tables = pcre_default_tables;
04419 compile_block.lcc = tables + lcc_offset;
04420 compile_block.fcc = tables + fcc_offset;
04421 compile_block.cbits = tables + cbits_offset;
04422 compile_block.ctypes = tables + ctypes_offset;
04423 
04424 /* Maximum back reference and backref bitmap. This is updated for numeric
04425 references during the first pass, but for named references during the actual
04426 compile pass. The bitmap records up to 31 back references to help in deciding
04427 whether (.*) can be treated as anchored or not. */
04428 
04429 compile_block.top_backref = 0;
04430 compile_block.backref_map = 0;
04431 
04432 /* Reflect pattern for debugging output */
04433 
04434 DPRINTF(("------------------------------------------------------------------\n"));
04435 DPRINTF(("%s\n", pattern));
04436 
04437 /* The first thing to do is to make a pass over the pattern to compute the
04438 amount of store required to hold the compiled code. This does not have to be
04439 perfect as long as errors are overestimates. At the same time we can detect any
04440 flag settings right at the start, and extract them. Make an attempt to correct
04441 for any counted white space if an "extended" flag setting appears late in the
04442 pattern. We can't be so clever for #-comments. */
04443 
04444 ptr = (const uschar *)(pattern - 1);
04445 while ((c = *(++ptr)) != 0)
04446   {
04447   int min, max;
04448 #if defined(WIN32) && (_MSC_VER == 1200) && defined(_M_IX86) && !defined(__INTEL_COMPILER)
04449   // The addition of 'volatile' works around a bug in Version 12.0 of
04450   // Microsoft's Visual C/C++ compiler (part of Visual Studio 6.0). Without
04451   // volatile, class_optcount is calculated properly, but the compiler
04452   // clobbers the EAX register before tests it as class_optcount.
04453   //
04454   // This is not a problem with the Intel Compiler.
04455   //
04456   volatile int class_optcount;
04457 #else
04458   int class_optcount;
04459 #endif
04460   int bracket_length;
04461   int duplength;
04462 
04463   /* If we are inside a \Q...\E sequence, all chars are literal */
04464 
04465   if (inescq) goto NORMAL_CHAR;
04466 
04467   /* Otherwise, first check for ignored whitespace and comments */
04468 
04469   if ((options & PCRE_EXTENDED) != 0)
04470     {
04471     if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
04472     if (c == '#')
04473       {
04474       /* The space before the ; is to avoid a warning on a silly compiler
04475       on the Macintosh. */
04476       while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
04477       if (c == 0) break;
04478       continue;
04479       }
04480     }
04481 
04482   item_count++;    /* Is zero for the first non-comment item */
04483 
04484   switch(c)
04485     {
04486     /* A backslashed item may be an escaped "normal" character or a
04487     character type. For a "normal" character, put the pointers and
04488     character back so that tests for whitespace etc. in the input
04489     are done correctly. */
04490 
04491     case '\\':
04492       {
04493       const uschar *save_ptr = ptr;
04494       c = check_escape(&ptr, errorptr, bracount, options, false);
04495       if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
04496       if (c >= 0)
04497         {
04498         ptr = save_ptr;
04499         c = '\\';
04500         goto NORMAL_CHAR;
04501         }
04502       }
04503 
04504     /* If \Q, enter "literal" mode */
04505 
04506     if (-c == ESC_Q)
04507       {
04508       inescq = true;
04509       continue;
04510       }
04511 
04512     /* Other escapes need one byte, and are of length one for repeats */
04513 
04514     length++;
04515 
04516     /* A back reference needs an additional 2 bytes, plus either one or 5
04517     bytes for a repeat. We also need to keep the value of the highest
04518     back reference. */
04519 
04520     if (c <= -ESC_REF)
04521       {
04522       int refnum = -c - ESC_REF;
04523       compile_block.backref_map |= (refnum < 32)? (1 << refnum) : 1;
04524       if (refnum > compile_block.top_backref)
04525         compile_block.top_backref = refnum;
04526       length += 2;   /* For single back reference */
04527       if (ptr[1] == '{' && is_counted_repeat(ptr+2))
04528         {
04529         ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
04530         if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
04531         if ((min == 0 && (max == 1 || max == -1)) ||
04532           (min == 1 && max == -1))
04533             length++;
04534         else length += 5;
04535         if (ptr[1] == '?') ptr++;
04536         }
04537       }
04538     continue;
04539 
04540     case '^':     /* Single-byte metacharacters */
04541     case '.':
04542     case '$':
04543     length++;
04544     continue;
04545 
04546     case '*':            /* These repeats won't be after brackets; */
04547     case '+':            /* those are handled separately */
04548     case '?':
04549     length++;
04550     goto POSESSIVE;      /* A few lines below */
04551 
04552     /* This covers the cases of braced repeats after a single char, metachar,
04553     class, or back reference. */
04554 
04555     case '{':
04556     if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;
04557     ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);
04558     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
04559 
04560     /* These special cases just insert one extra opcode */
04561 
04562     if ((min == 0 && (max == 1 || max == -1)) ||
04563       (min == 1 && max == -1))
04564         length++;
04565 
04566     /* These cases might insert additional copies of a preceding character. */
04567 
04568     else
04569       {
04570 
04571       /* Not UTF-8 mode: all characters are one byte */
04572         {
04573         if (min != 1)
04574           {
04575           length--;   /* Uncount the original char or metachar */
04576           if (min > 0) length += 4;
04577           }
04578 
04579         length += (max > 0)? 4 : 2;
04580         }
04581       }
04582 
04583     if (ptr[1] == '?') ptr++;      /* Needs no extra length */
04584 
04585     POSESSIVE:                     /* Test for possessive quantifier */
04586     if (ptr[1] == '+')
04587       {
04588       ptr++;
04589       length += 2 + 2*LINK_SIZE;   /* Allow for atomic brackets */
04590       }
04591     continue;
04592 
04593     /* An alternation contains an offset to the next branch or ket. If any ims
04594     options changed in the previous branch(es), and/or if we are in a
04595     lookbehind assertion, extra space will be needed at the start of the
04596     branch. This is handled by branch_extra. */
04597 
04598     case '|':
04599     length += 1 + LINK_SIZE + branch_extra;
04600     continue;
04601 
04602     /* A character class uses 33 characters provided that all the character
04603     values are less than 256. Otherwise, it uses a bit map for low valued
04604     characters, and individual items for others. Don't worry about character
04605     types that aren't allowed in classes - they'll get picked up during the
04606     compile. A character class that contains only one single-byte character
04607     uses 2 or 3 bytes, depending on whether it is negated or not. Notice this
04608     where we can. (In UTF-8 mode we can do this only for chars < 128.) */
04609 
04610     case '[':
04611     class_optcount = 0;
04612 
04613     if (*(++ptr) == '^') ptr++;
04614 
04615     /* Written as a "do" so that an initial ']' is taken as data */
04616 
04617     if (*ptr != 0) do
04618       {
04619       /* Inside \Q...\E everything is literal except \E */
04620 
04621       if (inescq)
04622         {
04623         if (*ptr != '\\' || ptr[1] != 'E') goto NON_SPECIAL_CHARACTER;
04624         inescq = false;
04625         ptr += 1;
04626         continue;
04627         }
04628 
04629       /* Outside \Q...\E, check for escapes */
04630 
04631       if (*ptr == '\\')
04632         {
04633         int ch = check_escape(&ptr, errorptr, bracount, options, true);
04634         if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
04635 
04636         /* \b is backspace inside a class */
04637 
04638         if (-ch == ESC_b) ch = '\b';
04639 
04640         /* \Q enters quoting mode */
04641 
04642         if (-ch == ESC_Q)
04643           {
04644           inescq = true;
04645           continue;
04646           }
04647 
04648         /* Handle escapes that turn into characters */
04649 
04650         if (ch >= 0)
04651           {
04652           class_optcount++;            /* for possible optimization */
04653           }
04654         else class_optcount = 10;      /* \d, \s etc; make sure > 1 */
04655         }
04656 
04657       /* Check the syntax for POSIX stuff. The bits we actually handle are
04658       checked during the real compile phase. */
04659 
04660       else if (*ptr == '[' && check_posix_syntax(ptr, &ptr, &compile_block))
04661         {
04662         ptr++;
04663         class_optcount = 10;    /* Make sure > 1 */
04664         }
04665 
04666       /* Anything else just increments the possible optimization count. If
04667       there are wide characters, we are going to have to use an XCLASS. */
04668 
04669       else
04670         {
04671         NON_SPECIAL_CHARACTER:
04672         class_optcount++;
04673 
04674         }
04675       }
04676     while (*(++ptr) != 0 && (inescq || *ptr != ']')); /* Concludes "do" above */
04677 
04678     if (*ptr == 0)                          /* Missing terminating ']' */
04679       {
04680       *errorptr = ERR6;
04681       goto PCRE_ERROR_RETURN;
04682       }
04683 
04684     /* We can optimize when there was only one optimizable character. Repeats
04685     for positive and negated single one-byte chars are handled by the general
04686     code. Here, we handle repeats for the class opcodes. */
04687 
04688     if (class_optcount == 1) length += 3; else
04689       {
04690       length += 33;
04691 
04692       /* A repeat needs either 1 or 5 bytes. If it is a possessive quantifier,
04693       we also need extra for wrapping the whole thing in a sub-pattern. */
04694 
04695       if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
04696         {
04697         ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
04698         if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
04699         if ((min == 0 && (max == 1 || max == -1)) ||
04700           (min == 1 && max == -1))
04701             length++;
04702         else length += 5;
04703         if (ptr[1] == '+')
04704           {
04705           ptr++;
04706           length += 2 + 2*LINK_SIZE;
04707           }
04708         else if (ptr[1] == '?') ptr++;
04709         }
04710       }
04711     continue;
04712 
04713     /* Brackets may be genuine groups or special things */
04714 
04715     case '(':
04716     branch_newextra = 0;
04717     bracket_length = 1 + LINK_SIZE;
04718 
04719     /* Handle special forms of bracket, which all start (? */
04720 
04721     if (ptr[1] == '?')
04722       {
04723       int set, unset;
04724       int *optset;
04725 
04726       switch (c = ptr[2])
04727         {
04728         /* Skip over comments entirely */
04729         case '#':
04730         ptr += 3;
04731         while (*ptr != 0 && *ptr != ')') ptr++;
04732         if (*ptr == 0)
04733           {
04734           *errorptr = ERR18;
04735           goto PCRE_ERROR_RETURN;
04736           }
04737         continue;
04738 
04739         /* Non-referencing groups and lookaheads just move the pointer on, and
04740         then behave like a non-special bracket, except that they don't increment
04741         the count of extracting brackets. Ditto for the "once only" bracket,
04742         which is in Perl from version 5.005. */
04743 
04744         case ':':
04745         case '=':
04746         case '!':
04747         case '>':
04748         ptr += 2;
04749         break;
04750 
04751         /* (?R) specifies a recursive call to the regex, which is an extension
04752         to provide the facility which can be obtained by (?p{perl-code}) in
04753         Perl 5.6. In Perl 5.8 this has become (??{perl-code}).
04754 
04755         From PCRE 4.00, items such as (?3) specify subroutine-like "calls" to
04756         the appropriate numbered brackets. This includes both recursive and
04757         non-recursive calls. (?R) is now synonymous with (?0). */
04758 
04759         case 'R':
04760         ptr++;
04761 
04762         case '0': case '1': case '2': case '3': case '4':
04763         case '5': case '6': case '7': case '8': case '9':
04764         ptr += 2;
04765         if (c != 'R')
04766           while ((digitab[*(++ptr)] & ctype_digit) != 0);
04767         if (*ptr != ')')
04768           {
04769           *errorptr = ERR29;
04770           goto PCRE_ERROR_RETURN;
04771           }
04772         length += 1 + LINK_SIZE;
04773 
04774         /* If this item is quantified, it will get wrapped inside brackets so
04775         as to use the code for quantified brackets. We jump down and use the
04776         code that handles this for real brackets. */
04777 
04778         if (ptr[1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{')
04779           {
04780           length += 2 + 2 * LINK_SIZE;       /* to make bracketed */
04781           duplength = 5 + 3 * LINK_SIZE;
04782           goto HANDLE_QUANTIFIED_BRACKETS;
04783           }
04784         continue;
04785 
04786         /* (?C) is an extension which provides "callout" - to provide a bit of
04787         the functionality of the Perl (?{...}) feature. An optional number may
04788         follow (default is zero). */
04789 
04790         case 'C':
04791         ptr += 2;
04792         while ((digitab[*(++ptr)] & ctype_digit) != 0);
04793         if (*ptr != ')')
04794           {
04795           *errorptr = ERR39;
04796           goto PCRE_ERROR_RETURN;
04797           }
04798         length += 2;
04799         continue;
04800 
04801         /* Named subpatterns are an extension copied from Python */
04802 
04803         case 'P':
04804         ptr += 3;
04805         if (*ptr == '<')
04806           {
04807           const uschar *p;    /* Don't amalgamate; some compilers */
04808           p = ++ptr;          /* grumble at autoincrement in declaration */
04809           while ((compile_block.ctypes[*ptr] & ctype_word) != 0) ptr++;
04810           if (*ptr != '>')
04811             {
04812             *errorptr = ERR42;
04813             goto PCRE_ERROR_RETURN;
04814             }
04815           name_count++;
04816           if (ptr - p > max_name_size) max_name_size = (ptr - p);
04817           break;
04818           }
04819 
04820         if (*ptr == '=' || *ptr == '>')
04821           {
04822           while ((compile_block.ctypes[*(++ptr)] & ctype_word) != 0);
04823           if (*ptr != ')')
04824             {
04825             *errorptr = ERR42;
04826             goto PCRE_ERROR_RETURN;
04827             }
04828           break;
04829           }
04830 
04831         /* Unknown character after (?P */
04832 
04833         *errorptr = ERR41;
04834         goto PCRE_ERROR_RETURN;
04835 
04836         /* Lookbehinds are in Perl from version 5.005 */
04837 
04838         case '<':
04839         ptr += 3;
04840         if (*ptr == '=' || *ptr == '!')
04841           {
04842           branch_newextra = 1 + LINK_SIZE;
04843           length += 1 + LINK_SIZE;         /* For the first branch */
04844           break;
04845           }
04846         *errorptr = ERR24;
04847         goto PCRE_ERROR_RETURN;
04848 
04849         /* Conditionals are in Perl from version 5.005. The bracket must either
04850         be followed by a number (for bracket reference) or by an assertion
04851         group, or (a PCRE extension) by 'R' for a recursion test. */
04852 
04853         case '(':
04854         if (ptr[3] == 'R' && ptr[4] == ')')
04855           {
04856           ptr += 4;
04857           length += 3;
04858           }
04859         else if ((digitab[ptr[3]] & ctype_digit) != 0)
04860           {
04861           ptr += 4;
04862           length += 3;
04863           while ((digitab[*ptr] & ctype_digit) != 0) ptr++;
04864           if (*ptr != ')')
04865             {
04866             *errorptr = ERR26;
04867             goto PCRE_ERROR_RETURN;
04868             }
04869           }
04870         else   /* An assertion must follow */
04871           {
04872           ptr++;   /* Can treat like ':' as far as spacing is concerned */
04873           if (ptr[2] != '?' ||
04874              (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') )
04875             {
04876             ptr += 2;    /* To get right offset in message */
04877             *errorptr = ERR28;
04878             goto PCRE_ERROR_RETURN;
04879             }
04880           }
04881         break;
04882 
04883         /* Else loop checking valid options until ) is met. Anything else is an
04884         error. If we are without any brackets, i.e. at top level, the settings
04885         act as if specified in the options, so massage the options immediately.
04886         This is for backward compatibility with Perl 5.004. */
04887 
04888         default:
04889         set = unset = 0;
04890         optset = &set;
04891         ptr += 2;
04892 
04893         for (;; ptr++)
04894           {
04895           c = *ptr;
04896           switch (c)
04897             {
04898             case 'i':
04899             *optset |= PCRE_CASELESS;
04900             continue;
04901 
04902             case 'm':
04903             *optset |= PCRE_MULTILINE;
04904             continue;
04905 
04906             case 's':
04907             *optset |= PCRE_DOTALL;
04908             continue;
04909 
04910             case 'x':
04911             *optset |= PCRE_EXTENDED;
04912             continue;
04913 
04914             case 'X':
04915             *optset |= PCRE_EXTRA;
04916             continue;
04917 
04918             case 'U':
04919             *optset |= PCRE_UNGREEDY;
04920             continue;
04921 
04922             case '-':
04923             optset = &unset;
04924             continue;
04925 
04926             /* A termination by ')' indicates an options-setting-only item; if
04927             this is at the very start of the pattern (indicated by item_count
04928             being zero), we use it to set the global options. This is helpful
04929             when analyzing the pattern for first characters, etc. Otherwise
04930             nothing is done here and it is handled during the compiling
04931             process.
04932 
04933             [Historical note: Up to Perl 5.8, options settings at top level
04934             were always global settings, wherever they appeared in the pattern.
04935             That is, they were equivalent to an external setting. From 5.8
04936             onwards, they apply only to what follows (which is what you might
04937             expect).] */
04938 
04939             case ')':
04940             if (item_count == 0)
04941               {
04942               options = (options | set) & (~unset);
04943               set = unset = 0;     /* To save length */
04944               item_count--;        /* To allow for several */
04945               }
04946 
04947             /* Fall through */
04948 
04949             /* A termination by ':' indicates the start of a nested group with
04950             the given options set. This is again handled at compile time, but
04951             we must allow for compiled space if any of the ims options are
04952             set. We also have to allow for resetting space at the end of
04953             the group, which is why 4 is added to the length and not just 2.
04954             If there are several changes of options within the same group, this
04955             will lead to an over-estimate on the length, but this shouldn't
04956             matter very much. We also have to allow for resetting options at
04957             the start of any alternations, which we do by setting
04958             branch_newextra to 2. Finally, we record whether the case-dependent
04959             flag ever changes within the regex. This is used by the "required
04960             character" code. */
04961 
04962             case ':':
04963             if (((set|unset) & PCRE_IMS) != 0)
04964               {
04965               length += 4;
04966               branch_newextra = 2;
04967               if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
04968               }
04969             goto END_OPTIONS;
04970 
04971             /* Unrecognized option character */
04972 
04973             default:
04974             *errorptr = ERR12;
04975             goto PCRE_ERROR_RETURN;
04976             }
04977           }
04978 
04979         /* If we hit a closing bracket, that's it - this is a freestanding
04980         option-setting. We need to ensure that branch_extra is updated if
04981         necessary. The only values branch_newextra can have here are 0 or 2.
04982         If the value is 2, then branch_extra must either be 2 or 5, depending
04983         on whether this is a lookbehind group or not. */
04984 
04985         END_OPTIONS:
04986         if (c == ')')
04987           {
04988           if (branch_newextra == 2 &&
04989               (branch_extra == 0 || branch_extra == 1+LINK_SIZE))
04990             branch_extra += branch_newextra;
04991           continue;
04992           }
04993 
04994         /* If options were terminated by ':' control comes here. Fall through
04995         to handle the group below. */
04996         }
04997       }
04998 
04999     /* Extracting brackets must be counted so we can process escapes in a
05000     Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to
05001     need an additional 3 bytes of store per extracting bracket. However, if
05002     PCRE_NO_AUTO)CAPTURE is set, unadorned brackets become non-capturing, so we
05003     must leave the count alone (it will aways be zero). */
05004 
05005     else if ((options & PCRE_NO_AUTO_CAPTURE) == 0)
05006       {
05007       bracount++;
05008       if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3;
05009       }
05010 
05011     /* Save length for computing whole length at end if there's a repeat that
05012     requires duplication of the group. Also save the current value of
05013     branch_extra, and start the new group with the new value. If non-zero, this
05014     will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */
05015 
05016     if (brastackptr >= sizeof(brastack)/sizeof(int))
05017       {
05018       *errorptr = ERR19;
05019       goto PCRE_ERROR_RETURN;
05020       }
05021 
05022     bralenstack[brastackptr] = branch_extra;
05023     branch_extra = branch_newextra;
05024 
05025     brastack[brastackptr++] = length;
05026     length += bracket_length;
05027     continue;
05028 
05029     /* Handle ket. Look for subsequent max/min; for certain sets of values we
05030     have to replicate this bracket up to that many times. If brastackptr is
05031     0 this is an unmatched bracket which will generate an error, but take care
05032     not to try to access brastack[-1] when computing the length and restoring
05033     the branch_extra value. */
05034 
05035     case ')':
05036     length += 1 + LINK_SIZE;
05037     if (brastackptr > 0)
05038       {
05039       duplength = length - brastack[--brastackptr];
05040       branch_extra = bralenstack[brastackptr];
05041       }
05042     else duplength = 0;
05043 
05044     /* The following code is also used when a recursion such as (?3) is
05045     followed by a quantifier, because in that case, it has to be wrapped inside
05046     brackets so that the quantifier works. The value of duplength must be
05047     set before arrival. */
05048 
05049     HANDLE_QUANTIFIED_BRACKETS:
05050 
05051     /* Leave ptr at the final char; for read_repeat_counts this happens
05052     automatically; for the others we need an increment. */
05053 
05054     if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
05055       {
05056       ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
05057       if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
05058       }
05059     else if (c == '*') { min = 0; max = -1; ptr++; }
05060     else if (c == '+') { min = 1; max = -1; ptr++; }
05061     else if (c == '?') { min = 0; max = 1;  ptr++; }
05062     else { min = 1; max = 1; }
05063 
05064     /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
05065     group, and if the maximum is greater than zero, we have to replicate
05066     maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
05067     bracket set. */
05068 
05069     if (min == 0)
05070       {
05071       length++;
05072       if (max > 0) length += (max - 1) * (duplength + 3 + 2*LINK_SIZE);
05073       }
05074 
05075     /* When the minimum is greater than zero, we have to replicate up to
05076     minval-1 times, with no additions required in the copies. Then, if there
05077     is a limited maximum we have to replicate up to maxval-1 times allowing
05078     for a BRAZERO item before each optional copy and nesting brackets for all
05079     but one of the optional copies. */
05080 
05081     else
05082       {
05083       length += (min - 1) * duplength;
05084       if (max > min)   /* Need this test as max=-1 means no limit */
05085         length += (max - min) * (duplength + 3 + 2*LINK_SIZE)
05086           - (2 + 2*LINK_SIZE);
05087       }
05088 
05089     /* Allow space for once brackets for "possessive quantifier" */
05090 
05091     if (ptr[1] == '+')
05092       {
05093       ptr++;
05094       length += 2 + 2*LINK_SIZE;
05095       }
05096     continue;
05097 
05098     /* Non-special character. For a run of such characters the length required
05099     is the number of characters + 2, except that the maximum run length is
05100     MAXLIT. We won't get a skipped space or a non-data escape or the start of a
05101     # comment as the first character, so the length can't be zero. */
05102 
05103     NORMAL_CHAR:
05104     default:
05105     length += 2;
05106     runlength = 0;
05107     do
05108       {
05109 
05110       /* If in a \Q...\E sequence, check for end; otherwise it's a literal */
05111       if (inescq)
05112         {
05113         if (c == '\\' && ptr[1] == 'E')
05114           {
05115           inescq = false;
05116           ptr++;
05117           }
05118         else runlength++;
05119         continue;
05120         }
05121 
05122       /* Skip whitespace and comments for /x */
05123 
05124       if ((options & PCRE_EXTENDED) != 0)
05125         {
05126         if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
05127         if (c == '#')
05128           {
05129           /* The space before the ; is to avoid a warning on a silly compiler
05130           on the Macintosh. */
05131           while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
05132           continue;
05133           }
05134         }
05135 
05136       /* Backslash may introduce a data char or a metacharacter; stop the
05137       string before the latter. */
05138 
05139       if (c == '\\')
05140         {
05141         const uschar *saveptr = ptr;
05142         c = check_escape(&ptr, errorptr, bracount, options, false);
05143         if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
05144         if (c < 0) { ptr = saveptr; break; }
05145 
05146         /* In UTF-8 mode, add on the number of additional bytes needed to
05147         encode this character, and save the total length in case this is a
05148         final char that is repeated. */
05149 
05150         }
05151 
05152       /* Ordinary character or single-char escape */
05153 
05154       runlength++;
05155       }
05156 
05157     /* This "while" is the end of the "do" above. */
05158 
05159     while (runlength < MAXLIT &&
05160       (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
05161 
05162     /* If we hit a meta-character, back off to point to it */
05163 
05164