mux/src/pcre.cpp

Go to the documentation of this file.
00001 /*************************************************
00002 *      Perl-Compatible Regular Expressions       *
00003 *************************************************/
00004 
00005 
00006 /* This is a library of functions to support regular expressions whose syntax
00007 and semantics are as close as possible to those of the Perl 5 language. See
00008 the file Tech.Notes for some information on the internals.
00009 
00010 Written by: Philip Hazel <ph10@cam.ac.uk>
00011 
00012            Copyright (c) 1997-2003 University of Cambridge
00013 
00014 -----------------------------------------------------------------------------
00015 Permission is granted to anyone to use this software for any purpose on any
00016 computer system, and to redistribute it freely, subject to the following
00017 restrictions:
00018 
00019 1. This software is distributed in the hope that it will be useful,
00020    but WITHOUT ANY WARRANTY; without even the implied warranty of
00021    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00022 
00023 2. The origin of this software must not be misrepresented, either by
00024    explicit claim or by omission.
00025 
00026 3. Altered versions must be plainly marked as such, and must not be
00027    misrepresented as being the original software.
00028 
00029 4. If PCRE is embedded in any software that is released under the GNU
00030    General Purpose Licence (GPL), then the terms of that licence shall
00031    supersede any condition above with which it is incompatible.
00032 -----------------------------------------------------------------------------
00033 */
00034 
00035 /* Modified by Shawn Wagner for MUX to fit in one file and remove
00036    things we don't use, like a bunch of API functions and utf-8
00037    support. If you want the full thing, see http://www.pcre.org.
00038 
00039    Patched by Alierak to protect against integer overflow in repeat
00040    counts.
00041  */
00042 
00043 #include "autoconf.h"
00044 #include "config.h"
00045 
00046 #include <limits.h>
00047 #include <string.h>
00048 #include <ctype.h>
00049 #include <stdlib.h>
00050 #include <stddef.h>
00051 #include "pcre.h"
00052 
00053 #include "externs.h"
00054 #include "timeutil.h"
00055 
00056 /* Bits of PCRE's config.h */
00057 #define LINK_SIZE 2
00058 #define MATCH_LIMIT 100000
00059 #define NEWLINE '\n'
00060 
00061 /* Bits of internal.h */
00062 /* This header contains definitions that are shared between the different
00063 modules, but which are not relevant to the outside. */
00064 
00065 
00066 /* PCRE keeps offsets in its compiled code as 2-byte quantities by default.
00067 These are used, for example, to link from the start of a subpattern to its
00068 alternatives and its end. The use of 2 bytes per offset limits the size of the
00069 compiled regex to around 64K, which is big enough for almost everybody.
00070 However, I received a request for an even bigger limit. For this reason, and
00071 also to make the code easier to maintain, the storing and loading of offsets
00072 from the byte string is now handled by the macros that are defined here.
00073 
00074 The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
00075 the config.h file, but can be overridden by using -D on the command line. This
00076 is automated on Unix systems via the "configure" command. */
00077 
00078 #define PUT(a,n,d)   \
00079   (a[n] = (d) >> 8), \
00080   (a[(n)+1] = (d) & 255)
00081 
00082 #define GET(a,n) \
00083   (((a)[n] << 8) | (a)[(n)+1])
00084 
00085 #define MAX_PATTERN_SIZE (1 << 16)
00086 
00087 
00088 /* Convenience macro defined in terms of the others */
00089 
00090 #define PUTINC(a,n,d)   PUT(a,n,d), a += LINK_SIZE
00091 
00092 
00093 /* PCRE uses some other 2-byte quantities that do not change when the size of
00094 offsets changes. There are used for repeat counts and for other things such as
00095 capturing parenthesis numbers in back references. */
00096 
00097 #define PUT2(a,n,d)   \
00098   a[n] = (d) >> 8; \
00099   a[(n)+1] = (d) & 255
00100 
00101 #define GET2(a,n) \
00102   (((a)[n] << 8) | (a)[(n)+1])
00103 
00104 #define PUT2INC(a,n,d)  PUT2(a,n,d), a += 2
00105 
00106 /* These are the public options that can change during matching. */
00107 
00108 #define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
00109 
00110 /* Private options flags start at the most significant end of the four bytes,
00111 but skip the top bit so we can use ints for convenience without getting tangled
00112 with negative values. The public options defined in pcre.h start at the least
00113 significant end. Make sure they don't overlap, though now that we have expanded
00114 to four bytes there is plenty of space. */
00115 
00116 #define PCRE_FIRSTSET      0x40000000  /* first_byte is set */
00117 #define PCRE_REQCHSET      0x20000000  /* req_byte is set */
00118 #define PCRE_STARTLINE     0x10000000  /* start after \n for multiline */
00119 #define PCRE_ICHANGED      0x08000000  /* i option changes within regex */
00120 
00121 /* Options for the "extra" block produced by pcre_study(). */
00122 
00123 #define PCRE_STUDY_MAPPED   0x01     /* a map of starting chars exists */
00124 
00125 /* Masks for identifying the public options which are permitted at compile
00126 time, run time or study time, respectively. */
00127 
00128 #define PUBLIC_OPTIONS \
00129   (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
00130    PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
00131    PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK)
00132 
00133 #define PUBLIC_EXEC_OPTIONS \
00134   (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK)
00135 
00136 #define PUBLIC_STUDY_OPTIONS 0   /* None defined */
00137 
00138 /* Magic number to provide a small check against being handed junk. */
00139 
00140 #define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */
00141 
00142 /* Negative values for the firstchar and reqchar variables */
00143 
00144 #define REQ_UNSET (-2)
00145 #define REQ_NONE  (-1)
00146 
00147 /* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
00148 variable-length repeat, or a anything other than literal characters. */
00149 
00150 #define REQ_CASELESS 0x0100    /* indicates caselessness */
00151 #define REQ_VARY     0x0200    /* reqbyte followed non-literal item */
00152 
00153 /* Miscellaneous definitions */
00154 
00155 /* Escape items that are just an encoding of a particular data value. Note that
00156 ESC_n is defined as yet another macro, which is set in config.h to either \n
00157 (the default) or \r (which some people want). */
00158 
00159 #ifndef ESC_e
00160 #define ESC_e 27
00161 #endif
00162 
00163 #ifndef ESC_f
00164 #define ESC_f '\f'
00165 #endif
00166 
00167 #ifndef ESC_n
00168 #define ESC_n NEWLINE
00169 #endif
00170 
00171 #ifndef ESC_r
00172 #define ESC_r '\r'
00173 #endif
00174 
00175 /* We can't officially use ESC_t because it is a POSIX reserved identifier
00176 (presumably because of all the others like size_t). */
00177 
00178 #ifndef ESC_tee
00179 #define ESC_tee '\t'
00180 #endif
00181 
00182 /* These are escaped items that aren't just an encoding of a particular data
00183 value such as \n. They must have non-zero values, as check_escape() returns
00184 their negation. Also, they must appear in the same order as in the opcode
00185 definitions below, up to ESC_z. There's a dummy for OP_ANY because it
00186 corresponds to "." rather than an escape sequence. The final one must be
00187 ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
00188 tests in the code for an escape greater than ESC_b and less than ESC_Z to
00189 detect the types that may be repeated. These are the types that consume a
00190 character. If any new escapes are put in between that don't consume a
00191 character, that code will have to change. */
00192 
00193 enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
00194        ESC_w, ESC_dum1, ESC_C, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_REF };
00195 
00196 /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
00197 contain UTF-8 characters with values greater than 255. */
00198 
00199 #define XCL_NOT    0x01    /* Flag: this is a negative class */
00200 #define XCL_MAP    0x02    /* Flag: a 32-byte map is present */
00201 
00202 #define XCL_END       0    /* Marks end of individual items */
00203 #define XCL_SINGLE    1    /* Single item (one multibyte char) follows */
00204 #define XCL_RANGE     2    /* A range (two multibyte chars) follows */
00205 
00206 
00207 /* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
00208 that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
00209 OP_EOD must correspond in order to the list of escapes immediately above.
00210 Note that whenever this list is updated, the two macro definitions that follow
00211 must also be updated to match. */
00212 
00213 enum {
00214   OP_END,            /* 0 End of pattern */
00215 
00216   /* Values corresponding to backslashed metacharacters */
00217 
00218   OP_SOD,            /* 1 Start of data: \A */
00219   OP_SOM,            /* 2 Start of match (subject + offset): \G */
00220   OP_NOT_WORD_BOUNDARY,  /*  3 \B */
00221   OP_WORD_BOUNDARY,      /*  4 \b */
00222   OP_NOT_DIGIT,          /*  5 \D */
00223   OP_DIGIT,              /*  6 \d */
00224   OP_NOT_WHITESPACE,     /*  7 \S */
00225   OP_WHITESPACE,         /*  8 \s */
00226   OP_NOT_WORDCHAR,       /*  9 \W */
00227   OP_WORDCHAR,           /* 10 \w */
00228   OP_ANY,            /* 11 Match any character */
00229   OP_ANYBYTE,        /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
00230   OP_EODN,           /* 13 End of data or \n at end of data: \Z. */
00231   OP_EOD,            /* 14 End of data: \z */
00232 
00233   OP_OPT,            /* 15 Set runtime options */
00234   OP_CIRC,           /* 16 Start of line - varies with multiline switch */
00235   OP_DOLL,           /* 17 End of line - varies with multiline switch */
00236   OP_CHARS,          /* 18 Match string of characters */
00237   OP_NOT,            /* 19 Match anything but the following char */
00238 
00239   OP_STAR,           /* 20 The maximizing and minimizing versions of */
00240   OP_MINSTAR,        /* 21 all these opcodes must come in pairs, with */
00241   OP_PLUS,           /* 22 the minimizing one second. */
00242   OP_MINPLUS,        /* 23 This first set applies to single characters */
00243   OP_QUERY,          /* 24 */
00244   OP_MINQUERY,       /* 25 */
00245   OP_UPTO,           /* 26 From 0 to n matches */
00246   OP_MINUPTO,        /* 27 */
00247   OP_EXACT,          /* 28 Exactly n matches */
00248 
00249   OP_NOTSTAR,        /* 29 The maximizing and minimizing versions of */
00250   OP_NOTMINSTAR,     /* 30 all these opcodes must come in pairs, with */
00251   OP_NOTPLUS,        /* 31 the minimizing one second. */
00252   OP_NOTMINPLUS,     /* 32 This set applies to "not" single characters */
00253   OP_NOTQUERY,       /* 33 */
00254   OP_NOTMINQUERY,    /* 34 */
00255   OP_NOTUPTO,        /* 35 From 0 to n matches */
00256   OP_NOTMINUPTO,     /* 36 */
00257   OP_NOTEXACT,       /* 37 Exactly n matches */
00258 
00259   OP_TYPESTAR,       /* 38 The maximizing and minimizing versions of */
00260   OP_TYPEMINSTAR,    /* 39 all these opcodes must come in pairs, with */
00261   OP_TYPEPLUS,       /* 40 the minimizing one second. These codes must */
00262   OP_TYPEMINPLUS,    /* 41 be in exactly the same order as those above. */
00263   OP_TYPEQUERY,      /* 42 This set applies to character types such as \d */
00264   OP_TYPEMINQUERY,   /* 43 */
00265   OP_TYPEUPTO,       /* 44 From 0 to n matches */
00266   OP_TYPEMINUPTO,    /* 45 */
00267   OP_TYPEEXACT,      /* 46 Exactly n matches */
00268 
00269   OP_CRSTAR,         /* 47 The maximizing and minimizing versions of */
00270   OP_CRMINSTAR,      /* 48 all these opcodes must come in pairs, with */
00271   OP_CRPLUS,         /* 49 the minimizing one second. These codes must */
00272   OP_CRMINPLUS,      /* 50 be in exactly the same order as those above. */
00273   OP_CRQUERY,        /* 51 These are for character classes and back refs */
00274   OP_CRMINQUERY,     /* 52 */
00275   OP_CRRANGE,        /* 53 These are different to the three seta above. */
00276   OP_CRMINRANGE,     /* 54 */
00277 
00278   OP_CLASS,          /* 55 Match a character class, chars < 256 only */
00279   OP_NCLASS,         /* 56 Same, but the bitmap was created from a negative
00280                            class - the difference is relevant only when a UTF-8
00281                            character > 255 is encountered. */
00282 
00283   OP_XCLASS,         /* 57 Extended class for handling UTF-8 chars within the
00284                            class. This does both positive and negative. */
00285 
00286   OP_REF,            /* 58 Match a back reference */
00287   OP_RECURSE,        /* 59 Match a numbered subpattern (possibly recursive) */
00288   OP_CALLOUT,        /* 60 Call out to external function if provided */
00289 
00290   OP_ALT,            /* 61 Start of alternation */
00291   OP_KET,            /* 62 End of group that doesn't have an unbounded repeat */
00292   OP_KETRMAX,        /* 63 These two must remain together and in this */
00293   OP_KETRMIN,        /* 64 order. They are for groups the repeat for ever. */
00294 
00295   /* The assertions must come before ONCE and COND */
00296 
00297   OP_ASSERT,         /* 65 Positive lookahead */
00298   OP_ASSERT_NOT,     /* 66 Negative lookahead */
00299   OP_ASSERTBACK,     /* 67 Positive lookbehind */
00300   OP_ASSERTBACK_NOT, /* 68 Negative lookbehind */
00301   OP_REVERSE,        /* 69 Move pointer back - used in lookbehind assertions */
00302 
00303   /* ONCE and COND must come after the assertions, with ONCE first, as there's
00304   a test for >= ONCE for a subpattern that isn't an assertion. */
00305 
00306   OP_ONCE,           /* 70 Once matched, don't back up into the subpattern */
00307   OP_COND,           /* 71 Conditional group */
00308   OP_CREF,           /* 72 Used to hold an extraction string number (cond ref) */
00309 
00310   OP_BRAZERO,        /* 73 These two must remain together and in this */
00311   OP_BRAMINZERO,     /* 74 order. */
00312 
00313   OP_BRANUMBER,      /* 75 Used for extracting brackets whose number is greater
00314                            than can fit into an opcode. */
00315 
00316   OP_BRA             /* 76 This and greater values are used for brackets that
00317                            extract substrings up to a basic limit. After that,
00318                            use is made of OP_BRANUMBER. */
00319 };
00320 
00321 /* WARNING: There is an implicit assumption in study.c that all opcodes are
00322 less than 128 in value. This makes handling UTF-8 character sequences easier.
00323 */
00324 
00325 
00326 /* This macro defines textual names for all the opcodes. There are used only
00327 for debugging, in pcre.c when DEBUG is defined, and also in pcretest.c. The
00328 macro is referenced only in printint.c. */
00329 
00330 #define OP_NAME_LIST \
00331   "End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d",                \
00332   "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", "\\Z", "\\z",     \
00333   "Opt", "^", "$", "chars", "not",                                \
00334   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
00335   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
00336   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
00337   "*", "*?", "+", "+?", "?", "??", "{", "{",                      \
00338   "class", "nclass", "xclass", "Ref", "Recurse", "Callout",       \
00339   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",     \
00340   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cond ref",\
00341   "Brazero", "Braminzero", "Branumber", "Bra"
00342 
00343 
00344 /* This macro defines the length of fixed length operations in the compiled
00345 regex. The lengths are used when searching for specific things, and also in the
00346 debugging printing of a compiled regex. We use a macro so that it can be
00347 incorporated both into pcre.c and pcretest.c without being publicly exposed.
00348 
00349 As things have been extended, some of these are no longer fixed lenths, but are
00350 minima instead. For example, the length of a single-character repeat may vary
00351 in UTF-8 mode. The code that uses this table must know about such things. */
00352 
00353 #define OP_LENGTHS \
00354   1,                             /* End                                    */ \
00355   1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
00356   1, 1, 1, 1, 2, 1, 1,           /* Any, Anybyte, \Z, \z, Opt, ^, $        */ \
00357   2,                             /* Chars - the minimum length             */ \
00358   2,                             /* not                                    */ \
00359   /* Positive single-char repeats                            ** These are  */ \
00360   2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??      ** minima in  */ \
00361   4, 4, 4,                       /* upto, minupto, exact     ** UTF-8 mode */ \
00362   /* Negative single-char repeats - only for chars < 256                   */ \
00363   2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \
00364   4, 4, 4,                       /* NOT upto, minupto, exact               */ \
00365   /* Positive type repeats                                                 */ \
00366   2, 2, 2, 2, 2, 2,              /* Type *, *?, +, +?, ?, ??               */ \
00367   4, 4, 4,                       /* Type upto, minupto, exact              */ \
00368   /* Character class & ref repeats                                         */ \
00369   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */ \
00370   5, 5,                          /* CRRANGE, CRMINRANGE                    */ \
00371  33,                             /* CLASS                                  */ \
00372  33,                             /* NCLASS                                 */ \
00373   0,                             /* XCLASS - variable length               */ \
00374   3,                             /* REF                                    */ \
00375   1+LINK_SIZE,                   /* RECURSE                                */ \
00376   2,                             /* CALLOUT                                */ \
00377   1+LINK_SIZE,                   /* Alt                                    */ \
00378   1+LINK_SIZE,                   /* Ket                                    */ \
00379   1+LINK_SIZE,                   /* KetRmax                                */ \
00380   1+LINK_SIZE,                   /* KetRmin                                */ \
00381   1+LINK_SIZE,                   /* Assert                                 */ \
00382   1+LINK_SIZE,                   /* Assert not                             */ \
00383   1+LINK_SIZE,                   /* Assert behind                          */ \
00384   1+LINK_SIZE,                   /* Assert behind not                      */ \
00385   1+LINK_SIZE,                   /* Reverse                                */ \
00386   1+LINK_SIZE,                   /* Once                                   */ \
00387   1+LINK_SIZE,                   /* COND                                   */ \
00388   3,                             /* CREF                                   */ \
00389   1, 1,                          /* BRAZERO, BRAMINZERO                    */ \
00390   3,                             /* BRANUMBER                              */ \
00391   1+LINK_SIZE                    /* BRA                                    */ \
00392 
00393 
00394 /* The highest extraction number before we have to start using additional
00395 bytes. (Originally PCRE didn't have support for extraction counts highter than
00396 this number.) The value is limited by the number of opcodes left after OP_BRA,
00397 i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
00398 opcodes. */
00399 
00400 #define EXTRACT_BASIC_MAX  150
00401 
00402 /* A magic value for OP_CREF to indicate the "in recursion" condition. */
00403 
00404 #define CREF_RECURSE  0xffff
00405 
00406 /* The texts of compile-time error messages are defined as macros here so that
00407 they can be accessed by the POSIX wrapper and converted into error codes.  Yes,
00408 I could have used error codes in the first place, but didn't feel like changing
00409 just to accommodate the POSIX wrapper. */
00410 
00411 #define ERR1  "\\ at end of pattern"
00412 #define ERR2  "\\c at end of pattern"
00413 #define ERR3  "unrecognized character follows \\"
00414 #define ERR4  "numbers out of order in {} quantifier"
00415 #define ERR5  "number too big in {} quantifier"
00416 #define ERR6  "missing terminating ] for character class"
00417 #define ERR7  "invalid escape sequence in character class"
00418 #define ERR8  "range out of order in character class"
00419 #define ERR9  "nothing to repeat"
00420 #define ERR10 "operand of unlimited repeat could match the empty string"
00421 #define ERR11 "internal error: unexpected repeat"
00422 #define ERR12 "unrecognized character after (?"
00423 #define ERR13 "POSIX named classes are supported only within a class"
00424 #define ERR14 "missing )"
00425 #define ERR15 "reference to non-existent subpattern"
00426 #define ERR16 "erroffset passed as NULL"
00427 #define ERR17 "unknown option bit(s) set"
00428 #define ERR18 "missing ) after comment"
00429 #define ERR19 "parentheses nested too deeply"
00430 #define ERR20 "regular expression too large"
00431 #define ERR21 "failed to get memory"
00432 #define ERR22 "unmatched parentheses"
00433 #define ERR23 "internal error: code overflow"
00434 #define ERR24 "unrecognized character after (?<"
00435 #define ERR25 "lookbehind assertion is not fixed length"
00436 #define ERR26 "malformed number after (?("
00437 #define ERR27 "conditional group contains more than two branches"
00438 #define ERR28 "assertion expected after (?("
00439 #define ERR29 "(?R or (?digits must be followed by )"
00440 #define ERR30 "unknown POSIX class name"
00441 #define ERR31 "POSIX collating elements are not supported"
00442 #define ERR32 "this version of PCRE is not compiled with PCRE_UTF8 support"
00443 #define ERR33 "spare error"
00444 #define ERR34 "character value in \\x{...} sequence is too large"
00445 #define ERR35 "invalid condition (?(0)"
00446 #define ERR36 "\\C not allowed in lookbehind assertion"
00447 #define ERR37 "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X"
00448 #define ERR38 "number after (?C is > 255"
00449 #define ERR39 "closing ) for (?C expected"
00450 #define ERR40 "recursive call could loop indefinitely"
00451 #define ERR41 "unrecognized character after (?P"
00452 #define ERR42 "syntax error after (?P"
00453 #define ERR43 "two named groups have the same name"
00454 #define ERR44 "invalid UTF-8 string"
00455 
00456 /* All character handling must be done as unsigned characters. Otherwise there
00457 are problems with top-bit-set characters and functions such as isspace().
00458 However, we leave the interface to the outside world as char *, because that
00459 should make things easier for callers. We define a short type for unsigned char
00460 to save lots of typing. I tried "uchar", but it causes problems on Digital
00461 Unix, where it is defined in sys/types, so use "uschar" instead. */
00462 
00463 typedef unsigned char uschar;
00464 
00465 /* The real format of the start of the pcre block; the index of names and the
00466 code vector run on as long as necessary after the end. */
00467 
00468 typedef struct real_pcre {
00469   unsigned long int magic_number;
00470   size_t size;                        /* Total that was malloced */
00471   const unsigned char *tables;        /* Pointer to tables */
00472   unsigned long int options;
00473   unsigned short int top_bracket;
00474   unsigned short int top_backref;
00475   unsigned short int first_byte;
00476   unsigned short int req_byte;
00477   unsigned short int name_entry_size; /* Size of any name items; 0 => none */
00478   unsigned short int name_count;      /* Number of name items */
00479 } real_pcre;
00480 
00481 /* The format of the block used to store data from pcre_study(). */
00482 
00483 typedef struct pcre_study_data {
00484   size_t size;                        /* Total that was malloced */
00485   uschar options;
00486   uschar start_bits[32];
00487 } pcre_study_data;
00488 
00489 /* Structure for passing "static" information around between the functions
00490 doing the compiling, so that they are thread-safe. */
00491 
00492 typedef struct compile_data {
00493   const uschar *lcc;            /* Points to lower casing table */
00494   const uschar *fcc;            /* Points to case-flipping table */
00495   const uschar *cbits;          /* Points to character type table */
00496   const uschar *ctypes;         /* Points to table of type maps */
00497   const uschar *start_code;     /* The start of the compiled code */
00498   uschar *name_table;           /* The name/number table */
00499   int  names_found;             /* Number of entries so far */
00500   int  name_entry_size;         /* Size of each entry */
00501   int  top_backref;             /* Maximum back reference */
00502   unsigned int backref_map;     /* Bitmap of low back refs */
00503   int  req_varyopt;             /* "After variable item" flag for reqbyte */
00504 } compile_data;
00505 
00506 /* Structure for maintaining a chain of pointers to the currently incomplete
00507 branches, for testing for left recursion. */
00508 
00509 typedef struct branch_chain {
00510   struct branch_chain *outer;
00511   uschar *current;
00512 } branch_chain;
00513 
00514 /* Structure for items in a linked list that represents an explicit recursive
00515 call within the pattern. */
00516 
00517 typedef struct recursion_info {
00518   struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
00519   int group_num;                /* Number of group that was called */
00520   const uschar *after_call;     /* "Return value": points after the call in the expr */
00521   const uschar *save_start;     /* Old value of md->start_match */
00522   int *offset_save;             /* Pointer to start of saved offsets */
00523   int saved_max;                /* Number of saved offsets */
00524 } recursion_info;
00525 
00526 /* When compiling in a mode that doesn't use recursive calls to match(),
00527 a structure is used to remember local variables on the heap. It is defined in
00528 pcre.c, close to the match() function, so that it is easy to keep it in step
00529 with any changes of local variable. However, the pointer to the current frame
00530 must be saved in some "static" place over a longjmp(). We declare the
00531 structure here so that we can put a pointer in the match_data structure.
00532 NOTE: This isn't used for a "normal" compilation of pcre. */
00533 
00534 /* Structure for passing "static" information around between the functions
00535 doing the matching, so that they are thread-safe. */
00536 
00537 typedef struct match_data {
00538   unsigned long int match_call_count; /* As it says */
00539   unsigned long int match_limit;/* As it says */
00540   int   *offset_vector;         /* Offset vector */
00541   int    offset_end;            /* One past the end */
00542   int    offset_max;            /* The maximum usable for return data */
00543   const uschar *lcc;            /* Points to lower casing table */
00544   const uschar *ctypes;         /* Points to table of type maps */
00545   bool   offset_overflow;       /* Set if too many extractions */
00546   bool   notbol;                /* NOTBOL flag */
00547   bool   noteol;                /* NOTEOL flag */
00548   bool   utf8;                  /* UTF8 flag */
00549   bool   endonly;               /* Dollar not before final \n */
00550   bool   notempty;              /* Empty string match not wanted */
00551   const uschar *start_code;     /* For use when recursing */
00552   const uschar *start_subject;  /* Start of the subject string */
00553   const uschar *end_subject;    /* End of the subject string */
00554   const uschar *start_match;    /* Start of this match attempt */
00555   const uschar *end_match_ptr;  /* Subject position at end match */
00556   int    end_offset_top;        /* Highwater mark at end of match */
00557   int    capture_last;          /* Most recent capture number */
00558   int    start_offset;          /* The start offset value */
00559   recursion_info *recursive;    /* Linked list of recursion data */
00560   void  *callout_data;          /* To pass back to callouts */
00561 } match_data;
00562 
00563 /* Bit definitions for entries in the pcre_ctypes table. */
00564 
00565 #define ctype_space   0x01
00566 #define ctype_letter  0x02
00567 #define ctype_digit   0x04
00568 #define ctype_xdigit  0x08
00569 #define ctype_word    0x10   /* alphameric or '_' */
00570 #define ctype_meta    0x80   /* regexp meta char or zero (end pattern) */
00571 
00572 /* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
00573 of bits for a class map. Some classes are built by combining these tables. */
00574 
00575 #define cbit_space     0      /* [:space:] or \s */
00576 #define cbit_xdigit   32      /* [:xdigit:] */
00577 #define cbit_digit    64      /* [:digit:] or \d */
00578 #define cbit_upper    96      /* [:upper:] */
00579 #define cbit_lower   128      /* [:lower:] */
00580 #define cbit_word    160      /* [:word:] or \w */
00581 #define cbit_graph   192      /* [:graph:] */
00582 #define cbit_print   224      /* [:print:] */
00583 #define cbit_punct   256      /* [:punct:] */
00584 #define cbit_cntrl   288      /* [:cntrl:] */
00585 #define cbit_length  320      /* Length of the cbits table */
00586 
00587 /* Offsets of the various tables from the base tables pointer, and
00588 total length. */
00589 
00590 #define lcc_offset      0
00591 #define fcc_offset    256
00592 #define cbits_offset  512
00593 #define ctypes_offset (cbits_offset + cbit_length)
00594 #define tables_length (ctypes_offset + 256)
00595 
00596 /* End of internal.h */
00597 /* chartables.c */
00598 /*************************************************
00599 *      Perl-Compatible Regular Expressions       *
00600 *************************************************/
00601 
00602 /* This file is automatically written by the dftables auxiliary
00603 program. If you edit it by hand, you might like to edit the Makefile to
00604 prevent its ever being regenerated.
00605 
00606 This file is #included in the compilation of pcre.c to build the default
00607 character tables which are used when no tables are passed to the compile
00608 function. */
00609 
00610 static unsigned char pcre_default_tables[] = {
00611 
00612 /* This table is a lower casing table. */
00613 
00614     0,  1,  2,  3,  4,  5,  6,  7,
00615     8,  9, 10, 11, 12, 13, 14, 15,
00616    16, 17, 18, 19, 20, 21, 22, 23,
00617    24, 25, 26, 27, 28, 29, 30, 31,
00618    32, 33, 34, 35, 36, 37, 38, 39,
00619    40, 41, 42, 43, 44, 45, 46, 47,
00620    48, 49, 50, 51, 52, 53, 54, 55,
00621    56, 57, 58, 59, 60, 61, 62, 63,
00622    64, 97, 98, 99,100,101,102,103,
00623   104,105,106,107,108,109,110,111,
00624   112,113,114,115,116,117,118,119,
00625   120,121,122, 91, 92, 93, 94, 95,
00626    96, 97, 98, 99,100,101,102,103,
00627   104,105,106,107,108,109,110,111,
00628   112,113,114,115,116,117,118,119,
00629   120,121,122,123,124,125,126,127,
00630   128,129,130,131,132,133,134,135,
00631   136,137,138,139,140,141,142,143,
00632   144,145,146,147,148,149,150,151,
00633   152,153,154,155,156,157,158,159,
00634   160,161,162,163,164,165,166,167,
00635   168,169,170,171,172,173,174,175,
00636   176,177,178,179,180,181,182,183,
00637   184,185,186,187,188,189,190,191,
00638   192,193,194,195,196,197,198,199,
00639   200,201,202,203,204,205,206,207,
00640   208,209,210,211,212,213,214,215,
00641   216,217,218,219,220,221,222,223,
00642   224,225,226,227,228,229,230,231,
00643   232,233,234,235,236,237,238,239,
00644   240,241,242,243,244,245,246,247,
00645   248,249,250,251,252,253,254,255,
00646 
00647 /* This table is a case flipping table. */
00648 
00649     0,  1,  2,  3,  4,  5,  6,  7,
00650     8,  9, 10, 11, 12, 13, 14, 15,
00651    16, 17, 18, 19, 20, 21, 22, 23,
00652    24, 25, 26, 27, 28, 29, 30, 31,
00653    32, 33, 34, 35, 36, 37, 38, 39,
00654    40, 41, 42, 43, 44, 45, 46, 47,
00655    48, 49, 50, 51, 52, 53, 54, 55,
00656    56, 57, 58, 59, 60, 61, 62, 63,
00657    64, 97, 98, 99,100,101,102,103,
00658   104,105,106,107,108,109,110,111,
00659   112,113,114,115,116,117,118,119,
00660   120,121,122, 91, 92, 93, 94, 95,
00661    96, 65, 66, 67, 68, 69, 70, 71,
00662    72, 73, 74, 75, 76, 77, 78, 79,
00663    80, 81, 82, 83, 84, 85, 86, 87,
00664    88, 89, 90,123,124,125,126,127,
00665   128,129,130,131,132,133,134,135,
00666   136,137,138,139,140,141,142,143,
00667   144,145,146,147,148,149,150,151,
00668   152,153,154,155,156,157,158,159,
00669   160,161,162,163,164,165,166,167,
00670   168,169,170,171,172,173,174,175,
00671   176,177,178,179,180,181,182,183,
00672   184,185,186,187,188,189,190,191,
00673   192,193,194,195,196,197,198,199,
00674   200,201,202,203,204,205,206,207,
00675   208,209,210,211,212,213,214,215,
00676   216,217,218,219,220,221,222,223,
00677   224,225,226,227,228,229,230,231,
00678   232,233,234,235,236,237,238,239,
00679   240,241,242,243,244,245,246,247,
00680   248,249,250,251,252,253,254,255,
00681 
00682 /* This table contains bit maps for various character classes.
00683 Each map is 32 bytes long and the bits run from the least
00684 significant end of each byte. The classes that have their own
00685 maps are: space, xdigit, digit, upper, lower, word, graph
00686 print, punct, and cntrl. Other classes are built from combinations. */
00687 
00688   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
00689   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00690   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00691   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00692 
00693   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
00694   0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
00695   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00696   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00697 
00698   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
00699   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00700   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00701   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00702 
00703   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00704   0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
00705   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00706   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00707 
00708   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00709   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
00710   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00711   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00712 
00713   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
00714   0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
00715   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00716   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00717 
00718   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
00719   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
00720   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00721   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00722 
00723   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
00724   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
00725   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00726   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00727 
00728   0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
00729   0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
00730   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00731   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00732 
00733   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
00734   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
00735   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00736   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00737 
00738 /* This table identifies various classes of character by individual bits:
00739   0x01   white space character
00740   0x02   letter
00741   0x04   decimal digit
00742   0x08   hexadecimal digit
00743   0x10   alphanumeric or '_'
00744   0x80   regular expression metacharacter or binary zero
00745 */
00746 
00747   0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
00748   0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
00749   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
00750   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
00751   0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
00752   0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
00753   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
00754   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
00755   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
00756   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
00757   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
00758   0x12,0x12,0x12,0x80,0x00,0x00,0x80,0x10, /*  X - _  */
00759   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
00760   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
00761   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
00762   0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
00763   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
00764   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
00765   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
00766   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
00767   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
00768   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
00769   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
00770   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
00771   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
00772   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
00773   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
00774   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
00775   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
00776   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
00777   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
00778   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
00779 
00780 /* End of chartables.c */
00781 /* get.c */
00782 /*************************************************
00783 *      Copy captured string to given buffer      *
00784 *************************************************/
00785 
00786 /* This function copies a single captured substring into a given buffer.
00787 Note that we use memcpy() rather than strncpy() in case there are binary zeros
00788 in the string.
00789 
00790 Arguments:
00791   subject        the subject string that was matched
00792   ovector        pointer to the offsets table
00793   stringcount    the number of substrings that were captured
00794                    (i.e. the yield of the pcre_exec call, unless
00795                    that was zero, in which case it should be 1/3
00796                    of the offset table size)
00797   stringnumber   the number of the required substring
00798   buffer         where to put the substring
00799   size           the size of the buffer
00800 
00801 Returns:         if successful:
00802                    the length of the copied string, not including the zero
00803                    that is put on the end; can be zero
00804                  if not successful:
00805                    PCRE_ERROR_NOMEMORY (-6) buffer too small
00806                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
00807 */
00808 
00809 int
00810 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
00811   int stringnumber, char *buffer, int size)
00812 {
00813 int yield;
00814 if (stringnumber < 0 || stringnumber >= stringcount)
00815   return PCRE_ERROR_NOSUBSTRING;
00816 stringnumber *= 2;
00817 yield = ovector[stringnumber+1] - ovector[stringnumber];
00818 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
00819 memcpy(buffer, subject + ovector[stringnumber], yield);
00820 buffer[yield] = 0;
00821 return yield;
00822 }
00823 
00824 /* End of get.c */
00825 /* maketables.c */
00826 /*************************************************
00827 *           Create PCRE character tables         *
00828 *************************************************/
00829 
00830 /* This function builds a set of character tables for use by PCRE and returns
00831 a pointer to them. They are build using the ctype functions, and consequently
00832 their contents will depend upon the current locale setting. When compiled as
00833 part of the library, the store is obtained via malloc(), but when compiled
00834 inside dftables, use malloc().
00835 
00836 Arguments:   none
00837 Returns:     pointer to the contiguous block of data
00838 */
00839 
00840 const unsigned char *
00841 pcre_maketables(void)
00842 {
00843 unsigned char *yield, *p;
00844 int i;
00845 
00846 yield = static_cast<unsigned char*>(malloc(tables_length));
00847 
00848 if (yield == NULL) return NULL;
00849 p = yield;
00850 
00851 /* First comes the lower casing table */
00852 
00853 for (i = 0; i < 256; i++) *p++ = tolower(i);
00854 
00855 /* Next the case-flipping table */
00856 
00857 for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
00858 
00859 /* Then the character class tables. Don't try to be clever and save effort
00860 on exclusive ones - in some locales things may be different. Note that the
00861 table for "space" includes everything "isspace" gives, including VT in the
00862 default locale. This makes it work for the POSIX class [:space:]. */
00863 
00864 memset(p, 0, cbit_length);
00865 for (i = 0; i < 256; i++)
00866   {
00867   if (isdigit(i))
00868     {
00869     p[cbit_digit  + i/8] |= 1 << (i&7);
00870     p[cbit_word   + i/8] |= 1 << (i&7);
00871     }
00872   if (isupper(i))
00873     {
00874     p[cbit_upper  + i/8] |= 1 << (i&7);
00875     p[cbit_word   + i/8] |= 1 << (i&7);
00876     }
00877   if (islower(i))
00878     {
00879     p[cbit_lower  + i/8] |= 1 << (i&7);
00880     p[cbit_word   + i/8] |= 1 << (i&7);
00881     }
00882   if (i == '_')   p[cbit_word   + i/8] |= 1 << (i&7);
00883   if (isspace(i)) p[cbit_space  + i/8] |= 1 << (i&7);
00884   if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
00885   if (isgraph(i)) p[cbit_graph  + i/8] |= 1 << (i&7);
00886   if (isprint(i)) p[cbit_print  + i/8] |= 1 << (i&7);
00887   if (ispunct(i)) p[cbit_punct  + i/8] |= 1 << (i&7);
00888   if (iscntrl(i)) p[cbit_cntrl  + i/8] |= 1 << (i&7);
00889   }
00890 p += cbit_length;
00891 
00892 /* Finally, the character type table. In this, we exclude VT from the white
00893 space chars, because Perl doesn't recognize it as such for \s and for comments
00894 within regexes. */
00895 
00896 for (i = 0; i < 256; i++)
00897   {
00898   int x = 0;
00899   if (i != 0x0b && isspace(i)) x += ctype_space;
00900   if (isalpha(i)) x += ctype_letter;
00901   if (isdigit(i)) x += ctype_digit;
00902   if (isxdigit(i)) x += ctype_xdigit;
00903   if (isalnum(i) || i == '_') x += ctype_word;
00904   if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta;
00905   *p++ = x;
00906   }
00907 
00908 return yield;
00909 }
00910 
00911 /* End of maketables.c */
00912 /* study.c */
00913 
00914 /*************************************************
00915 *      Set a bit and maybe its alternate case    *
00916 *************************************************/
00917 
00918 /* Given a character, set its bit in the table, and also the bit for the other
00919 version of a letter if we are caseless.
00920 
00921 Arguments:
00922   start_bits    points to the bit map
00923   c             is the character
00924   caseless      the caseless flag
00925   cd            the block with char table pointers
00926 
00927 Returns:        nothing
00928 */
00929 
00930 static void
00931 set_bit(uschar *start_bits, int c, bool caseless, compile_data *cd)
00932 {
00933 start_bits[c/8] |= (1 << (c&7));
00934 if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
00935   start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
00936 }
00937 
00938 
00939 
00940 /*************************************************
00941 *          Create bitmap of starting chars       *
00942 *************************************************/
00943 
00944 /* This function scans a compiled unanchored expression and attempts to build a
00945 bitmap of the set of initial characters. If it can't, it returns false. As time
00946 goes by, we may be able to get more clever at doing this.
00947 
00948 Arguments:
00949   code         points to an expression
00950   start_bits   points to a 32-byte table, initialized to 0
00951   caseless     the current state of the caseless flag
00952   utf8         true if in UTF-8 mode
00953   cd           the block with char table pointers
00954 
00955 Returns:       true if table built, false otherwise
00956 */
00957 
00958 static bool
00959 set_start_bits(const uschar *code, uschar *start_bits, bool caseless,
00960   bool utf8, compile_data *cd)
00961 {
00962 register int c;
00963 
00964 /* This next statement and the later reference to dummy are here in order to
00965 trick the optimizer of the IBM C compiler for OS/2 into generating correct
00966 code. Apparently IBM isn't going to fix the problem, and we would rather not
00967 disable optimization (in this module it actually makes a big difference, and
00968 the pcre module can use all the optimization it can get). */
00969 
00970 volatile int dummy;
00971 
00972 do
00973   {
00974   const uschar *tcode = code + 1 + LINK_SIZE;
00975   bool try_next = true;
00976 
00977   while (try_next)
00978     {
00979     /* If a branch starts with a bracket or a positive lookahead assertion,
00980     recurse to set bits from within them. That's all for this branch. */
00981 
00982     if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
00983       {
00984       if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))
00985         return false;
00986       try_next = false;
00987       }
00988 
00989     else switch(*tcode)
00990       {
00991       default:
00992       return false;
00993 
00994       /* Skip over callout */
00995 
00996       case OP_CALLOUT:
00997       tcode += 2;
00998       break;
00999 
01000       /* Skip over extended extraction bracket number */
01001 
01002       case OP_BRANUMBER:
01003       tcode += 3;
01004       break;
01005 
01006       /* Skip over lookbehind and negative l