basic_regex_parser.hpp 109 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE basic_regex_parser.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares template class basic_regex_parser.
  16. */
  17. #ifndef BOOST_REGEX_V5_BASIC_REGEX_PARSER_HPP
  18. #define BOOST_REGEX_V5_BASIC_REGEX_PARSER_HPP
  19. namespace boost{
  20. namespace BOOST_REGEX_DETAIL_NS{
  21. #ifdef BOOST_REGEX_MSVC
  22. #pragma warning(push)
  23. #pragma warning(disable:4244 4459)
  24. #if BOOST_REGEX_MSVC < 1910
  25. #pragma warning(disable:4800)
  26. #endif
  27. #endif
  28. inline std::intmax_t umax(std::integral_constant<bool, false> const&)
  29. {
  30. // Get out clause here, just in case numeric_limits is unspecialized:
  31. return std::numeric_limits<std::intmax_t>::is_specialized ? (std::numeric_limits<std::intmax_t>::max)() : INT_MAX;
  32. }
  33. inline std::intmax_t umax(std::integral_constant<bool, true> const&)
  34. {
  35. return (std::numeric_limits<std::size_t>::max)();
  36. }
  37. inline std::intmax_t umax()
  38. {
  39. return umax(std::integral_constant<bool, std::numeric_limits<std::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
  40. }
  41. template <class charT, class traits>
  42. class basic_regex_parser : public basic_regex_creator<charT, traits>
  43. {
  44. public:
  45. basic_regex_parser(regex_data<charT, traits>* data);
  46. void parse(const charT* p1, const charT* p2, unsigned flags);
  47. void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
  48. void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
  49. void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
  50. {
  51. fail(error_code, position, message, position);
  52. }
  53. bool parse_all();
  54. bool parse_basic();
  55. bool parse_extended();
  56. bool parse_literal();
  57. bool parse_open_paren();
  58. bool parse_basic_escape();
  59. bool parse_extended_escape();
  60. bool parse_match_any();
  61. bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
  62. bool parse_repeat_range(bool isbasic);
  63. bool parse_alt();
  64. bool parse_set();
  65. bool parse_backref();
  66. void parse_set_literal(basic_char_set<charT, traits>& char_set);
  67. bool parse_inner_set(basic_char_set<charT, traits>& char_set);
  68. bool parse_QE();
  69. bool parse_perl_extension();
  70. bool parse_perl_verb();
  71. bool match_verb(const char*);
  72. bool add_emacs_code(bool negate);
  73. bool unwind_alts(std::ptrdiff_t last_paren_start);
  74. digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
  75. charT unescape_character();
  76. regex_constants::syntax_option_type parse_options();
  77. private:
  78. typedef bool (basic_regex_parser::*parser_proc_type)();
  79. typedef typename traits::string_type string_type;
  80. typedef typename traits::char_class_type char_class_type;
  81. parser_proc_type m_parser_proc; // the main parser to use
  82. const charT* m_base; // the start of the string being parsed
  83. const charT* m_end; // the end of the string being parsed
  84. const charT* m_position; // our current parser position
  85. unsigned m_mark_count; // how many sub-expressions we have
  86. int m_mark_reset; // used to indicate that we're inside a (?|...) block.
  87. unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
  88. std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
  89. std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
  90. bool m_has_case_change; // true if somewhere in the current block the case has changed
  91. unsigned m_recursion_count; // How many times we've called parse_all.
  92. #if defined(BOOST_REGEX_MSVC) && defined(_M_IX86)
  93. // This is an ugly warning suppression workaround (for warnings *inside* std::vector
  94. // that can not otherwise be suppressed)...
  95. static_assert(sizeof(long) >= sizeof(void*), "Long isn't long enough!");
  96. std::vector<long> m_alt_jumps; // list of alternative in the current scope.
  97. #else
  98. std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
  99. #endif
  100. basic_regex_parser& operator=(const basic_regex_parser&);
  101. basic_regex_parser(const basic_regex_parser&);
  102. };
  103. template <class charT, class traits>
  104. basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
  105. : basic_regex_creator<charT, traits>(data), m_parser_proc(), m_base(0), m_end(0), m_position(0),
  106. m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0)
  107. {
  108. }
  109. template <class charT, class traits>
  110. void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
  111. {
  112. // pass l_flags on to base class:
  113. this->init(l_flags);
  114. // set up pointers:
  115. m_position = m_base = p1;
  116. m_end = p2;
  117. // empty strings are errors:
  118. if((p1 == p2) &&
  119. (
  120. ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
  121. || (l_flags & regbase::no_empty_expressions)
  122. )
  123. )
  124. {
  125. fail(regex_constants::error_empty, 0);
  126. return;
  127. }
  128. // select which parser to use:
  129. switch(l_flags & regbase::main_option_type)
  130. {
  131. case regbase::perl_syntax_group:
  132. {
  133. m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
  134. //
  135. // Add a leading paren with index zero to give recursions a target:
  136. //
  137. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  138. br->index = 0;
  139. br->icase = this->flags() & regbase::icase;
  140. break;
  141. }
  142. case regbase::basic_syntax_group:
  143. m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
  144. break;
  145. case regbase::literal:
  146. m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
  147. break;
  148. default:
  149. // Oops, someone has managed to set more than one of the main option flags,
  150. // so this must be an error:
  151. fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
  152. return;
  153. }
  154. // parse all our characters:
  155. bool result = parse_all();
  156. //
  157. // Unwind our alternatives:
  158. //
  159. unwind_alts(-1);
  160. // reset l_flags as a global scope (?imsx) may have altered them:
  161. this->flags(l_flags);
  162. // if we haven't gobbled up all the characters then we must
  163. // have had an unexpected ')' :
  164. if(!result)
  165. {
  166. fail(regex_constants::error_paren, std::distance(m_base, m_position), "Found a closing ) with no corresponding opening parenthesis.");
  167. return;
  168. }
  169. // if an error has been set then give up now:
  170. if(this->m_pdata->m_status)
  171. return;
  172. // fill in our sub-expression count:
  173. this->m_pdata->m_mark_count = 1u + (std::size_t)m_mark_count;
  174. this->finalize(p1, p2);
  175. }
  176. template <class charT, class traits>
  177. void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
  178. {
  179. // get the error message:
  180. std::string message = this->m_pdata->m_ptraits->error_string(error_code);
  181. fail(error_code, position, message);
  182. }
  183. template <class charT, class traits>
  184. void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
  185. {
  186. if(0 == this->m_pdata->m_status) // update the error code if not already set
  187. this->m_pdata->m_status = error_code;
  188. m_position = m_end; // don't bother parsing anything else
  189. //
  190. // Augment error message with the regular expression text:
  191. //
  192. if(start_pos == position)
  193. start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
  194. std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
  195. if(error_code != regex_constants::error_empty)
  196. {
  197. if((start_pos != 0) || (end_pos != (m_end - m_base)))
  198. message += " The error occurred while parsing the regular expression fragment: '";
  199. else
  200. message += " The error occurred while parsing the regular expression: '";
  201. if(start_pos != end_pos)
  202. {
  203. message += std::string(m_base + start_pos, m_base + position);
  204. message += ">>>HERE>>>";
  205. message += std::string(m_base + position, m_base + end_pos);
  206. }
  207. message += "'.";
  208. }
  209. #ifndef BOOST_NO_EXCEPTIONS
  210. if(0 == (this->flags() & regex_constants::no_except))
  211. {
  212. boost::regex_error e(message, error_code, position);
  213. e.raise();
  214. }
  215. #else
  216. (void)position; // suppress warnings.
  217. #endif
  218. }
  219. template <class charT, class traits>
  220. bool basic_regex_parser<charT, traits>::parse_all()
  221. {
  222. if (++m_recursion_count > 400)
  223. {
  224. // exceeded internal limits
  225. fail(boost::regex_constants::error_complexity, m_position - m_base, "Exceeded nested brace limit.");
  226. }
  227. bool result = true;
  228. while(result && (m_position != m_end))
  229. {
  230. result = (this->*m_parser_proc)();
  231. }
  232. --m_recursion_count;
  233. return result;
  234. }
  235. #ifdef BOOST_REGEX_MSVC
  236. #pragma warning(push)
  237. #pragma warning(disable:4702)
  238. #endif
  239. template <class charT, class traits>
  240. bool basic_regex_parser<charT, traits>::parse_basic()
  241. {
  242. switch(this->m_traits.syntax_type(*m_position))
  243. {
  244. case regex_constants::syntax_escape:
  245. return parse_basic_escape();
  246. case regex_constants::syntax_dot:
  247. return parse_match_any();
  248. case regex_constants::syntax_caret:
  249. ++m_position;
  250. this->append_state(syntax_element_start_line);
  251. break;
  252. case regex_constants::syntax_dollar:
  253. ++m_position;
  254. this->append_state(syntax_element_end_line);
  255. break;
  256. case regex_constants::syntax_star:
  257. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
  258. return parse_literal();
  259. else
  260. {
  261. ++m_position;
  262. return parse_repeat();
  263. }
  264. case regex_constants::syntax_plus:
  265. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
  266. return parse_literal();
  267. else
  268. {
  269. ++m_position;
  270. return parse_repeat(1);
  271. }
  272. case regex_constants::syntax_question:
  273. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
  274. return parse_literal();
  275. else
  276. {
  277. ++m_position;
  278. return parse_repeat(0, 1);
  279. }
  280. case regex_constants::syntax_open_set:
  281. return parse_set();
  282. case regex_constants::syntax_newline:
  283. if(this->flags() & regbase::newline_alt)
  284. return parse_alt();
  285. else
  286. return parse_literal();
  287. default:
  288. return parse_literal();
  289. }
  290. return true;
  291. }
  292. #ifdef BOOST_REGEX_MSVC
  293. # pragma warning(push)
  294. #if BOOST_REGEX_MSVC >= 1800
  295. #pragma warning(disable:26812)
  296. #endif
  297. #endif
  298. template <class charT, class traits>
  299. bool basic_regex_parser<charT, traits>::parse_extended()
  300. {
  301. bool result = true;
  302. switch(this->m_traits.syntax_type(*m_position))
  303. {
  304. case regex_constants::syntax_open_mark:
  305. return parse_open_paren();
  306. case regex_constants::syntax_close_mark:
  307. return false;
  308. case regex_constants::syntax_escape:
  309. return parse_extended_escape();
  310. case regex_constants::syntax_dot:
  311. return parse_match_any();
  312. case regex_constants::syntax_caret:
  313. ++m_position;
  314. this->append_state(
  315. (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
  316. break;
  317. case regex_constants::syntax_dollar:
  318. ++m_position;
  319. this->append_state(
  320. (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
  321. break;
  322. case regex_constants::syntax_star:
  323. if(m_position == this->m_base)
  324. {
  325. fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
  326. return false;
  327. }
  328. ++m_position;
  329. return parse_repeat();
  330. case regex_constants::syntax_question:
  331. if(m_position == this->m_base)
  332. {
  333. fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
  334. return false;
  335. }
  336. ++m_position;
  337. return parse_repeat(0,1);
  338. case regex_constants::syntax_plus:
  339. if(m_position == this->m_base)
  340. {
  341. fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
  342. return false;
  343. }
  344. ++m_position;
  345. return parse_repeat(1);
  346. case regex_constants::syntax_open_brace:
  347. ++m_position;
  348. return parse_repeat_range(false);
  349. case regex_constants::syntax_close_brace:
  350. if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
  351. {
  352. fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
  353. return false;
  354. }
  355. result = parse_literal();
  356. break;
  357. case regex_constants::syntax_or:
  358. return parse_alt();
  359. case regex_constants::syntax_open_set:
  360. return parse_set();
  361. case regex_constants::syntax_newline:
  362. if(this->flags() & regbase::newline_alt)
  363. return parse_alt();
  364. else
  365. return parse_literal();
  366. case regex_constants::syntax_hash:
  367. //
  368. // If we have a mod_x flag set, then skip until
  369. // we get to a newline character:
  370. //
  371. if((this->flags()
  372. & (regbase::no_perl_ex|regbase::mod_x))
  373. == regbase::mod_x)
  374. {
  375. while((m_position != m_end) && !is_separator(*m_position++)){}
  376. return true;
  377. }
  378. BOOST_REGEX_FALLTHROUGH;
  379. default:
  380. result = parse_literal();
  381. break;
  382. }
  383. return result;
  384. }
  385. #ifdef BOOST_REGEX_MSVC
  386. # pragma warning(pop)
  387. #endif
  388. #ifdef BOOST_REGEX_MSVC
  389. #pragma warning(pop)
  390. #endif
  391. template <class charT, class traits>
  392. bool basic_regex_parser<charT, traits>::parse_literal()
  393. {
  394. // append this as a literal provided it's not a space character
  395. // or the perl option regbase::mod_x is not set:
  396. if(
  397. ((this->flags()
  398. & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
  399. != regbase::mod_x)
  400. || !this->m_traits.isctype(*m_position, this->m_mask_space))
  401. this->append_literal(*m_position);
  402. ++m_position;
  403. return true;
  404. }
  405. template <class charT, class traits>
  406. bool basic_regex_parser<charT, traits>::parse_open_paren()
  407. {
  408. //
  409. // skip the '(' and error check:
  410. //
  411. if(++m_position == m_end)
  412. {
  413. fail(regex_constants::error_paren, m_position - m_base);
  414. return false;
  415. }
  416. //
  417. // begin by checking for a perl-style (?...) extension:
  418. //
  419. if(
  420. ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
  421. || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
  422. )
  423. {
  424. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
  425. return parse_perl_extension();
  426. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
  427. return parse_perl_verb();
  428. }
  429. //
  430. // update our mark count, and append the required state:
  431. //
  432. unsigned markid = 0;
  433. if(0 == (this->flags() & regbase::nosubs))
  434. {
  435. markid = ++m_mark_count;
  436. if(this->flags() & regbase::save_subexpression_location)
  437. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
  438. }
  439. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  440. pb->index = markid;
  441. pb->icase = this->flags() & regbase::icase;
  442. std::ptrdiff_t last_paren_start = this->getoffset(pb);
  443. // back up insertion point for alternations, and set new point:
  444. std::ptrdiff_t last_alt_point = m_alt_insert_point;
  445. this->m_pdata->m_data.align();
  446. m_alt_insert_point = this->m_pdata->m_data.size();
  447. //
  448. // back up the current flags in case we have a nested (?imsx) group:
  449. //
  450. regex_constants::syntax_option_type opts = this->flags();
  451. bool old_case_change = m_has_case_change;
  452. m_has_case_change = false; // no changes to this scope as yet...
  453. //
  454. // Back up branch reset data in case we have a nested (?|...)
  455. //
  456. int mark_reset = m_mark_reset;
  457. m_mark_reset = -1;
  458. //
  459. // now recursively add more states, this will terminate when we get to a
  460. // matching ')' :
  461. //
  462. parse_all();
  463. //
  464. // Unwind pushed alternatives:
  465. //
  466. if(0 == unwind_alts(last_paren_start))
  467. return false;
  468. //
  469. // restore flags:
  470. //
  471. if(m_has_case_change)
  472. {
  473. // the case has changed in one or more of the alternatives
  474. // within the scoped (...) block: we have to add a state
  475. // to reset the case sensitivity:
  476. static_cast<re_case*>(
  477. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  478. )->icase = opts & regbase::icase;
  479. }
  480. this->flags(opts);
  481. m_has_case_change = old_case_change;
  482. //
  483. // restore branch reset:
  484. //
  485. m_mark_reset = mark_reset;
  486. //
  487. // we either have a ')' or we have run out of characters prematurely:
  488. //
  489. if(m_position == m_end)
  490. {
  491. this->fail(regex_constants::error_paren, std::distance(m_base, m_end));
  492. return false;
  493. }
  494. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  495. return false;
  496. if(markid && (this->flags() & regbase::save_subexpression_location))
  497. this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
  498. ++m_position;
  499. //
  500. // append closing parenthesis state:
  501. //
  502. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  503. pb->index = markid;
  504. pb->icase = this->flags() & regbase::icase;
  505. this->m_paren_start = last_paren_start;
  506. //
  507. // restore the alternate insertion point:
  508. //
  509. this->m_alt_insert_point = last_alt_point;
  510. //
  511. // allow backrefs to this mark:
  512. //
  513. if(markid > 0)
  514. this->m_backrefs.set(markid);
  515. return true;
  516. }
  517. template <class charT, class traits>
  518. bool basic_regex_parser<charT, traits>::parse_basic_escape()
  519. {
  520. if(++m_position == m_end)
  521. {
  522. fail(regex_constants::error_paren, m_position - m_base);
  523. return false;
  524. }
  525. bool result = true;
  526. switch(this->m_traits.escape_syntax_type(*m_position))
  527. {
  528. case regex_constants::syntax_open_mark:
  529. return parse_open_paren();
  530. case regex_constants::syntax_close_mark:
  531. return false;
  532. case regex_constants::syntax_plus:
  533. if(this->flags() & regex_constants::bk_plus_qm)
  534. {
  535. ++m_position;
  536. return parse_repeat(1);
  537. }
  538. else
  539. return parse_literal();
  540. case regex_constants::syntax_question:
  541. if(this->flags() & regex_constants::bk_plus_qm)
  542. {
  543. ++m_position;
  544. return parse_repeat(0, 1);
  545. }
  546. else
  547. return parse_literal();
  548. case regex_constants::syntax_open_brace:
  549. if(this->flags() & regbase::no_intervals)
  550. return parse_literal();
  551. ++m_position;
  552. return parse_repeat_range(true);
  553. case regex_constants::syntax_close_brace:
  554. if(this->flags() & regbase::no_intervals)
  555. return parse_literal();
  556. fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
  557. return false;
  558. case regex_constants::syntax_or:
  559. if(this->flags() & regbase::bk_vbar)
  560. return parse_alt();
  561. else
  562. result = parse_literal();
  563. break;
  564. case regex_constants::syntax_digit:
  565. return parse_backref();
  566. case regex_constants::escape_type_start_buffer:
  567. if(this->flags() & regbase::emacs_ex)
  568. {
  569. ++m_position;
  570. this->append_state(syntax_element_buffer_start);
  571. }
  572. else
  573. result = parse_literal();
  574. break;
  575. case regex_constants::escape_type_end_buffer:
  576. if(this->flags() & regbase::emacs_ex)
  577. {
  578. ++m_position;
  579. this->append_state(syntax_element_buffer_end);
  580. }
  581. else
  582. result = parse_literal();
  583. break;
  584. case regex_constants::escape_type_word_assert:
  585. if(this->flags() & regbase::emacs_ex)
  586. {
  587. ++m_position;
  588. this->append_state(syntax_element_word_boundary);
  589. }
  590. else
  591. result = parse_literal();
  592. break;
  593. case regex_constants::escape_type_not_word_assert:
  594. if(this->flags() & regbase::emacs_ex)
  595. {
  596. ++m_position;
  597. this->append_state(syntax_element_within_word);
  598. }
  599. else
  600. result = parse_literal();
  601. break;
  602. case regex_constants::escape_type_left_word:
  603. if(this->flags() & regbase::emacs_ex)
  604. {
  605. ++m_position;
  606. this->append_state(syntax_element_word_start);
  607. }
  608. else
  609. result = parse_literal();
  610. break;
  611. case regex_constants::escape_type_right_word:
  612. if(this->flags() & regbase::emacs_ex)
  613. {
  614. ++m_position;
  615. this->append_state(syntax_element_word_end);
  616. }
  617. else
  618. result = parse_literal();
  619. break;
  620. default:
  621. if(this->flags() & regbase::emacs_ex)
  622. {
  623. bool negate = true;
  624. switch(*m_position)
  625. {
  626. case 'w':
  627. negate = false;
  628. BOOST_REGEX_FALLTHROUGH;
  629. case 'W':
  630. {
  631. basic_char_set<charT, traits> char_set;
  632. if(negate)
  633. char_set.negate();
  634. char_set.add_class(this->m_word_mask);
  635. if(0 == this->append_set(char_set))
  636. {
  637. fail(regex_constants::error_ctype, m_position - m_base);
  638. return false;
  639. }
  640. ++m_position;
  641. return true;
  642. }
  643. case 's':
  644. negate = false;
  645. BOOST_REGEX_FALLTHROUGH;
  646. case 'S':
  647. return add_emacs_code(negate);
  648. case 'c':
  649. case 'C':
  650. // not supported yet:
  651. fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
  652. return false;
  653. default:
  654. break;
  655. }
  656. }
  657. result = parse_literal();
  658. break;
  659. }
  660. return result;
  661. }
  662. template <class charT, class traits>
  663. bool basic_regex_parser<charT, traits>::parse_extended_escape()
  664. {
  665. ++m_position;
  666. if(m_position == m_end)
  667. {
  668. fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
  669. return false;
  670. }
  671. bool negate = false; // in case this is a character class escape: \w \d etc
  672. switch(this->m_traits.escape_syntax_type(*m_position))
  673. {
  674. case regex_constants::escape_type_not_class:
  675. negate = true;
  676. BOOST_REGEX_FALLTHROUGH;
  677. case regex_constants::escape_type_class:
  678. {
  679. escape_type_class_jump:
  680. typedef typename traits::char_class_type m_type;
  681. m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  682. if(m != 0)
  683. {
  684. basic_char_set<charT, traits> char_set;
  685. if(negate)
  686. char_set.negate();
  687. char_set.add_class(m);
  688. if(0 == this->append_set(char_set))
  689. {
  690. fail(regex_constants::error_ctype, m_position - m_base);
  691. return false;
  692. }
  693. ++m_position;
  694. return true;
  695. }
  696. //
  697. // not a class, just a regular unknown escape:
  698. //
  699. this->append_literal(unescape_character());
  700. break;
  701. }
  702. case regex_constants::syntax_digit:
  703. return parse_backref();
  704. case regex_constants::escape_type_left_word:
  705. ++m_position;
  706. this->append_state(syntax_element_word_start);
  707. break;
  708. case regex_constants::escape_type_right_word:
  709. ++m_position;
  710. this->append_state(syntax_element_word_end);
  711. break;
  712. case regex_constants::escape_type_start_buffer:
  713. ++m_position;
  714. this->append_state(syntax_element_buffer_start);
  715. break;
  716. case regex_constants::escape_type_end_buffer:
  717. ++m_position;
  718. this->append_state(syntax_element_buffer_end);
  719. break;
  720. case regex_constants::escape_type_word_assert:
  721. ++m_position;
  722. this->append_state(syntax_element_word_boundary);
  723. break;
  724. case regex_constants::escape_type_not_word_assert:
  725. ++m_position;
  726. this->append_state(syntax_element_within_word);
  727. break;
  728. case regex_constants::escape_type_Z:
  729. ++m_position;
  730. this->append_state(syntax_element_soft_buffer_end);
  731. break;
  732. case regex_constants::escape_type_Q:
  733. return parse_QE();
  734. case regex_constants::escape_type_C:
  735. return parse_match_any();
  736. case regex_constants::escape_type_X:
  737. ++m_position;
  738. this->append_state(syntax_element_combining);
  739. break;
  740. case regex_constants::escape_type_G:
  741. ++m_position;
  742. this->append_state(syntax_element_restart_continue);
  743. break;
  744. case regex_constants::escape_type_not_property:
  745. negate = true;
  746. BOOST_REGEX_FALLTHROUGH;
  747. case regex_constants::escape_type_property:
  748. {
  749. ++m_position;
  750. char_class_type m;
  751. if(m_position == m_end)
  752. {
  753. fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
  754. return false;
  755. }
  756. // maybe have \p{ddd}
  757. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  758. {
  759. const charT* base = m_position;
  760. // skip forward until we find enclosing brace:
  761. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  762. ++m_position;
  763. if(m_position == m_end)
  764. {
  765. fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
  766. return false;
  767. }
  768. m = this->m_traits.lookup_classname(++base, m_position++);
  769. }
  770. else
  771. {
  772. m = this->m_traits.lookup_classname(m_position, m_position+1);
  773. ++m_position;
  774. }
  775. if(m != 0)
  776. {
  777. basic_char_set<charT, traits> char_set;
  778. if(negate)
  779. char_set.negate();
  780. char_set.add_class(m);
  781. if(0 == this->append_set(char_set))
  782. {
  783. fail(regex_constants::error_ctype, m_position - m_base);
  784. return false;
  785. }
  786. return true;
  787. }
  788. fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
  789. return false;
  790. }
  791. case regex_constants::escape_type_reset_start_mark:
  792. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  793. {
  794. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  795. pb->index = -5;
  796. pb->icase = this->flags() & regbase::icase;
  797. this->m_pdata->m_data.align();
  798. ++m_position;
  799. return true;
  800. }
  801. goto escape_type_class_jump;
  802. case regex_constants::escape_type_line_ending:
  803. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  804. {
  805. const charT* e = get_escape_R_string<charT>();
  806. const charT* old_position = m_position;
  807. const charT* old_end = m_end;
  808. const charT* old_base = m_base;
  809. m_position = e;
  810. m_base = e;
  811. m_end = e + traits::length(e);
  812. bool r = parse_all();
  813. m_position = ++old_position;
  814. m_end = old_end;
  815. m_base = old_base;
  816. return r;
  817. }
  818. goto escape_type_class_jump;
  819. case regex_constants::escape_type_extended_backref:
  820. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  821. {
  822. bool have_brace = false;
  823. bool negative = false;
  824. static const char incomplete_message[] = "Incomplete \\g escape found.";
  825. if(++m_position == m_end)
  826. {
  827. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  828. return false;
  829. }
  830. // maybe have \g{ddd}
  831. regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
  832. regex_constants::syntax_type syn_end = 0;
  833. if((syn == regex_constants::syntax_open_brace)
  834. || (syn == regex_constants::escape_type_left_word)
  835. || (syn == regex_constants::escape_type_end_buffer))
  836. {
  837. if(++m_position == m_end)
  838. {
  839. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  840. return false;
  841. }
  842. have_brace = true;
  843. switch(syn)
  844. {
  845. case regex_constants::syntax_open_brace:
  846. syn_end = regex_constants::syntax_close_brace;
  847. break;
  848. case regex_constants::escape_type_left_word:
  849. syn_end = regex_constants::escape_type_right_word;
  850. break;
  851. default:
  852. syn_end = regex_constants::escape_type_end_buffer;
  853. break;
  854. }
  855. }
  856. negative = (*m_position == static_cast<charT>('-'));
  857. if((negative) && (++m_position == m_end))
  858. {
  859. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  860. return false;
  861. }
  862. const charT* pc = m_position;
  863. std::intmax_t i = this->m_traits.toi(pc, m_end, 10);
  864. if((i < 0) && syn_end)
  865. {
  866. // Check for a named capture, get the leftmost one if there is more than one:
  867. const charT* base = m_position;
  868. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
  869. {
  870. ++m_position;
  871. }
  872. i = hash_value_from_capture_name(base, m_position);
  873. pc = m_position;
  874. }
  875. if(negative)
  876. i = 1 + (static_cast<std::intmax_t>(m_mark_count) - i);
  877. if(((i < hash_value_mask) && (i > 0) && (this->m_backrefs.test((std::size_t)i))) || ((i >= hash_value_mask) && (this->m_pdata->get_id((int)i) > 0) && (this->m_backrefs.test(this->m_pdata->get_id((int)i)))))
  878. {
  879. m_position = pc;
  880. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
  881. pb->index = (int)i;
  882. pb->icase = this->flags() & regbase::icase;
  883. }
  884. else
  885. {
  886. fail(regex_constants::error_backref, m_position - m_base);
  887. return false;
  888. }
  889. m_position = pc;
  890. if(have_brace)
  891. {
  892. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
  893. {
  894. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  895. return false;
  896. }
  897. ++m_position;
  898. }
  899. return true;
  900. }
  901. goto escape_type_class_jump;
  902. case regex_constants::escape_type_control_v:
  903. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  904. goto escape_type_class_jump;
  905. BOOST_REGEX_FALLTHROUGH;
  906. default:
  907. this->append_literal(unescape_character());
  908. break;
  909. }
  910. return true;
  911. }
  912. template <class charT, class traits>
  913. bool basic_regex_parser<charT, traits>::parse_match_any()
  914. {
  915. //
  916. // we have a '.' that can match any character:
  917. //
  918. ++m_position;
  919. static_cast<re_dot*>(
  920. this->append_state(syntax_element_wild, sizeof(re_dot))
  921. )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
  922. ? BOOST_REGEX_DETAIL_NS::force_not_newline
  923. : this->flags() & regbase::mod_s ?
  924. BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
  925. return true;
  926. }
  927. template <class charT, class traits>
  928. bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
  929. {
  930. bool greedy = true;
  931. bool possessive = false;
  932. std::size_t insert_point;
  933. //
  934. // when we get to here we may have a non-greedy ? mark still to come:
  935. //
  936. if((m_position != m_end)
  937. && (
  938. (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  939. || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
  940. )
  941. )
  942. {
  943. // OK we have a perl or emacs regex, check for a '?':
  944. if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
  945. {
  946. // whitespace skip:
  947. while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  948. ++m_position;
  949. }
  950. if((m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question))
  951. {
  952. greedy = false;
  953. ++m_position;
  954. }
  955. // for perl regexes only check for possessive ++ repeats.
  956. if((m_position != m_end)
  957. && (0 == (this->flags() & regbase::main_option_type))
  958. && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
  959. {
  960. possessive = true;
  961. ++m_position;
  962. }
  963. }
  964. if(0 == this->m_last_state)
  965. {
  966. fail(regex_constants::error_badrepeat, std::distance(m_base, m_position), "Nothing to repeat.");
  967. return false;
  968. }
  969. if(this->m_last_state->type == syntax_element_endmark)
  970. {
  971. // insert a repeat before the '(' matching the last ')':
  972. insert_point = this->m_paren_start;
  973. }
  974. else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
  975. {
  976. // the last state was a literal with more than one character, split it in two:
  977. re_literal* lit = static_cast<re_literal*>(this->m_last_state);
  978. charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
  979. lit->length -= 1;
  980. // now append new state:
  981. lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
  982. lit->length = 1;
  983. (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
  984. insert_point = this->getoffset(this->m_last_state);
  985. }
  986. else
  987. {
  988. // repeat the last state whatever it was, need to add some error checking here:
  989. switch(this->m_last_state->type)
  990. {
  991. case syntax_element_start_line:
  992. case syntax_element_end_line:
  993. case syntax_element_word_boundary:
  994. case syntax_element_within_word:
  995. case syntax_element_word_start:
  996. case syntax_element_word_end:
  997. case syntax_element_buffer_start:
  998. case syntax_element_buffer_end:
  999. case syntax_element_alt:
  1000. case syntax_element_soft_buffer_end:
  1001. case syntax_element_restart_continue:
  1002. case syntax_element_jump:
  1003. case syntax_element_startmark:
  1004. case syntax_element_backstep:
  1005. // can't legally repeat any of the above:
  1006. fail(regex_constants::error_badrepeat, m_position - m_base);
  1007. return false;
  1008. default:
  1009. // do nothing...
  1010. break;
  1011. }
  1012. insert_point = this->getoffset(this->m_last_state);
  1013. }
  1014. //
  1015. // OK we now know what to repeat, so insert the repeat around it:
  1016. //
  1017. re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
  1018. rep->min = low;
  1019. rep->max = high;
  1020. rep->greedy = greedy;
  1021. rep->leading = false;
  1022. // store our repeater position for later:
  1023. std::ptrdiff_t rep_off = this->getoffset(rep);
  1024. // and append a back jump to the repeat:
  1025. re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
  1026. jmp->alt.i = rep_off - this->getoffset(jmp);
  1027. this->m_pdata->m_data.align();
  1028. // now fill in the alt jump for the repeat:
  1029. rep = static_cast<re_repeat*>(this->getaddress(rep_off));
  1030. rep->alt.i = this->m_pdata->m_data.size() - rep_off;
  1031. //
  1032. // If the repeat is possessive then bracket the repeat with a (?>...)
  1033. // independent sub-expression construct:
  1034. //
  1035. if(possessive)
  1036. {
  1037. if(m_position != m_end)
  1038. {
  1039. //
  1040. // Check for illegal following quantifier, we have to do this here, because
  1041. // the extra states we insert below circumvents our usual error checking :-(
  1042. //
  1043. bool contin = false;
  1044. do
  1045. {
  1046. if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
  1047. {
  1048. // whitespace skip:
  1049. while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1050. ++m_position;
  1051. }
  1052. if (m_position != m_end)
  1053. {
  1054. switch (this->m_traits.syntax_type(*m_position))
  1055. {
  1056. case regex_constants::syntax_star:
  1057. case regex_constants::syntax_plus:
  1058. case regex_constants::syntax_question:
  1059. case regex_constants::syntax_open_brace:
  1060. fail(regex_constants::error_badrepeat, m_position - m_base);
  1061. return false;
  1062. case regex_constants::syntax_open_mark:
  1063. // Do we have a comment? If so we need to skip it here...
  1064. if ((m_position + 2 < m_end) && this->m_traits.syntax_type(*(m_position + 1)) == regex_constants::syntax_question
  1065. && this->m_traits.syntax_type(*(m_position + 2)) == regex_constants::syntax_hash)
  1066. {
  1067. while ((m_position != m_end)
  1068. && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) {
  1069. }
  1070. contin = true;
  1071. }
  1072. else
  1073. contin = false;
  1074. }
  1075. }
  1076. else
  1077. contin = false;
  1078. } while (contin);
  1079. }
  1080. re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
  1081. pb->index = -3;
  1082. pb->icase = this->flags() & regbase::icase;
  1083. jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
  1084. this->m_pdata->m_data.align();
  1085. jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
  1086. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  1087. pb->index = -3;
  1088. pb->icase = this->flags() & regbase::icase;
  1089. }
  1090. return true;
  1091. }
  1092. template <class charT, class traits>
  1093. bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
  1094. {
  1095. static const char incomplete_message[] = "Missing } in quantified repetition.";
  1096. //
  1097. // parse a repeat-range:
  1098. //
  1099. std::size_t min, max;
  1100. std::intmax_t v;
  1101. // skip whitespace:
  1102. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1103. ++m_position;
  1104. if(this->m_position == this->m_end)
  1105. {
  1106. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1107. {
  1108. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1109. return false;
  1110. }
  1111. // Treat the opening '{' as a literal character, rewind to start of error:
  1112. --m_position;
  1113. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1114. return parse_literal();
  1115. }
  1116. // get min:
  1117. v = this->m_traits.toi(m_position, m_end, 10);
  1118. // skip whitespace:
  1119. if((v < 0) || (v > umax()))
  1120. {
  1121. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1122. {
  1123. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1124. return false;
  1125. }
  1126. // Treat the opening '{' as a literal character, rewind to start of error:
  1127. --m_position;
  1128. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1129. return parse_literal();
  1130. }
  1131. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1132. ++m_position;
  1133. if(this->m_position == this->m_end)
  1134. {
  1135. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1136. {
  1137. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1138. return false;
  1139. }
  1140. // Treat the opening '{' as a literal character, rewind to start of error:
  1141. --m_position;
  1142. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1143. return parse_literal();
  1144. }
  1145. min = static_cast<std::size_t>(v);
  1146. // see if we have a comma:
  1147. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
  1148. {
  1149. // move on and error check:
  1150. ++m_position;
  1151. // skip whitespace:
  1152. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1153. ++m_position;
  1154. if(this->m_position == this->m_end)
  1155. {
  1156. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1157. {
  1158. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1159. return false;
  1160. }
  1161. // Treat the opening '{' as a literal character, rewind to start of error:
  1162. --m_position;
  1163. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1164. return parse_literal();
  1165. }
  1166. // get the value if any:
  1167. v = this->m_traits.toi(m_position, m_end, 10);
  1168. max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
  1169. }
  1170. else
  1171. {
  1172. // no comma, max = min:
  1173. max = min;
  1174. }
  1175. // skip whitespace:
  1176. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1177. ++m_position;
  1178. // OK now check trailing }:
  1179. if(this->m_position == this->m_end)
  1180. {
  1181. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1182. {
  1183. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1184. return false;
  1185. }
  1186. // Treat the opening '{' as a literal character, rewind to start of error:
  1187. --m_position;
  1188. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1189. return parse_literal();
  1190. }
  1191. if(isbasic)
  1192. {
  1193. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
  1194. {
  1195. ++m_position;
  1196. if(this->m_position == this->m_end)
  1197. {
  1198. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1199. return false;
  1200. }
  1201. }
  1202. else
  1203. {
  1204. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1205. return false;
  1206. }
  1207. }
  1208. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
  1209. ++m_position;
  1210. else
  1211. {
  1212. // Treat the opening '{' as a literal character, rewind to start of error:
  1213. --m_position;
  1214. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1215. return parse_literal();
  1216. }
  1217. //
  1218. // finally go and add the repeat, unless error:
  1219. //
  1220. if(min > max)
  1221. {
  1222. // Backtrack to error location:
  1223. m_position -= 2;
  1224. while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
  1225. ++m_position;
  1226. fail(regex_constants::error_badbrace, m_position - m_base);
  1227. return false;
  1228. }
  1229. return parse_repeat(min, max);
  1230. }
  1231. template <class charT, class traits>
  1232. bool basic_regex_parser<charT, traits>::parse_alt()
  1233. {
  1234. //
  1235. // error check: if there have been no previous states,
  1236. // or if the last state was a '(' then error:
  1237. //
  1238. if(
  1239. ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
  1240. &&
  1241. !(
  1242. ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
  1243. &&
  1244. ((this->flags() & regbase::no_empty_expressions) == 0)
  1245. )
  1246. )
  1247. {
  1248. fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
  1249. return false;
  1250. }
  1251. //
  1252. // Reset mark count if required:
  1253. //
  1254. if(m_max_mark < m_mark_count)
  1255. m_max_mark = m_mark_count;
  1256. if(m_mark_reset >= 0)
  1257. m_mark_count = m_mark_reset;
  1258. ++m_position;
  1259. //
  1260. // we need to append a trailing jump:
  1261. //
  1262. re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
  1263. std::ptrdiff_t jump_offset = this->getoffset(pj);
  1264. //
  1265. // now insert the alternative:
  1266. //
  1267. re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
  1268. jump_offset += re_alt_size;
  1269. this->m_pdata->m_data.align();
  1270. palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
  1271. //
  1272. // update m_alt_insert_point so that the next alternate gets
  1273. // inserted at the start of the second of the two we've just created:
  1274. //
  1275. this->m_alt_insert_point = this->m_pdata->m_data.size();
  1276. //
  1277. // the start of this alternative must have a case changes state
  1278. // if the current block has messed around with case changes:
  1279. //
  1280. if(m_has_case_change)
  1281. {
  1282. static_cast<re_case*>(
  1283. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  1284. )->icase = this->m_icase;
  1285. }
  1286. //
  1287. // push the alternative onto our stack, a recursive
  1288. // implementation here is easier to understand (and faster
  1289. // as it happens), but causes all kinds of stack overflow problems
  1290. // on programs with small stacks (COM+).
  1291. //
  1292. m_alt_jumps.push_back(jump_offset);
  1293. return true;
  1294. }
  1295. template <class charT, class traits>
  1296. bool basic_regex_parser<charT, traits>::parse_set()
  1297. {
  1298. static const char incomplete_message[] = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
  1299. ++m_position;
  1300. if(m_position == m_end)
  1301. {
  1302. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1303. return false;
  1304. }
  1305. basic_char_set<charT, traits> char_set;
  1306. const charT* base = m_position; // where the '[' was
  1307. const charT* item_base = m_position; // where the '[' or '^' was
  1308. while(m_position != m_end)
  1309. {
  1310. switch(this->m_traits.syntax_type(*m_position))
  1311. {
  1312. case regex_constants::syntax_caret:
  1313. if(m_position == base)
  1314. {
  1315. char_set.negate();
  1316. ++m_position;
  1317. item_base = m_position;
  1318. }
  1319. else
  1320. parse_set_literal(char_set);
  1321. break;
  1322. case regex_constants::syntax_close_set:
  1323. if(m_position == item_base)
  1324. {
  1325. parse_set_literal(char_set);
  1326. break;
  1327. }
  1328. else
  1329. {
  1330. ++m_position;
  1331. if(0 == this->append_set(char_set))
  1332. {
  1333. fail(regex_constants::error_ctype, m_position - m_base);
  1334. return false;
  1335. }
  1336. }
  1337. return true;
  1338. case regex_constants::syntax_open_set:
  1339. if(parse_inner_set(char_set))
  1340. break;
  1341. return true;
  1342. case regex_constants::syntax_escape:
  1343. {
  1344. //
  1345. // look ahead and see if this is a character class shortcut
  1346. // \d \w \s etc...
  1347. //
  1348. ++m_position;
  1349. if(this->m_traits.escape_syntax_type(*m_position)
  1350. == regex_constants::escape_type_class)
  1351. {
  1352. char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  1353. if(m != 0)
  1354. {
  1355. char_set.add_class(m);
  1356. ++m_position;
  1357. break;
  1358. }
  1359. }
  1360. else if(this->m_traits.escape_syntax_type(*m_position)
  1361. == regex_constants::escape_type_not_class)
  1362. {
  1363. // negated character class:
  1364. char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  1365. if(m != 0)
  1366. {
  1367. char_set.add_negated_class(m);
  1368. ++m_position;
  1369. break;
  1370. }
  1371. }
  1372. // not a character class, just a regular escape:
  1373. --m_position;
  1374. parse_set_literal(char_set);
  1375. break;
  1376. }
  1377. default:
  1378. parse_set_literal(char_set);
  1379. break;
  1380. }
  1381. }
  1382. return m_position != m_end;
  1383. }
  1384. template <class charT, class traits>
  1385. bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
  1386. {
  1387. static const char incomplete_message[] = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
  1388. //
  1389. // we have either a character class [:name:]
  1390. // a collating element [.name.]
  1391. // or an equivalence class [=name=]
  1392. //
  1393. if(m_end == ++m_position)
  1394. {
  1395. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1396. return false;
  1397. }
  1398. switch(this->m_traits.syntax_type(*m_position))
  1399. {
  1400. case regex_constants::syntax_dot:
  1401. //
  1402. // a collating element is treated as a literal:
  1403. //
  1404. --m_position;
  1405. parse_set_literal(char_set);
  1406. return true;
  1407. case regex_constants::syntax_colon:
  1408. {
  1409. // check that character classes are actually enabled:
  1410. if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
  1411. == (regbase::basic_syntax_group | regbase::no_char_classes))
  1412. {
  1413. --m_position;
  1414. parse_set_literal(char_set);
  1415. return true;
  1416. }
  1417. // skip the ':'
  1418. if(m_end == ++m_position)
  1419. {
  1420. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1421. return false;
  1422. }
  1423. const charT* name_first = m_position;
  1424. // skip at least one character, then find the matching ':]'
  1425. if(m_end == ++m_position)
  1426. {
  1427. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1428. return false;
  1429. }
  1430. while((m_position != m_end)
  1431. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
  1432. ++m_position;
  1433. const charT* name_last = m_position;
  1434. if(m_end == m_position)
  1435. {
  1436. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1437. return false;
  1438. }
  1439. if((m_end == ++m_position)
  1440. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1441. {
  1442. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1443. return false;
  1444. }
  1445. //
  1446. // check for negated class:
  1447. //
  1448. bool negated = false;
  1449. if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
  1450. {
  1451. ++name_first;
  1452. negated = true;
  1453. }
  1454. typedef typename traits::char_class_type m_type;
  1455. m_type m = this->m_traits.lookup_classname(name_first, name_last);
  1456. if(m == 0)
  1457. {
  1458. if(char_set.empty() && (name_last - name_first == 1))
  1459. {
  1460. // maybe a special case:
  1461. ++m_position;
  1462. if( (m_position != m_end)
  1463. && (this->m_traits.syntax_type(*m_position)
  1464. == regex_constants::syntax_close_set))
  1465. {
  1466. if(this->m_traits.escape_syntax_type(*name_first)
  1467. == regex_constants::escape_type_left_word)
  1468. {
  1469. ++m_position;
  1470. this->append_state(syntax_element_word_start);
  1471. return false;
  1472. }
  1473. if(this->m_traits.escape_syntax_type(*name_first)
  1474. == regex_constants::escape_type_right_word)
  1475. {
  1476. ++m_position;
  1477. this->append_state(syntax_element_word_end);
  1478. return false;
  1479. }
  1480. }
  1481. }
  1482. fail(regex_constants::error_ctype, name_first - m_base);
  1483. return false;
  1484. }
  1485. if(!negated)
  1486. char_set.add_class(m);
  1487. else
  1488. char_set.add_negated_class(m);
  1489. ++m_position;
  1490. break;
  1491. }
  1492. case regex_constants::syntax_equal:
  1493. {
  1494. // skip the '='
  1495. if(m_end == ++m_position)
  1496. {
  1497. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1498. return false;
  1499. }
  1500. const charT* name_first = m_position;
  1501. // skip at least one character, then find the matching '=]'
  1502. if(m_end == ++m_position)
  1503. {
  1504. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1505. return false;
  1506. }
  1507. while((m_position != m_end)
  1508. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
  1509. ++m_position;
  1510. const charT* name_last = m_position;
  1511. if(m_end == m_position)
  1512. {
  1513. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1514. return false;
  1515. }
  1516. if((m_end == ++m_position)
  1517. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1518. {
  1519. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1520. return false;
  1521. }
  1522. string_type m = this->m_traits.lookup_collatename(name_first, name_last);
  1523. if(m.empty() || (m.size() > 2))
  1524. {
  1525. fail(regex_constants::error_collate, name_first - m_base);
  1526. return false;
  1527. }
  1528. digraph<charT> d;
  1529. d.first = m[0];
  1530. if(m.size() > 1)
  1531. d.second = m[1];
  1532. else
  1533. d.second = 0;
  1534. char_set.add_equivalent(d);
  1535. ++m_position;
  1536. break;
  1537. }
  1538. default:
  1539. --m_position;
  1540. parse_set_literal(char_set);
  1541. break;
  1542. }
  1543. return true;
  1544. }
  1545. template <class charT, class traits>
  1546. void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
  1547. {
  1548. digraph<charT> start_range(get_next_set_literal(char_set));
  1549. if(m_end == m_position)
  1550. {
  1551. fail(regex_constants::error_brack, m_position - m_base);
  1552. return;
  1553. }
  1554. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
  1555. {
  1556. // we have a range:
  1557. if(m_end == ++m_position)
  1558. {
  1559. fail(regex_constants::error_brack, m_position - m_base);
  1560. return;
  1561. }
  1562. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
  1563. {
  1564. digraph<charT> end_range = get_next_set_literal(char_set);
  1565. char_set.add_range(start_range, end_range);
  1566. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
  1567. {
  1568. if(m_end == ++m_position)
  1569. {
  1570. fail(regex_constants::error_brack, m_position - m_base);
  1571. return;
  1572. }
  1573. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
  1574. {
  1575. // trailing - :
  1576. --m_position;
  1577. return;
  1578. }
  1579. fail(regex_constants::error_range, m_position - m_base);
  1580. return;
  1581. }
  1582. return;
  1583. }
  1584. --m_position;
  1585. }
  1586. char_set.add_single(start_range);
  1587. }
  1588. template <class charT, class traits>
  1589. digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
  1590. {
  1591. digraph<charT> result;
  1592. switch(this->m_traits.syntax_type(*m_position))
  1593. {
  1594. case regex_constants::syntax_dash:
  1595. if(!char_set.empty())
  1596. {
  1597. // see if we are at the end of the set:
  1598. if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1599. {
  1600. fail(regex_constants::error_range, m_position - m_base);
  1601. return result;
  1602. }
  1603. --m_position;
  1604. }
  1605. result.first = *m_position++;
  1606. return result;
  1607. case regex_constants::syntax_escape:
  1608. // check to see if escapes are supported first:
  1609. if(this->flags() & regex_constants::no_escape_in_lists)
  1610. {
  1611. result = *m_position++;
  1612. break;
  1613. }
  1614. ++m_position;
  1615. result = unescape_character();
  1616. break;
  1617. case regex_constants::syntax_open_set:
  1618. {
  1619. if(m_end == ++m_position)
  1620. {
  1621. fail(regex_constants::error_collate, m_position - m_base);
  1622. return result;
  1623. }
  1624. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
  1625. {
  1626. --m_position;
  1627. result.first = *m_position;
  1628. ++m_position;
  1629. return result;
  1630. }
  1631. if(m_end == ++m_position)
  1632. {
  1633. fail(regex_constants::error_collate, m_position - m_base);
  1634. return result;
  1635. }
  1636. const charT* name_first = m_position;
  1637. // skip at least one character, then find the matching ':]'
  1638. if(m_end == ++m_position)
  1639. {
  1640. fail(regex_constants::error_collate, name_first - m_base);
  1641. return result;
  1642. }
  1643. while((m_position != m_end)
  1644. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
  1645. ++m_position;
  1646. const charT* name_last = m_position;
  1647. if(m_end == m_position)
  1648. {
  1649. fail(regex_constants::error_collate, name_first - m_base);
  1650. return result;
  1651. }
  1652. if((m_end == ++m_position)
  1653. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1654. {
  1655. fail(regex_constants::error_collate, name_first - m_base);
  1656. return result;
  1657. }
  1658. ++m_position;
  1659. string_type s = this->m_traits.lookup_collatename(name_first, name_last);
  1660. if(s.empty() || (s.size() > 2))
  1661. {
  1662. fail(regex_constants::error_collate, name_first - m_base);
  1663. return result;
  1664. }
  1665. result.first = s[0];
  1666. if(s.size() > 1)
  1667. result.second = s[1];
  1668. else
  1669. result.second = 0;
  1670. return result;
  1671. }
  1672. default:
  1673. result = *m_position++;
  1674. }
  1675. return result;
  1676. }
  1677. //
  1678. // does a value fit in the specified charT type?
  1679. //
  1680. template <class charT>
  1681. bool valid_value(charT, std::intmax_t v, const std::integral_constant<bool, true>&)
  1682. {
  1683. return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
  1684. }
  1685. template <class charT>
  1686. bool valid_value(charT, std::intmax_t, const std::integral_constant<bool, false>&)
  1687. {
  1688. return true; // v will alsways fit in a charT
  1689. }
  1690. template <class charT>
  1691. bool valid_value(charT c, std::intmax_t v)
  1692. {
  1693. return valid_value(c, v, std::integral_constant<bool, (sizeof(charT) < sizeof(std::intmax_t))>());
  1694. }
  1695. template <class charT, class traits>
  1696. charT basic_regex_parser<charT, traits>::unescape_character()
  1697. {
  1698. #ifdef BOOST_REGEX_MSVC
  1699. #pragma warning(push)
  1700. #pragma warning(disable:4127)
  1701. #endif
  1702. charT result(0);
  1703. if(m_position == m_end)
  1704. {
  1705. fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
  1706. return false;
  1707. }
  1708. switch(this->m_traits.escape_syntax_type(*m_position))
  1709. {
  1710. case regex_constants::escape_type_control_a:
  1711. result = charT('\a');
  1712. break;
  1713. case regex_constants::escape_type_e:
  1714. result = charT(27);
  1715. break;
  1716. case regex_constants::escape_type_control_f:
  1717. result = charT('\f');
  1718. break;
  1719. case regex_constants::escape_type_control_n:
  1720. result = charT('\n');
  1721. break;
  1722. case regex_constants::escape_type_control_r:
  1723. result = charT('\r');
  1724. break;
  1725. case regex_constants::escape_type_control_t:
  1726. result = charT('\t');
  1727. break;
  1728. case regex_constants::escape_type_control_v:
  1729. result = charT('\v');
  1730. break;
  1731. case regex_constants::escape_type_word_assert:
  1732. result = charT('\b');
  1733. break;
  1734. case regex_constants::escape_type_ascii_control:
  1735. ++m_position;
  1736. if(m_position == m_end)
  1737. {
  1738. // Rewind to start of escape:
  1739. --m_position;
  1740. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1741. fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
  1742. return result;
  1743. }
  1744. result = static_cast<charT>(*m_position % 32);
  1745. break;
  1746. case regex_constants::escape_type_hex:
  1747. ++m_position;
  1748. if(m_position == m_end)
  1749. {
  1750. // Rewind to start of escape:
  1751. --m_position;
  1752. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1753. fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
  1754. return result;
  1755. }
  1756. // maybe have \x{ddd}
  1757. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  1758. {
  1759. ++m_position;
  1760. if(m_position == m_end)
  1761. {
  1762. // Rewind to start of escape:
  1763. --m_position;
  1764. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1765. fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
  1766. return result;
  1767. }
  1768. std::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
  1769. if((m_position == m_end)
  1770. || (i < 0)
  1771. || ((std::numeric_limits<charT>::is_specialized) && (i > (std::intmax_t)(std::numeric_limits<charT>::max)()))
  1772. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  1773. {
  1774. // Rewind to start of escape:
  1775. --m_position;
  1776. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1777. fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
  1778. return result;
  1779. }
  1780. ++m_position;
  1781. result = charT(i);
  1782. }
  1783. else
  1784. {
  1785. std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
  1786. std::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
  1787. if((i < 0)
  1788. || !valid_value(charT(0), i))
  1789. {
  1790. // Rewind to start of escape:
  1791. --m_position;
  1792. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1793. fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
  1794. return result;
  1795. }
  1796. result = charT(i);
  1797. }
  1798. return result;
  1799. case regex_constants::syntax_digit:
  1800. {
  1801. // an octal escape sequence, the first character must be a zero
  1802. // followed by up to 3 octal digits:
  1803. std::ptrdiff_t len = (std::min)(std::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
  1804. const charT* bp = m_position;
  1805. std::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
  1806. if(val != 0)
  1807. {
  1808. // Rewind to start of escape:
  1809. --m_position;
  1810. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1811. // Oops not an octal escape after all:
  1812. fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
  1813. return result;
  1814. }
  1815. val = this->m_traits.toi(m_position, m_position + len, 8);
  1816. if((val < 0) || (val > (std::intmax_t)(std::numeric_limits<charT>::max)()))
  1817. {
  1818. // Rewind to start of escape:
  1819. --m_position;
  1820. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1821. fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
  1822. return result;
  1823. }
  1824. return static_cast<charT>(val);
  1825. }
  1826. case regex_constants::escape_type_named_char:
  1827. {
  1828. ++m_position;
  1829. if(m_position == m_end)
  1830. {
  1831. // Rewind to start of escape:
  1832. --m_position;
  1833. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1834. fail(regex_constants::error_escape, m_position - m_base);
  1835. return false;
  1836. }
  1837. // maybe have \N{name}
  1838. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  1839. {
  1840. const charT* base = m_position;
  1841. // skip forward until we find enclosing brace:
  1842. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  1843. ++m_position;
  1844. if(m_position == m_end)
  1845. {
  1846. // Rewind to start of escape:
  1847. --m_position;
  1848. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1849. fail(regex_constants::error_escape, m_position - m_base);
  1850. return false;
  1851. }
  1852. string_type s = this->m_traits.lookup_collatename(++base, m_position++);
  1853. if(s.empty())
  1854. {
  1855. // Rewind to start of escape:
  1856. --m_position;
  1857. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1858. fail(regex_constants::error_collate, m_position - m_base);
  1859. return false;
  1860. }
  1861. if(s.size() == 1)
  1862. {
  1863. return s[0];
  1864. }
  1865. }
  1866. // fall through is a failure:
  1867. // Rewind to start of escape:
  1868. --m_position;
  1869. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1870. fail(regex_constants::error_escape, m_position - m_base);
  1871. return false;
  1872. }
  1873. default:
  1874. result = *m_position;
  1875. break;
  1876. }
  1877. ++m_position;
  1878. return result;
  1879. #ifdef BOOST_REGEX_MSVC
  1880. #pragma warning(pop)
  1881. #endif
  1882. }
  1883. template <class charT, class traits>
  1884. bool basic_regex_parser<charT, traits>::parse_backref()
  1885. {
  1886. BOOST_REGEX_ASSERT(m_position != m_end);
  1887. const charT* pc = m_position;
  1888. std::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
  1889. if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
  1890. {
  1891. // not a backref at all but an octal escape sequence:
  1892. charT c = unescape_character();
  1893. this->append_literal(c);
  1894. }
  1895. else if((i > 0) && (this->m_backrefs.test((std::size_t)i)))
  1896. {
  1897. m_position = pc;
  1898. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
  1899. pb->index = (int)i;
  1900. pb->icase = this->flags() & regbase::icase;
  1901. }
  1902. else
  1903. {
  1904. // Rewind to start of escape:
  1905. --m_position;
  1906. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1907. fail(regex_constants::error_backref, m_position - m_base);
  1908. return false;
  1909. }
  1910. return true;
  1911. }
  1912. template <class charT, class traits>
  1913. bool basic_regex_parser<charT, traits>::parse_QE()
  1914. {
  1915. #ifdef BOOST_REGEX_MSVC
  1916. #pragma warning(push)
  1917. #pragma warning(disable:4127)
  1918. #endif
  1919. //
  1920. // parse a \Q...\E sequence:
  1921. //
  1922. ++m_position; // skip the Q
  1923. const charT* start = m_position;
  1924. const charT* end;
  1925. do
  1926. {
  1927. while((m_position != m_end)
  1928. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
  1929. ++m_position;
  1930. if(m_position == m_end)
  1931. {
  1932. // a \Q...\E sequence may terminate with the end of the expression:
  1933. end = m_position;
  1934. break;
  1935. }
  1936. if(++m_position == m_end) // skip the escape
  1937. {
  1938. fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
  1939. return false;
  1940. }
  1941. // check to see if it's a \E:
  1942. if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
  1943. {
  1944. ++m_position;
  1945. end = m_position - 2;
  1946. break;
  1947. }
  1948. // otherwise go round again:
  1949. }while(true);
  1950. //
  1951. // now add all the character between the two escapes as literals:
  1952. //
  1953. while(start != end)
  1954. {
  1955. this->append_literal(*start);
  1956. ++start;
  1957. }
  1958. return true;
  1959. #ifdef BOOST_REGEX_MSVC
  1960. #pragma warning(pop)
  1961. #endif
  1962. }
  1963. template <class charT, class traits>
  1964. bool basic_regex_parser<charT, traits>::parse_perl_extension()
  1965. {
  1966. if(++m_position == m_end)
  1967. {
  1968. // Rewind to start of (? sequence:
  1969. --m_position;
  1970. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  1971. fail(regex_constants::error_perl_extension, m_position - m_base);
  1972. return false;
  1973. }
  1974. //
  1975. // treat comments as a special case, as these
  1976. // are the only ones that don't start with a leading
  1977. // startmark state:
  1978. //
  1979. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
  1980. {
  1981. while((m_position != m_end)
  1982. && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
  1983. {}
  1984. return true;
  1985. }
  1986. //
  1987. // backup some state, and prepare the way:
  1988. //
  1989. int markid = 0;
  1990. std::ptrdiff_t jump_offset = 0;
  1991. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  1992. pb->icase = this->flags() & regbase::icase;
  1993. std::ptrdiff_t last_paren_start = this->getoffset(pb);
  1994. // back up insertion point for alternations, and set new point:
  1995. std::ptrdiff_t last_alt_point = m_alt_insert_point;
  1996. this->m_pdata->m_data.align();
  1997. m_alt_insert_point = this->m_pdata->m_data.size();
  1998. std::ptrdiff_t expected_alt_point = m_alt_insert_point;
  1999. bool restore_flags = true;
  2000. regex_constants::syntax_option_type old_flags = this->flags();
  2001. bool old_case_change = m_has_case_change;
  2002. m_has_case_change = false;
  2003. charT name_delim;
  2004. int mark_reset = m_mark_reset;
  2005. int max_mark = m_max_mark;
  2006. m_mark_reset = -1;
  2007. m_max_mark = m_mark_count;
  2008. std::intmax_t v;
  2009. //
  2010. // select the actual extension used:
  2011. //
  2012. switch(this->m_traits.syntax_type(*m_position))
  2013. {
  2014. case regex_constants::syntax_or:
  2015. m_mark_reset = m_mark_count;
  2016. BOOST_REGEX_FALLTHROUGH;
  2017. case regex_constants::syntax_colon:
  2018. //
  2019. // a non-capturing mark:
  2020. //
  2021. pb->index = markid = 0;
  2022. ++m_position;
  2023. break;
  2024. case regex_constants::syntax_digit:
  2025. {
  2026. //
  2027. // a recursive subexpression:
  2028. //
  2029. v = this->m_traits.toi(m_position, m_end, 10);
  2030. if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2031. {
  2032. // Rewind to start of (? sequence:
  2033. --m_position;
  2034. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2035. fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
  2036. return false;
  2037. }
  2038. insert_recursion:
  2039. pb->index = markid = 0;
  2040. re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
  2041. pr->alt.i = (std::ptrdiff_t)v;
  2042. pr->state_id = 0;
  2043. static_cast<re_case*>(
  2044. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2045. )->icase = this->flags() & regbase::icase;
  2046. break;
  2047. }
  2048. case regex_constants::syntax_plus:
  2049. //
  2050. // A forward-relative recursive subexpression:
  2051. //
  2052. ++m_position;
  2053. v = this->m_traits.toi(m_position, m_end, 10);
  2054. if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2055. {
  2056. // Rewind to start of (? sequence:
  2057. --m_position;
  2058. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2059. fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
  2060. return false;
  2061. }
  2062. if ((std::numeric_limits<std::intmax_t>::max)() - m_mark_count < v)
  2063. {
  2064. fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
  2065. return false;
  2066. }
  2067. v += m_mark_count;
  2068. goto insert_recursion;
  2069. case regex_constants::syntax_dash:
  2070. //
  2071. // Possibly a backward-relative recursive subexpression:
  2072. //
  2073. ++m_position;
  2074. v = this->m_traits.toi(m_position, m_end, 10);
  2075. if(v <= 0)
  2076. {
  2077. --m_position;
  2078. // Oops not a relative recursion at all, but a (?-imsx) group:
  2079. goto option_group_jump;
  2080. }
  2081. v = static_cast<std::intmax_t>(m_mark_count) + 1 - v;
  2082. if(v <= 0)
  2083. {
  2084. // Rewind to start of (? sequence:
  2085. --m_position;
  2086. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2087. fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
  2088. return false;
  2089. }
  2090. goto insert_recursion;
  2091. case regex_constants::syntax_equal:
  2092. pb->index = markid = -1;
  2093. ++m_position;
  2094. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2095. this->m_pdata->m_data.align();
  2096. m_alt_insert_point = this->m_pdata->m_data.size();
  2097. break;
  2098. case regex_constants::syntax_not:
  2099. pb->index = markid = -2;
  2100. ++m_position;
  2101. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2102. this->m_pdata->m_data.align();
  2103. m_alt_insert_point = this->m_pdata->m_data.size();
  2104. break;
  2105. case regex_constants::escape_type_left_word:
  2106. {
  2107. // a lookbehind assertion:
  2108. if(++m_position == m_end)
  2109. {
  2110. // Rewind to start of (? sequence:
  2111. --m_position;
  2112. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2113. fail(regex_constants::error_perl_extension, m_position - m_base);
  2114. return false;
  2115. }
  2116. regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
  2117. if(t == regex_constants::syntax_not)
  2118. pb->index = markid = -2;
  2119. else if(t == regex_constants::syntax_equal)
  2120. pb->index = markid = -1;
  2121. else
  2122. {
  2123. // Probably a named capture which also starts (?< :
  2124. name_delim = '>';
  2125. --m_position;
  2126. goto named_capture_jump;
  2127. }
  2128. ++m_position;
  2129. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2130. this->append_state(syntax_element_backstep, sizeof(re_brace));
  2131. this->m_pdata->m_data.align();
  2132. m_alt_insert_point = this->m_pdata->m_data.size();
  2133. break;
  2134. }
  2135. case regex_constants::escape_type_right_word:
  2136. //
  2137. // an independent sub-expression:
  2138. //
  2139. pb->index = markid = -3;
  2140. ++m_position;
  2141. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2142. this->m_pdata->m_data.align();
  2143. m_alt_insert_point = this->m_pdata->m_data.size();
  2144. break;
  2145. case regex_constants::syntax_open_mark:
  2146. {
  2147. // a conditional expression:
  2148. pb->index = markid = -4;
  2149. if(++m_position == m_end)
  2150. {
  2151. // Rewind to start of (? sequence:
  2152. --m_position;
  2153. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2154. fail(regex_constants::error_perl_extension, m_position - m_base);
  2155. return false;
  2156. }
  2157. v = this->m_traits.toi(m_position, m_end, 10);
  2158. if(m_position == m_end)
  2159. {
  2160. // Rewind to start of (? sequence:
  2161. --m_position;
  2162. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2163. fail(regex_constants::error_perl_extension, m_position - m_base);
  2164. return false;
  2165. }
  2166. if(*m_position == charT('R'))
  2167. {
  2168. if(++m_position == m_end)
  2169. {
  2170. // Rewind to start of (? sequence:
  2171. --m_position;
  2172. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2173. fail(regex_constants::error_perl_extension, m_position - m_base);
  2174. return false;
  2175. }
  2176. if(*m_position == charT('&'))
  2177. {
  2178. const charT* base = ++m_position;
  2179. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2180. ++m_position;
  2181. if(m_position == m_end)
  2182. {
  2183. // Rewind to start of (? sequence:
  2184. --m_position;
  2185. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2186. fail(regex_constants::error_perl_extension, m_position - m_base);
  2187. return false;
  2188. }
  2189. v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
  2190. }
  2191. else
  2192. {
  2193. v = -this->m_traits.toi(m_position, m_end, 10);
  2194. }
  2195. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2196. br->index = v < 0 ? (int)(v - 1) : 0;
  2197. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2198. {
  2199. // Rewind to start of (? sequence:
  2200. --m_position;
  2201. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2202. fail(regex_constants::error_perl_extension, m_position - m_base);
  2203. return false;
  2204. }
  2205. if(++m_position == m_end)
  2206. {
  2207. // Rewind to start of (? sequence:
  2208. --m_position;
  2209. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2210. fail(regex_constants::error_perl_extension, m_position - m_base);
  2211. return false;
  2212. }
  2213. }
  2214. else if((*m_position == charT('\'')) || (*m_position == charT('<')))
  2215. {
  2216. const charT* base = ++m_position;
  2217. while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
  2218. ++m_position;
  2219. if(m_position == m_end)
  2220. {
  2221. // Rewind to start of (? sequence:
  2222. --m_position;
  2223. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2224. fail(regex_constants::error_perl_extension, m_position - m_base);
  2225. return false;
  2226. }
  2227. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2228. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2229. br->index = (int)v;
  2230. if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
  2231. {
  2232. // Rewind to start of (? sequence:
  2233. --m_position;
  2234. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2235. fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
  2236. return false;
  2237. }
  2238. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2239. {
  2240. // Rewind to start of (? sequence:
  2241. --m_position;
  2242. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2243. fail(regex_constants::error_perl_extension, m_position - m_base);
  2244. return false;
  2245. }
  2246. if(++m_position == m_end)
  2247. {
  2248. // Rewind to start of (? sequence:
  2249. --m_position;
  2250. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2251. fail(regex_constants::error_perl_extension, m_position - m_base);
  2252. return false;
  2253. }
  2254. }
  2255. else if(*m_position == charT('D'))
  2256. {
  2257. const char* def = "DEFINE";
  2258. while(*def && (m_position != m_end) && (*m_position == charT(*def)))
  2259. ++m_position, ++def;
  2260. if((m_position == m_end) || *def)
  2261. {
  2262. // Rewind to start of (? sequence:
  2263. --m_position;
  2264. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2265. fail(regex_constants::error_perl_extension, m_position - m_base);
  2266. return false;
  2267. }
  2268. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2269. br->index = 9999; // special magic value!
  2270. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2271. {
  2272. // Rewind to start of (? sequence:
  2273. --m_position;
  2274. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2275. fail(regex_constants::error_perl_extension, m_position - m_base);
  2276. return false;
  2277. }
  2278. if(++m_position == m_end)
  2279. {
  2280. // Rewind to start of (? sequence:
  2281. --m_position;
  2282. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2283. fail(regex_constants::error_perl_extension, m_position - m_base);
  2284. return false;
  2285. }
  2286. }
  2287. else if(v > 0)
  2288. {
  2289. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2290. br->index = (int)v;
  2291. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2292. {
  2293. // Rewind to start of (? sequence:
  2294. --m_position;
  2295. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2296. fail(regex_constants::error_perl_extension, m_position - m_base);
  2297. return false;
  2298. }
  2299. if(++m_position == m_end)
  2300. {
  2301. // Rewind to start of (? sequence:
  2302. --m_position;
  2303. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2304. fail(regex_constants::error_perl_extension, m_position - m_base);
  2305. return false;
  2306. }
  2307. }
  2308. else
  2309. {
  2310. // verify that we have a lookahead or lookbehind assert:
  2311. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
  2312. {
  2313. // Rewind to start of (? sequence:
  2314. --m_position;
  2315. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2316. fail(regex_constants::error_perl_extension, m_position - m_base);
  2317. return false;
  2318. }
  2319. if(++m_position == m_end)
  2320. {
  2321. // Rewind to start of (? sequence:
  2322. --m_position;
  2323. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2324. fail(regex_constants::error_perl_extension, m_position - m_base);
  2325. return false;
  2326. }
  2327. if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
  2328. {
  2329. if(++m_position == m_end)
  2330. {
  2331. // Rewind to start of (? sequence:
  2332. --m_position;
  2333. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2334. fail(regex_constants::error_perl_extension, m_position - m_base);
  2335. return false;
  2336. }
  2337. if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
  2338. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
  2339. {
  2340. // Rewind to start of (? sequence:
  2341. --m_position;
  2342. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2343. fail(regex_constants::error_perl_extension, m_position - m_base);
  2344. return false;
  2345. }
  2346. m_position -= 3;
  2347. }
  2348. else
  2349. {
  2350. if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
  2351. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
  2352. {
  2353. // Rewind to start of (? sequence:
  2354. --m_position;
  2355. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2356. fail(regex_constants::error_perl_extension, m_position - m_base);
  2357. return false;
  2358. }
  2359. m_position -= 2;
  2360. }
  2361. }
  2362. break;
  2363. }
  2364. case regex_constants::syntax_close_mark:
  2365. // Rewind to start of (? sequence:
  2366. --m_position;
  2367. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2368. fail(regex_constants::error_perl_extension, m_position - m_base);
  2369. return false;
  2370. case regex_constants::escape_type_end_buffer:
  2371. {
  2372. name_delim = *m_position;
  2373. named_capture_jump:
  2374. markid = 0;
  2375. if(0 == (this->flags() & regbase::nosubs))
  2376. {
  2377. markid = ++m_mark_count;
  2378. if(this->flags() & regbase::save_subexpression_location)
  2379. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
  2380. }
  2381. pb->index = markid;
  2382. const charT* base = ++m_position;
  2383. if(m_position == m_end)
  2384. {
  2385. // Rewind to start of (? sequence:
  2386. --m_position;
  2387. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2388. fail(regex_constants::error_perl_extension, m_position - m_base);
  2389. return false;
  2390. }
  2391. while((m_position != m_end) && (*m_position != name_delim))
  2392. ++m_position;
  2393. if(m_position == m_end)
  2394. {
  2395. // Rewind to start of (? sequence:
  2396. --m_position;
  2397. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2398. fail(regex_constants::error_perl_extension, m_position - m_base);
  2399. return false;
  2400. }
  2401. this->m_pdata->set_name(base, m_position, markid);
  2402. ++m_position;
  2403. break;
  2404. }
  2405. default:
  2406. if(*m_position == charT('R'))
  2407. {
  2408. ++m_position;
  2409. v = 0;
  2410. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2411. {
  2412. // Rewind to start of (? sequence:
  2413. --m_position;
  2414. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2415. fail(regex_constants::error_perl_extension, m_position - m_base);
  2416. return false;
  2417. }
  2418. goto insert_recursion;
  2419. }
  2420. if(*m_position == charT('&'))
  2421. {
  2422. ++m_position;
  2423. const charT* base = m_position;
  2424. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2425. ++m_position;
  2426. if(m_position == m_end)
  2427. {
  2428. // Rewind to start of (? sequence:
  2429. --m_position;
  2430. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2431. fail(regex_constants::error_perl_extension, m_position - m_base);
  2432. return false;
  2433. }
  2434. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2435. goto insert_recursion;
  2436. }
  2437. if(*m_position == charT('P'))
  2438. {
  2439. ++m_position;
  2440. if(m_position == m_end)
  2441. {
  2442. // Rewind to start of (? sequence:
  2443. --m_position;
  2444. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2445. fail(regex_constants::error_perl_extension, m_position - m_base);
  2446. return false;
  2447. }
  2448. if(*m_position == charT('>'))
  2449. {
  2450. ++m_position;
  2451. const charT* base = m_position;
  2452. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2453. ++m_position;
  2454. if(m_position == m_end)
  2455. {
  2456. // Rewind to start of (? sequence:
  2457. --m_position;
  2458. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2459. fail(regex_constants::error_perl_extension, m_position - m_base);
  2460. return false;
  2461. }
  2462. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2463. goto insert_recursion;
  2464. }
  2465. }
  2466. //
  2467. // lets assume that we have a (?imsx) group and try and parse it:
  2468. //
  2469. option_group_jump:
  2470. regex_constants::syntax_option_type opts = parse_options();
  2471. if(m_position == m_end)
  2472. {
  2473. // Rewind to start of (? sequence:
  2474. --m_position;
  2475. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2476. fail(regex_constants::error_perl_extension, m_position - m_base);
  2477. return false;
  2478. }
  2479. // make a note of whether we have a case change:
  2480. m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
  2481. pb->index = markid = 0;
  2482. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
  2483. {
  2484. // update flags and carry on as normal:
  2485. this->flags(opts);
  2486. restore_flags = false;
  2487. old_case_change |= m_has_case_change; // defer end of scope by one ')'
  2488. }
  2489. else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
  2490. {
  2491. // update flags and carry on until the matching ')' is found:
  2492. this->flags(opts);
  2493. ++m_position;
  2494. }
  2495. else
  2496. {
  2497. // Rewind to start of (? sequence:
  2498. --m_position;
  2499. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2500. fail(regex_constants::error_perl_extension, m_position - m_base);
  2501. return false;
  2502. }
  2503. // finally append a case change state if we need it:
  2504. if(m_has_case_change)
  2505. {
  2506. static_cast<re_case*>(
  2507. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2508. )->icase = opts & regbase::icase;
  2509. }
  2510. }
  2511. //
  2512. // now recursively add more states, this will terminate when we get to a
  2513. // matching ')' :
  2514. //
  2515. parse_all();
  2516. //
  2517. // Unwind alternatives:
  2518. //
  2519. if(0 == unwind_alts(last_paren_start))
  2520. {
  2521. // Rewind to start of (? sequence:
  2522. --m_position;
  2523. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2524. fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
  2525. return false;
  2526. }
  2527. //
  2528. // we either have a ')' or we have run out of characters prematurely:
  2529. //
  2530. if(m_position == m_end)
  2531. {
  2532. // Rewind to start of (? sequence:
  2533. --m_position;
  2534. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2535. this->fail(regex_constants::error_paren, std::distance(m_base, m_end));
  2536. return false;
  2537. }
  2538. BOOST_REGEX_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
  2539. ++m_position;
  2540. //
  2541. // restore the flags:
  2542. //
  2543. if(restore_flags)
  2544. {
  2545. // append a case change state if we need it:
  2546. if(m_has_case_change)
  2547. {
  2548. static_cast<re_case*>(
  2549. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2550. )->icase = old_flags & regbase::icase;
  2551. }
  2552. this->flags(old_flags);
  2553. }
  2554. //
  2555. // set up the jump pointer if we have one:
  2556. //
  2557. if(jump_offset)
  2558. {
  2559. this->m_pdata->m_data.align();
  2560. re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
  2561. jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
  2562. if((this->m_last_state == jmp) && (markid != -2))
  2563. {
  2564. // Oops... we didn't have anything inside the assertion.
  2565. // Note we don't get here for negated forward lookahead as (?!)
  2566. // does have some uses.
  2567. // Rewind to start of (? sequence:
  2568. --m_position;
  2569. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2570. fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
  2571. return false;
  2572. }
  2573. }
  2574. //
  2575. // verify that if this is conditional expression, that we do have
  2576. // an alternative, if not add one:
  2577. //
  2578. if(markid == -4)
  2579. {
  2580. re_syntax_base* b = this->getaddress(expected_alt_point);
  2581. // Make sure we have exactly one alternative following this state:
  2582. if(b->type != syntax_element_alt)
  2583. {
  2584. re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
  2585. alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
  2586. }
  2587. else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
  2588. {
  2589. // Can't have seen more than one alternative:
  2590. // Rewind to start of (? sequence:
  2591. --m_position;
  2592. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2593. fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
  2594. return false;
  2595. }
  2596. else
  2597. {
  2598. // We must *not* have seen an alternative inside a (DEFINE) block:
  2599. b = this->getaddress(b->next.i, b);
  2600. if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
  2601. {
  2602. // Rewind to start of (? sequence:
  2603. --m_position;
  2604. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2605. fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
  2606. return false;
  2607. }
  2608. }
  2609. // check for invalid repetition of next state:
  2610. b = this->getaddress(expected_alt_point);
  2611. b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
  2612. if((b->type != syntax_element_assert_backref)
  2613. && (b->type != syntax_element_startmark))
  2614. {
  2615. // Rewind to start of (? sequence:
  2616. --m_position;
  2617. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2618. fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
  2619. return false;
  2620. }
  2621. }
  2622. //
  2623. // append closing parenthesis state:
  2624. //
  2625. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  2626. pb->index = markid;
  2627. pb->icase = this->flags() & regbase::icase;
  2628. this->m_paren_start = last_paren_start;
  2629. //
  2630. // restore the alternate insertion point:
  2631. //
  2632. this->m_alt_insert_point = last_alt_point;
  2633. //
  2634. // and the case change data:
  2635. //
  2636. m_has_case_change = old_case_change;
  2637. //
  2638. // And the mark_reset data:
  2639. //
  2640. if(m_max_mark > m_mark_count)
  2641. {
  2642. m_mark_count = m_max_mark;
  2643. }
  2644. m_mark_reset = mark_reset;
  2645. m_max_mark = max_mark;
  2646. if(markid > 0)
  2647. {
  2648. if(this->flags() & regbase::save_subexpression_location)
  2649. this->m_pdata->m_subs.at((std::size_t)markid - 1).second = std::distance(m_base, m_position) - 1;
  2650. //
  2651. // allow backrefs to this mark:
  2652. //
  2653. this->m_backrefs.set(markid);
  2654. }
  2655. return true;
  2656. }
  2657. template <class charT, class traits>
  2658. bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
  2659. {
  2660. while(*verb)
  2661. {
  2662. if(static_cast<charT>(*verb) != *m_position)
  2663. {
  2664. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2665. fail(regex_constants::error_perl_extension, m_position - m_base);
  2666. return false;
  2667. }
  2668. if(++m_position == m_end)
  2669. {
  2670. --m_position;
  2671. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2672. fail(regex_constants::error_perl_extension, m_position - m_base);
  2673. return false;
  2674. }
  2675. ++verb;
  2676. }
  2677. return true;
  2678. }
  2679. #ifdef BOOST_REGEX_MSVC
  2680. # pragma warning(push)
  2681. #if BOOST_REGEX_MSVC >= 1800
  2682. #pragma warning(disable:26812)
  2683. #endif
  2684. #endif
  2685. template <class charT, class traits>
  2686. bool basic_regex_parser<charT, traits>::parse_perl_verb()
  2687. {
  2688. if(++m_position == m_end)
  2689. {
  2690. // Rewind to start of (* sequence:
  2691. --m_position;
  2692. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2693. fail(regex_constants::error_perl_extension, m_position - m_base);
  2694. return false;
  2695. }
  2696. switch(*m_position)
  2697. {
  2698. case 'F':
  2699. if(++m_position == m_end)
  2700. {
  2701. // Rewind to start of (* sequence:
  2702. --m_position;
  2703. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2704. fail(regex_constants::error_perl_extension, m_position - m_base);
  2705. return false;
  2706. }
  2707. if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
  2708. {
  2709. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2710. {
  2711. // Rewind to start of (* sequence:
  2712. --m_position;
  2713. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2714. fail(regex_constants::error_perl_extension, m_position - m_base);
  2715. return false;
  2716. }
  2717. ++m_position;
  2718. this->append_state(syntax_element_fail);
  2719. return true;
  2720. }
  2721. break;
  2722. case 'A':
  2723. if(++m_position == m_end)
  2724. {
  2725. // Rewind to start of (* sequence:
  2726. --m_position;
  2727. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2728. fail(regex_constants::error_perl_extension, m_position - m_base);
  2729. return false;
  2730. }
  2731. if(match_verb("CCEPT"))
  2732. {
  2733. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2734. {
  2735. // Rewind to start of (* sequence:
  2736. --m_position;
  2737. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2738. fail(regex_constants::error_perl_extension, m_position - m_base);
  2739. return false;
  2740. }
  2741. ++m_position;
  2742. this->append_state(syntax_element_accept);
  2743. return true;
  2744. }
  2745. break;
  2746. case 'C':
  2747. if(++m_position == m_end)
  2748. {
  2749. // Rewind to start of (* sequence:
  2750. --m_position;
  2751. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2752. fail(regex_constants::error_perl_extension, m_position - m_base);
  2753. return false;
  2754. }
  2755. if(match_verb("OMMIT"))
  2756. {
  2757. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2758. {
  2759. // Rewind to start of (* sequence:
  2760. --m_position;
  2761. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2762. fail(regex_constants::error_perl_extension, m_position - m_base);
  2763. return false;
  2764. }
  2765. ++m_position;
  2766. static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
  2767. this->m_pdata->m_disable_match_any = true;
  2768. return true;
  2769. }
  2770. break;
  2771. case 'P':
  2772. if(++m_position == m_end)
  2773. {
  2774. // Rewind to start of (* sequence:
  2775. --m_position;
  2776. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2777. fail(regex_constants::error_perl_extension, m_position - m_base);
  2778. return false;
  2779. }
  2780. if(match_verb("RUNE"))
  2781. {
  2782. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2783. {
  2784. // Rewind to start of (* sequence:
  2785. --m_position;
  2786. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2787. fail(regex_constants::error_perl_extension, m_position - m_base);
  2788. return false;
  2789. }
  2790. ++m_position;
  2791. static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
  2792. this->m_pdata->m_disable_match_any = true;
  2793. return true;
  2794. }
  2795. break;
  2796. case 'S':
  2797. if(++m_position == m_end)
  2798. {
  2799. // Rewind to start of (* sequence:
  2800. --m_position;
  2801. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2802. fail(regex_constants::error_perl_extension, m_position - m_base);
  2803. return false;
  2804. }
  2805. if(match_verb("KIP"))
  2806. {
  2807. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2808. {
  2809. // Rewind to start of (* sequence:
  2810. --m_position;
  2811. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2812. fail(regex_constants::error_perl_extension, m_position - m_base);
  2813. return false;
  2814. }
  2815. ++m_position;
  2816. static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
  2817. this->m_pdata->m_disable_match_any = true;
  2818. return true;
  2819. }
  2820. break;
  2821. case 'T':
  2822. if(++m_position == m_end)
  2823. {
  2824. // Rewind to start of (* sequence:
  2825. --m_position;
  2826. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2827. fail(regex_constants::error_perl_extension, m_position - m_base);
  2828. return false;
  2829. }
  2830. if(match_verb("HEN"))
  2831. {
  2832. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2833. {
  2834. // Rewind to start of (* sequence:
  2835. --m_position;
  2836. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2837. fail(regex_constants::error_perl_extension, m_position - m_base);
  2838. return false;
  2839. }
  2840. ++m_position;
  2841. this->append_state(syntax_element_then);
  2842. this->m_pdata->m_disable_match_any = true;
  2843. return true;
  2844. }
  2845. break;
  2846. }
  2847. // Rewind to start of (* sequence:
  2848. --m_position;
  2849. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2850. fail(regex_constants::error_perl_extension, m_position - m_base);
  2851. return false;
  2852. }
  2853. #ifdef BOOST_REGEX_MSVC
  2854. # pragma warning(pop)
  2855. #endif
  2856. template <class charT, class traits>
  2857. bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
  2858. {
  2859. //
  2860. // parses an emacs style \sx or \Sx construct.
  2861. //
  2862. if(++m_position == m_end)
  2863. {
  2864. // Rewind to start of sequence:
  2865. --m_position;
  2866. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  2867. fail(regex_constants::error_escape, m_position - m_base);
  2868. return false;
  2869. }
  2870. basic_char_set<charT, traits> char_set;
  2871. if(negate)
  2872. char_set.negate();
  2873. static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
  2874. switch(*m_position)
  2875. {
  2876. case 's':
  2877. case ' ':
  2878. char_set.add_class(this->m_mask_space);
  2879. break;
  2880. case 'w':
  2881. char_set.add_class(this->m_word_mask);
  2882. break;
  2883. case '_':
  2884. char_set.add_single(digraph<charT>(charT('$')));
  2885. char_set.add_single(digraph<charT>(charT('&')));
  2886. char_set.add_single(digraph<charT>(charT('*')));
  2887. char_set.add_single(digraph<charT>(charT('+')));
  2888. char_set.add_single(digraph<charT>(charT('-')));
  2889. char_set.add_single(digraph<charT>(charT('_')));
  2890. char_set.add_single(digraph<charT>(charT('<')));
  2891. char_set.add_single(digraph<charT>(charT('>')));
  2892. break;
  2893. case '.':
  2894. char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
  2895. break;
  2896. case '(':
  2897. char_set.add_single(digraph<charT>(charT('(')));
  2898. char_set.add_single(digraph<charT>(charT('[')));
  2899. char_set.add_single(digraph<charT>(charT('{')));
  2900. break;
  2901. case ')':
  2902. char_set.add_single(digraph<charT>(charT(')')));
  2903. char_set.add_single(digraph<charT>(charT(']')));
  2904. char_set.add_single(digraph<charT>(charT('}')));
  2905. break;
  2906. case '"':
  2907. char_set.add_single(digraph<charT>(charT('"')));
  2908. char_set.add_single(digraph<charT>(charT('\'')));
  2909. char_set.add_single(digraph<charT>(charT('`')));
  2910. break;
  2911. case '\'':
  2912. char_set.add_single(digraph<charT>(charT('\'')));
  2913. char_set.add_single(digraph<charT>(charT(',')));
  2914. char_set.add_single(digraph<charT>(charT('#')));
  2915. break;
  2916. case '<':
  2917. char_set.add_single(digraph<charT>(charT(';')));
  2918. break;
  2919. case '>':
  2920. char_set.add_single(digraph<charT>(charT('\n')));
  2921. char_set.add_single(digraph<charT>(charT('\f')));
  2922. break;
  2923. default:
  2924. fail(regex_constants::error_ctype, m_position - m_base);
  2925. return false;
  2926. }
  2927. if(0 == this->append_set(char_set))
  2928. {
  2929. fail(regex_constants::error_ctype, m_position - m_base);
  2930. return false;
  2931. }
  2932. ++m_position;
  2933. return true;
  2934. }
  2935. template <class charT, class traits>
  2936. regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
  2937. {
  2938. // we have a (?imsx-imsx) group, convert it into a set of flags:
  2939. regex_constants::syntax_option_type f = this->flags();
  2940. bool breakout = false;
  2941. do
  2942. {
  2943. switch(*m_position)
  2944. {
  2945. case 's':
  2946. f |= regex_constants::mod_s;
  2947. f &= ~regex_constants::no_mod_s;
  2948. break;
  2949. case 'm':
  2950. f &= ~regex_constants::no_mod_m;
  2951. break;
  2952. case 'i':
  2953. f |= regex_constants::icase;
  2954. break;
  2955. case 'x':
  2956. f |= regex_constants::mod_x;
  2957. break;
  2958. default:
  2959. breakout = true;
  2960. continue;
  2961. }
  2962. if(++m_position == m_end)
  2963. {
  2964. // Rewind to start of (? sequence:
  2965. --m_position;
  2966. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2967. fail(regex_constants::error_paren, m_position - m_base);
  2968. return false;
  2969. }
  2970. }
  2971. while(!breakout);
  2972. breakout = false;
  2973. if(*m_position == static_cast<charT>('-'))
  2974. {
  2975. if(++m_position == m_end)
  2976. {
  2977. // Rewind to start of (? sequence:
  2978. --m_position;
  2979. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2980. fail(regex_constants::error_paren, m_position - m_base);
  2981. return false;
  2982. }
  2983. do
  2984. {
  2985. switch(*m_position)
  2986. {
  2987. case 's':
  2988. f &= ~regex_constants::mod_s;
  2989. f |= regex_constants::no_mod_s;
  2990. break;
  2991. case 'm':
  2992. f |= regex_constants::no_mod_m;
  2993. break;
  2994. case 'i':
  2995. f &= ~regex_constants::icase;
  2996. break;
  2997. case 'x':
  2998. f &= ~regex_constants::mod_x;
  2999. break;
  3000. default:
  3001. breakout = true;
  3002. continue;
  3003. }
  3004. if(++m_position == m_end)
  3005. {
  3006. // Rewind to start of (? sequence:
  3007. --m_position;
  3008. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  3009. fail(regex_constants::error_paren, m_position - m_base);
  3010. return false;
  3011. }
  3012. }
  3013. while(!breakout);
  3014. }
  3015. return f;
  3016. }
  3017. template <class charT, class traits>
  3018. bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
  3019. {
  3020. //
  3021. // If we didn't actually add any states after the last
  3022. // alternative then that's an error:
  3023. //
  3024. if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
  3025. && (!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start)
  3026. &&
  3027. !(
  3028. ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
  3029. &&
  3030. ((this->flags() & regbase::no_empty_expressions) == 0)
  3031. )
  3032. )
  3033. {
  3034. fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
  3035. return false;
  3036. }
  3037. //
  3038. // Fix up our alternatives:
  3039. //
  3040. while((!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start))
  3041. {
  3042. //
  3043. // fix up the jump to point to the end of the states
  3044. // that we've just added:
  3045. //
  3046. std::ptrdiff_t jump_offset = m_alt_jumps.back();
  3047. m_alt_jumps.pop_back();
  3048. this->m_pdata->m_data.align();
  3049. re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
  3050. BOOST_REGEX_ASSERT(jmp->type == syntax_element_jump);
  3051. jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
  3052. }
  3053. return true;
  3054. }
  3055. #ifdef BOOST_REGEX_MSVC
  3056. #pragma warning(pop)
  3057. #endif
  3058. } // namespace BOOST_REGEX_DETAIL_NS
  3059. } // namespace boost
  3060. #endif