perl_matcher_common.hpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915
  1. /*
  2. *
  3. * Copyright (c) 2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE perl_matcher_common.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Definitions of perl_matcher member functions that are
  16. * common to both the recursive and non-recursive versions.
  17. */
  18. #ifndef BOOST_REGEX_V5_PERL_MATCHER_COMMON_HPP
  19. #define BOOST_REGEX_V5_PERL_MATCHER_COMMON_HPP
  20. #ifdef BOOST_REGEX_MSVC
  21. # pragma warning(push)
  22. #pragma warning(disable:4459)
  23. #if BOOST_REGEX_MSVC < 1910
  24. #pragma warning(disable:4800)
  25. #endif
  26. #endif
  27. namespace boost{
  28. namespace BOOST_REGEX_DETAIL_NS{
  29. #ifdef BOOST_REGEX_MSVC
  30. # pragma warning(push)
  31. #pragma warning(disable:26812)
  32. #endif
  33. template <class BidiIterator, class Allocator, class traits>
  34. void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f)
  35. {
  36. typedef typename std::iterator_traits<BidiIterator>::iterator_category category;
  37. typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type;
  38. if(e.empty())
  39. {
  40. // precondition failure: e is not a valid regex.
  41. std::invalid_argument ex("Invalid regular expression object");
  42. #ifndef BOOST_REGEX_STANDALONE
  43. boost::throw_exception(ex);
  44. #else
  45. throw e;
  46. #endif
  47. }
  48. pstate = 0;
  49. m_match_flags = f;
  50. estimate_max_state_count(static_cast<category*>(0));
  51. expression_flag_type re_f = re.flags();
  52. icase = re_f & regex_constants::icase;
  53. if(!(m_match_flags & (match_perl|match_posix)))
  54. {
  55. if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
  56. m_match_flags |= match_perl;
  57. else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
  58. m_match_flags |= match_perl;
  59. else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal))
  60. m_match_flags |= match_perl;
  61. else
  62. m_match_flags |= match_posix;
  63. }
  64. if(m_match_flags & match_posix)
  65. {
  66. m_temp_match.reset(new match_results<BidiIterator, Allocator>());
  67. m_presult = m_temp_match.get();
  68. }
  69. else
  70. m_presult = &m_result;
  71. m_stack_base = 0;
  72. m_backup_state = 0;
  73. // find the value to use for matching word boundaries:
  74. m_word_mask = re.get_data().m_word_mask;
  75. // find bitmask to use for matching '.':
  76. match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline);
  77. // Disable match_any if requested in the state machine:
  78. if(e.get_data().m_disable_match_any)
  79. m_match_flags &= regex_constants::match_not_any;
  80. }
  81. #ifdef BOOST_REGEX_MSVC
  82. # pragma warning(pop)
  83. #endif
  84. template <class BidiIterator, class Allocator, class traits>
  85. void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
  86. {
  87. //
  88. // How many states should we allow our machine to visit before giving up?
  89. // This is a heuristic: it takes the greater of O(N^2) and O(NS^2)
  90. // where N is the length of the string, and S is the number of states
  91. // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2)
  92. // but these take unreasonably amounts of time to bale out in pathological
  93. // cases.
  94. //
  95. // Calculate NS^2 first:
  96. //
  97. static const std::ptrdiff_t k = 100000;
  98. std::ptrdiff_t dist = std::distance(base, last);
  99. if(dist == 0)
  100. dist = 1;
  101. std::ptrdiff_t states = re.size();
  102. if(states == 0)
  103. states = 1;
  104. if ((std::numeric_limits<std::ptrdiff_t>::max)() / states < states)
  105. {
  106. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  107. return;
  108. }
  109. states *= states;
  110. if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
  111. {
  112. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  113. return;
  114. }
  115. states *= dist;
  116. if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
  117. {
  118. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  119. return;
  120. }
  121. states += k;
  122. max_state_count = states;
  123. //
  124. // Now calculate N^2:
  125. //
  126. states = dist;
  127. if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
  128. {
  129. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  130. return;
  131. }
  132. states *= dist;
  133. if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
  134. {
  135. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  136. return;
  137. }
  138. states += k;
  139. //
  140. // N^2 can be a very large number indeed, to prevent things getting out
  141. // of control, cap the max states:
  142. //
  143. if(states > BOOST_REGEX_MAX_STATE_COUNT)
  144. states = BOOST_REGEX_MAX_STATE_COUNT;
  145. //
  146. // If (the possibly capped) N^2 is larger than our first estimate,
  147. // use this instead:
  148. //
  149. if(states > max_state_count)
  150. max_state_count = states;
  151. }
  152. template <class BidiIterator, class Allocator, class traits>
  153. inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
  154. {
  155. // we don't know how long the sequence is:
  156. max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
  157. }
  158. template <class BidiIterator, class Allocator, class traits>
  159. inline bool perl_matcher<BidiIterator, Allocator, traits>::match()
  160. {
  161. return match_imp();
  162. }
  163. template <class BidiIterator, class Allocator, class traits>
  164. bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
  165. {
  166. // initialise our stack if we are non-recursive:
  167. save_state_init init(&m_stack_base, &m_backup_state);
  168. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  169. #if !defined(BOOST_NO_EXCEPTIONS)
  170. try{
  171. #endif
  172. // reset our state machine:
  173. position = base;
  174. search_base = base;
  175. state_count = 0;
  176. m_match_flags |= regex_constants::match_all;
  177. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
  178. m_presult->set_base(base);
  179. m_presult->set_named_subs(this->re.get_named_subs());
  180. if(m_match_flags & match_posix)
  181. m_result = *m_presult;
  182. verify_options(re.flags(), m_match_flags);
  183. if(0 == match_prefix())
  184. return false;
  185. return (m_result[0].second == last) && (m_result[0].first == base);
  186. #if !defined(BOOST_NO_EXCEPTIONS)
  187. }
  188. catch(...)
  189. {
  190. // unwind all pushed states, apart from anything else this
  191. // ensures that all the states are correctly destructed
  192. // not just the memory freed.
  193. while(unwind(true)){}
  194. throw;
  195. }
  196. #endif
  197. }
  198. template <class BidiIterator, class Allocator, class traits>
  199. inline bool perl_matcher<BidiIterator, Allocator, traits>::find()
  200. {
  201. return find_imp();
  202. }
  203. template <class BidiIterator, class Allocator, class traits>
  204. bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
  205. {
  206. static matcher_proc_type const s_find_vtable[7] =
  207. {
  208. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_any,
  209. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_word,
  210. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_line,
  211. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf,
  212. &perl_matcher<BidiIterator, Allocator, traits>::match_prefix,
  213. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  214. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  215. };
  216. // initialise our stack if we are non-recursive:
  217. save_state_init init(&m_stack_base, &m_backup_state);
  218. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  219. #if !defined(BOOST_NO_EXCEPTIONS)
  220. try{
  221. #endif
  222. state_count = 0;
  223. if((m_match_flags & regex_constants::match_init) == 0)
  224. {
  225. // reset our state machine:
  226. search_base = position = base;
  227. pstate = re.get_first_state();
  228. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
  229. m_presult->set_base(base);
  230. m_presult->set_named_subs(this->re.get_named_subs());
  231. m_match_flags |= regex_constants::match_init;
  232. }
  233. else
  234. {
  235. // start again:
  236. search_base = position = m_result[0].second;
  237. // If last match was null and match_not_null was not set then increment
  238. // our start position, otherwise we go into an infinite loop:
  239. if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
  240. {
  241. if(position == last)
  242. return false;
  243. else
  244. ++position;
  245. }
  246. // reset $` start:
  247. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
  248. //if((base != search_base) && (base == backstop))
  249. // m_match_flags |= match_prev_avail;
  250. }
  251. if(m_match_flags & match_posix)
  252. {
  253. m_result.set_size(static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
  254. m_result.set_base(base);
  255. }
  256. verify_options(re.flags(), m_match_flags);
  257. // find out what kind of expression we have:
  258. unsigned type = (m_match_flags & match_continuous) ?
  259. static_cast<unsigned int>(regbase::restart_continue)
  260. : static_cast<unsigned int>(re.get_restart_type());
  261. // call the appropriate search routine:
  262. matcher_proc_type proc = s_find_vtable[type];
  263. return (this->*proc)();
  264. #if !defined(BOOST_NO_EXCEPTIONS)
  265. }
  266. catch(...)
  267. {
  268. // unwind all pushed states, apart from anything else this
  269. // ensures that all the states are correctly destructed
  270. // not just the memory freed.
  271. while(unwind(true)){}
  272. throw;
  273. }
  274. #endif
  275. }
  276. template <class BidiIterator, class Allocator, class traits>
  277. bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
  278. {
  279. m_has_partial_match = false;
  280. m_has_found_match = false;
  281. pstate = re.get_first_state();
  282. m_presult->set_first(position);
  283. restart = position;
  284. match_all_states();
  285. if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial))
  286. {
  287. m_has_found_match = true;
  288. m_presult->set_second(last, 0, false);
  289. position = last;
  290. if((m_match_flags & match_posix) == match_posix)
  291. {
  292. m_result.maybe_assign(*m_presult);
  293. }
  294. }
  295. #ifdef BOOST_REGEX_MATCH_EXTRA
  296. if(m_has_found_match && (match_extra & m_match_flags))
  297. {
  298. //
  299. // we have a match, reverse the capture information:
  300. //
  301. for(unsigned i = 0; i < m_presult->size(); ++i)
  302. {
  303. typename sub_match<BidiIterator>::capture_sequence_type & seq = ((*m_presult)[i]).get_captures();
  304. std::reverse(seq.begin(), seq.end());
  305. }
  306. }
  307. #endif
  308. if(!m_has_found_match)
  309. position = restart; // reset search postion
  310. return m_has_found_match;
  311. }
  312. template <class BidiIterator, class Allocator, class traits>
  313. bool perl_matcher<BidiIterator, Allocator, traits>::match_literal()
  314. {
  315. unsigned int len = static_cast<const re_literal*>(pstate)->length;
  316. const char_type* what = reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1);
  317. //
  318. // compare string with what we stored in
  319. // our records:
  320. for(unsigned int i = 0; i < len; ++i, ++position)
  321. {
  322. if((position == last) || (traits_inst.translate(*position, icase) != what[i]))
  323. return false;
  324. }
  325. pstate = pstate->next.p;
  326. return true;
  327. }
  328. template <class BidiIterator, class Allocator, class traits>
  329. bool perl_matcher<BidiIterator, Allocator, traits>::match_start_line()
  330. {
  331. if(position == backstop)
  332. {
  333. if((m_match_flags & match_prev_avail) == 0)
  334. {
  335. if((m_match_flags & match_not_bol) == 0)
  336. {
  337. pstate = pstate->next.p;
  338. return true;
  339. }
  340. return false;
  341. }
  342. }
  343. else if(m_match_flags & match_single_line)
  344. return false;
  345. // check the previous value character:
  346. BidiIterator t(position);
  347. --t;
  348. if(position != last)
  349. {
  350. if(is_separator(*t) && !((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n'))) )
  351. {
  352. pstate = pstate->next.p;
  353. return true;
  354. }
  355. }
  356. else if(is_separator(*t))
  357. {
  358. pstate = pstate->next.p;
  359. return true;
  360. }
  361. return false;
  362. }
  363. template <class BidiIterator, class Allocator, class traits>
  364. bool perl_matcher<BidiIterator, Allocator, traits>::match_end_line()
  365. {
  366. if(position != last)
  367. {
  368. if(m_match_flags & match_single_line)
  369. return false;
  370. // we're not yet at the end so *first is always valid:
  371. if(is_separator(*position))
  372. {
  373. if((position != backstop) || (m_match_flags & match_prev_avail))
  374. {
  375. // check that we're not in the middle of \r\n sequence
  376. BidiIterator t(position);
  377. --t;
  378. if((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n')))
  379. {
  380. return false;
  381. }
  382. }
  383. pstate = pstate->next.p;
  384. return true;
  385. }
  386. }
  387. else if((m_match_flags & match_not_eol) == 0)
  388. {
  389. pstate = pstate->next.p;
  390. return true;
  391. }
  392. return false;
  393. }
  394. template <class BidiIterator, class Allocator, class traits>
  395. bool perl_matcher<BidiIterator, Allocator, traits>::match_wild()
  396. {
  397. if(position == last)
  398. return false;
  399. if(is_separator(*position) && ((match_any_mask & static_cast<const re_dot*>(pstate)->mask) == 0))
  400. return false;
  401. if((*position == char_type(0)) && (m_match_flags & match_not_dot_null))
  402. return false;
  403. pstate = pstate->next.p;
  404. ++position;
  405. return true;
  406. }
  407. template <class BidiIterator, class Allocator, class traits>
  408. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary()
  409. {
  410. bool b; // indcates whether next character is a word character
  411. if(position != last)
  412. {
  413. // prev and this character must be opposites:
  414. b = traits_inst.isctype(*position, m_word_mask);
  415. }
  416. else
  417. {
  418. if (m_match_flags & match_not_eow)
  419. return false;
  420. b = false;
  421. }
  422. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  423. {
  424. if(m_match_flags & match_not_bow)
  425. return false;
  426. else
  427. b ^= false;
  428. }
  429. else
  430. {
  431. --position;
  432. b ^= traits_inst.isctype(*position, m_word_mask);
  433. ++position;
  434. }
  435. if(b)
  436. {
  437. pstate = pstate->next.p;
  438. return true;
  439. }
  440. return false; // no match if we get to here...
  441. }
  442. template <class BidiIterator, class Allocator, class traits>
  443. bool perl_matcher<BidiIterator, Allocator, traits>::match_within_word()
  444. {
  445. if(position == last)
  446. return false;
  447. // both prev and this character must be m_word_mask:
  448. bool prev = traits_inst.isctype(*position, m_word_mask);
  449. {
  450. bool b;
  451. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  452. return false;
  453. else
  454. {
  455. --position;
  456. b = traits_inst.isctype(*position, m_word_mask);
  457. ++position;
  458. }
  459. if(b == prev)
  460. {
  461. pstate = pstate->next.p;
  462. return true;
  463. }
  464. }
  465. return false;
  466. }
  467. template <class BidiIterator, class Allocator, class traits>
  468. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_start()
  469. {
  470. if(position == last)
  471. return false; // can't be starting a word if we're already at the end of input
  472. if(!traits_inst.isctype(*position, m_word_mask))
  473. return false; // next character isn't a word character
  474. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  475. {
  476. if(m_match_flags & match_not_bow)
  477. return false; // no previous input
  478. }
  479. else
  480. {
  481. // otherwise inside buffer:
  482. BidiIterator t(position);
  483. --t;
  484. if(traits_inst.isctype(*t, m_word_mask))
  485. return false; // previous character not non-word
  486. }
  487. // OK we have a match:
  488. pstate = pstate->next.p;
  489. return true;
  490. }
  491. template <class BidiIterator, class Allocator, class traits>
  492. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_end()
  493. {
  494. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  495. return false; // start of buffer can't be end of word
  496. BidiIterator t(position);
  497. --t;
  498. if(traits_inst.isctype(*t, m_word_mask) == false)
  499. return false; // previous character wasn't a word character
  500. if(position == last)
  501. {
  502. if(m_match_flags & match_not_eow)
  503. return false; // end of buffer but not end of word
  504. }
  505. else
  506. {
  507. // otherwise inside buffer:
  508. if(traits_inst.isctype(*position, m_word_mask))
  509. return false; // next character is a word character
  510. }
  511. pstate = pstate->next.p;
  512. return true; // if we fall through to here then we've succeeded
  513. }
  514. template <class BidiIterator, class Allocator, class traits>
  515. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start()
  516. {
  517. if((position != backstop) || (m_match_flags & match_not_bob))
  518. return false;
  519. // OK match:
  520. pstate = pstate->next.p;
  521. return true;
  522. }
  523. template <class BidiIterator, class Allocator, class traits>
  524. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end()
  525. {
  526. if((position != last) || (m_match_flags & match_not_eob))
  527. return false;
  528. // OK match:
  529. pstate = pstate->next.p;
  530. return true;
  531. }
  532. template <class BidiIterator, class Allocator, class traits>
  533. bool perl_matcher<BidiIterator, Allocator, traits>::match_backref()
  534. {
  535. //
  536. // Compare with what we previously matched.
  537. // Note that this succeeds if the backref did not partisipate
  538. // in the match, this is in line with ECMAScript, but not Perl
  539. // or PCRE.
  540. //
  541. int index = static_cast<const re_brace*>(pstate)->index;
  542. if(index >= hash_value_mask)
  543. {
  544. named_subexpressions::range_type r = re.get_data().equal_range(index);
  545. BOOST_REGEX_ASSERT(r.first != r.second);
  546. do
  547. {
  548. index = r.first->index;
  549. ++r.first;
  550. }while((r.first != r.second) && ((*m_presult)[index].matched != true));
  551. }
  552. if((m_match_flags & match_perl) && !(*m_presult)[index].matched)
  553. return false;
  554. BidiIterator i = (*m_presult)[index].first;
  555. BidiIterator j = (*m_presult)[index].second;
  556. while(i != j)
  557. {
  558. if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase)))
  559. return false;
  560. ++i;
  561. ++position;
  562. }
  563. pstate = pstate->next.p;
  564. return true;
  565. }
  566. template <class BidiIterator, class Allocator, class traits>
  567. bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set()
  568. {
  569. typedef typename traits::char_class_type char_class_type;
  570. // let the traits class do the work:
  571. if(position == last)
  572. return false;
  573. BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data(), icase);
  574. if(t != position)
  575. {
  576. pstate = pstate->next.p;
  577. position = t;
  578. return true;
  579. }
  580. return false;
  581. }
  582. template <class BidiIterator, class Allocator, class traits>
  583. bool perl_matcher<BidiIterator, Allocator, traits>::match_set()
  584. {
  585. if(position == last)
  586. return false;
  587. if(static_cast<const re_set*>(pstate)->_map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  588. {
  589. pstate = pstate->next.p;
  590. ++position;
  591. return true;
  592. }
  593. return false;
  594. }
  595. template <class BidiIterator, class Allocator, class traits>
  596. bool perl_matcher<BidiIterator, Allocator, traits>::match_jump()
  597. {
  598. pstate = static_cast<const re_jump*>(pstate)->alt.p;
  599. return true;
  600. }
  601. template <class BidiIterator, class Allocator, class traits>
  602. bool perl_matcher<BidiIterator, Allocator, traits>::match_combining()
  603. {
  604. if(position == last)
  605. return false;
  606. if(is_combining(traits_inst.translate(*position, icase)))
  607. return false;
  608. ++position;
  609. while((position != last) && is_combining(traits_inst.translate(*position, icase)))
  610. ++position;
  611. pstate = pstate->next.p;
  612. return true;
  613. }
  614. template <class BidiIterator, class Allocator, class traits>
  615. bool perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end()
  616. {
  617. if(m_match_flags & match_not_eob)
  618. return false;
  619. BidiIterator p(position);
  620. while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p;
  621. if(p != last)
  622. return false;
  623. pstate = pstate->next.p;
  624. return true;
  625. }
  626. template <class BidiIterator, class Allocator, class traits>
  627. bool perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue()
  628. {
  629. if(position == search_base)
  630. {
  631. pstate = pstate->next.p;
  632. return true;
  633. }
  634. return false;
  635. }
  636. template <class BidiIterator, class Allocator, class traits>
  637. bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep()
  638. {
  639. #ifdef BOOST_REGEX_MSVC
  640. #pragma warning(push)
  641. #pragma warning(disable:4127)
  642. #endif
  643. if( ::boost::is_random_access_iterator<BidiIterator>::value)
  644. {
  645. std::ptrdiff_t maxlen = std::distance(backstop, position);
  646. if(maxlen < static_cast<const re_brace*>(pstate)->index)
  647. return false;
  648. std::advance(position, -static_cast<const re_brace*>(pstate)->index);
  649. }
  650. else
  651. {
  652. int c = static_cast<const re_brace*>(pstate)->index;
  653. while(c--)
  654. {
  655. if(position == backstop)
  656. return false;
  657. --position;
  658. }
  659. }
  660. pstate = pstate->next.p;
  661. return true;
  662. #ifdef BOOST_REGEX_MSVC
  663. #pragma warning(pop)
  664. #endif
  665. }
  666. template <class BidiIterator, class Allocator, class traits>
  667. inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
  668. {
  669. // return true if marked sub-expression N has been matched:
  670. int index = static_cast<const re_brace*>(pstate)->index;
  671. bool result = false;
  672. if(index == 9999)
  673. {
  674. // Magic value for a (DEFINE) block:
  675. return false;
  676. }
  677. else if(index > 0)
  678. {
  679. // Have we matched subexpression "index"?
  680. // Check if index is a hash value:
  681. if(index >= hash_value_mask)
  682. {
  683. named_subexpressions::range_type r = re.get_data().equal_range(index);
  684. while(r.first != r.second)
  685. {
  686. if((*m_presult)[r.first->index].matched)
  687. {
  688. result = true;
  689. break;
  690. }
  691. ++r.first;
  692. }
  693. }
  694. else
  695. {
  696. result = (*m_presult)[index].matched;
  697. }
  698. pstate = pstate->next.p;
  699. }
  700. else
  701. {
  702. // Have we recursed into subexpression "index"?
  703. // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
  704. int idx = -(index+1);
  705. if(idx >= hash_value_mask)
  706. {
  707. named_subexpressions::range_type r = re.get_data().equal_range(idx);
  708. int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx;
  709. while(r.first != r.second)
  710. {
  711. result |= (stack_index == r.first->index);
  712. if(result)break;
  713. ++r.first;
  714. }
  715. }
  716. else
  717. {
  718. result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0));
  719. }
  720. pstate = pstate->next.p;
  721. }
  722. return result;
  723. }
  724. template <class BidiIterator, class Allocator, class traits>
  725. bool perl_matcher<BidiIterator, Allocator, traits>::match_fail()
  726. {
  727. // Just force a backtrack:
  728. return false;
  729. }
  730. template <class BidiIterator, class Allocator, class traits>
  731. bool perl_matcher<BidiIterator, Allocator, traits>::match_accept()
  732. {
  733. if(!recursion_stack.empty())
  734. {
  735. return skip_until_paren(recursion_stack.back().idx);
  736. }
  737. else
  738. {
  739. return skip_until_paren(INT_MAX);
  740. }
  741. }
  742. template <class BidiIterator, class Allocator, class traits>
  743. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
  744. {
  745. #ifdef BOOST_REGEX_MSVC
  746. #pragma warning(push)
  747. #pragma warning(disable:4127)
  748. #endif
  749. const unsigned char* _map = re.get_map();
  750. while(true)
  751. {
  752. // skip everything we can't match:
  753. while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) )
  754. ++position;
  755. if(position == last)
  756. {
  757. // run out of characters, try a null match if possible:
  758. if(re.can_be_null())
  759. return match_prefix();
  760. break;
  761. }
  762. // now try and obtain a match:
  763. if(match_prefix())
  764. return true;
  765. if(position == last)
  766. return false;
  767. ++position;
  768. }
  769. return false;
  770. #ifdef BOOST_REGEX_MSVC
  771. #pragma warning(pop)
  772. #endif
  773. }
  774. template <class BidiIterator, class Allocator, class traits>
  775. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_word()
  776. {
  777. #ifdef BOOST_REGEX_MSVC
  778. #pragma warning(push)
  779. #pragma warning(disable:4127)
  780. #endif
  781. // do search optimised for word starts:
  782. const unsigned char* _map = re.get_map();
  783. if((m_match_flags & match_prev_avail) || (position != base))
  784. --position;
  785. else if(match_prefix())
  786. return true;
  787. do
  788. {
  789. while((position != last) && traits_inst.isctype(*position, m_word_mask))
  790. ++position;
  791. while((position != last) && !traits_inst.isctype(*position, m_word_mask))
  792. ++position;
  793. if(position == last)
  794. break;
  795. if(can_start(*position, _map, (unsigned char)mask_any) )
  796. {
  797. if(match_prefix())
  798. return true;
  799. }
  800. if(position == last)
  801. break;
  802. } while(true);
  803. return false;
  804. #ifdef BOOST_REGEX_MSVC
  805. #pragma warning(pop)
  806. #endif
  807. }
  808. template <class BidiIterator, class Allocator, class traits>
  809. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_line()
  810. {
  811. // do search optimised for line starts:
  812. const unsigned char* _map = re.get_map();
  813. if(match_prefix())
  814. return true;
  815. while(position != last)
  816. {
  817. while((position != last) && !is_separator(*position))
  818. ++position;
  819. if(position == last)
  820. return false;
  821. ++position;
  822. if(position == last)
  823. {
  824. if(re.can_be_null() && match_prefix())
  825. return true;
  826. return false;
  827. }
  828. if( can_start(*position, _map, (unsigned char)mask_any) )
  829. {
  830. if(match_prefix())
  831. return true;
  832. }
  833. if(position == last)
  834. return false;
  835. //++position;
  836. }
  837. return false;
  838. }
  839. template <class BidiIterator, class Allocator, class traits>
  840. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf()
  841. {
  842. if((position == base) && ((m_match_flags & match_not_bob) == 0))
  843. return match_prefix();
  844. return false;
  845. }
  846. template <class BidiIterator, class Allocator, class traits>
  847. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit()
  848. {
  849. return false;
  850. }
  851. } // namespace BOOST_REGEX_DETAIL_NS
  852. } // namespace boost
  853. #ifdef BOOST_REGEX_MSVC
  854. # pragma warning(pop)
  855. #endif
  856. #endif