LaTeX.g4 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. /*
  2. ANTLR4 LaTeX Math Grammar
  3. Ported from latex2sympy by @augustt198 https://github.com/augustt198/latex2sympy See license in
  4. LICENSE.txt
  5. */
  6. /*
  7. After changing this file, it is necessary to run `python setup.py antlr` in the root directory of
  8. the repository. This will regenerate the code in `sympy/parsing/latex/_antlr/*.py`.
  9. */
  10. grammar LaTeX;
  11. options {
  12. language = Python3;
  13. }
  14. WS: [ \t\r\n]+ -> skip;
  15. THINSPACE: ('\\,' | '\\thinspace') -> skip;
  16. MEDSPACE: ('\\:' | '\\medspace') -> skip;
  17. THICKSPACE: ('\\;' | '\\thickspace') -> skip;
  18. QUAD: '\\quad' -> skip;
  19. QQUAD: '\\qquad' -> skip;
  20. NEGTHINSPACE: ('\\!' | '\\negthinspace') -> skip;
  21. NEGMEDSPACE: '\\negmedspace' -> skip;
  22. NEGTHICKSPACE: '\\negthickspace' -> skip;
  23. CMD_LEFT: '\\left' -> skip;
  24. CMD_RIGHT: '\\right' -> skip;
  25. IGNORE:
  26. (
  27. '\\vrule'
  28. | '\\vcenter'
  29. | '\\vbox'
  30. | '\\vskip'
  31. | '\\vspace'
  32. | '\\hfil'
  33. | '\\*'
  34. | '\\-'
  35. | '\\.'
  36. | '\\/'
  37. | '\\"'
  38. | '\\('
  39. | '\\='
  40. ) -> skip;
  41. ADD: '+';
  42. SUB: '-';
  43. MUL: '*';
  44. DIV: '/';
  45. L_PAREN: '(';
  46. R_PAREN: ')';
  47. L_BRACE: '{';
  48. R_BRACE: '}';
  49. L_BRACE_LITERAL: '\\{';
  50. R_BRACE_LITERAL: '\\}';
  51. L_BRACKET: '[';
  52. R_BRACKET: ']';
  53. BAR: '|';
  54. R_BAR: '\\right|';
  55. L_BAR: '\\left|';
  56. L_ANGLE: '\\langle';
  57. R_ANGLE: '\\rangle';
  58. FUNC_LIM: '\\lim';
  59. LIM_APPROACH_SYM:
  60. '\\to'
  61. | '\\rightarrow'
  62. | '\\Rightarrow'
  63. | '\\longrightarrow'
  64. | '\\Longrightarrow';
  65. FUNC_INT:
  66. '\\int'
  67. | '\\int\\limits';
  68. FUNC_SUM: '\\sum';
  69. FUNC_PROD: '\\prod';
  70. FUNC_EXP: '\\exp';
  71. FUNC_LOG: '\\log';
  72. FUNC_LG: '\\lg';
  73. FUNC_LN: '\\ln';
  74. FUNC_SIN: '\\sin';
  75. FUNC_COS: '\\cos';
  76. FUNC_TAN: '\\tan';
  77. FUNC_CSC: '\\csc';
  78. FUNC_SEC: '\\sec';
  79. FUNC_COT: '\\cot';
  80. FUNC_ARCSIN: '\\arcsin';
  81. FUNC_ARCCOS: '\\arccos';
  82. FUNC_ARCTAN: '\\arctan';
  83. FUNC_ARCCSC: '\\arccsc';
  84. FUNC_ARCSEC: '\\arcsec';
  85. FUNC_ARCCOT: '\\arccot';
  86. FUNC_SINH: '\\sinh';
  87. FUNC_COSH: '\\cosh';
  88. FUNC_TANH: '\\tanh';
  89. FUNC_ARSINH: '\\arsinh';
  90. FUNC_ARCOSH: '\\arcosh';
  91. FUNC_ARTANH: '\\artanh';
  92. L_FLOOR: '\\lfloor';
  93. R_FLOOR: '\\rfloor';
  94. L_CEIL: '\\lceil';
  95. R_CEIL: '\\rceil';
  96. FUNC_SQRT: '\\sqrt';
  97. FUNC_OVERLINE: '\\overline';
  98. CMD_TIMES: '\\times';
  99. CMD_CDOT: '\\cdot';
  100. CMD_DIV: '\\div';
  101. CMD_FRAC:
  102. '\\frac'
  103. | '\\dfrac'
  104. | '\\tfrac';
  105. CMD_BINOM: '\\binom';
  106. CMD_DBINOM: '\\dbinom';
  107. CMD_TBINOM: '\\tbinom';
  108. CMD_MATHIT: '\\mathit';
  109. UNDERSCORE: '_';
  110. CARET: '^';
  111. COLON: ':';
  112. fragment WS_CHAR: [ \t\r\n];
  113. DIFFERENTIAL: 'd' WS_CHAR*? ([a-zA-Z] | '\\' [a-zA-Z]+);
  114. LETTER: [a-zA-Z];
  115. DIGIT: [0-9];
  116. EQUAL: (('&' WS_CHAR*?)? '=') | ('=' (WS_CHAR*? '&')?);
  117. NEQ: '\\neq';
  118. LT: '<';
  119. LTE: ('\\leq' | '\\le' | LTE_Q | LTE_S);
  120. LTE_Q: '\\leqq';
  121. LTE_S: '\\leqslant';
  122. GT: '>';
  123. GTE: ('\\geq' | '\\ge' | GTE_Q | GTE_S);
  124. GTE_Q: '\\geqq';
  125. GTE_S: '\\geqslant';
  126. BANG: '!';
  127. SINGLE_QUOTES: '\''+;
  128. SYMBOL: '\\' [a-zA-Z]+;
  129. math: relation;
  130. relation:
  131. relation (EQUAL | LT | LTE | GT | GTE | NEQ) relation
  132. | expr;
  133. equality: expr EQUAL expr;
  134. expr: additive;
  135. additive: additive (ADD | SUB) additive | mp;
  136. // mult part
  137. mp:
  138. mp (MUL | CMD_TIMES | CMD_CDOT | DIV | CMD_DIV | COLON) mp
  139. | unary;
  140. mp_nofunc:
  141. mp_nofunc (
  142. MUL
  143. | CMD_TIMES
  144. | CMD_CDOT
  145. | DIV
  146. | CMD_DIV
  147. | COLON
  148. ) mp_nofunc
  149. | unary_nofunc;
  150. unary: (ADD | SUB) unary | postfix+;
  151. unary_nofunc:
  152. (ADD | SUB) unary_nofunc
  153. | postfix postfix_nofunc*;
  154. postfix: exp postfix_op*;
  155. postfix_nofunc: exp_nofunc postfix_op*;
  156. postfix_op: BANG | eval_at;
  157. eval_at:
  158. BAR (eval_at_sup | eval_at_sub | eval_at_sup eval_at_sub);
  159. eval_at_sub: UNDERSCORE L_BRACE (expr | equality) R_BRACE;
  160. eval_at_sup: CARET L_BRACE (expr | equality) R_BRACE;
  161. exp: exp CARET (atom | L_BRACE expr R_BRACE) subexpr? | comp;
  162. exp_nofunc:
  163. exp_nofunc CARET (atom | L_BRACE expr R_BRACE) subexpr?
  164. | comp_nofunc;
  165. comp:
  166. group
  167. | abs_group
  168. | func
  169. | atom
  170. | floor
  171. | ceil;
  172. comp_nofunc:
  173. group
  174. | abs_group
  175. | atom
  176. | floor
  177. | ceil;
  178. group:
  179. L_PAREN expr R_PAREN
  180. | L_BRACKET expr R_BRACKET
  181. | L_BRACE expr R_BRACE
  182. | L_BRACE_LITERAL expr R_BRACE_LITERAL;
  183. abs_group: BAR expr BAR;
  184. number: DIGIT+ (',' DIGIT DIGIT DIGIT)* ('.' DIGIT+)?;
  185. atom: (LETTER | SYMBOL) (subexpr? SINGLE_QUOTES? | SINGLE_QUOTES? subexpr?)
  186. | number
  187. | DIFFERENTIAL
  188. | mathit
  189. | frac
  190. | binom
  191. | bra
  192. | ket;
  193. bra: L_ANGLE expr (R_BAR | BAR);
  194. ket: (L_BAR | BAR) expr R_ANGLE;
  195. mathit: CMD_MATHIT L_BRACE mathit_text R_BRACE;
  196. mathit_text: LETTER*;
  197. frac: CMD_FRAC (upperd = DIGIT | L_BRACE upper = expr R_BRACE)
  198. (lowerd = DIGIT | L_BRACE lower = expr R_BRACE);
  199. binom:
  200. (CMD_BINOM | CMD_DBINOM | CMD_TBINOM) L_BRACE n = expr R_BRACE L_BRACE k = expr R_BRACE;
  201. floor: L_FLOOR val = expr R_FLOOR;
  202. ceil: L_CEIL val = expr R_CEIL;
  203. func_normal:
  204. FUNC_EXP
  205. | FUNC_LOG
  206. | FUNC_LG
  207. | FUNC_LN
  208. | FUNC_SIN
  209. | FUNC_COS
  210. | FUNC_TAN
  211. | FUNC_CSC
  212. | FUNC_SEC
  213. | FUNC_COT
  214. | FUNC_ARCSIN
  215. | FUNC_ARCCOS
  216. | FUNC_ARCTAN
  217. | FUNC_ARCCSC
  218. | FUNC_ARCSEC
  219. | FUNC_ARCCOT
  220. | FUNC_SINH
  221. | FUNC_COSH
  222. | FUNC_TANH
  223. | FUNC_ARSINH
  224. | FUNC_ARCOSH
  225. | FUNC_ARTANH;
  226. func:
  227. func_normal (subexpr? supexpr? | supexpr? subexpr?) (
  228. L_PAREN func_arg R_PAREN
  229. | func_arg_noparens
  230. )
  231. | (LETTER | SYMBOL) (subexpr? SINGLE_QUOTES? | SINGLE_QUOTES? subexpr?) // e.g. f(x), f_1'(x)
  232. L_PAREN args R_PAREN
  233. | FUNC_INT (subexpr supexpr | supexpr subexpr)? (
  234. additive? DIFFERENTIAL
  235. | frac
  236. | additive
  237. )
  238. | FUNC_SQRT (L_BRACKET root = expr R_BRACKET)? L_BRACE base = expr R_BRACE
  239. | FUNC_OVERLINE L_BRACE base = expr R_BRACE
  240. | (FUNC_SUM | FUNC_PROD) (subeq supexpr | supexpr subeq) mp
  241. | FUNC_LIM limit_sub mp;
  242. args: (expr ',' args) | expr;
  243. limit_sub:
  244. UNDERSCORE L_BRACE (LETTER | SYMBOL) LIM_APPROACH_SYM expr (
  245. CARET ((L_BRACE (ADD | SUB) R_BRACE) | ADD | SUB)
  246. )? R_BRACE;
  247. func_arg: expr | (expr ',' func_arg);
  248. func_arg_noparens: mp_nofunc;
  249. subexpr: UNDERSCORE (atom | L_BRACE expr R_BRACE);
  250. supexpr: CARET (atom | L_BRACE expr R_BRACE);
  251. subeq: UNDERSCORE L_BRACE equality R_BRACE;
  252. supeq: UNDERSCORE L_BRACE equality R_BRACE;