Math.cuh 120 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375
  1. #pragma once
  2. #include <ATen/AccumulateType.h>
  3. #include <ATen/jit_macros.h>
  4. #include <c10/macros/Macros.h>
  5. #include <ATen/native/cuda/jit_utils.h>
  6. namespace at {
  7. namespace native {
  8. // See note [Jiterator]
  9. // TODO: elaborate in this comment on the structure of math.cuh
  10. #if AT_USE_JITERATOR()
  11. const auto ndtri_string = jiterator_stringify(
  12. /*
  13. * This function is derived from the implementation of the digamma function in the Cephes Math Library.
  14. * See note [3-Clause BSD License for the Cephes Math Library].
  15. *
  16. * Evaluates polynomial of degree N:
  17. *
  18. * 2 N
  19. * y = C + C x + C x +...+ C x
  20. * 0 1 2 N
  21. *
  22. * Coefficients are stored in reverse order:
  23. *
  24. * coef[0] = C , ..., coef[N] = C .
  25. * N 0
  26. */
  27. template <typename T>
  28. T polevl(const T x, const T A[], const int len) {
  29. // NOTE: This `polevl` is different from other `polevl`
  30. // implementation (in PyTorch) which expect the `len` to be
  31. // `len(A) - 1` instead of `len(A)`.
  32. T result = 0;
  33. for (int i = 0; i < len; ++i) {
  34. result = result * x + A[i];
  35. }
  36. return result;
  37. }
  38. /*
  39. * This function is derived from the implementation of the i1e function in the Cephes Math Library.
  40. * See note [3-Clause BSD License for the Cephes Math Library].
  41. *
  42. * Computes the argument, x, for which the area under the Gaussian probability density function
  43. * (integrated from minus infinity to x) is equal to y.
  44. */
  45. template <typename T>
  46. T ndtri(T y0) {
  47. constexpr T zero = 0;
  48. constexpr T one = 1;
  49. // Handles special cases
  50. if (y0 == zero) {
  51. return NEG_INFINITY;
  52. }
  53. if (y0 == one) {
  54. return POS_INFINITY;
  55. }
  56. if (y0 < zero || y0 > one) {
  57. return NAN;
  58. }
  59. bool code = true;
  60. T y = y0;
  61. // Note: the constant 0.135... is equal to exp(-2)
  62. if (y > one - T{0.13533528323661269189}) {
  63. y = one - y;
  64. code = false;
  65. }
  66. if (y > T{0.13533528323661269189}) {
  67. /* approximation for 0 <= |y - 0.5| <= 3/8 */
  68. static const T P0[5] = {
  69. -5.99633501014107895267E1,
  70. 9.80010754185999661536E1,
  71. -5.66762857469070293439E1,
  72. 1.39312609387279679503E1,
  73. -1.23916583867381258016E0,
  74. };
  75. static const T Q0[9] = {
  76. 1.00000000000000000000E0,
  77. 1.95448858338141759834E0,
  78. 4.67627912898881538453E0,
  79. 8.63602421390890590575E1,
  80. -2.25462687854119370527E2,
  81. 2.00260212380060660359E2,
  82. -8.20372256168333339912E1,
  83. 1.59056225126211695515E1,
  84. -1.18331621121330003142E0,
  85. };
  86. /* sqrt(2pi) */
  87. constexpr T s2pi = 2.50662827463100050242E0;
  88. y = y - T{0.5};
  89. const T y2 = y * y;
  90. T x = y + y * (y2 * polevl(y2, P0, int{5}) / polevl(y2, Q0, int{9}));
  91. return x * s2pi;
  92. }
  93. T x = sqrt(T{-2.} * log(y));
  94. const T x0 = x - (log(x) / x);
  95. const T z = one / x;
  96. T x1;
  97. /* y > exp(-32) = 1.2664165549e-14 */
  98. if (x < T{8.0}) {
  99. /* Approximation for interval z = sqrt(-2 log y ) between 2 and 8
  100. * i.e., y between exp(-2) = .135 and exp(-32) = 1.27e-14.
  101. */
  102. static const T P1[9] = {
  103. 4.05544892305962419923E0,
  104. 3.15251094599893866154E1,
  105. 5.71628192246421288162E1,
  106. 4.40805073893200834700E1,
  107. 1.46849561928858024014E1,
  108. 2.18663306850790267539E0,
  109. -1.40256079171354495875E-1,
  110. -3.50424626827848203418E-2,
  111. -8.57456785154685413611E-4,
  112. };
  113. static const T Q1[9] = {
  114. 1.00000000000000000000E0,
  115. 1.57799883256466749731E1,
  116. 4.53907635128879210584E1,
  117. 4.13172038254672030440E1,
  118. 1.50425385692907503408E1,
  119. 2.50464946208309415979E0,
  120. -1.42182922854787788574E-1,
  121. -3.80806407691578277194E-2,
  122. -9.33259480895457427372E-4,
  123. };
  124. x1 = z * polevl(z, P1, int{9}) / polevl(z, Q1, int{9});
  125. } else {
  126. /* Approximation for interval z = sqrt(-2 log y ) between 8 and 64
  127. * i.e., y between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890.
  128. */
  129. static const T P2[9] = {
  130. 3.23774891776946035970E0,
  131. 6.91522889068984211695E0,
  132. 3.93881025292474443415E0,
  133. 1.33303460815807542389E0,
  134. 2.01485389549179081538E-1,
  135. 1.23716634817820021358E-2,
  136. 3.01581553508235416007E-4,
  137. 2.65806974686737550832E-6,
  138. 6.23974539184983293730E-9,
  139. };
  140. static const T Q2[9] = {
  141. 1.00000000000000000000E0,
  142. 6.02427039364742014255E0,
  143. 3.67983563856160859403E0,
  144. 1.37702099489081330271E0,
  145. 2.16236993594496635890E-1,
  146. 1.34204006088543189037E-2,
  147. 3.28014464682127739104E-4,
  148. 2.89247864745380683936E-6,
  149. 6.79019408009981274425E-9,
  150. };
  151. x1 = z * polevl(z, P2, int{9}) / polevl(z, Q2, int{9});
  152. }
  153. x = x0 - x1;
  154. return (!code) ? x : -x;
  155. }
  156. ); // ndtri_string
  157. const auto log_ndtr_string = jiterator_stringify(
  158. template <typename T>
  159. T log_ndtr(T x) {
  160. constexpr T SQRT1_2{0.707106781186547524400844362104849039}; // 1/sqrt(2)
  161. T t = x * SQRT1_2;
  162. if (x < T{-1.0}) {
  163. return log(erfcx(-t) / 2) - t * t;
  164. } else {
  165. return log1p(-erfc(t) / 2);
  166. }
  167. }
  168. ); // log_ndtr_string
  169. const auto gcd_string = jiterator_stringify(
  170. template <typename T>
  171. T gcd(const T a_in, const T b_in) {
  172. T a = abs(a_in);
  173. T b = abs(b_in);
  174. while (a != T{0}) {
  175. T c = a;
  176. a = b % a;
  177. b = c;
  178. }
  179. return b;
  180. }
  181. ); // gcd_string
  182. const auto lcm_string = jiterator_stringify(
  183. template <typename T>
  184. T gcd(const T a_in, const T b_in) {
  185. T a = abs(a_in);
  186. T b = abs(b_in);
  187. while (a != T{0}) {
  188. T c = a;
  189. a = b % a;
  190. b = c;
  191. }
  192. return b;
  193. }
  194. template <typename T>
  195. T lcm(const T a, const T b) {
  196. T g = gcd(a, b);
  197. return (g == T{0}) ? T{0} : abs(a / g * b);
  198. }
  199. ); // lcm_string
  200. /*
  201. * For licensing information, please refer to the the cpu implementation located in "ATen/native/Math.h".
  202. */
  203. // [C++ Standard Reference: Gamma Function] https://en.cppreference.com/w/cpp/numeric/math/tgamma
  204. const auto digamma_string = jiterator_stringify(
  205. template <typename T>
  206. T digamma(T x) {
  207. static const double PI_f64 = 3.14159265358979323846;
  208. // Short-circuits if x is +/- 0 and returns -/+ ∞ per the C++ standard
  209. if (x == 0) {
  210. return copysign(POS_INFINITY, -x);
  211. }
  212. T result = 0;
  213. if (x < 0) {
  214. // Short-circuits if x is a negative integer and returns NaN
  215. // per the C++ standard
  216. const bool x_is_integer = (x == trunc(x));
  217. if (x_is_integer) {
  218. return NAN;
  219. }
  220. // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
  221. // accurate than tan(pi * x). While these operations are mathematically equivalent
  222. // since both x and r are in radians and tan() has a periodicity of pi, in practice
  223. // the computation of pi * x is a source of error (when |x| > 1).
  224. double q, r;
  225. r = modf(static_cast<double>(x), &q);
  226. result = - PI_f64 / tan(PI_f64 * r);
  227. x = 1 - x;
  228. }
  229. while (x < T{10}) {
  230. result -= T{1} / x;
  231. x += T{1};
  232. }
  233. if (x == T{10}) {
  234. return result + T{2.25175258906672110764};
  235. }
  236. T y = 0;
  237. if (x < T{1.0e17}) {
  238. const T A[] = {
  239. 8.33333333333333333333E-2,
  240. -2.10927960927960927961E-2,
  241. 7.57575757575757575758E-3,
  242. -4.16666666666666666667E-3,
  243. 3.96825396825396825397E-3,
  244. -8.33333333333333333333E-3,
  245. 8.33333333333333333333E-2,
  246. };
  247. T z = T{1} / (x * x);
  248. T polevl_result = 0;
  249. for (int i = 0; i <= 6; i++) {
  250. polevl_result = polevl_result * z + A[i];
  251. }
  252. y = z * polevl_result;
  253. }
  254. return log(x) - (T{0.5} / x) - y + result;
  255. }
  256. ); // digamma_string
  257. /*
  258. * This function is derived from the implementation of the zeta function in the Cephes Math Library.
  259. * See note [3-Clause BSD License for the Cephes Math Library].
  260. */
  261. const auto zeta_string = jiterator_stringify(
  262. template <typename T>
  263. T zeta(T x, T q) {
  264. const T MACHEP{1.11022302462515654042E-16};
  265. constexpr T zero{0};
  266. constexpr T half{0.5};
  267. constexpr T one{1};
  268. static const T A[] = {
  269. 12.0,
  270. -720.0,
  271. 30240.0,
  272. -1209600.0,
  273. 47900160.0,
  274. -1.8924375803183791606e9, /*1.307674368e12/691*/
  275. 7.47242496e10,
  276. -2.950130727918164224e12, /*1.067062284288e16/3617*/
  277. 1.1646782814350067249e14, /*5.109094217170944e18/43867*/
  278. -4.5979787224074726105e15, /*8.028576626982912e20/174611*/
  279. 1.8152105401943546773e17, /*1.5511210043330985984e23/854513*/
  280. -7.1661652561756670113e18 /*1.6938241367317436694528e27/236364091*/
  281. };
  282. int i = 0;
  283. T a, b, k, s, t, w;
  284. // Short-circuits x -> +infty
  285. if (x == one) {
  286. return POS_INFINITY;
  287. }
  288. // Short-circuits x < 1 -> NaN
  289. if (x < one) {
  290. return NAN;
  291. }
  292. // Short-circuits negative q integers map to +infty,
  293. // negative q non-integers map to NaN
  294. if (q <= zero) {
  295. if (q == floor(q)) {
  296. return POS_INFINITY;
  297. }
  298. if (x != floor(x)) {
  299. return NAN;
  300. }
  301. }
  302. s = pow(q, -x);
  303. a = q;
  304. i = 0;
  305. b = zero;
  306. while ((i < 9) || (a <= T{9.0})) {
  307. i += 1;
  308. a += one;
  309. b = pow(a, -x);
  310. s += b;
  311. if ((-MACHEP * s < b) && (b < MACHEP * s)) {
  312. return s;
  313. }
  314. };
  315. w = a;
  316. s += b * w / (x - one);
  317. s -= half * b;
  318. a = one;
  319. k = zero;
  320. for (int i = 0; i < 12; i++) {
  321. a *= x + k;
  322. b /= w;
  323. t = a * b / A[i];
  324. s = s + t;
  325. t = fabs(t / s);
  326. if (t < MACHEP) {
  327. return s;
  328. }
  329. k += one;
  330. a *= x + k;
  331. b /= w;
  332. k += one;
  333. }
  334. return s;
  335. }
  336. ); // zeta_string
  337. const auto trigamma_string = jiterator_stringify(
  338. template <typename T>
  339. T trigamma(T x) {
  340. const T PI{3.14159265358979323846};
  341. T sign = 1;
  342. T result = 0;
  343. if (x < T{0.5}) {
  344. sign = -1;
  345. T sin_pi_x = sin(PI * x);
  346. result -= (PI * PI) / (sin_pi_x * sin_pi_x);
  347. x = 1 - x;
  348. }
  349. for (int i = 0; i < 6; ++i) {
  350. result += T{1} / (x * x);
  351. x += 1;
  352. }
  353. const T one{1};
  354. const T ixx = one / (x*x);
  355. result += (one + one / (T{2}*x) + ixx * (one/T{6} - ixx * (one/T{30} - ixx * (one/T{42})))) / x;
  356. return sign * result;
  357. }
  358. ); // trigamma_string
  359. const auto lgamma_string = jiterator_stringify(
  360. template <typename T>
  361. T lgamma_kernel(T a) {
  362. return lgamma(a);
  363. }
  364. ); // lgamma_string
  365. const auto polygamma_string = zeta_string + jiterator_stringify(
  366. template <typename T>
  367. T polygamma(T x, int n) {
  368. // already blocked if n <= 1
  369. const auto one = T{1};
  370. return ((n % 2) ? one : -one) * exp(lgamma(static_cast<T>(n) + one)) *
  371. zeta<T>(static_cast<T>(n + 1), x);
  372. }
  373. ); // polygamma_string
  374. const auto exp2_string = jiterator_stringify(
  375. template <typename T>
  376. T exp2_impl(T a) {
  377. return exp2(a);
  378. }
  379. namespace std { template <typename _Ty> class complex; }
  380. template <typename T>
  381. std::complex<T> exp2_impl(std::complex<T> x) {
  382. // There is no std::exp2 overload for complex, so instead
  383. // use the identity 2^x = e^(ln(2) * x)
  384. const auto ln_2 = static_cast<T>(0.693147180559945309417232121458176);
  385. return exp(ln_2 * x);
  386. }
  387. template <typename T>
  388. T exp2_kernel(T a) {
  389. return exp2_impl(a);
  390. }
  391. ); // exp2_string
  392. const auto erfc_string = jiterator_stringify(
  393. template <typename T>
  394. T erfc_kernel(T a) {
  395. return erfc(a);
  396. }
  397. ); // erfc_string
  398. const auto erfinv_string = jiterator_stringify(
  399. template <typename T>
  400. T erfinv_kernel(T a) {
  401. return erfinv(a);
  402. }
  403. ); // erfinv_string
  404. const auto entr_string = jiterator_stringify(
  405. template <typename T>
  406. T entr(T a) {
  407. if (a != a) {
  408. return a;
  409. }
  410. if (a > 0) {
  411. return -a * log(a);
  412. }
  413. if (a == 0) {
  414. return 0;
  415. }
  416. return NEG_INFINITY;
  417. }
  418. ); // entr_string
  419. // NOTE: `kaiser_window_string` depends on `i0_string`
  420. // for its implementation.
  421. const auto i0_string = jiterator_stringify(
  422. template<typename T>
  423. T chbevl(T x, const T array[], const int len) {
  424. T b0, b1, b2;
  425. b0 = array[0];
  426. b1 = 0;
  427. for (int i = 1; i < len; ++i) {
  428. b2 = b1;
  429. b1 = b0;
  430. b0 = x * b1 - b2 + array[i];
  431. }
  432. return T{0.5} * (b0 - b2);
  433. }
  434. template<typename T>
  435. T i0(T _x) {
  436. T x = fabs(_x);
  437. if (x <= T{8.0}) {
  438. /* Chebyshev coefficients for exp(-x) I0(x)
  439. * in the interval [0,8].
  440. *
  441. * lim(x->0){ exp(-x) I0(x) } = 1.
  442. */
  443. static const T A[] = {
  444. -4.41534164647933937950E-18, 3.33079451882223809783E-17,
  445. -2.43127984654795469359E-16, 1.71539128555513303061E-15,
  446. -1.16853328779934516808E-14, 7.67618549860493561688E-14,
  447. -4.85644678311192946090E-13, 2.95505266312963983461E-12,
  448. -1.72682629144155570723E-11, 9.67580903537323691224E-11,
  449. -5.18979560163526290666E-10, 2.65982372468238665035E-9,
  450. -1.30002500998624804212E-8, 6.04699502254191894932E-8,
  451. -2.67079385394061173391E-7, 1.11738753912010371815E-6,
  452. -4.41673835845875056359E-6, 1.64484480707288970893E-5,
  453. -5.75419501008210370398E-5, 1.88502885095841655729E-4,
  454. -5.76375574538582365885E-4, 1.63947561694133579842E-3,
  455. -4.32430999505057594430E-3, 1.05464603945949983183E-2,
  456. -2.37374148058994688156E-2, 4.93052842396707084878E-2,
  457. -9.49010970480476444210E-2, 1.71620901522208775349E-1,
  458. -3.04682672343198398683E-1, 6.76795274409476084995E-1};
  459. T y = (x / T{2.0}) - T{2.0};
  460. return exp(x) * chbevl(y, A, int{30});
  461. }
  462. // Handles x > 8 case
  463. /* Chebyshev coefficients for exp(-x) sqrt(x) I0(x)
  464. * in the inverted interval [8,infinity].
  465. *
  466. * lim(x->inf){ exp(-x) sqrt(x) I0(x) } = 1/sqrt(2pi).
  467. */
  468. const T B[] = {
  469. -7.23318048787475395456E-18, -4.83050448594418207126E-18,
  470. 4.46562142029675999901E-17, 3.46122286769746109310E-17,
  471. -2.82762398051658348494E-16, -3.42548561967721913462E-16,
  472. 1.77256013305652638360E-15, 3.81168066935262242075E-15,
  473. -9.55484669882830764870E-15, -4.15056934728722208663E-14,
  474. 1.54008621752140982691E-14, 3.85277838274214270114E-13,
  475. 7.18012445138366623367E-13, -1.79417853150680611778E-12,
  476. -1.32158118404477131188E-11, -3.14991652796324136454E-11,
  477. 1.18891471078464383424E-11, 4.94060238822496958910E-10,
  478. 3.39623202570838634515E-9, 2.26666899049817806459E-8,
  479. 2.04891858946906374183E-7, 2.89137052083475648297E-6,
  480. 6.88975834691682398426E-5, 3.36911647825569408990E-3,
  481. 8.04490411014108831608E-1};
  482. return (exp(x) * chbevl(T{32.0} / x - T{2.0}, B, int{25})) / sqrt(x);
  483. }
  484. ); // i0_string
  485. const auto i1_string = jiterator_stringify(
  486. template<typename T>
  487. T chbevl(const T x, const T array[], const int len) {
  488. T b0, b1, b2;
  489. b0 = array[0];
  490. b1 = 0;
  491. for (int i = 1; i < len; ++i) {
  492. b2 = b1;
  493. b1 = b0;
  494. b0 = x * b1 - b2 + array[i];
  495. }
  496. return T{0.5} * (b0 - b2);
  497. }
  498. template <typename T>
  499. T i1(T _x) {
  500. const T x = fabs(_x);
  501. if (x <= T{8.0}) {
  502. // Chebyshev coefficients for exp(-x) i1(x) in the internal [0, 8]
  503. // lim(x->0){ exp(-x) i1(x) / x } = 1/2
  504. static const T coefficients[] = {
  505. 2.77791411276104639959E-18, -2.11142121435816608115E-17,
  506. 1.55363195773620046921E-16, -1.10559694773538630805E-15,
  507. 7.60068429473540693410E-15, -5.04218550472791168711E-14,
  508. 3.22379336594557470981E-13, -1.98397439776494371520E-12,
  509. 1.17361862988909016308E-11, -6.66348972350202774223E-11,
  510. 3.62559028155211703701E-10, -1.88724975172282928790E-9,
  511. 9.38153738649577178388E-9, -4.44505912879632808065E-8,
  512. 2.00329475355213526229E-7, -8.56872026469545474066E-7,
  513. 3.47025130813767847674E-6, -1.32731636560394358279E-5,
  514. 4.78156510755005422638E-5, -1.61760815825896745588E-4,
  515. 5.12285956168575772895E-4, -1.51357245063125314899E-3,
  516. 4.15642294431288815669E-3, -1.05640848946261981558E-2,
  517. 2.47264490306265168283E-2, -5.29459812080949914269E-2,
  518. 1.02643658689847095384E-1, -1.76416518357834055153E-1,
  519. 2.52587186443633654823E-1};
  520. const T y = x / T{2.0} - T{2.0};
  521. const T out = exp(x) * x * chbevl(y, coefficients, int{29});
  522. return (_x < T{0.0}) ? -out : out;
  523. }
  524. // Chebyshev coefficients for exp(-x) sqrt(x) i1(x)
  525. // in the inverted interval [8, infinity]
  526. // lim(x->inf){ exp(-x) sqrt(x) i1(x) } = 1/sqrt(2pi)
  527. static const T coefficients[] = {
  528. 7.51729631084210481353E-18, 4.41434832307170791151E-18,
  529. -4.65030536848935832153E-17, -3.20952592199342395980E-17,
  530. 2.96262899764595013876E-16, 3.30820231092092828324E-16,
  531. -1.88035477551078244854E-15, -3.81440307243700780478E-15,
  532. 1.04202769841288027642E-14, 4.27244001671195135429E-14,
  533. -2.10154184277266431302E-14, -4.08355111109219731823E-13,
  534. -7.19855177624590851209E-13, 2.03562854414708950722E-12,
  535. 1.41258074366137813316E-11, 3.25260358301548823856E-11,
  536. -1.89749581235054123450E-11, -5.58974346219658380687E-10,
  537. -3.83538038596423702205E-9, -2.63146884688951950684E-8,
  538. -2.51223623787020892529E-7, -3.88256480887769039346E-6,
  539. -1.10588938762623716291E-4, -9.76109749136146840777E-3,
  540. 7.78576235018280120474E-1};
  541. const T out = (exp(x) * chbevl(T{32.} / x - T{2.}, coefficients, int{25})) / sqrt(x);
  542. return (_x < T{0.}) ? -out : out;
  543. }
  544. ); // i1_string
  545. const auto i1e_string = jiterator_stringify(
  546. template<typename T>
  547. T chbevl(const T x, const T array[], const int len) {
  548. T b0, b1, b2;
  549. b0 = array[0];
  550. b1 = 0;
  551. for (int i = 1; i < len; ++i) {
  552. b2 = b1;
  553. b1 = b0;
  554. b0 = x * b1 - b2 + array[i];
  555. }
  556. return T{0.5} * (b0 - b2);
  557. }
  558. // See double and float instantiations below
  559. template <typename T>
  560. T i1e(T _x) { }
  561. // Double specialization (uses different coefficients than the float version)
  562. template<>
  563. double i1e(double _x) {
  564. const double x = fabs(_x);
  565. if (x <= double{8.}) {
  566. // Chebyshev double coefficients for exp(-x) i1(x) in the interval [0,8].
  567. // Note: lim(x->0){ exp(-x) i1(x) / x } = 1/2.
  568. static const double coefficients[] = {
  569. 2.77791411276104639959E-18, -2.11142121435816608115E-17,
  570. 1.55363195773620046921E-16, -1.10559694773538630805E-15,
  571. 7.60068429473540693410E-15, -5.04218550472791168711E-14,
  572. 3.22379336594557470981E-13, -1.98397439776494371520E-12,
  573. 1.17361862988909016308E-11, -6.66348972350202774223E-11,
  574. 3.62559028155211703701E-10, -1.88724975172282928790E-9,
  575. 9.38153738649577178388E-9, -4.44505912879632808065E-8,
  576. 2.00329475355213526229E-7, -8.56872026469545474066E-7,
  577. 3.47025130813767847674E-6, -1.32731636560394358279E-5,
  578. 4.78156510755005422638E-5, -1.61760815825896745588E-4,
  579. 5.12285956168575772895E-4, -1.51357245063125314899E-3,
  580. 4.15642294431288815669E-3, -1.05640848946261981558E-2,
  581. 2.47264490306265168283E-2, -5.29459812080949914269E-2,
  582. 1.02643658689847095384E-1, -1.76416518357834055153E-1,
  583. 2.52587186443633654823E-1};
  584. const double y = x / double{2.} - double{2.};
  585. const double out = chbevl(y, coefficients, int{29}) * x;
  586. return (_x < 0.) ? -out : out;
  587. }
  588. // Chebyshev coefficients for exp(-x) sqrt(x) i1(x)
  589. // in the inverted interval (8, infinity].
  590. // Note: lim(x->inf){ exp(-x) sqrt(x) i1(x) } = 1/sqrt(2pi).
  591. // TODO: what's an "inverted interval"? Open on the left
  592. // and closed on the right?
  593. static const double coefficients[] = {
  594. 7.51729631084210481353E-18, 4.41434832307170791151E-18,
  595. -4.65030536848935832153E-17, -3.20952592199342395980E-17,
  596. 2.96262899764595013876E-16, 3.30820231092092828324E-16,
  597. -1.88035477551078244854E-15, -3.81440307243700780478E-15,
  598. 1.04202769841288027642E-14, 4.27244001671195135429E-14,
  599. -2.10154184277266431302E-14, -4.08355111109219731823E-13,
  600. -7.19855177624590851209E-13, 2.03562854414708950722E-12,
  601. 1.41258074366137813316E-11, 3.25260358301548823856E-11,
  602. -1.89749581235054123450E-11, -5.58974346219658380687E-10,
  603. -3.83538038596423702205E-9, -2.63146884688951950684E-8,
  604. -2.51223623787020892529E-7, -3.88256480887769039346E-6,
  605. -1.10588938762623716291E-4, -9.76109749136146840777E-3,
  606. 7.78576235018280120474E-1};
  607. const double out = chbevl(double{32.} / x - double{2.}, coefficients, int{25}) / sqrt(x);
  608. return (_x < double{0.}) ? -out : out;
  609. }
  610. // Float specialization (uses different coefficients than the double version)
  611. template<>
  612. float i1e(float _x) {
  613. const float x = fabsf(_x);
  614. if (x <= float{8.}) {
  615. // Chebyshev double coefficients for exp(-x) i1(x) in the interval [0,8].
  616. // Note: lim(x->0){ exp(-x) i1(x) / x } = 1/2.
  617. static const float coefficients[] = {
  618. 9.38153738649577178388E-9f,
  619. -4.44505912879632808065E-8f,
  620. 2.00329475355213526229E-7f,
  621. -8.56872026469545474066E-7f,
  622. 3.47025130813767847674E-6f,
  623. -1.32731636560394358279E-5f,
  624. 4.78156510755005422638E-5f,
  625. -1.61760815825896745588E-4f,
  626. 5.12285956168575772895E-4f,
  627. -1.51357245063125314899E-3f,
  628. 4.15642294431288815669E-3f,
  629. -1.05640848946261981558E-2f,
  630. 2.47264490306265168283E-2f,
  631. -5.29459812080949914269E-2f,
  632. 1.02643658689847095384E-1f,
  633. -1.76416518357834055153E-1f,
  634. 2.52587186443633654823E-1f};
  635. const float y = x / float{2.} - float{2.};
  636. const float out = chbevl(y, coefficients, int{17}) * x;
  637. return (_x < 0.) ? -out : out;
  638. }
  639. // Chebyshev coefficients for exp(-x) sqrt(x) i1(x)
  640. // in the inverted interval (8, infinity].
  641. // Note: lim(x->inf){ exp(-x) sqrt(x) i1(x) } = 1/sqrt(2pi).
  642. // TODO: what's an "inverted interval"? Open on the left
  643. // and closed on the right?
  644. static const float coefficients[] = {
  645. -3.83538038596423702205E-9f,
  646. -2.63146884688951950684E-8f,
  647. -2.51223623787020892529E-7f,
  648. -3.88256480887769039346E-6f,
  649. -1.10588938762623716291E-4f,
  650. -9.76109749136146840777E-3f,
  651. 7.78576235018280120474E-1f};
  652. const float out = chbevl(float{32.} / x - float{2.}, coefficients, int{7}) / sqrt(x);
  653. return (_x < float{0.}) ? -out : out;
  654. }
  655. ); // i1e_string
  656. const auto kaiser_window_string = i0_string + jiterator_stringify(
  657. template <typename T>
  658. T kaiser_window(T a, T inv_alpha, T beta, T inv_i0_beta) {
  659. T x = a * inv_alpha - T{1};
  660. T y = max(T{0}, T{1} - x * x);
  661. return i0(beta * sqrt(y)) * inv_i0_beta;
  662. }
  663. ); // kaiser_window_string
  664. const auto sinc_string = jiterator_stringify(
  665. template <typename T>
  666. T sinc(T a) {
  667. if (a == T(0)) {
  668. return T(1);
  669. } else {
  670. constexpr T pi = T(3.14159265358979323846L);
  671. T product = pi * a;
  672. return std::sin(product) / product;
  673. }
  674. }
  675. ); // sinc_string
  676. const auto erfcx_string = jiterator_stringify(
  677. /* The next function is taken from http://ab-initio.mit.edu/Faddeev */
  678. /* Copyright (c) 2012 Massachusetts Institute of Technology
  679. *
  680. * Permission is hereby granted, free of charge, to any person obtaining
  681. * a copy of this software and associated documentation files (the
  682. * "Software"), to deal in the Software without restriction, including
  683. * without limitation the rights to use, copy, modify, merge, publish,
  684. * distribute, sublicense, and/or sell copies of the Software, and to
  685. * permit persons to whom the Software is furnished to do so, subject to
  686. * the following conditions:
  687. *
  688. * The above copyright notice and this permission notice shall be
  689. * included in all copies or substantial portions of the Software.
  690. *
  691. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  692. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  693. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  694. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  695. * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  696. * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  697. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  698. */
  699. /* erfcx(x) = exp(x^2) erfc(x) function, for real x, written by
  700. Steven G. Johnson, October 2012.
  701. This function combines a few different ideas.
  702. First, for x > 50, it uses a continued-fraction expansion (same as
  703. for the Faddeeva function, but with algebraic simplifications for z=i*x).
  704. Second, for 0 <= x <= 50, it uses Chebyshev polynomial approximations,
  705. but with two twists:
  706. a) It maps x to y = 4 / (4+x) in [0,1]. This simple transformation,
  707. inspired by a similar transformation in the octave-forge/specfun
  708. erfcx by Soren Hauberg, results in much faster Chebyshev convergence
  709. than other simple transformations I have examined.
  710. b) Instead of using a single Chebyshev polynomial for the entire
  711. [0,1] y interval, we break the interval up into 100 equal
  712. subintervals, with a switch/lookup table, and use much lower
  713. degree Chebyshev polynomials in each subinterval. This greatly
  714. improves performance in my tests.
  715. For x < 0, we use the relationship erfcx(-x) = 2 exp(x^2) - erfc(x),
  716. with the usual checks for overflow etcetera.
  717. Performance-wise, it seems to be substantially faster than either
  718. the SLATEC DERFC function [or an erfcx function derived therefrom]
  719. or Cody's CALERF function (from netlib.org/specfun), while
  720. retaining near machine precision in accuracy.
  721. */
  722. /* Given y100 = 100 * y, where y = 4 / (4 + x) for x >= 0, compute erfc(x).
  723. Uses a look-up table of 100 different Chebyshev polynomials
  724. for y intervals [0,0.01], [0.01,0.02], ...., [0.99,1], generated
  725. with the help of Maple and a little shell script. This allows
  726. the Chebyshev polynomials to be of significantly lower degree (about 1/4)
  727. compared to fitting the whole [0,1] interval with a single polynomial.
  728. */
  729. // TODO: review if this is computing in double when given a float input
  730. template <typename T>
  731. T erfcx_y100(T y100) {
  732. switch (static_cast<int>(y100)) {
  733. case 0: {
  734. T t = 2*y100 - 1;
  735. return 0.70878032454106438663e-3 + (0.71234091047026302958e-3 + (0.35779077297597742384e-5 + (0.17403143962587937815e-7 + (0.81710660047307788845e-10 + (0.36885022360434957634e-12 + 0.15917038551111111111e-14 * t) * t) * t) * t) * t) * t;
  736. }
  737. case 1: {
  738. T t = 2*y100 - 3;
  739. return 0.21479143208285144230e-2 + (0.72686402367379996033e-3 + (0.36843175430938995552e-5 + (0.18071841272149201685e-7 + (0.85496449296040325555e-10 + (0.38852037518534291510e-12 + 0.16868473576888888889e-14 * t) * t) * t) * t) * t) * t;
  740. }
  741. case 2: {
  742. T t = 2*y100 - 5;
  743. return 0.36165255935630175090e-2 + (0.74182092323555510862e-3 + (0.37948319957528242260e-5 + (0.18771627021793087350e-7 + (0.89484715122415089123e-10 + (0.40935858517772440862e-12 + 0.17872061464888888889e-14 * t) * t) * t) * t) * t) * t;
  744. }
  745. case 3: {
  746. T t = 2*y100 - 7;
  747. return 0.51154983860031979264e-2 + (0.75722840734791660540e-3 + (0.39096425726735703941e-5 + (0.19504168704300468210e-7 + (0.93687503063178993915e-10 + (0.43143925959079664747e-12 + 0.18939926435555555556e-14 * t) * t) * t) * t) * t) * t;
  748. }
  749. case 4: {
  750. T t = 2*y100 - 9;
  751. return 0.66457513172673049824e-2 + (0.77310406054447454920e-3 + (0.40289510589399439385e-5 + (0.20271233238288381092e-7 + (0.98117631321709100264e-10 + (0.45484207406017752971e-12 + 0.20076352213333333333e-14 * t) * t) * t) * t) * t) * t;
  752. }
  753. case 5: {
  754. T t = 2*y100 - 11;
  755. return 0.82082389970241207883e-2 + (0.78946629611881710721e-3 + (0.41529701552622656574e-5 + (0.21074693344544655714e-7 + (0.10278874108587317989e-9 + (0.47965201390613339638e-12 + 0.21285907413333333333e-14 * t) * t) * t) * t) * t) * t;
  756. }
  757. case 6: {
  758. T t = 2*y100 - 13;
  759. return 0.98039537275352193165e-2 + (0.80633440108342840956e-3 + (0.42819241329736982942e-5 + (0.21916534346907168612e-7 + (0.10771535136565470914e-9 + (0.50595972623692822410e-12 + 0.22573462684444444444e-14 * t) * t) * t) * t) * t) * t;
  760. }
  761. case 7: {
  762. T t = 2*y100 - 15;
  763. return 0.11433927298290302370e-1 + (0.82372858383196561209e-3 + (0.44160495311765438816e-5 + (0.22798861426211986056e-7 + (0.11291291745879239736e-9 + (0.53386189365816880454e-12 + 0.23944209546666666667e-14 * t) * t) * t) * t) * t) * t;
  764. }
  765. case 8: {
  766. T t = 2*y100 - 17;
  767. return 0.13099232878814653979e-1 + (0.84167002467906968214e-3 + (0.45555958988457506002e-5 + (0.23723907357214175198e-7 + (0.11839789326602695603e-9 + (0.56346163067550237877e-12 + 0.25403679644444444444e-14 * t) * t) * t) * t) * t) * t;
  768. }
  769. case 9: {
  770. T t = 2*y100 - 19;
  771. return 0.14800987015587535621e-1 + (0.86018092946345943214e-3 + (0.47008265848816866105e-5 + (0.24694040760197315333e-7 + (0.12418779768752299093e-9 + (0.59486890370320261949e-12 + 0.26957764568888888889e-14 * t) * t) * t) * t) * t) * t;
  772. }
  773. case 10: {
  774. T t = 2*y100 - 21;
  775. return 0.16540351739394069380e-1 + (0.87928458641241463952e-3 + (0.48520195793001753903e-5 + (0.25711774900881709176e-7 + (0.13030128534230822419e-9 + (0.62820097586874779402e-12 + 0.28612737351111111111e-14 * t) * t) * t) * t) * t) * t;
  776. }
  777. case 11: {
  778. T t = 2*y100 - 23;
  779. return 0.18318536789842392647e-1 + (0.89900542647891721692e-3 + (0.50094684089553365810e-5 + (0.26779777074218070482e-7 + (0.13675822186304615566e-9 + (0.66358287745352705725e-12 + 0.30375273884444444444e-14 * t) * t) * t) * t) * t) * t;
  780. }
  781. case 12: {
  782. T t = 2*y100 - 25;
  783. return 0.20136801964214276775e-1 + (0.91936908737673676012e-3 + (0.51734830914104276820e-5 + (0.27900878609710432673e-7 + (0.14357976402809042257e-9 + (0.70114790311043728387e-12 + 0.32252476000000000000e-14 * t) * t) * t) * t) * t) * t;
  784. }
  785. case 13: {
  786. T t = 2*y100 - 27;
  787. return 0.21996459598282740954e-1 + (0.94040248155366777784e-3 + (0.53443911508041164739e-5 + (0.29078085538049374673e-7 + (0.15078844500329731137e-9 + (0.74103813647499204269e-12 + 0.34251892320000000000e-14 * t) * t) * t) * t) * t) * t;
  788. }
  789. case 14: {
  790. T t = 2*y100 - 29;
  791. return 0.23898877187226319502e-1 + (0.96213386835900177540e-3 + (0.55225386998049012752e-5 + (0.30314589961047687059e-7 + (0.15840826497296335264e-9 + (0.78340500472414454395e-12 + 0.36381553564444444445e-14 * t) * t) * t) * t) * t) * t;
  792. }
  793. case 15: {
  794. T t = 2*y100 - 31;
  795. return 0.25845480155298518485e-1 + (0.98459293067820123389e-3 + (0.57082915920051843672e-5 + (0.31613782169164830118e-7 + (0.16646478745529630813e-9 + (0.82840985928785407942e-12 + 0.38649975768888888890e-14 * t) * t) * t) * t) * t) * t;
  796. }
  797. case 16: {
  798. T t = 2*y100 - 33;
  799. return 0.27837754783474696598e-1 + (0.10078108563256892757e-2 + (0.59020366493792212221e-5 + (0.32979263553246520417e-7 + (0.17498524159268458073e-9 + (0.87622459124842525110e-12 + 0.41066206488888888890e-14 * t) * t) * t) * t) * t) * t;
  800. }
  801. case 17: {
  802. T t = 2*y100 - 35;
  803. return 0.29877251304899307550e-1 + (0.10318204245057349310e-2 + (0.61041829697162055093e-5 + (0.34414860359542720579e-7 + (0.18399863072934089607e-9 + (0.92703227366365046533e-12 + 0.43639844053333333334e-14 * t) * t) * t) * t) * t) * t;
  804. }
  805. case 18: {
  806. T t = 2*y100 - 37;
  807. return 0.31965587178596443475e-1 + (0.10566560976716574401e-2 + (0.63151633192414586770e-5 + (0.35924638339521924242e-7 + (0.19353584758781174038e-9 + (0.98102783859889264382e-12 + 0.46381060817777777779e-14 * t) * t) * t) * t) * t) * t;
  808. }
  809. case 19: {
  810. T t = 2*y100 - 39;
  811. return 0.34104450552588334840e-1 + (0.10823541191350532574e-2 + (0.65354356159553934436e-5 + (0.37512918348533521149e-7 + (0.20362979635817883229e-9 + (0.10384187833037282363e-11 + 0.49300625262222222221e-14 * t) * t) * t) * t) * t) * t;
  812. }
  813. case 20: {
  814. T t = 2*y100 - 41;
  815. return 0.36295603928292425716e-1 + (0.11089526167995268200e-2 + (0.67654845095518363577e-5 + (0.39184292949913591646e-7 + (0.21431552202133775150e-9 + (0.10994259106646731797e-11 + 0.52409949102222222221e-14 * t) * t) * t) * t) * t) * t;
  816. }
  817. case 21: {
  818. T t = 2*y100 - 43;
  819. return 0.38540888038840509795e-1 + (0.11364917134175420009e-2 + (0.70058230641246312003e-5 + (0.40943644083718586939e-7 + (0.22563034723692881631e-9 + (0.11642841011361992885e-11 + 0.55721092871111111110e-14 * t) * t) * t) * t) * t) * t;
  820. }
  821. case 22: {
  822. T t = 2*y100 - 45;
  823. return 0.40842225954785960651e-1 + (0.11650136437945673891e-2 + (0.72569945502343006619e-5 + (0.42796161861855042273e-7 + (0.23761401711005024162e-9 + (0.12332431172381557035e-11 + 0.59246802364444444445e-14 * t) * t) * t) * t) * t) * t;
  824. }
  825. case 23: {
  826. T t = 2*y100 - 47;
  827. return 0.43201627431540222422e-1 + (0.11945628793917272199e-2 + (0.75195743532849206263e-5 + (0.44747364553960993492e-7 + (0.25030885216472953674e-9 + (0.13065684400300476484e-11 + 0.63000532853333333334e-14 * t) * t) * t) * t) * t) * t;
  828. }
  829. case 24: {
  830. T t = 2*y100 - 49;
  831. return 0.45621193513810471438e-1 + (0.12251862608067529503e-2 + (0.77941720055551920319e-5 + (0.46803119830954460212e-7 + (0.26375990983978426273e-9 + (0.13845421370977119765e-11 + 0.66996477404444444445e-14 * t) * t) * t) * t) * t) * t;
  832. }
  833. case 25: {
  834. T t = 2*y100 - 51;
  835. return 0.48103121413299865517e-1 + (0.12569331386432195113e-2 + (0.80814333496367673980e-5 + (0.48969667335682018324e-7 + (0.27801515481905748484e-9 + (0.14674637611609884208e-11 + 0.71249589351111111110e-14 * t) * t) * t) * t) * t) * t;
  836. }
  837. case 26: {
  838. T t = 2*y100 - 53;
  839. return 0.50649709676983338501e-1 + (0.12898555233099055810e-2 + (0.83820428414568799654e-5 + (0.51253642652551838659e-7 + (0.29312563849675507232e-9 + (0.15556512782814827846e-11 + 0.75775607822222222221e-14 * t) * t) * t) * t) * t) * t;
  840. }
  841. case 27: {
  842. T t = 2*y100 - 55;
  843. return 0.53263363664388864181e-1 + (0.13240082443256975769e-2 + (0.86967260015007658418e-5 + (0.53662102750396795566e-7 + (0.30914568786634796807e-9 + (0.16494420240828493176e-11 + 0.80591079644444444445e-14 * t) * t) * t) * t) * t) * t;
  844. }
  845. case 28: {
  846. T t = 2*y100 - 57;
  847. return 0.55946601353500013794e-1 + (0.13594491197408190706e-2 + (0.90262520233016380987e-5 + (0.56202552975056695376e-7 + (0.32613310410503135996e-9 + (0.17491936862246367398e-11 + 0.85713381688888888890e-14 * t) * t) * t) * t) * t) * t;
  848. }
  849. case 29: {
  850. T t = 2*y100 - 59;
  851. return 0.58702059496154081813e-1 + (0.13962391363223647892e-2 + (0.93714365487312784270e-5 + (0.58882975670265286526e-7 + (0.34414937110591753387e-9 + (0.18552853109751857859e-11 + 0.91160736711111111110e-14 * t) * t) * t) * t) * t) * t;
  852. }
  853. case 30: {
  854. T t = 2*y100 - 61;
  855. return 0.61532500145144778048e-1 + (0.14344426411912015247e-2 + (0.97331446201016809696e-5 + (0.61711860507347175097e-7 + (0.36325987418295300221e-9 + (0.19681183310134518232e-11 + 0.96952238400000000000e-14 * t) * t) * t) * t) * t) * t;
  856. }
  857. case 31: {
  858. T t = 2*y100 - 63;
  859. return 0.64440817576653297993e-1 + (0.14741275456383131151e-2 + (0.10112293819576437838e-4 + (0.64698236605933246196e-7 + (0.38353412915303665586e-9 + (0.20881176114385120186e-11 + 0.10310784480000000000e-13 * t) * t) * t) * t) * t) * t;
  860. }
  861. case 32: {
  862. T t = 2*y100 - 65;
  863. return 0.67430045633130393282e-1 + (0.15153655418916540370e-2 + (0.10509857606888328667e-4 + (0.67851706529363332855e-7 + (0.40504602194811140006e-9 + (0.22157325110542534469e-11 + 0.10964842115555555556e-13 * t) * t) * t) * t) * t) * t;
  864. }
  865. case 33: {
  866. T t = 2*y100 - 67;
  867. return 0.70503365513338850709e-1 + (0.15582323336495709827e-2 + (0.10926868866865231089e-4 + (0.71182482239613507542e-7 + (0.42787405890153386710e-9 + (0.23514379522274416437e-11 + 0.11659571751111111111e-13 * t) * t) * t) * t) * t) * t;
  868. }
  869. case 34: {
  870. T t = 2*y100 - 69;
  871. return 0.73664114037944596353e-1 + (0.16028078812438820413e-2 + (0.11364423678778207991e-4 + (0.74701423097423182009e-7 + (0.45210162777476488324e-9 + (0.24957355004088569134e-11 + 0.12397238257777777778e-13 * t) * t) * t) * t) * t) * t;
  872. }
  873. case 35: {
  874. T t = 2*y100 - 71;
  875. return 0.76915792420819562379e-1 + (0.16491766623447889354e-2 + (0.11823685320041302169e-4 + (0.78420075993781544386e-7 + (0.47781726956916478925e-9 + (0.26491544403815724749e-11 + 0.13180196462222222222e-13 * t) * t) * t) * t) * t) * t;
  876. }
  877. case 36: {
  878. T t = 2*y100 - 73;
  879. return 0.80262075578094612819e-1 + (0.16974279491709504117e-2 + (0.12305888517309891674e-4 + (0.82350717698979042290e-7 + (0.50511496109857113929e-9 + (0.28122528497626897696e-11 + 0.14010889635555555556e-13 * t) * t) * t) * t) * t) * t;
  880. }
  881. case 37: {
  882. T t = 2*y100 - 75;
  883. return 0.83706822008980357446e-1 + (0.17476561032212656962e-2 + (0.12812343958540763368e-4 + (0.86506399515036435592e-7 + (0.53409440823869467453e-9 + (0.29856186620887555043e-11 + 0.14891851591111111111e-13 * t) * t) * t) * t) * t) * t;
  884. }
  885. case 38: {
  886. T t = 2*y100 - 77;
  887. return 0.87254084284461718231e-1 + (0.17999608886001962327e-2 + (0.13344443080089492218e-4 + (0.90900994316429008631e-7 + (0.56486134972616465316e-9 + (0.31698707080033956934e-11 + 0.15825697795555555556e-13 * t) * t) * t) * t) * t) * t;
  888. }
  889. case 39: {
  890. T t = 2*y100 - 79;
  891. return 0.90908120182172748487e-1 + (0.18544478050657699758e-2 + (0.13903663143426120077e-4 + (0.95549246062549906177e-7 + (0.59752787125242054315e-9 + (0.33656597366099099413e-11 + 0.16815130613333333333e-13 * t) * t) * t) * t) * t) * t;
  892. }
  893. case 40: {
  894. T t = 2*y100 - 81;
  895. return 0.94673404508075481121e-1 + (0.19112284419887303347e-2 + (0.14491572616545004930e-4 + (0.10046682186333613697e-6 + (0.63221272959791000515e-9 + (0.35736693975589130818e-11 + 0.17862931591111111111e-13 * t) * t) * t) * t) * t) * t;
  896. }
  897. case 41: {
  898. T t = 2*y100 - 83;
  899. return 0.98554641648004456555e-1 + (0.19704208544725622126e-2 + (0.15109836875625443935e-4 + (0.10567036667675984067e-6 + (0.66904168640019354565e-9 + (0.37946171850824333014e-11 + 0.18971959040000000000e-13 * t) * t) * t) * t) * t) * t;
  900. }
  901. case 42: {
  902. T t = 2*y100 - 85;
  903. return 0.10255677889470089531e0 + (0.20321499629472857418e-2 + (0.15760224242962179564e-4 + (0.11117756071353507391e-6 + (0.70814785110097658502e-9 + (0.40292553276632563925e-11 + 0.20145143075555555556e-13 * t) * t) * t) * t) * t) * t;
  904. }
  905. case 43: {
  906. T t = 2*y100 - 87;
  907. return 0.10668502059865093318e0 + (0.20965479776148731610e-2 + (0.16444612377624983565e-4 + (0.11700717962026152749e-6 + (0.74967203250938418991e-9 + (0.42783716186085922176e-11 + 0.21385479360000000000e-13 * t) * t) * t) * t) * t) * t;
  908. }
  909. case 44: {
  910. T t = 2*y100 - 89;
  911. return 0.11094484319386444474e0 + (0.21637548491908170841e-2 + (0.17164995035719657111e-4 + (0.12317915750735938089e-6 + (0.79376309831499633734e-9 + (0.45427901763106353914e-11 + 0.22696025653333333333e-13 * t) * t) * t) * t) * t) * t;
  912. }
  913. case 45: {
  914. T t = 2*y100 - 91;
  915. return 0.11534201115268804714e0 + (0.22339187474546420375e-2 + (0.17923489217504226813e-4 + (0.12971465288245997681e-6 + (0.84057834180389073587e-9 + (0.48233721206418027227e-11 + 0.24079890062222222222e-13 * t) * t) * t) * t) * t) * t;
  916. }
  917. case 46: {
  918. T t = 2*y100 - 93;
  919. return 0.11988259392684094740e0 + (0.23071965691918689601e-2 + (0.18722342718958935446e-4 + (0.13663611754337957520e-6 + (0.89028385488493287005e-9 + (0.51210161569225846701e-11 + 0.25540227111111111111e-13 * t) * t) * t) * t) * t) * t;
  920. }
  921. case 47: {
  922. T t = 2*y100 - 95;
  923. return 0.12457298393509812907e0 + (0.23837544771809575380e-2 + (0.19563942105711612475e-4 + (0.14396736847739470782e-6 + (0.94305490646459247016e-9 + (0.54366590583134218096e-11 + 0.27080225920000000000e-13 * t) * t) * t) * t) * t) * t;
  924. }
  925. case 48: {
  926. T t = 2*y100 - 97;
  927. return 0.12941991566142438816e0 + (0.24637684719508859484e-2 + (0.20450821127475879816e-4 + (0.15173366280523906622e-6 + (0.99907632506389027739e-9 + (0.57712760311351625221e-11 + 0.28703099555555555556e-13 * t) * t) * t) * t) * t) * t;
  928. }
  929. case 49: {
  930. T t = 2*y100 - 99;
  931. return 0.13443048593088696613e0 + (0.25474249981080823877e-2 + (0.21385669591362915223e-4 + (0.15996177579900443030e-6 + (0.10585428844575134013e-8 + (0.61258809536787882989e-11 + 0.30412080142222222222e-13 * t) * t) * t) * t) * t) * t;
  932. }
  933. case 50: {
  934. T t = 2*y100 - 101;
  935. return 0.13961217543434561353e0 + (0.26349215871051761416e-2 + (0.22371342712572567744e-4 + (0.16868008199296822247e-6 + (0.11216596910444996246e-8 + (0.65015264753090890662e-11 + 0.32210394506666666666e-13 * t) * t) * t) * t) * t) * t;
  936. }
  937. case 51: {
  938. T t = 2*y100 - 103;
  939. return 0.14497287157673800690e0 + (0.27264675383982439814e-2 + (0.23410870961050950197e-4 + (0.17791863939526376477e-6 + (0.11886425714330958106e-8 + (0.68993039665054288034e-11 + 0.34101266222222222221e-13 * t) * t) * t) * t) * t) * t;
  940. }
  941. case 52: {
  942. T t = 2*y100 - 105;
  943. return 0.15052089272774618151e0 + (0.28222846410136238008e-2 + (0.24507470422713397006e-4 + (0.18770927679626136909e-6 + (0.12597184587583370712e-8 + (0.73203433049229821618e-11 + 0.36087889048888888890e-13 * t) * t) * t) * t) * t) * t;
  944. }
  945. case 53: {
  946. T t = 2*y100 - 107;
  947. return 0.15626501395774612325e0 + (0.29226079376196624949e-2 + (0.25664553693768450545e-4 + (0.19808568415654461964e-6 + (0.13351257759815557897e-8 + (0.77658124891046760667e-11 + 0.38173420035555555555e-13 * t) * t) * t) * t) * t) * t;
  948. }
  949. case 54: {
  950. T t = 2*y100 - 109;
  951. return 0.16221449434620737567e0 + (0.30276865332726475672e-2 + (0.26885741326534564336e-4 + (0.20908350604346384143e-6 + (0.14151148144240728728e-8 + (0.82369170665974313027e-11 + 0.40360957457777777779e-13 * t) * t) * t) * t) * t) * t;
  952. }
  953. case 55: {
  954. T t = 2*y100 - 111;
  955. return 0.16837910595412130659e0 + (0.31377844510793082301e-2 + (0.28174873844911175026e-4 + (0.22074043807045782387e-6 + (0.14999481055996090039e-8 + (0.87348993661930809254e-11 + 0.42653528977777777779e-13 * t) * t) * t) * t) * t) * t;
  956. }
  957. case 56: {
  958. T t = 2*y100 - 113;
  959. return 0.17476916455659369953e0 + (0.32531815370903068316e-2 + (0.29536024347344364074e-4 + (0.23309632627767074202e-6 + (0.15899007843582444846e-8 + (0.92610375235427359475e-11 + 0.45054073102222222221e-13 * t) * t) * t) * t) * t) * t;
  960. }
  961. case 57: {
  962. T t = 2*y100 - 115;
  963. return 0.18139556223643701364e0 + (0.33741744168096996041e-2 + (0.30973511714709500836e-4 + (0.24619326937592290996e-6 + (0.16852609412267750744e-8 + (0.98166442942854895573e-11 + 0.47565418097777777779e-13 * t) * t) * t) * t) * t) * t;
  964. }
  965. case 58: {
  966. T t = 2*y100 - 117;
  967. return 0.18826980194443664549e0 + (0.35010775057740317997e-2 + (0.32491914440014267480e-4 + (0.26007572375886319028e-6 + (0.17863299617388376116e-8 + (0.10403065638343878679e-10 + 0.50190265831111111110e-13 * t) * t) * t) * t) * t) * t;
  968. }
  969. case 59: {
  970. T t = 2*y100 - 119;
  971. return 0.19540403413693967350e0 + (0.36342240767211326315e-2 + (0.34096085096200907289e-4 + (0.27479061117017637474e-6 + (0.18934228504790032826e-8 + (0.11021679075323598664e-10 + 0.52931171733333333334e-13 * t) * t) * t) * t) * t) * t;
  972. }
  973. case 60: {
  974. T t = 2*y100 - 121;
  975. return 0.20281109560651886959e0 + (0.37739673859323597060e-2 + (0.35791165457592409054e-4 + (0.29038742889416172404e-6 + (0.20068685374849001770e-8 + (0.11673891799578381999e-10 + 0.55790523093333333334e-13 * t) * t) * t) * t) * t) * t;
  976. }
  977. case 61: {
  978. T t = 2*y100 - 123;
  979. return 0.21050455062669334978e0 + (0.39206818613925652425e-2 + (0.37582602289680101704e-4 + (0.30691836231886877385e-6 + (0.21270101645763677824e-8 + (0.12361138551062899455e-10 + 0.58770520160000000000e-13 * t) * t) * t) * t) * t) * t;
  980. }
  981. case 62: {
  982. T t = 2*y100 - 125;
  983. return 0.21849873453703332479e0 + (0.40747643554689586041e-2 + (0.39476163820986711501e-4 + (0.32443839970139918836e-6 + (0.22542053491518680200e-8 + (0.13084879235290858490e-10 + 0.61873153262222222221e-13 * t) * t) * t) * t) * t) * t;
  984. }
  985. case 63: {
  986. T t = 2*y100 - 127;
  987. return 0.22680879990043229327e0 + (0.42366354648628516935e-2 + (0.41477956909656896779e-4 + (0.34300544894502810002e-6 + (0.23888264229264067658e-8 + (0.13846596292818514601e-10 + 0.65100183751111111110e-13 * t) * t) * t) * t) * t) * t;
  988. }
  989. case 64: {
  990. T t = 2*y100 - 129;
  991. return 0.23545076536988703937e0 + (0.44067409206365170888e-2 + (0.43594444916224700881e-4 + (0.36268045617760415178e-6 + (0.25312606430853202748e-8 + (0.14647791812837903061e-10 + 0.68453122631111111110e-13 * t) * t) * t) * t) * t) * t;
  992. }
  993. case 65: {
  994. T t = 2*y100 - 131;
  995. return 0.24444156740777432838e0 + (0.45855530511605787178e-2 + (0.45832466292683085475e-4 + (0.38352752590033030472e-6 + (0.26819103733055603460e-8 + (0.15489984390884756993e-10 + 0.71933206364444444445e-13 * t) * t) * t) * t) * t) * t;
  996. }
  997. case 66: {
  998. T t = 2*y100 - 133;
  999. return 0.25379911500634264643e0 + (0.47735723208650032167e-2 + (0.48199253896534185372e-4 + (0.40561404245564732314e-6 + (0.28411932320871165585e-8 + (0.16374705736458320149e-10 + 0.75541379822222222221e-13 * t) * t) * t) * t) * t) * t;
  1000. }
  1001. case 67: {
  1002. T t = 2*y100 - 135;
  1003. return 0.26354234756393613032e0 + (0.49713289477083781266e-2 + (0.50702455036930367504e-4 + (0.42901079254268185722e-6 + (0.30095422058900481753e-8 + (0.17303497025347342498e-10 + 0.79278273368888888890e-13 * t) * t) * t) * t) * t) * t;
  1004. }
  1005. case 68: {
  1006. T t = 2*y100 - 137;
  1007. return 0.27369129607732343398e0 + (0.51793846023052643767e-2 + (0.53350152258326602629e-4 + (0.45379208848865015485e-6 + (0.31874057245814381257e-8 + (0.18277905010245111046e-10 + 0.83144182364444444445e-13 * t) * t) * t) * t) * t) * t;
  1008. }
  1009. case 69: {
  1010. T t = 2*y100 - 139;
  1011. return 0.28426714781640316172e0 + (0.53983341916695141966e-2 + (0.56150884865255810638e-4 + (0.48003589196494734238e-6 + (0.33752476967570796349e-8 + (0.19299477888083469086e-10 + 0.87139049137777777779e-13 * t) * t) * t) * t) * t) * t;
  1012. }
  1013. case 70: {
  1014. T t = 2*y100 - 141;
  1015. return 0.29529231465348519920e0 + (0.56288077305420795663e-2 + (0.59113671189913307427e-4 + (0.50782393781744840482e-6 + (0.35735475025851713168e-8 + (0.20369760937017070382e-10 + 0.91262442613333333334e-13 * t) * t) * t) * t) * t) * t;
  1016. }
  1017. case 71: {
  1018. T t = 2*y100 - 143;
  1019. return 0.30679050522528838613e0 + (0.58714723032745403331e-2 + (0.62248031602197686791e-4 + (0.53724185766200945789e-6 + (0.37827999418960232678e-8 + (0.21490291930444538307e-10 + 0.95513539182222222221e-13 * t) * t) * t) * t) * t) * t;
  1020. }
  1021. case 72: {
  1022. T t = 2*y100 - 145;
  1023. return 0.31878680111173319425e0 + (0.61270341192339103514e-2 + (0.65564012259707640976e-4 + (0.56837930287837738996e-6 + (0.40035151353392378882e-8 + (0.22662596341239294792e-10 + 0.99891109760000000000e-13 * t) * t) * t) * t) * t) * t;
  1024. }
  1025. case 73: {
  1026. T t = 2*y100 - 147;
  1027. return 0.33130773722152622027e0 + (0.63962406646798080903e-2 + (0.69072209592942396666e-4 + (0.60133006661885941812e-6 + (0.42362183765883466691e-8 + (0.23888182347073698382e-10 + 0.10439349811555555556e-12 * t) * t) * t) * t) * t) * t;
  1028. }
  1029. case 74: {
  1030. T t = 2*y100 - 149;
  1031. return 0.34438138658041336523e0 + (0.66798829540414007258e-2 + (0.72783795518603561144e-4 + (0.63619220443228800680e-6 + (0.44814499336514453364e-8 + (0.25168535651285475274e-10 + 0.10901861383111111111e-12 * t) * t) * t) * t) * t) * t;
  1032. }
  1033. case 75: {
  1034. T t = 2*y100 - 151;
  1035. return 0.35803744972380175583e0 + (0.69787978834882685031e-2 + (0.76710543371454822497e-4 + (0.67306815308917386747e-6 + (0.47397647975845228205e-8 + (0.26505114141143050509e-10 + 0.11376390933333333333e-12 * t) * t) * t) * t) * t) * t;
  1036. }
  1037. case 76: {
  1038. T t = 2*y100 - 153;
  1039. return 0.37230734890119724188e0 + (0.72938706896461381003e-2 + (0.80864854542670714092e-4 + (0.71206484718062688779e-6 + (0.50117323769745883805e-8 + (0.27899342394100074165e-10 + 0.11862637614222222222e-12 * t) * t) * t) * t) * t) * t;
  1040. }
  1041. case 77: {
  1042. T t = 2*y100 - 155;
  1043. return 0.38722432730555448223e0 + (0.76260375162549802745e-2 + (0.85259785810004603848e-4 + (0.75329383305171327677e-6 + (0.52979361368388119355e-8 + (0.29352606054164086709e-10 + 0.12360253370666666667e-12 * t) * t) * t) * t) * t) * t;
  1044. }
  1045. case 78: {
  1046. T t = 2*y100 - 157;
  1047. return 0.40282355354616940667e0 + (0.79762880915029728079e-2 + (0.89909077342438246452e-4 + (0.79687137961956194579e-6 + (0.55989731807360403195e-8 + (0.30866246101464869050e-10 + 0.12868841946666666667e-12 * t) * t) * t) * t) * t) * t;
  1048. }
  1049. case 79: {
  1050. T t = 2*y100 - 159;
  1051. return 0.41914223158913787649e0 + (0.83456685186950463538e-2 + (0.94827181359250161335e-4 + (0.84291858561783141014e-6 + (0.59154537751083485684e-8 + (0.32441553034347469291e-10 + 0.13387957943111111111e-12 * t) * t) * t) * t) * t) * t;
  1052. }
  1053. case 80: {
  1054. T t = 2*y100 - 161;
  1055. return 0.43621971639463786896e0 + (0.87352841828289495773e-2 + (0.10002929142066799966e-3 + (0.89156148280219880024e-6 + (0.62480008150788597147e-8 + (0.34079760983458878910e-10 + 0.13917107176888888889e-12 * t) * t) * t) * t) * t) * t;
  1056. }
  1057. case 81: {
  1058. T t = 2*y100 - 163;
  1059. return 0.45409763548534330981e0 + (0.91463027755548240654e-2 + (0.10553137232446167258e-3 + (0.94293113464638623798e-6 + (0.65972492312219959885e-8 + (0.35782041795476563662e-10 + 0.14455745872000000000e-12 * t) * t) * t) * t) * t) * t;
  1060. }
  1061. case 82: {
  1062. T t = 2*y100 - 165;
  1063. return 0.47282001668512331468e0 + (0.95799574408860463394e-2 + (0.11135019058000067469e-3 + (0.99716373005509038080e-6 + (0.69638453369956970347e-8 + (0.37549499088161345850e-10 + 0.15003280712888888889e-12 * t) * t) * t) * t) * t) * t;
  1064. }
  1065. case 83: {
  1066. T t = 2*y100 - 167;
  1067. return 0.49243342227179841649e0 + (0.10037550043909497071e-1 + (0.11750334542845234952e-3 + (0.10544006716188967172e-5 + (0.73484461168242224872e-8 + (0.39383162326435752965e-10 + 0.15559069118222222222e-12 * t) * t) * t) * t) * t) * t;
  1068. }
  1069. case 84: {
  1070. T t = 2*y100 - 169;
  1071. return 0.51298708979209258326e0 + (0.10520454564612427224e-1 + (0.12400930037494996655e-3 + (0.11147886579371265246e-5 + (0.77517184550568711454e-8 + (0.41283980931872622611e-10 + 0.16122419680000000000e-12 * t) * t) * t) * t) * t) * t;
  1072. }
  1073. case 85: {
  1074. T t = 2*y100 - 171;
  1075. return 0.53453307979101369843e0 + (0.11030120618800726938e-1 + (0.13088741519572269581e-3 + (0.11784797595374515432e-5 + (0.81743383063044825400e-8 + (0.43252818449517081051e-10 + 0.16692592640000000000e-12 * t) * t) * t) * t) * t) * t;
  1076. }
  1077. case 86: {
  1078. T t = 2*y100 - 173;
  1079. return 0.55712643071169299478e0 + (0.11568077107929735233e-1 + (0.13815797838036651289e-3 + (0.12456314879260904558e-5 + (0.86169898078969313597e-8 + (0.45290446811539652525e-10 + 0.17268801084444444444e-12 * t) * t) * t) * t) * t) * t;
  1080. }
  1081. case 87: {
  1082. T t = 2*y100 - 175;
  1083. return 0.58082532122519320968e0 + (0.12135935999503877077e-1 + (0.14584223996665838559e-3 + (0.13164068573095710742e-5 + (0.90803643355106020163e-8 + (0.47397540713124619155e-10 + 0.17850211608888888889e-12 * t) * t) * t) * t) * t) * t;
  1084. }
  1085. case 88: {
  1086. T t = 2*y100 - 177;
  1087. return 0.60569124025293375554e0 + (0.12735396239525550361e-1 + (0.15396244472258863344e-3 + (0.13909744385382818253e-5 + (0.95651595032306228245e-8 + (0.49574672127669041550e-10 + 0.18435945564444444444e-12 * t) * t) * t) * t) * t) * t;
  1088. }
  1089. case 89: {
  1090. T t = 2*y100 - 179;
  1091. return 0.63178916494715716894e0 + (0.13368247798287030927e-1 + (0.16254186562762076141e-3 + (0.14695084048334056083e-5 + (0.10072078109604152350e-7 + (0.51822304995680707483e-10 + 0.19025081422222222222e-12 * t) * t) * t) * t) * t) * t;
  1092. }
  1093. case 90: {
  1094. T t = 2*y100 - 181;
  1095. return 0.65918774689725319200e0 + (0.14036375850601992063e-1 + (0.17160483760259706354e-3 + (0.15521885688723188371e-5 + (0.10601827031535280590e-7 + (0.54140790105837520499e-10 + 0.19616655146666666667e-12 * t) * t) * t) * t) * t) * t;
  1096. }
  1097. case 91: {
  1098. T t = 2*y100 - 183;
  1099. return 0.68795950683174433822e0 + (0.14741765091365869084e-1 + (0.18117679143520433835e-3 + (0.16392004108230585213e-5 + (0.11155116068018043001e-7 + (0.56530360194925690374e-10 + 0.20209663662222222222e-12 * t) * t) * t) * t) * t) * t;
  1100. }
  1101. case 92: {
  1102. T t = 2*y100 - 185;
  1103. return 0.71818103808729967036e0 + (0.15486504187117112279e-1 + (0.19128428784550923217e-3 + (0.17307350969359975848e-5 + (0.11732656736113607751e-7 + (0.58991125287563833603e-10 + 0.20803065333333333333e-12 * t) * t) * t) * t) * t) * t;
  1104. }
  1105. case 93: {
  1106. T t = 2*y100 - 187;
  1107. return 0.74993321911726254661e0 + (0.16272790364044783382e-1 + (0.20195505163377912645e-3 + (0.18269894883203346953e-5 + (0.12335161021630225535e-7 + (0.61523068312169087227e-10 + 0.21395783431111111111e-12 * t) * t) * t) * t) * t) * t;
  1108. }
  1109. case 94: {
  1110. T t = 2*y100 - 189;
  1111. return 0.78330143531283492729e0 + (0.17102934132652429240e-1 + (0.21321800585063327041e-3 + (0.19281661395543913713e-5 + (0.12963340087354341574e-7 + (0.64126040998066348872e-10 + 0.21986708942222222222e-12 * t) * t) * t) * t) * t) * t;
  1112. }
  1113. case 95: {
  1114. T t = 2*y100 - 191;
  1115. return 0.81837581041023811832e0 + (0.17979364149044223802e-1 + (0.22510330592753129006e-3 + (0.20344732868018175389e-5 + (0.13617902941839949718e-7 + (0.66799760083972474642e-10 + 0.22574701262222222222e-12 * t) * t) * t) * t) * t) * t;
  1116. }
  1117. case 96: {
  1118. T t = 2*y100 - 193;
  1119. return 0.85525144775685126237e0 + (0.18904632212547561026e-1 + (0.23764237370371255638e-3 + (0.21461248251306387979e-5 + (0.14299555071870523786e-7 + (0.69543803864694171934e-10 + 0.23158593688888888889e-12 * t) * t) * t) * t) * t) * t;
  1120. }
  1121. case 97: {
  1122. T t = 2*y100 - 195;
  1123. return 0.89402868170849933734e0 + (0.19881418399127202569e-1 + (0.25086793128395995798e-3 + (0.22633402747585233180e-5 + (0.15008997042116532283e-7 + (0.72357609075043941261e-10 + 0.23737194737777777778e-12 * t) * t) * t) * t) * t) * t;
  1124. }
  1125. case 98: {
  1126. T t = 2*y100 - 197;
  1127. return 0.93481333942870796363e0 + (0.20912536329780368893e-1 + (0.26481403465998477969e-3 + (0.23863447359754921676e-5 + (0.15746923065472184451e-7 + (0.75240468141720143653e-10 + 0.24309291271111111111e-12 * t) * t) * t) * t) * t) * t;
  1128. }
  1129. case 99: {
  1130. T t = 2*y100 - 199;
  1131. return 0.97771701335885035464e0 + (0.22000938572830479551e-1 + (0.27951610702682383001e-3 + (0.25153688325245314530e-5 + (0.16514019547822821453e-7 + (0.78191526829368231251e-10 + 0.24873652355555555556e-12 * t) * t) * t) * t) * t) * t;
  1132. }
  1133. }
  1134. // we only get here if y = 1, i.e. |x| < 4*eps, in which case
  1135. // erfcx is within 1e-15 of 1..
  1136. return 1.;
  1137. }
  1138. template <typename T>
  1139. T erfcx(T x) {
  1140. // Short-circuits on NaN (returning NaN)
  1141. if (x != x) {
  1142. return x;
  1143. }
  1144. if (x >= 0) {
  1145. if (x > T{50}) { // continued-fraction expansion is faster
  1146. const T ispi = 0.56418958354775628694807945156; // 1 / sqrt(pi)
  1147. if (x > T{5e7}) { // 1-term expansion, important to avoid overflow
  1148. return ispi / x;
  1149. }
  1150. /* 5-term expansion (rely on compiler for CSE), simplified from:
  1151. ispi / (x+0.5/(x+1/(x+1.5/(x+2/x)))) */
  1152. return ispi * ((x*x) * (x*x+T{4.5}) + T{2}) / (x * ((x*x) * (x*x+T{5}) + T{3.75}));
  1153. }
  1154. // x >= 0 x <= 50
  1155. return erfcx_y100(T{400} / (T{4} + x));
  1156. }
  1157. // x < 0
  1158. if (x < T{-26.7}) {
  1159. return POS_INFINITY;
  1160. } else if (x < T{-6.1}) {
  1161. return T{2} * exp(x * x);
  1162. }
  1163. // x < 0 and x >= -6.1
  1164. return T{2} * exp(x * x) - erfcx_y100(T{400} / (T{4} - x));
  1165. }
  1166. ); // erfcx_string
  1167. const auto airy_ai_string = jiterator_stringify(
  1168. template<typename T>
  1169. T airy_ai_forward(T x) {
  1170. static const T AN[] = {
  1171. +3.46538101525629032477e-01,
  1172. +1.20075952739645805542e+01,
  1173. +7.62796053615234516538e+01,
  1174. +1.68089224934630576269e+02,
  1175. +1.59756391350164413639e+02,
  1176. +7.05360906840444183113e+01,
  1177. +1.40264691163389668864e+01,
  1178. +9.99999999999999995305e-01,
  1179. };
  1180. static const T AD[] = {
  1181. +5.67594532638770212846e-01,
  1182. +1.47562562584847203173e+01,
  1183. +8.45138970141474626562e+01,
  1184. +1.77318088145400459522e+02,
  1185. +1.64234692871529701831e+02,
  1186. +7.14778400825575695274e+01,
  1187. +1.40959135607834029598e+01,
  1188. +1.00000000000000000470e+00,
  1189. };
  1190. static const T AFN[] = {
  1191. -1.31696323418331795333e-01,
  1192. -6.26456544431912369773e-01,
  1193. -6.93158036036933542233e-01,
  1194. -2.79779981545119124951e-01,
  1195. -4.91900132609500318020e-02,
  1196. -4.06265923594885404393e-03,
  1197. -1.59276496239262096340e-04,
  1198. -2.77649108155232920844e-06,
  1199. -1.67787698489114633780e-08,
  1200. };
  1201. static const T AFD[] = {
  1202. +1.33560420706553243746e+01,
  1203. +3.26825032795224613948e+01,
  1204. +2.67367040941499554804e+01,
  1205. +9.18707402907259625840e+00,
  1206. +1.47529146771666414581e+00,
  1207. +1.15687173795188044134e-01,
  1208. +4.40291641615211203805e-03,
  1209. +7.54720348287414296618e-05,
  1210. +4.51850092970580378464e-07,
  1211. };
  1212. static const T AGN[] = {
  1213. +1.97339932091685679179e-02,
  1214. +3.91103029615688277255e-01,
  1215. +1.06579897599595591108e+00,
  1216. +9.39169229816650230044e-01,
  1217. +3.51465656105547619242e-01,
  1218. +6.33888919628925490927e-02,
  1219. +5.85804113048388458567e-03,
  1220. +2.82851600836737019778e-04,
  1221. +6.98793669997260967291e-06,
  1222. +8.11789239554389293311e-08,
  1223. +3.41551784765923618484e-10,
  1224. };
  1225. static const T AGD[] = {
  1226. +9.30892908077441974853e+00,
  1227. +1.98352928718312140417e+01,
  1228. +1.55646628932864612953e+01,
  1229. +5.47686069422975497931e+00,
  1230. +9.54293611618961883998e-01,
  1231. +8.64580826352392193095e-02,
  1232. +4.12656523824222607191e-03,
  1233. +1.01259085116509135510e-04,
  1234. +1.17166733214413521882e-06,
  1235. +4.91834570062930015649e-09,
  1236. };
  1237. int domain_flag = 0;
  1238. T ai;
  1239. if (isinf(x)) {
  1240. return NAN;
  1241. }
  1242. if (x > T(103.892)) {
  1243. return T(0.0);
  1244. }
  1245. T f;
  1246. T g;
  1247. T k;
  1248. if (x < T(-2.09)) {
  1249. T z = T(1.0) / (T(-2.0) * x * sqrt(-x) / T(3.0));
  1250. T afn = 0.0;
  1251. for (uint8_t index = 0; index <= 8; index++) {
  1252. afn = afn * (z * z) + AFN[index];
  1253. }
  1254. T afd = 0.0;
  1255. for (uint8_t index = 0; index <= 8; index++) {
  1256. afd = afd * (z * z) + AFD[index];
  1257. }
  1258. T agn = 0.0;
  1259. for (uint8_t index = 0; index <= 10 + 0; index++) {
  1260. agn = agn * (z * z) + AGN[index];
  1261. }
  1262. T agd = 0.0;
  1263. for (uint8_t index = 0; index <= 10 - 1; index++) {
  1264. agd = agd * (z * z) + AGD[index];
  1265. }
  1266. T t = T(-2.0) * x * sqrt(-x) / T(3.0) + T(0.25) * T(3.14159265358979323846);
  1267. return T(5.64189583547756286948e-01) / sqrt(sqrt(-x)) * (sin(t) * (T(1.0) + z * z * afn / afd) - cos(t) * (z * agn / agd));
  1268. }
  1269. if (x >= T(2.09)) {
  1270. domain_flag = 5;
  1271. T zeta = T(2.0) * x * sqrt(x) / T(3.0);
  1272. T an = 0.0;
  1273. for (uint8_t index = 0; index <= 7; index++) {
  1274. an = an * (T(1.0) / zeta) + AN[index];
  1275. }
  1276. T ad = 0.0;
  1277. for (uint8_t index = 0; index <= 7; index++) {
  1278. ad = ad * (T(1.0) / zeta) + AD[index];
  1279. }
  1280. ai = T(5.64189583547756286948e-01) * (an / ad) / (T(2.0) * sqrt(sqrt(x)) * exp(zeta));
  1281. if (x > T(8.3203353)) {
  1282. return ai;
  1283. }
  1284. }
  1285. f = 1.0;
  1286. g = x;
  1287. k = 1.0;
  1288. T m = 1.0;
  1289. T n = x;
  1290. T t = 1.0;
  1291. T z = x * x * x;
  1292. while (t > T(1.11022302462515654042e-16)) {
  1293. m *= z;
  1294. k += T(1.0);
  1295. m /= k;
  1296. n *= z;
  1297. k += T(1.0);
  1298. n /= k;
  1299. m /= k;
  1300. f += m;
  1301. k += T(1.0);
  1302. n /= k;
  1303. g += n;
  1304. t = abs(m / f);
  1305. }
  1306. if ((domain_flag & 1) == 0) {
  1307. return T(0.355028053887817239260) * f - T(0.258819403792806798405) * g;
  1308. }
  1309. return ai;
  1310. } // T airy_ai(T x)
  1311. ); // airy_ai_string
  1312. const auto bessel_j0_string = jiterator_stringify(
  1313. template<typename T>
  1314. T bessel_j0_forward(T x) {
  1315. static const T PP[] = {
  1316. +7.96936729297347051624e-04,
  1317. +8.28352392107440799803e-02,
  1318. +1.23953371646414299388e+00,
  1319. +5.44725003058768775090e+00,
  1320. +8.74716500199817011941e+00,
  1321. +5.30324038235394892183e+00,
  1322. +9.99999999999999997821e-01,
  1323. };
  1324. static const T PQ[] = {
  1325. +9.24408810558863637013e-04,
  1326. +8.56288474354474431428e-02,
  1327. +1.25352743901058953537e+00,
  1328. +5.47097740330417105182e+00,
  1329. +8.76190883237069594232e+00,
  1330. +5.30605288235394617618e+00,
  1331. +1.00000000000000000218e+00,
  1332. };
  1333. static const T QP[] = {
  1334. -1.13663838898469149931e-02,
  1335. -1.28252718670509318512e+00,
  1336. -1.95539544257735972385e+01,
  1337. -9.32060152123768231369e+01,
  1338. -1.77681167980488050595e+02,
  1339. -1.47077505154951170175e+02,
  1340. -5.14105326766599330220e+01,
  1341. -6.05014350600728481186e+00,
  1342. };
  1343. static const T QQ[] = {
  1344. +6.43178256118178023184e+01,
  1345. +8.56430025976980587198e+02,
  1346. +3.88240183605401609683e+03,
  1347. +7.24046774195652478189e+03,
  1348. +5.93072701187316984827e+03,
  1349. +2.06209331660327847417e+03,
  1350. +2.42005740240291393179e+02,
  1351. };
  1352. static const T RP[] = {
  1353. -4.79443220978201773821e+09,
  1354. +1.95617491946556577543e+12,
  1355. -2.49248344360967716204e+14,
  1356. +9.70862251047306323952e+15,
  1357. };
  1358. static const T RQ[] = {
  1359. +4.99563147152651017219e+02,
  1360. +1.73785401676374683123e+05,
  1361. +4.84409658339962045305e+07,
  1362. +1.11855537045356834862e+10,
  1363. +2.11277520115489217587e+12,
  1364. +3.10518229857422583814e+14,
  1365. +3.18121955943204943306e+16,
  1366. +1.71086294081043136091e+18,
  1367. };
  1368. if (x < T(0)) {
  1369. x = -x;
  1370. }
  1371. if (x <= T(5.0)) {
  1372. if (x < T(0.00001)) {
  1373. return T(1.0) - x * x / T(4.0);
  1374. }
  1375. T rp = 0.0;
  1376. for (uint8_t index = 0; index <= 3; index++) {
  1377. rp = rp * (x * x) + RP[index];
  1378. }
  1379. T rq = 0.0;
  1380. for (uint8_t index = 0; index <= 7; index++) {
  1381. rq = rq * (x * x) + RQ[index];
  1382. }
  1383. return (x * x - T(5.78318596294678452118e+00)) * (x * x - T(3.04712623436620863991e+01)) * rp / rq;
  1384. }
  1385. T pp = 0.0;
  1386. for (uint8_t index = 0; index <= 6; index++) {
  1387. pp = pp * (T(25.0) / (x * x)) + PP[index];
  1388. }
  1389. T pq = 0.0;
  1390. for (uint8_t index = 0; index <= 6; index++) {
  1391. pq = pq * (T(25.0) / (x * x)) + PQ[index];
  1392. }
  1393. T qp = 0.0;
  1394. for (uint8_t index = 0; index <= 7; index++) {
  1395. qp = qp * (T(25.0) / (x * x)) + QP[index];
  1396. }
  1397. T qq = 0.0;
  1398. for (uint8_t index = 0; index <= 6; index++) {
  1399. qq = qq * (T(25.0) / (x * x)) + QQ[index];
  1400. }
  1401. return (pp / pq * cos(x - T(0.785398163397448309615660845819875721)) - T(5.0) / x * (qp / qq) * sin(x - T(0.785398163397448309615660845819875721))) * T(0.797884560802865355879892119868763737) / sqrt(x);
  1402. } // bessel_j0_forward(T x)
  1403. ); // bessel_j0_string
  1404. const auto bessel_y0_string = bessel_j0_string + jiterator_stringify(
  1405. template<typename T>
  1406. T bessel_y0_forward(T x) {
  1407. static const T PP[] = {
  1408. +7.96936729297347051624e-04,
  1409. +8.28352392107440799803e-02,
  1410. +1.23953371646414299388e+00,
  1411. +5.44725003058768775090e+00,
  1412. +8.74716500199817011941e+00,
  1413. +5.30324038235394892183e+00,
  1414. +9.99999999999999997821e-01,
  1415. };
  1416. static const T PQ[] = {
  1417. +9.24408810558863637013e-04,
  1418. +8.56288474354474431428e-02,
  1419. +1.25352743901058953537e+00,
  1420. +5.47097740330417105182e+00,
  1421. +8.76190883237069594232e+00,
  1422. +5.30605288235394617618e+00,
  1423. +1.00000000000000000218e+00,
  1424. };
  1425. static const T QP[] = {
  1426. -1.13663838898469149931e-02,
  1427. -1.28252718670509318512e+00,
  1428. -1.95539544257735972385e+01,
  1429. -9.32060152123768231369e+01,
  1430. -1.77681167980488050595e+02,
  1431. -1.47077505154951170175e+02,
  1432. -5.14105326766599330220e+01,
  1433. -6.05014350600728481186e+00,
  1434. };
  1435. static const T QQ[] = {
  1436. +6.43178256118178023184e+01,
  1437. +8.56430025976980587198e+02,
  1438. +3.88240183605401609683e+03,
  1439. +7.24046774195652478189e+03,
  1440. +5.93072701187316984827e+03,
  1441. +2.06209331660327847417e+03,
  1442. +2.42005740240291393179e+02,
  1443. };
  1444. static const T YP[] = {
  1445. +1.55924367855235737965e+04,
  1446. -1.46639295903971606143e+07,
  1447. +5.43526477051876500413e+09,
  1448. -9.82136065717911466409e+11,
  1449. +8.75906394395366999549e+13,
  1450. -3.46628303384729719441e+15,
  1451. +4.42733268572569800351e+16,
  1452. -1.84950800436986690637e+16,
  1453. };
  1454. static const T YQ[] = {
  1455. +1.04128353664259848412e+03,
  1456. +6.26107330137134956842e+05,
  1457. +2.68919633393814121987e+08,
  1458. +8.64002487103935000337e+10,
  1459. +2.02979612750105546709e+13,
  1460. +3.17157752842975028269e+15,
  1461. +2.50596256172653059228e+17,
  1462. };
  1463. if (x <= T(5.0)) {
  1464. if (x == T(0.0)) {
  1465. return NEG_INFINITY;
  1466. }
  1467. if (x < T(0.0)) {
  1468. NAN;
  1469. }
  1470. T yp = 0.0;
  1471. for (uint8_t index = 0; index <= 7; index++) {
  1472. yp = yp * (x * x) + YP[index];
  1473. }
  1474. T yq = 0.0;
  1475. for (uint8_t index = 0; index <= 6; index++) {
  1476. yq = yq * (x * x) + YQ[index];
  1477. }
  1478. return yp / yq + (T(0.636619772367581343075535053490057448) * log(x) * bessel_j0_forward(x));
  1479. }
  1480. T pp = 0.0;
  1481. for (uint8_t index = 0; index <= 6; index++) {
  1482. pp = pp * (T(25.0) / (x * x)) + PP[index];
  1483. }
  1484. T pq = 0.0;
  1485. for (uint8_t index = 0; index <= 6; index++) {
  1486. pq = pq * (T(25.0) / (x * x)) + PQ[index];
  1487. }
  1488. T qp = 0.0;
  1489. for (uint8_t index = 0; index <= 7; index++) {
  1490. qp = qp * (T(25.0) / (x * x)) + QP[index];
  1491. }
  1492. T qq = 0.0;
  1493. for (uint8_t index = 0; index <= 6; index++) {
  1494. qq = qq * (T(25.0) / (x * x)) + QQ[index];
  1495. }
  1496. return (pp / pq * sin(x - T(0.785398163397448309615660845819875721)) + T(5.0) / x * (qp / qq) * cos(x - T(0.785398163397448309615660845819875721))) * T(0.797884560802865355879892119868763737) / sqrt(x);
  1497. } // bessel_y0_forward(T x)
  1498. ); // bessel_y0_string
  1499. const auto bessel_j1_string = jiterator_stringify(
  1500. template<typename T>
  1501. T bessel_j1_forward(T x) {
  1502. static const T PP[] = {
  1503. +7.62125616208173112003e-04,
  1504. +7.31397056940917570436e-02,
  1505. +1.12719608129684925192e+00,
  1506. +5.11207951146807644818e+00,
  1507. +8.42404590141772420927e+00,
  1508. +5.21451598682361504063e+00,
  1509. +1.00000000000000000254e+00,
  1510. };
  1511. static const T PQ[] = {
  1512. +5.71323128072548699714e-04,
  1513. +6.88455908754495404082e-02,
  1514. +1.10514232634061696926e+00,
  1515. +5.07386386128601488557e+00,
  1516. +8.39985554327604159757e+00,
  1517. +5.20982848682361821619e+00,
  1518. +9.99999999999999997461e-01,
  1519. };
  1520. static const T QP[] = {
  1521. +5.10862594750176621635e-02,
  1522. +4.98213872951233449420e+00,
  1523. +7.58238284132545283818e+01,
  1524. +3.66779609360150777800e+02,
  1525. +7.10856304998926107277e+02,
  1526. +5.97489612400613639965e+02,
  1527. +2.11688757100572135698e+02,
  1528. +2.52070205858023719784e+01,
  1529. };
  1530. static const T QQ[] = {
  1531. +7.42373277035675149943e+01,
  1532. +1.05644886038262816351e+03,
  1533. +4.98641058337653607651e+03,
  1534. +9.56231892404756170795e+03,
  1535. +7.99704160447350683650e+03,
  1536. +2.82619278517639096600e+03,
  1537. +3.36093607810698293419e+02,
  1538. };
  1539. static const T RP[] = {
  1540. -8.99971225705559398224e+08,
  1541. +4.52228297998194034323e+11,
  1542. -7.27494245221818276015e+13,
  1543. +3.68295732863852883286e+15,
  1544. };
  1545. static const T RQ[] = {
  1546. +6.20836478118054335476e+02,
  1547. +2.56987256757748830383e+05,
  1548. +8.35146791431949253037e+07,
  1549. +2.21511595479792499675e+10,
  1550. +4.74914122079991414898e+12,
  1551. +7.84369607876235854894e+14,
  1552. +8.95222336184627338078e+16,
  1553. +5.32278620332680085395e+18,
  1554. };
  1555. if (x < T(0.0)) {
  1556. return -bessel_j1_forward(-x);
  1557. }
  1558. if (x <= T(5.0)) {
  1559. T rp = 0.0;
  1560. for (uint8_t index = 0; index <= 3; index++) {
  1561. rp = rp * (x * x) + RP[index];
  1562. }
  1563. T rq = 0.0;
  1564. for (uint8_t index = 0; index <= 7; index++) {
  1565. rq = rq * (x * x) + RQ[index];
  1566. }
  1567. return rp / rq * x * (x * x - T(1.46819706421238932572e+01)) * (x * x - T(4.92184563216946036703e+01));
  1568. }
  1569. T pp = 0.0;
  1570. for (uint8_t index = 0; index <= 6; index++) {
  1571. pp = pp * (T(5.0) / x * (T(5.0) / x)) + PP[index];
  1572. }
  1573. T pq = 0.0;
  1574. for (uint8_t index = 0; index <= 6; index++) {
  1575. pq = pq * (T(5.0) / x * (T(5.0) / x)) + PQ[index];
  1576. }
  1577. T qp = 0.0;
  1578. for (uint8_t index = 0; index <= 7; index++) {
  1579. qp = qp * (T(5.0) / x * (T(5.0) / x)) + QP[index];
  1580. }
  1581. T qq = 0.0;
  1582. for (uint8_t index = 0; index <= 6; index++) {
  1583. qq = qq * (T(5.0) / x * (T(5.0) / x)) + QQ[index];
  1584. }
  1585. return (pp / pq * cos(x - T(2.356194490192344928846982537459627163)) - T(5.0) / x * (qp / qq) * sin(x - T(2.356194490192344928846982537459627163))) * T(0.797884560802865355879892119868763737) / sqrt(x);
  1586. } // bessel_j1_forward(T x)
  1587. ); // bessel_j1_string
  1588. const auto bessel_y1_string = bessel_j1_string + jiterator_stringify(
  1589. template<typename T>
  1590. T bessel_y1_forward(T x) {
  1591. static const T PP[] = {
  1592. +7.62125616208173112003e-04,
  1593. +7.31397056940917570436e-02,
  1594. +1.12719608129684925192e+00,
  1595. +5.11207951146807644818e+00,
  1596. +8.42404590141772420927e+00,
  1597. +5.21451598682361504063e+00,
  1598. +1.00000000000000000254e+00,
  1599. };
  1600. static const T PQ[] = {
  1601. +5.71323128072548699714e-04,
  1602. +6.88455908754495404082e-02,
  1603. +1.10514232634061696926e+00,
  1604. +5.07386386128601488557e+00,
  1605. +8.39985554327604159757e+00,
  1606. +5.20982848682361821619e+00,
  1607. +9.99999999999999997461e-01,
  1608. };
  1609. static const T QP[] = {
  1610. +5.10862594750176621635e-02,
  1611. +4.98213872951233449420e+00,
  1612. +7.58238284132545283818e+01,
  1613. +3.66779609360150777800e+02,
  1614. +7.10856304998926107277e+02,
  1615. +5.97489612400613639965e+02,
  1616. +2.11688757100572135698e+02,
  1617. +2.52070205858023719784e+01,
  1618. };
  1619. static const T QQ[] = {
  1620. +7.42373277035675149943e+01,
  1621. +1.05644886038262816351e+03,
  1622. +4.98641058337653607651e+03,
  1623. +9.56231892404756170795e+03,
  1624. +7.99704160447350683650e+03,
  1625. +2.82619278517639096600e+03,
  1626. +3.36093607810698293419e+02,
  1627. };
  1628. static const T YP[] = {
  1629. +1.26320474790178026440e+09,
  1630. -6.47355876379160291031e+11,
  1631. +1.14509511541823727583e+14,
  1632. -8.12770255501325109621e+15,
  1633. +2.02439475713594898196e+17,
  1634. -7.78877196265950026825e+17,
  1635. };
  1636. static const T YQ[] = {
  1637. +5.94301592346128195359e+02,
  1638. +2.35564092943068577943e+05,
  1639. +7.34811944459721705660e+07,
  1640. +1.87601316108706159478e+10,
  1641. +3.88231277496238566008e+12,
  1642. +6.20557727146953693363e+14,
  1643. +6.87141087355300489866e+16,
  1644. +3.97270608116560655612e+18,
  1645. };
  1646. if (x <= T(5.0)) {
  1647. if (x == T(0.0)) {
  1648. return NEG_INFINITY;
  1649. }
  1650. if (x <= T(0.0)) {
  1651. return NAN;
  1652. }
  1653. T yp = 0.0;
  1654. for (uint8_t index = 0; index <= 5; index++) {
  1655. yp = yp * (x * x) + YP[index];
  1656. }
  1657. T yq = 0.0;
  1658. for (uint8_t index = 0; index <= 7; index++) {
  1659. yq = yq * (x * x) + YQ[index];
  1660. }
  1661. return x * (yp / yq) + (T(0.636619772367581343075535053490057448) * (bessel_j1_forward(x) * log(x) - T(1.0) / x));
  1662. }
  1663. T pp = 0.0;
  1664. for (uint8_t index = 0; index <= 6; index++) {
  1665. pp = pp * (T(5.0) / x * (T(5.0) / x)) + PP[index];
  1666. }
  1667. T pq = 0.0;
  1668. for (uint8_t index = 0; index <= 6; index++) {
  1669. pq = pq * (T(5.0) / x * (T(5.0) / x)) + PQ[index];
  1670. }
  1671. T qp = 0.0;
  1672. for (uint8_t index = 0; index <= 7; index++) {
  1673. qp = qp * (T(5.0) / x * (T(5.0) / x)) + QP[index];
  1674. }
  1675. T qq = 0.0;
  1676. for (uint8_t index = 0; index <= 6; index++) {
  1677. qq = qq * (T(5.0) / x * (T(5.0) / x)) + QQ[index];
  1678. }
  1679. return (pp / pq * sin(x - T(2.356194490192344928846982537459627163)) + T(5.0) / x * (qp / qq) * cos(x - T(2.356194490192344928846982537459627163))) * T(0.797884560802865355879892119868763737) / sqrt(x);
  1680. } // bessel_y1_forward(T x)
  1681. ); // bessel_y1_string
  1682. const auto chebyshev_polynomial_t_string = jiterator_stringify(
  1683. template<typename T>
  1684. T chebyshev_polynomial_t_forward(T x, int64_t n) {
  1685. if (n < 0) {
  1686. return T(0.0);
  1687. }
  1688. if (abs(x) == T(1.0)) {
  1689. if (x > T(0.0) || n % 2 == 0) {
  1690. return T(1.0);
  1691. }
  1692. return T(-1.0);
  1693. }
  1694. if ((n > 6) && (abs(x) < T(1.0))) {
  1695. return cos(n * acos(x));
  1696. }
  1697. if (n == 0) {
  1698. return T(1.0);
  1699. }
  1700. if (n == 1) {
  1701. return x;
  1702. }
  1703. T p = T(1.0);
  1704. T q = x;
  1705. T r;
  1706. for (int64_t k = 2; k <= n; k++) {
  1707. r = (x + x) * q - p;
  1708. p = q;
  1709. q = r;
  1710. }
  1711. return r;
  1712. } // chebyshev_polynomial_t_forward(T x, int64_t n)
  1713. template<typename T>
  1714. T chebyshev_polynomial_t_forward(T x, T n) {
  1715. return chebyshev_polynomial_t_forward(x, static_cast<int64_t>(n));
  1716. } // chebyshev_polynomial_t_forward(T x, T n)
  1717. ); // chebyshev_polynomial_t_string
  1718. const auto chebyshev_polynomial_u_string = jiterator_stringify(
  1719. template<typename T>
  1720. T chebyshev_polynomial_u_forward(T x, int64_t n) {
  1721. if (n < 0) {
  1722. return T(0.0);
  1723. }
  1724. if (abs(x) == T(1.0)) {
  1725. if (x > T(0.0) || n % 2 == 0) {
  1726. return n + 1;
  1727. }
  1728. return -(n + 1);
  1729. }
  1730. if ((n > 8) && (abs(x) < T(1.0))) {
  1731. if (sin(acos(x)) != T(0.0)) {
  1732. return sin((n + 1) * acos(x)) / sin(acos(x));
  1733. }
  1734. return (n + 1) * cos((n + 1) * acos(x)) / x;
  1735. }
  1736. if (n == 0) {
  1737. return T(1.0);
  1738. }
  1739. if (n == 1) {
  1740. return x + x;
  1741. }
  1742. T p = T(1.0);
  1743. T q = x + x;
  1744. T r;
  1745. for (int64_t k = 2; k <= n; k++) {
  1746. r = (x + x) * q - p;
  1747. p = q;
  1748. q = r;
  1749. }
  1750. return r;
  1751. } // chebyshev_polynomial_u_forward(T x, int64_t n)
  1752. template<typename T>
  1753. T chebyshev_polynomial_u_forward(T x, T n) {
  1754. return chebyshev_polynomial_u_forward(x, static_cast<int64_t>(n));
  1755. } // chebyshev_polynomial_u_forward(T x, T n)
  1756. ); // chebyshev_polynomial_u_string
  1757. const auto chebyshev_polynomial_v_string = jiterator_stringify(
  1758. template<typename T>
  1759. T chebyshev_polynomial_v_forward(T x, int64_t n) {
  1760. if (n < 0) {
  1761. return T(0.0);
  1762. }
  1763. if (abs(x) == T(1.0)) {
  1764. if (x > T(0.0)) {
  1765. return T(1.0);
  1766. }
  1767. if (n % 2 == 0) {
  1768. return n + n + 1;
  1769. }
  1770. return -(n + n + 1);
  1771. }
  1772. if ((n > 8) && (abs(x) < T(1.0))) {
  1773. if (sin(acos(x) / T(2.0)) != T(1.0)) {
  1774. return cos((n + T(0.5)) * acos(x)) / cos(acos(x) / T(2.0));
  1775. }
  1776. if (n % 2 == 0) {
  1777. return n + n + 1;
  1778. }
  1779. return -(n + n + 1);
  1780. }
  1781. if (n == 0) {
  1782. return T(1.0);
  1783. }
  1784. if (n == 1) {
  1785. return x + x - T(1.0);
  1786. }
  1787. T p = T(1.0);
  1788. T q = x + x - T(1.0);
  1789. T r;
  1790. for (int64_t k = 2; k <= n; k++) {
  1791. r = (x + x) * q - p;
  1792. p = q;
  1793. q = r;
  1794. }
  1795. return r;
  1796. } // chebyshev_polynomial_v_forward(T x, int64_t n)
  1797. template<typename T>
  1798. T chebyshev_polynomial_v_forward(T x, T n) {
  1799. return chebyshev_polynomial_v_forward(x, static_cast<int64_t>(n));
  1800. } // chebyshev_polynomial_v_forward(T x, T n)
  1801. ); // chebyshev_polynomial_v_string
  1802. const auto chebyshev_polynomial_w_string = jiterator_stringify(
  1803. template<typename T>
  1804. T chebyshev_polynomial_w_forward(T x, int64_t n) {
  1805. if (n < 0) {
  1806. return T(0.0);
  1807. }
  1808. if (abs(x) == T(1.0)) {
  1809. if (x > T(0.0)) {
  1810. return n + n + 1;
  1811. }
  1812. if (n % 2 == 0) {
  1813. return T(1.0);
  1814. }
  1815. return T(-1.0);
  1816. }
  1817. if ((n > 8) && (abs(x) < T(1.0))) {
  1818. if (cos(acos(x) / T(2.0)) != T(1.0)) {
  1819. return sin((n + T(0.5)) * acos(x)) / sin(acos(x) / T(2.0));
  1820. }
  1821. if (x > T(0.0)) {
  1822. return n + n + 1;
  1823. }
  1824. if (n % 2 == 0) {
  1825. return T(1.0);
  1826. }
  1827. return T(-1.0);
  1828. }
  1829. if (n == 0) {
  1830. return T(1.0);
  1831. }
  1832. if (n == 1) {
  1833. return x + x + T(1.0);
  1834. }
  1835. T p = T(1.0);
  1836. T q = x + x + T(1.0);
  1837. T r;
  1838. for (int64_t k = 2; k <= n; k++) {
  1839. r = (x + x) * q - p;
  1840. p = q;
  1841. q = r;
  1842. }
  1843. return r;
  1844. } // chebyshev_polynomial_w_forward(T x, int64_t n)
  1845. template<typename T>
  1846. T chebyshev_polynomial_w_forward(T x, T n) {
  1847. return chebyshev_polynomial_w_forward(x, static_cast<int64_t>(n));
  1848. } // chebyshev_polynomial_w_forward(T x, T n)
  1849. ); // chebyshev_polynomial_w_string
  1850. const auto hermite_polynomial_h_string = jiterator_stringify(
  1851. template<typename T>
  1852. T hermite_polynomial_h_forward(T x, int64_t n) {
  1853. if (n < 0) {
  1854. return T(0.0);
  1855. }
  1856. if (n == 0) {
  1857. return T(1.0);
  1858. }
  1859. if (n == 1) {
  1860. return x + x;
  1861. }
  1862. T p = T(1.0);
  1863. T q = x + x;
  1864. T r;
  1865. for (int64_t k = 2; k < n + n; k += 2) {
  1866. r = (x + x) * q - k * p;
  1867. p = q;
  1868. q = r;
  1869. }
  1870. return r;
  1871. } // hermite_polynomial_h_forward(T x, int64_t n)
  1872. template<typename T>
  1873. T hermite_polynomial_h_forward(T x, T n) {
  1874. return hermite_polynomial_h_forward(x, static_cast<int64_t>(n));
  1875. } // hermite_polynomial_h_forward(T x, T n)
  1876. ); // hermite_polynomial_h_string
  1877. const auto hermite_polynomial_he_string = jiterator_stringify(
  1878. template<typename T>
  1879. T hermite_polynomial_he_forward(T x, int64_t n) {
  1880. if (n < 0) {
  1881. return T(0.0);
  1882. }
  1883. if (n == 0) {
  1884. return T(1.0);
  1885. }
  1886. if (n == 1) {
  1887. return x;
  1888. }
  1889. T p = T(1.0);
  1890. T q = x;
  1891. T r;
  1892. for (int64_t k = 1; k < n; k++) {
  1893. r = x * q - k * p;
  1894. p = q;
  1895. q = r;
  1896. }
  1897. return r;
  1898. } // hermite_polynomial_he_forward(T x, int64_t n)
  1899. template<typename T>
  1900. T hermite_polynomial_he_forward(T x, T n) {
  1901. return hermite_polynomial_he_forward(x, static_cast<int64_t>(n));
  1902. } // hermite_polynomial_he_forward(T x, T n)
  1903. ); // hermite_polynomial_he_string
  1904. const auto laguerre_polynomial_l_string = jiterator_stringify(
  1905. template<typename T>
  1906. T laguerre_polynomial_l_forward(T x, int64_t n) {
  1907. if (n < 0) {
  1908. return T(0.0);
  1909. }
  1910. if (abs(x) == T(0.0)) {
  1911. return T(1.0);
  1912. }
  1913. if (n == 0) {
  1914. return T(1.0);
  1915. }
  1916. if (n == 1) {
  1917. return T(1.0) - x;
  1918. }
  1919. T p = T(1.0);
  1920. T q = T(1.0) - x;
  1921. T r;
  1922. for (int64_t k = 1; k < n; k++) {
  1923. r = (((k + k) + (T(1.0) - x)) * q - k * p) / (k + 1);
  1924. p = q;
  1925. q = r;
  1926. }
  1927. return r;
  1928. } // laguerre_polynomial_l_forward(T x, int64_t n)
  1929. template<typename T>
  1930. T laguerre_polynomial_l_forward(T x, T n) {
  1931. return laguerre_polynomial_l_forward(x, static_cast<int64_t>(n));
  1932. } // laguerre_polynomial_l_forward(T x, T n)
  1933. ); // laguerre_polynomial_l_string
  1934. const auto legendre_polynomial_p_string = jiterator_stringify(
  1935. template<typename T>
  1936. T legendre_polynomial_p_forward(T x, int64_t n) {
  1937. if (n < 0) {
  1938. return T(0.0);
  1939. }
  1940. if (abs(x) == T(1.0)) {
  1941. if (x > T(0.0) || n % 2 == 0) {
  1942. return T(1.0);
  1943. }
  1944. return T(-1.0);
  1945. }
  1946. if (n == 0) {
  1947. return T(1.0);
  1948. }
  1949. if (n == 1) {
  1950. return x;
  1951. }
  1952. T p = T(1.0);
  1953. T q = x;
  1954. T r;
  1955. for (int64_t k = 1; k < n; k++) {
  1956. r = ((k + k + 1) * x * q - k * p) / (k + 1);
  1957. p = q;
  1958. q = r;
  1959. }
  1960. return r;
  1961. } // legendre_polynomial_p_forward(T x, int64_t n)
  1962. template<typename T>
  1963. T legendre_polynomial_p_forward(T x, T n) {
  1964. return legendre_polynomial_p_forward(x, static_cast<int64_t>(n));
  1965. } // legendre_polynomial_p_forward(T x, T n)
  1966. ); // legendre_polynomial_p_string
  1967. const auto modified_bessel_i0_string = jiterator_stringify(
  1968. template<typename T>
  1969. T modified_bessel_i0_forward(T x) {
  1970. static const T A[] = {
  1971. -4.41534164647933937950e-18,
  1972. +3.33079451882223809783e-17,
  1973. -2.43127984654795469359e-16,
  1974. +1.71539128555513303061e-15,
  1975. -1.16853328779934516808e-14,
  1976. +7.67618549860493561688e-14,
  1977. -4.85644678311192946090e-13,
  1978. +2.95505266312963983461e-12,
  1979. -1.72682629144155570723e-11,
  1980. +9.67580903537323691224e-11,
  1981. -5.18979560163526290666e-10,
  1982. +2.65982372468238665035e-09,
  1983. -1.30002500998624804212e-08,
  1984. +6.04699502254191894932e-08,
  1985. -2.67079385394061173391e-07,
  1986. +1.11738753912010371815e-06,
  1987. -4.41673835845875056359e-06,
  1988. +1.64484480707288970893e-05,
  1989. -5.75419501008210370398e-05,
  1990. +1.88502885095841655729e-04,
  1991. -5.76375574538582365885e-04,
  1992. +1.63947561694133579842e-03,
  1993. -4.32430999505057594430e-03,
  1994. +1.05464603945949983183e-02,
  1995. -2.37374148058994688156e-02,
  1996. +4.93052842396707084878e-02,
  1997. -9.49010970480476444210e-02,
  1998. +1.71620901522208775349e-01,
  1999. -3.04682672343198398683e-01,
  2000. +6.76795274409476084995e-01,
  2001. };
  2002. static const T B[] = {
  2003. -7.23318048787475395456e-18,
  2004. -4.83050448594418207126e-18,
  2005. +4.46562142029675999901e-17,
  2006. +3.46122286769746109310e-17,
  2007. -2.82762398051658348494e-16,
  2008. -3.42548561967721913462e-16,
  2009. +1.77256013305652638360e-15,
  2010. +3.81168066935262242075e-15,
  2011. -9.55484669882830764870e-15,
  2012. -4.15056934728722208663e-14,
  2013. +1.54008621752140982691e-14,
  2014. +3.85277838274214270114e-13,
  2015. +7.18012445138366623367e-13,
  2016. -1.79417853150680611778e-12,
  2017. -1.32158118404477131188e-11,
  2018. -3.14991652796324136454e-11,
  2019. +1.18891471078464383424e-11,
  2020. +4.94060238822496958910e-10,
  2021. +3.39623202570838634515e-09,
  2022. +2.26666899049817806459e-08,
  2023. +2.04891858946906374183e-07,
  2024. +2.89137052083475648297e-06,
  2025. +6.88975834691682398426e-05,
  2026. +3.36911647825569408990e-03,
  2027. +8.04490411014108831608e-01,
  2028. };
  2029. T p;
  2030. T q = 0.0;
  2031. if (abs(x) <= T(8.0)) {
  2032. T a = A[0];
  2033. for (uint8_t index = 1; index < 30; index++) {
  2034. p = q;
  2035. q = a;
  2036. a = ((abs(x) / T(2.0)) - T(2.0)) * q - p + A[index];
  2037. }
  2038. return exp(abs(x)) * (T(0.5) * (a - p));
  2039. }
  2040. T b = B[0];
  2041. for (uint8_t index = 1; index < 25; index++) {
  2042. p = q;
  2043. q = b;
  2044. b = (T(32.0) / abs(x) - T(2.0)) * q - p + B[index];
  2045. }
  2046. return exp(abs(x)) * (T(0.5) * (b - p)) / sqrt(abs(x));
  2047. } // modified_bessel_i0_forward(T x)
  2048. ); // modified_bessel_i0_string
  2049. const auto modified_bessel_i1_string = jiterator_stringify(
  2050. template<typename T>
  2051. T modified_bessel_i1_forward(T x) {
  2052. static const T A[] = {
  2053. +2.77791411276104639959e-18,
  2054. -2.11142121435816608115e-17,
  2055. +1.55363195773620046921e-16,
  2056. -1.10559694773538630805e-15,
  2057. +7.60068429473540693410e-15,
  2058. -5.04218550472791168711e-14,
  2059. +3.22379336594557470981e-13,
  2060. -1.98397439776494371520e-12,
  2061. +1.17361862988909016308e-11,
  2062. -6.66348972350202774223e-11,
  2063. +3.62559028155211703701e-10,
  2064. -1.88724975172282928790e-09,
  2065. +9.38153738649577178388e-09,
  2066. -4.44505912879632808065e-08,
  2067. +2.00329475355213526229e-07,
  2068. -8.56872026469545474066e-07,
  2069. +3.47025130813767847674e-06,
  2070. -1.32731636560394358279e-05,
  2071. +4.78156510755005422638e-05,
  2072. -1.61760815825896745588e-04,
  2073. +5.12285956168575772895e-04,
  2074. -1.51357245063125314899e-03,
  2075. +4.15642294431288815669e-03,
  2076. -1.05640848946261981558e-02,
  2077. +2.47264490306265168283e-02,
  2078. -5.29459812080949914269e-02,
  2079. +1.02643658689847095384e-01,
  2080. -1.76416518357834055153e-01,
  2081. +2.52587186443633654823e-01,
  2082. };
  2083. static const T B[] = {
  2084. +7.51729631084210481353e-18,
  2085. +4.41434832307170791151e-18,
  2086. -4.65030536848935832153e-17,
  2087. -3.20952592199342395980e-17,
  2088. +2.96262899764595013876e-16,
  2089. +3.30820231092092828324e-16,
  2090. -1.88035477551078244854e-15,
  2091. -3.81440307243700780478e-15,
  2092. +1.04202769841288027642e-14,
  2093. +4.27244001671195135429e-14,
  2094. -2.10154184277266431302e-14,
  2095. -4.08355111109219731823e-13,
  2096. -7.19855177624590851209e-13,
  2097. +2.03562854414708950722e-12,
  2098. +1.41258074366137813316e-11,
  2099. +3.25260358301548823856e-11,
  2100. -1.89749581235054123450e-11,
  2101. -5.58974346219658380687e-10,
  2102. -3.83538038596423702205e-09,
  2103. -2.63146884688951950684e-08,
  2104. -2.51223623787020892529e-07,
  2105. -3.88256480887769039346e-06,
  2106. -1.10588938762623716291e-04,
  2107. -9.76109749136146840777e-03,
  2108. +7.78576235018280120474e-01,
  2109. };
  2110. T p;
  2111. T q = 0.0;
  2112. if (abs(x) <= T(8.0)) {
  2113. T a = A[0];
  2114. for (uint8_t index = 1; index < 29; index++) {
  2115. p = q;
  2116. q = a;
  2117. a = ((abs(x) / T(2.0)) - T(2.0)) * q - p + A[index];
  2118. }
  2119. if (x < T(0.0)) {
  2120. return -(T(0.5) * (a - p) * abs(x) * exp(abs(x)));
  2121. }
  2122. return T(0.5) * (a - p) * abs(x) * exp(abs(x));
  2123. }
  2124. T b = B[0];
  2125. for (uint8_t index = 1; index < 25; index++) {
  2126. p = q;
  2127. q = b;
  2128. b = (T(32.0) / abs(x) - T(2.0)) * q - p + B[index];
  2129. }
  2130. if (x < T(0.0)) {
  2131. return -(exp(abs(x)) * (T(0.5) * (b - p)) / sqrt(abs(x)));
  2132. }
  2133. return exp(abs(x)) * (T(0.5) * (b - p)) / sqrt(abs(x));
  2134. } // modified_bessel_i1_forward(T x)
  2135. ); // modified_bessel_i1_string
  2136. const auto modified_bessel_k0_string = modified_bessel_i0_string + jiterator_stringify(
  2137. template<typename T>
  2138. T modified_bessel_k0_forward(T x) {
  2139. static const T A[] = {
  2140. +1.37446543561352307156e-16,
  2141. +4.25981614279661018399e-14,
  2142. +1.03496952576338420167e-11,
  2143. +1.90451637722020886025e-09,
  2144. +2.53479107902614945675e-07,
  2145. +2.28621210311945178607e-05,
  2146. +1.26461541144692592338e-03,
  2147. +3.59799365153615016266e-02,
  2148. +3.44289899924628486886e-01,
  2149. -5.35327393233902768720e-01,
  2150. };
  2151. static const T B[] = {
  2152. +5.30043377268626276149e-18,
  2153. -1.64758043015242134646e-17,
  2154. +5.21039150503902756861e-17,
  2155. -1.67823109680541210385e-16,
  2156. +5.51205597852431940784e-16,
  2157. -1.84859337734377901440e-15,
  2158. +6.34007647740507060557e-15,
  2159. -2.22751332699166985548e-14,
  2160. +8.03289077536357521100e-14,
  2161. -2.98009692317273043925e-13,
  2162. +1.14034058820847496303e-12,
  2163. -4.51459788337394416547e-12,
  2164. +1.85594911495471785253e-11,
  2165. -7.95748924447710747776e-11,
  2166. +3.57739728140030116597e-10,
  2167. -1.69753450938905987466e-09,
  2168. +8.57403401741422608519e-09,
  2169. -4.66048989768794782956e-08,
  2170. +2.76681363944501510342e-07,
  2171. -1.83175552271911948767e-06,
  2172. +1.39498137188764993662e-05,
  2173. -1.28495495816278026384e-04,
  2174. +1.56988388573005337491e-03,
  2175. -3.14481013119645005427e-02,
  2176. +2.44030308206595545468e+00,
  2177. };
  2178. if (x == T(0.0)) {
  2179. return INFINITY;
  2180. }
  2181. if (x < T(0.0)) {
  2182. return NAN;
  2183. }
  2184. T p;
  2185. T q = 0.0;
  2186. if (x <= T(2.0)) {
  2187. T a = A[0];
  2188. for (uint8_t index = 1; index < 10; index++) {
  2189. p = q;
  2190. q = a;
  2191. a = (x * x - T(2.0)) * q - p + A[index];
  2192. }
  2193. return T(0.5) * (a - p) - log(0.5 * x) * modified_bessel_i0_forward(x);
  2194. }
  2195. T b = B[0];
  2196. for (uint8_t index = 1; index < 25; index++) {
  2197. p = q;
  2198. q = b;
  2199. b = (T(8.0) / x - T(2.0)) * q - p + B[index];
  2200. }
  2201. return exp(-x) * (T(0.5) * (b - p)) / sqrt(x);
  2202. } // modified_bessel_k0_forward(T x)
  2203. ); // modified_bessel_k0_string
  2204. const auto scaled_modified_bessel_k0_string = modified_bessel_i0_string + jiterator_stringify(
  2205. template<typename T>
  2206. T scaled_modified_bessel_k0_forward(T x) {
  2207. static const T A[] = {
  2208. +1.37446543561352307156e-16,
  2209. +4.25981614279661018399e-14,
  2210. +1.03496952576338420167e-11,
  2211. +1.90451637722020886025e-09,
  2212. +2.53479107902614945675e-07,
  2213. +2.28621210311945178607e-05,
  2214. +1.26461541144692592338e-03,
  2215. +3.59799365153615016266e-02,
  2216. +3.44289899924628486886e-01,
  2217. -5.35327393233902768720e-01,
  2218. };
  2219. static const T B[] = {
  2220. +5.30043377268626276149e-18,
  2221. -1.64758043015242134646e-17,
  2222. +5.21039150503902756861e-17,
  2223. -1.67823109680541210385e-16,
  2224. +5.51205597852431940784e-16,
  2225. -1.84859337734377901440e-15,
  2226. +6.34007647740507060557e-15,
  2227. -2.22751332699166985548e-14,
  2228. +8.03289077536357521100e-14,
  2229. -2.98009692317273043925e-13,
  2230. +1.14034058820847496303e-12,
  2231. -4.51459788337394416547e-12,
  2232. +1.85594911495471785253e-11,
  2233. -7.95748924447710747776e-11,
  2234. +3.57739728140030116597e-10,
  2235. -1.69753450938905987466e-09,
  2236. +8.57403401741422608519e-09,
  2237. -4.66048989768794782956e-08,
  2238. +2.76681363944501510342e-07,
  2239. -1.83175552271911948767e-06,
  2240. +1.39498137188764993662e-05,
  2241. -1.28495495816278026384e-04,
  2242. +1.56988388573005337491e-03,
  2243. -3.14481013119645005427e-02,
  2244. +2.44030308206595545468e+00,
  2245. };
  2246. if (x == T(0.0)) {
  2247. return INFINITY;
  2248. }
  2249. if (x < T(0.0)) {
  2250. return NAN;
  2251. }
  2252. T p;
  2253. T q = 0.0;
  2254. if (x <= T(2.0)) {
  2255. T a = A[0];
  2256. for (uint8_t index = 1; index < 10; index++) {
  2257. p = q;
  2258. q = a;
  2259. a = (x * x - T(2.0)) * q - p + A[index];
  2260. }
  2261. return (T(0.5) * (a - p) - log(T(0.5) * x) * modified_bessel_i0_forward(x)) * exp(x);
  2262. }
  2263. T b = B[0];
  2264. for (uint8_t index = 1; index < 25; index++) {
  2265. p = q;
  2266. q = b;
  2267. b = (T(8.0) / x - T(2.0)) * q - p + B[index];
  2268. }
  2269. return T(0.5) * (b - p) / sqrt(x);
  2270. } // T scaled_modified_bessel_k0_forward(T x)
  2271. ); // scaled_modified_bessel_k0_string
  2272. const auto modified_bessel_k1_string = modified_bessel_i1_string + jiterator_stringify(
  2273. template<typename T>
  2274. T modified_bessel_k1_forward(T x) {
  2275. static const T A[] = {
  2276. -7.02386347938628759343e-18,
  2277. -2.42744985051936593393e-15,
  2278. -6.66690169419932900609e-13,
  2279. -1.41148839263352776110e-10,
  2280. -2.21338763073472585583e-08,
  2281. -2.43340614156596823496e-06,
  2282. -1.73028895751305206302e-04,
  2283. -6.97572385963986435018e-03,
  2284. -1.22611180822657148235e-01,
  2285. -3.53155960776544875667e-01,
  2286. +1.52530022733894777053e+00,
  2287. };
  2288. static const T B[] = {
  2289. -5.75674448366501715755e-18,
  2290. +1.79405087314755922667e-17,
  2291. -5.68946255844285935196e-17,
  2292. +1.83809354436663880070e-16,
  2293. -6.05704724837331885336e-16,
  2294. +2.03870316562433424052e-15,
  2295. -7.01983709041831346144e-15,
  2296. +2.47715442448130437068e-14,
  2297. -8.97670518232499435011e-14,
  2298. +3.34841966607842919884e-13,
  2299. -1.28917396095102890680e-12,
  2300. +5.13963967348173025100e-12,
  2301. -2.12996783842756842877e-11,
  2302. +9.21831518760500529508e-11,
  2303. -4.19035475934189648750e-10,
  2304. +2.01504975519703286596e-09,
  2305. -1.03457624656780970260e-08,
  2306. +5.74108412545004946722e-08,
  2307. -3.50196060308781257119e-07,
  2308. +2.40648494783721712015e-06,
  2309. -1.93619797416608296024e-05,
  2310. +1.95215518471351631108e-04,
  2311. -2.85781685962277938680e-03,
  2312. +1.03923736576817238437e-01,
  2313. +2.72062619048444266945e+00,
  2314. };
  2315. if (x == T(0.0)) {
  2316. return INFINITY;
  2317. }
  2318. if (x < T(0.0)) {
  2319. return NAN;
  2320. }
  2321. T p;
  2322. T q = 0.0;
  2323. if (x <= T(2.0)) {
  2324. T a = A[0];
  2325. for (uint8_t index = 1; index < 11; index++) {
  2326. p = q;
  2327. q = a;
  2328. a = (x * x - T(2.0)) * q - p + A[index];
  2329. }
  2330. return log(T(0.5) * x) * modified_bessel_i1_forward(x) + T(0.5) * (a - p) / x;
  2331. }
  2332. T b = B[0];
  2333. for (uint8_t index = 1; index < 25; index++) {
  2334. p = q;
  2335. q = b;
  2336. b = (T(8.0) / x - T(2.0)) * q - p + B[index];
  2337. }
  2338. return exp(-x) * (T(0.5) * (b - p)) / sqrt(x);
  2339. } // modified_bessel_k1_forward(T x)
  2340. ); // modified_bessel_k1_string
  2341. const auto scaled_modified_bessel_k1_string = modified_bessel_i1_string + jiterator_stringify(
  2342. template<typename T>
  2343. T scaled_modified_bessel_k1_forward(T x) {
  2344. static const T A[] = {
  2345. -7.02386347938628759343e-18,
  2346. -2.42744985051936593393e-15,
  2347. -6.66690169419932900609e-13,
  2348. -1.41148839263352776110e-10,
  2349. -2.21338763073472585583e-08,
  2350. -2.43340614156596823496e-06,
  2351. -1.73028895751305206302e-04,
  2352. -6.97572385963986435018e-03,
  2353. -1.22611180822657148235e-01,
  2354. -3.53155960776544875667e-01,
  2355. +1.52530022733894777053e+00,
  2356. };
  2357. static const T B[] = {
  2358. -5.75674448366501715755e-18,
  2359. +1.79405087314755922667e-17,
  2360. -5.68946255844285935196e-17,
  2361. +1.83809354436663880070e-16,
  2362. -6.05704724837331885336e-16,
  2363. +2.03870316562433424052e-15,
  2364. -7.01983709041831346144e-15,
  2365. +2.47715442448130437068e-14,
  2366. -8.97670518232499435011e-14,
  2367. +3.34841966607842919884e-13,
  2368. -1.28917396095102890680e-12,
  2369. +5.13963967348173025100e-12,
  2370. -2.12996783842756842877e-11,
  2371. +9.21831518760500529508e-11,
  2372. -4.19035475934189648750e-10,
  2373. +2.01504975519703286596e-09,
  2374. -1.03457624656780970260e-08,
  2375. +5.74108412545004946722e-08,
  2376. -3.50196060308781257119e-07,
  2377. +2.40648494783721712015e-06,
  2378. -1.93619797416608296024e-05,
  2379. +1.95215518471351631108e-04,
  2380. -2.85781685962277938680e-03,
  2381. +1.03923736576817238437e-01,
  2382. +2.72062619048444266945e+00,
  2383. };
  2384. if (x == T(0.0)) {
  2385. return INFINITY;
  2386. }
  2387. if (x < T(0.0)) {
  2388. return NAN;
  2389. }
  2390. T p;
  2391. T q = 0.0;
  2392. if (x <= T(2.0)) {
  2393. T a = A[0];
  2394. for (uint8_t index = 1; index < 11; index++) {
  2395. p = q;
  2396. q = a;
  2397. a = (x * x - T(2.0)) * q - p + A[index];
  2398. }
  2399. return (log(T(0.5) * x) * modified_bessel_i1_forward(x) + T(0.5) * (a - p) / x) * exp(x);
  2400. }
  2401. T b = B[0];
  2402. for (uint8_t index = 1; index < 25; index++) {
  2403. p = q;
  2404. q = b;
  2405. b = (T(8.0) / x - T(2.0)) * q - p + B[index];
  2406. }
  2407. return (T(0.5) * (b - p) / sqrt(x));
  2408. } // T scaled_modified_bessel_k1_forward(T x)
  2409. ); // scaled_modified_bessel_k1_string
  2410. const auto shifted_chebyshev_polynomial_t_string = jiterator_stringify(
  2411. template<typename T>
  2412. T shifted_chebyshev_polynomial_t_forward(T x, int64_t n) {
  2413. if (n < 0) {
  2414. return T(0.0);
  2415. }
  2416. if (x == T(1.0)) {
  2417. return T(1.0);
  2418. }
  2419. if (x == T(0.0)) {
  2420. if (n % 2 == 0) {
  2421. return T(1.0);
  2422. }
  2423. return T(-1.0);
  2424. }
  2425. if ((n > 6) && (abs(x + x - T(1.0)) < T(1.0))) {
  2426. return cos(n * acos(x + x - T(1.0)));
  2427. }
  2428. if (n == 0) {
  2429. return T(1.0);
  2430. }
  2431. if (n == 1) {
  2432. return x + x - T(1.0);
  2433. }
  2434. T p = T(1.0);
  2435. T q = x + x - T(1.0);
  2436. T r;
  2437. for (int64_t k = 2; k <= n; k++) {
  2438. r = (x + x - T(1.0) + (x + x - T(1.0))) * q - p;
  2439. p = q;
  2440. q = r;
  2441. }
  2442. return r;
  2443. } // shifted_chebyshev_polynomial_t_forward(T x, int64_t n)
  2444. template<typename T>
  2445. T shifted_chebyshev_polynomial_t_forward(T x, T n) {
  2446. return shifted_chebyshev_polynomial_t_forward(x, static_cast<int64_t>(n));
  2447. } // shifted_chebyshev_polynomial_t_forward(T x, T n)
  2448. ); // shifted_chebyshev_polynomial_t_string
  2449. const auto shifted_chebyshev_polynomial_u_string = jiterator_stringify(
  2450. template<typename T>
  2451. T shifted_chebyshev_polynomial_u_forward(T x, int64_t n) {
  2452. if (n < 0) {
  2453. return T(0.0);
  2454. }
  2455. if (x == T(1.0)) {
  2456. return n + 1;
  2457. }
  2458. if (x == T(0.0)) {
  2459. if (n % 2 == 0) {
  2460. return n + 1;
  2461. }
  2462. return -(n + 1);
  2463. }
  2464. if ((n > 6) && (abs(x + x - T(1.0)) < T(1.0))) {
  2465. if (sin(acos(x + x - T(1.0))) != T(0.0)) {
  2466. return sin((n + 1) * acos(x + x - T(1.0))) / sin(acos(x + x - T(1.0)));
  2467. }
  2468. return (n + 1) * cos((n + 1) * acos(x + x - T(1.0))) / (x + x - T(1.0));
  2469. }
  2470. if (n == 0) {
  2471. return T(1.0);
  2472. }
  2473. if (n == 1) {
  2474. return x + x - T(1.0) + (x + x - T(1.0));
  2475. }
  2476. T p = T(1.0);
  2477. T q = x + x - T(1.0) + (x + x - T(1.0));
  2478. T r;
  2479. for (int64_t k = 2; k <= n; k++) {
  2480. r = (x + x - T(1.0) + (x + x - T(1.0))) * q - p;
  2481. p = q;
  2482. q = r;
  2483. }
  2484. return r;
  2485. } // shifted_chebyshev_polynomial_u_forward(T x, int64_t n)
  2486. template<typename T>
  2487. T shifted_chebyshev_polynomial_u_forward(T x, T n) {
  2488. return shifted_chebyshev_polynomial_u_forward(x, static_cast<int64_t>(n));
  2489. } // shifted_chebyshev_polynomial_u_forward(T x, T n)
  2490. ); // shifted_chebyshev_polynomial_u_string
  2491. const auto shifted_chebyshev_polynomial_v_string = jiterator_stringify(
  2492. template<typename T>
  2493. T shifted_chebyshev_polynomial_v_forward(T x, int64_t n) {
  2494. if (n < 0) {
  2495. return T(0.0);
  2496. }
  2497. if (x == T(1.0)) {
  2498. return T(1.0);
  2499. }
  2500. if (x == T(0.0)) {
  2501. if (n % 2 == 0) {
  2502. return (n + n + 1);
  2503. }
  2504. return -(n + n + 1);
  2505. }
  2506. if ((n > 6) && (abs(x + x - T(1.0)) < T(1.0))) {
  2507. if (sin(acos(x + x - T(1.0)) / T(2.0)) != T(1.0)) {
  2508. return cos(((n) + T(0.5)) * acos(x + x - T(1.0))) / cos(acos(x + x - T(1.0)) / T(2.0));
  2509. }
  2510. if (n % 2 == 0) {
  2511. return n + n + 1;
  2512. }
  2513. return -(n + n + 1);
  2514. }
  2515. if (n == 0) {
  2516. return T(1.0);
  2517. }
  2518. if (n == 1) {
  2519. return x + x - T(1.0) + (x + x - T(1.0)) - T(1.0);
  2520. }
  2521. T p = T(1.0);
  2522. T q = x + x - T(1.0) + (x + x - T(1.0)) - T(1.0);
  2523. T r;
  2524. for (int64_t k = 2; k <= n; k++) {
  2525. r = (x + x - T(1.0) + (x + x - T(1.0))) * q - p;
  2526. p = q;
  2527. q = r;
  2528. }
  2529. return r;
  2530. } // shifted_chebyshev_polynomial_v_forward(T x, int64_t n)
  2531. template<typename T>
  2532. T shifted_chebyshev_polynomial_v_forward(T x, T n) {
  2533. return shifted_chebyshev_polynomial_v_forward(x, static_cast<int64_t>(n));
  2534. } // shifted_chebyshev_polynomial_v_forward(T x, T n)
  2535. ); // shifted_chebyshev_polynomial_v_string
  2536. const auto shifted_chebyshev_polynomial_w_string = jiterator_stringify(
  2537. template<typename T>
  2538. T shifted_chebyshev_polynomial_w_forward(T x, int64_t n) {
  2539. if (n < 0) {
  2540. return T(0.0);
  2541. }
  2542. if (x == T(1.0)) {
  2543. return n + n + 1;
  2544. }
  2545. if (x == T(0.0)) {
  2546. if (n % 2 == 0) {
  2547. return T(1.0);
  2548. }
  2549. return T(-1.0);
  2550. }
  2551. if ((n > 4) && (abs(x + x - T(1.0)) < T(1.0))) {
  2552. if (cos(acos(x + x - T(1.0)) / T(2.0)) != T(1.0)) {
  2553. return sin((n + T(0.5)) * acos(x + x - T(1.0))) / sin(acos(x + x - T(1.0)) / T(2.0));
  2554. }
  2555. if (n % 2 == 0) {
  2556. return T(1.0);
  2557. }
  2558. return T(-1.0);
  2559. }
  2560. if (n == 0) {
  2561. return T(1.0);
  2562. }
  2563. if (n == 1) {
  2564. return x + x - T(1.0) + (x + x - T(1.0)) + T(1.0);
  2565. }
  2566. T p = T(1.0);
  2567. T q = x + x - T(1.0) + (x + x - T(1.0)) + T(1.0);
  2568. T r;
  2569. for (int64_t k = 2; k <= n; k++) {
  2570. r = (x + x - T(1.0) + (x + x - T(1.0))) * q - p;
  2571. p = q;
  2572. q = r;
  2573. }
  2574. return r;
  2575. } // shifted_chebyshev_polynomial_w_forward(T x, int64_t n)
  2576. template<typename T>
  2577. T shifted_chebyshev_polynomial_w_forward(T x, T n) {
  2578. return shifted_chebyshev_polynomial_w_forward(x, static_cast<int64_t>(n));
  2579. } // shifted_chebyshev_polynomial_w_forward(T x, T n)
  2580. ); // shifted_chebyshev_polynomial_w_string
  2581. const auto spherical_bessel_j0_string = jiterator_stringify(
  2582. template<typename T>
  2583. T spherical_bessel_j0_forward(T x) {
  2584. if (isinf(x)) {
  2585. return T(0.0);
  2586. }
  2587. if (abs(x) < T(0.5)) {
  2588. return T(1.0) + x * x * (T(-1.0) / T(6.0) + x * x * (T(1.0) / T(120.0) + x * x * (T(-1.0) / T(5040.0) + x * x * (T(1.0) / T(362880.0) + x * x * (T(-1.0) / T(39916800.0) + x * x * (T(1.0) / T(6227020800.0)))))));
  2589. }
  2590. return sin(x) / x;
  2591. } // T spherical_bessel_j0_forward(T x)
  2592. ); // spherical_bessel_j0_string
  2593. #else // !AT_USE_JITERATOR() -- kernels must be precompiled
  2594. template <typename scalar_t>
  2595. static inline C10_HOST_DEVICE scalar_t calc_gcd(scalar_t a_in, scalar_t b_in) {
  2596. scalar_t a = ::abs(a_in);
  2597. scalar_t b = ::abs(b_in);
  2598. while (a != 0) {
  2599. scalar_t c = a;
  2600. a = b % a;
  2601. b = c;
  2602. }
  2603. return b;
  2604. }
  2605. /*
  2606. * For licensing information, please refer to the the cpu implementation located in "ATen/native/Math.h".
  2607. */
  2608. template <typename scalar_t>
  2609. static inline C10_HOST_DEVICE scalar_t calc_digamma(scalar_t in) {
  2610. // [C++ Standard Reference: Gamma Function] https://en.cppreference.com/w/cpp/numeric/math/tgamma
  2611. using accscalar_t = at::acc_type<scalar_t, /*is_cuda=*/true>;
  2612. static const double PI_f64 = 3.14159265358979323846;
  2613. const accscalar_t PSI_10 = 2.25175258906672110764;
  2614. const accscalar_t A[] = {
  2615. 8.33333333333333333333E-2,
  2616. -2.10927960927960927961E-2,
  2617. 7.57575757575757575758E-3,
  2618. -4.16666666666666666667E-3,
  2619. 3.96825396825396825397E-3,
  2620. -8.33333333333333333333E-3,
  2621. 8.33333333333333333333E-2,
  2622. };
  2623. accscalar_t x = static_cast<accscalar_t>(in);
  2624. if (x == 0) {
  2625. // As per C++ standard for gamma related functions and SciPy,
  2626. // If the argument is ±0, ±∞ is returned
  2627. return std::copysign(static_cast<scalar_t>(INFINITY), -x);
  2628. }
  2629. bool x_is_integer = x == ::trunc(x);
  2630. accscalar_t result = 0;
  2631. if (x < 0) {
  2632. if (x_is_integer) {
  2633. // As per C++ standard for gamma related functions and SciPy,
  2634. // If the argument is a negative integer, NaN is returned
  2635. return static_cast<scalar_t>(NAN);
  2636. }
  2637. // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
  2638. // accurate than tan(pi * x). While these operations are mathematically equivalent
  2639. // since both x and r are in radians and tan() has a periodicity of pi, in practice
  2640. // the computation of pi * x is a source of error (when |x| > 1).
  2641. double q, r;
  2642. r = ::modf(static_cast<double>(x), &q);
  2643. result = static_cast<accscalar_t>(- PI_f64 / ::tan(PI_f64 * r));
  2644. x = 1 - x;
  2645. }
  2646. while (x < 10) {
  2647. result -= 1 / x;
  2648. x += 1;
  2649. }
  2650. if (x == 10) {
  2651. return static_cast<scalar_t>(result + PSI_10);
  2652. }
  2653. accscalar_t y = 0;
  2654. if (x < 1.0e17) {
  2655. accscalar_t z = 1 / (x * x);
  2656. accscalar_t polevl_result = 0;
  2657. for (int i = 0; i <= 6; i++) {
  2658. polevl_result = polevl_result * z + A[i];
  2659. }
  2660. y = z * polevl_result;
  2661. }
  2662. return static_cast<scalar_t>(::log(x) - (static_cast<accscalar_t>(0.5) / x) - y + result);
  2663. }
  2664. template <typename scalar_t>
  2665. static inline C10_HOST_DEVICE scalar_t calc_trigamma(scalar_t in) {
  2666. using accscalar_t = at::acc_type<scalar_t, /*is_cuda=*/true>;
  2667. const accscalar_t PI = 3.14159265358979323846;
  2668. accscalar_t x = static_cast<accscalar_t>(in);
  2669. accscalar_t sign = +1;
  2670. accscalar_t result = 0;
  2671. if (x < 0.5f) {
  2672. sign = -1;
  2673. accscalar_t sin_pi_x = ::sin(PI * x);
  2674. result -= (PI * PI) / (sin_pi_x * sin_pi_x);
  2675. x = 1 - x;
  2676. }
  2677. for (int i = 0; i < 6; ++i) {
  2678. result += 1 / (x * x);
  2679. x += 1;
  2680. }
  2681. const accscalar_t one = static_cast<scalar_t>(1);
  2682. const accscalar_t ixx = 1 / (x*x);
  2683. result += (1 + 1 / (2*x) + ixx * (one/6 - ixx * (one/30 - ixx * (one/42)))) / x;
  2684. return static_cast<scalar_t>(sign * result);
  2685. }
  2686. /*
  2687. * For licensing information and documentation, please refer to the the cpu implementation located in "ATen/native/Math.h".
  2688. */
  2689. template <typename scalar_t>
  2690. static inline C10_HOST_DEVICE scalar_t
  2691. chbevl(scalar_t _x, const scalar_t array[], size_t len) {
  2692. static_assert(!std::is_same<scalar_t, Half>() && !std::is_same<scalar_t, BFloat16>(), "don't instantiate with low precision type");
  2693. scalar_t b0, b1, b2;
  2694. b0 = array[0];
  2695. b1 = 0;
  2696. for (size_t i = 1; i < len; ++i) {
  2697. b2 = b1;
  2698. b1 = b0;
  2699. b0 = _x * b1 - b2 + array[i];
  2700. }
  2701. return (0.5 * (b0 - b2));
  2702. }
  2703. /*
  2704. * For licensing information and documentation, please refer to the the cpu implementation located in "ATen/native/Math.h".
  2705. */
  2706. template <typename T>
  2707. C10_HOST_DEVICE inline std::tuple<const T*, size_t> chebyshev_coefficients_i0e_A() {
  2708. /* Chebyshev coefficients for exp(-x) I0(x)
  2709. * in the interval [0,8].
  2710. *
  2711. * lim(x->0){ exp(-x) I0(x) } = 1.
  2712. */
  2713. static const T coefficients[] = {
  2714. -4.41534164647933937950E-18, 3.33079451882223809783E-17,
  2715. -2.43127984654795469359E-16, 1.71539128555513303061E-15,
  2716. -1.16853328779934516808E-14, 7.67618549860493561688E-14,
  2717. -4.85644678311192946090E-13, 2.95505266312963983461E-12,
  2718. -1.72682629144155570723E-11, 9.67580903537323691224E-11,
  2719. -5.18979560163526290666E-10, 2.65982372468238665035E-9,
  2720. -1.30002500998624804212E-8, 6.04699502254191894932E-8,
  2721. -2.67079385394061173391E-7, 1.11738753912010371815E-6,
  2722. -4.41673835845875056359E-6, 1.64484480707288970893E-5,
  2723. -5.75419501008210370398E-5, 1.88502885095841655729E-4,
  2724. -5.76375574538582365885E-4, 1.63947561694133579842E-3,
  2725. -4.32430999505057594430E-3, 1.05464603945949983183E-2,
  2726. -2.37374148058994688156E-2, 4.93052842396707084878E-2,
  2727. -9.49010970480476444210E-2, 1.71620901522208775349E-1,
  2728. -3.04682672343198398683E-1, 6.76795274409476084995E-1};
  2729. return std::make_tuple(coefficients, 30);
  2730. }
  2731. template <typename T>
  2732. C10_HOST_DEVICE inline std::tuple<const T*, size_t> chebyshev_coefficients_i0e_B() {
  2733. /* Chebyshev coefficients for exp(-x) sqrt(x) I0(x)
  2734. * in the inverted interval [8,infinity].
  2735. *
  2736. * lim(x->inf){ exp(-x) sqrt(x) I0(x) } = 1/sqrt(2pi).
  2737. */
  2738. static const T coefficients[] = {
  2739. -7.23318048787475395456E-18, -4.83050448594418207126E-18,
  2740. 4.46562142029675999901E-17, 3.46122286769746109310E-17,
  2741. -2.82762398051658348494E-16, -3.42548561967721913462E-16,
  2742. 1.77256013305652638360E-15, 3.81168066935262242075E-15,
  2743. -9.55484669882830764870E-15, -4.15056934728722208663E-14,
  2744. 1.54008621752140982691E-14, 3.85277838274214270114E-13,
  2745. 7.18012445138366623367E-13, -1.79417853150680611778E-12,
  2746. -1.32158118404477131188E-11, -3.14991652796324136454E-11,
  2747. 1.18891471078464383424E-11, 4.94060238822496958910E-10,
  2748. 3.39623202570838634515E-9, 2.26666899049817806459E-8,
  2749. 2.04891858946906374183E-7, 2.89137052083475648297E-6,
  2750. 6.88975834691682398426E-5, 3.36911647825569408990E-3,
  2751. 8.04490411014108831608E-1};
  2752. return std::make_tuple(coefficients, 25);
  2753. }
  2754. template <typename scalar_t>
  2755. static inline C10_HOST_DEVICE scalar_t calc_i0(scalar_t _x) {
  2756. static_assert(!std::is_same<scalar_t, Half>() && !std::is_same<scalar_t, BFloat16>(), "don't instantiate with low precision type");
  2757. // Upcast input for numerical accuracy purposes
  2758. // Needed for accurate results if input is bfloat16 or float16
  2759. scalar_t x = ::abs(_x);
  2760. if (x <= scalar_t{8.0}) {
  2761. auto coeff_pair = chebyshev_coefficients_i0e_A<scalar_t>();
  2762. auto A = std::get<0>(coeff_pair);
  2763. auto len = std::get<1>(coeff_pair);
  2764. scalar_t y = (x / scalar_t{2.0}) - scalar_t{2.0};
  2765. return (::exp(x) * chbevl(y, A, len));
  2766. }
  2767. auto coeff_pair = chebyshev_coefficients_i0e_B<scalar_t>();
  2768. auto B = std::get<0>(coeff_pair);
  2769. auto len = std::get<1>(coeff_pair);
  2770. return (::exp(x) * chbevl(scalar_t{32.0} / x - scalar_t{2.0}, B, len) / ::sqrt(x));
  2771. }
  2772. template <typename T>
  2773. C10_HOST_DEVICE inline
  2774. typename std::enable_if<std::is_same<double, T>::value, std::tuple<const T*, size_t>>::type
  2775. chebyshev_coefficients_i1e_A() {
  2776. /* Chebyshev coefficients for exp(-x) I1(x)
  2777. * in the interval [0,8].
  2778. *
  2779. * lim(x->0){ exp(-x) I1(x) / x } = 1/2.
  2780. */
  2781. static const T coefficients[] = {
  2782. 2.77791411276104639959E-18, -2.11142121435816608115E-17,
  2783. 1.55363195773620046921E-16, -1.10559694773538630805E-15,
  2784. 7.60068429473540693410E-15, -5.04218550472791168711E-14,
  2785. 3.22379336594557470981E-13, -1.98397439776494371520E-12,
  2786. 1.17361862988909016308E-11, -6.66348972350202774223E-11,
  2787. 3.62559028155211703701E-10, -1.88724975172282928790E-9,
  2788. 9.38153738649577178388E-9, -4.44505912879632808065E-8,
  2789. 2.00329475355213526229E-7, -8.56872026469545474066E-7,
  2790. 3.47025130813767847674E-6, -1.32731636560394358279E-5,
  2791. 4.78156510755005422638E-5, -1.61760815825896745588E-4,
  2792. 5.12285956168575772895E-4, -1.51357245063125314899E-3,
  2793. 4.15642294431288815669E-3, -1.05640848946261981558E-2,
  2794. 2.47264490306265168283E-2, -5.29459812080949914269E-2,
  2795. 1.02643658689847095384E-1, -1.76416518357834055153E-1,
  2796. 2.52587186443633654823E-1};
  2797. return std::make_tuple(coefficients, 29);
  2798. }
  2799. template <typename T>
  2800. C10_HOST_DEVICE inline
  2801. typename std::enable_if<std::is_same<float, T>::value, std::tuple<const T*, size_t>>::type
  2802. chebyshev_coefficients_i1e_A() {
  2803. /* Chebyshev coefficients for exp(-x) I1(x)
  2804. * in the interval [0,8].
  2805. *
  2806. * lim(x->0){ exp(-x) I1(x) / x } = 1/2.
  2807. */
  2808. static const T coeff[] = {
  2809. 9.38153738649577178388E-9f,
  2810. -4.44505912879632808065E-8f,
  2811. 2.00329475355213526229E-7f,
  2812. -8.56872026469545474066E-7f,
  2813. 3.47025130813767847674E-6f,
  2814. -1.32731636560394358279E-5f,
  2815. 4.78156510755005422638E-5f,
  2816. -1.61760815825896745588E-4f,
  2817. 5.12285956168575772895E-4f,
  2818. -1.51357245063125314899E-3f,
  2819. 4.15642294431288815669E-3f,
  2820. -1.05640848946261981558E-2f,
  2821. 2.47264490306265168283E-2f,
  2822. -5.29459812080949914269E-2f,
  2823. 1.02643658689847095384E-1f,
  2824. -1.76416518357834055153E-1f,
  2825. 2.52587186443633654823E-1f};
  2826. return std::make_tuple(coeff, 17);
  2827. };
  2828. template <typename T>
  2829. C10_HOST_DEVICE inline
  2830. typename std::enable_if<std::is_same<double, T>::value, std::tuple<const T*, size_t>>::type
  2831. chebyshev_coefficients_i1e_B() {
  2832. /* Chebyshev coefficients for exp(-x) sqrt(x) I1(x)
  2833. * in the inverted interval [8,infinity].
  2834. *
  2835. * lim(x->inf){ exp(-x) sqrt(x) I1(x) } = 1/sqrt(2pi).
  2836. */
  2837. static const T coefficients[] = {
  2838. 7.51729631084210481353E-18, 4.41434832307170791151E-18,
  2839. -4.65030536848935832153E-17, -3.20952592199342395980E-17,
  2840. 2.96262899764595013876E-16, 3.30820231092092828324E-16,
  2841. -1.88035477551078244854E-15, -3.81440307243700780478E-15,
  2842. 1.04202769841288027642E-14, 4.27244001671195135429E-14,
  2843. -2.10154184277266431302E-14, -4.08355111109219731823E-13,
  2844. -7.19855177624590851209E-13, 2.03562854414708950722E-12,
  2845. 1.41258074366137813316E-11, 3.25260358301548823856E-11,
  2846. -1.89749581235054123450E-11, -5.58974346219658380687E-10,
  2847. -3.83538038596423702205E-9, -2.63146884688951950684E-8,
  2848. -2.51223623787020892529E-7, -3.88256480887769039346E-6,
  2849. -1.10588938762623716291E-4, -9.76109749136146840777E-3,
  2850. 7.78576235018280120474E-1};
  2851. return std::make_tuple(coefficients, 25);
  2852. }
  2853. template <typename T>
  2854. C10_HOST_DEVICE inline
  2855. typename std::enable_if<std::is_same<float, T>::value, std::tuple<const T*, size_t>>::type
  2856. chebyshev_coefficients_i1e_B() {
  2857. /* Chebyshev coefficients for exp(-x) sqrt(x) I1(x)
  2858. * in the inverted interval [8,infinity].
  2859. *
  2860. * lim(x->inf){ exp(-x) sqrt(x) I1(x) } = 1/sqrt(2pi).
  2861. */
  2862. static const T coeff[] = {
  2863. -3.83538038596423702205E-9f,
  2864. -2.63146884688951950684E-8f,
  2865. -2.51223623787020892529E-7f,
  2866. -3.88256480887769039346E-6f,
  2867. -1.10588938762623716291E-4f,
  2868. -9.76109749136146840777E-3f,
  2869. 7.78576235018280120474E-1f};
  2870. return std::make_tuple(coeff, 7);
  2871. };
  2872. template <typename scalar_t>
  2873. static inline C10_HOST_DEVICE scalar_t calc_i1(scalar_t _x) {
  2874. const auto x = ::abs(_x);
  2875. if (x <= scalar_t{8.0}) {
  2876. auto coeff_pair = chebyshev_coefficients_i1e_A<scalar_t>();
  2877. auto A = std::get<0>(coeff_pair);
  2878. auto len = std::get<1>(coeff_pair);
  2879. scalar_t y = x / scalar_t{2.0} - scalar_t{2.0};
  2880. const scalar_t out = ::exp(x) * x * chbevl(y, A, len);
  2881. return (_x < scalar_t{0.0}) ? -out : out;
  2882. }
  2883. auto coeff_pair = chebyshev_coefficients_i1e_B<scalar_t>();
  2884. auto B = std::get<0>(coeff_pair);
  2885. auto len = std::get<1>(coeff_pair);
  2886. const scalar_t out = (::exp(x) * chbevl(scalar_t{32.0} / x - scalar_t{2.0}, B, len)) / ::sqrt(x);
  2887. return (_x < scalar_t{0.0}) ? -out : out;
  2888. }
  2889. template <typename scalar_t>
  2890. static inline C10_HOST_DEVICE scalar_t calc_i1e(scalar_t _x) {
  2891. const auto x = ::abs(_x);
  2892. if (x <= scalar_t{8.0}) {
  2893. auto coeff_pair = chebyshev_coefficients_i1e_A<scalar_t>();
  2894. auto A = std::get<0>(coeff_pair);
  2895. auto len = std::get<1>(coeff_pair);
  2896. const scalar_t y = x / scalar_t{2.0} - scalar_t{2.0};
  2897. const scalar_t out = chbevl(y, A, len) * x;
  2898. return (_x < scalar_t{0.0}) ? -out : out;
  2899. }
  2900. auto coeff_pair = chebyshev_coefficients_i1e_B<scalar_t>();
  2901. auto B = std::get<0>(coeff_pair);
  2902. auto len = std::get<1>(coeff_pair);
  2903. const scalar_t out = chbevl(scalar_t{32.0} / x - scalar_t{2.0}, B, len) / ::sqrt(x);
  2904. return (_x < scalar_t{0.0}) ? -out : out;
  2905. }
  2906. #endif // AT_USE_JITERATOR() (this closes the "else" branch of a if/else preprocessor directive)
  2907. } // namespace native
  2908. } // namespace at