test_stats.py 330 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173
  1. """ Test functions for stats module
  2. WRITTEN BY LOUIS LUANGKESORN <lluang@yahoo.com> FOR THE STATS MODULE
  3. BASED ON WILKINSON'S STATISTICS QUIZ
  4. https://www.stanford.edu/~clint/bench/wilk.txt
  5. Additional tests by a host of SciPy developers.
  6. """
  7. import os
  8. import re
  9. import warnings
  10. from collections import namedtuple
  11. from itertools import product
  12. from numpy.testing import (assert_, assert_equal,
  13. assert_almost_equal, assert_array_almost_equal,
  14. assert_array_equal, assert_approx_equal,
  15. assert_allclose, assert_warns, suppress_warnings,
  16. assert_array_less)
  17. import pytest
  18. from pytest import raises as assert_raises
  19. import numpy.ma.testutils as mat
  20. from numpy import array, arange, float32, float64, power
  21. import numpy as np
  22. import scipy.stats as stats
  23. import scipy.stats.mstats as mstats
  24. import scipy.stats._mstats_basic as mstats_basic
  25. from scipy.stats._ksstats import kolmogn
  26. from scipy.special._testutils import FuncData
  27. from scipy.special import binom
  28. from scipy import optimize
  29. from .common_tests import check_named_results
  30. from scipy.spatial.distance import cdist
  31. from numpy.lib import NumpyVersion
  32. from scipy.stats._axis_nan_policy import _broadcast_concatenate
  33. from scipy.stats._stats_py import _permutation_distribution_t
  34. """ Numbers in docstrings beginning with 'W' refer to the section numbers
  35. and headings found in the STATISTICS QUIZ of Leland Wilkinson. These are
  36. considered to be essential functionality. True testing and
  37. evaluation of a statistics package requires use of the
  38. NIST Statistical test data. See McCoullough(1999) Assessing The Reliability
  39. of Statistical Software for a test methodology and its
  40. implementation in testing SAS, SPSS, and S-Plus
  41. """
  42. # Datasets
  43. # These data sets are from the nasty.dat sets used by Wilkinson
  44. # For completeness, I should write the relevant tests and count them as failures
  45. # Somewhat acceptable, since this is still beta software. It would count as a
  46. # good target for 1.0 status
  47. X = array([1,2,3,4,5,6,7,8,9], float)
  48. ZERO = array([0,0,0,0,0,0,0,0,0], float)
  49. BIG = array([99999991,99999992,99999993,99999994,99999995,99999996,99999997,
  50. 99999998,99999999], float)
  51. LITTLE = array([0.99999991,0.99999992,0.99999993,0.99999994,0.99999995,0.99999996,
  52. 0.99999997,0.99999998,0.99999999], float)
  53. HUGE = array([1e+12,2e+12,3e+12,4e+12,5e+12,6e+12,7e+12,8e+12,9e+12], float)
  54. TINY = array([1e-12,2e-12,3e-12,4e-12,5e-12,6e-12,7e-12,8e-12,9e-12], float)
  55. ROUND = array([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5], float)
  56. class TestTrimmedStats:
  57. # TODO: write these tests to handle missing values properly
  58. dprec = np.finfo(np.float64).precision
  59. def test_tmean(self):
  60. y = stats.tmean(X, (2, 8), (True, True))
  61. assert_approx_equal(y, 5.0, significant=self.dprec)
  62. y1 = stats.tmean(X, limits=(2, 8), inclusive=(False, False))
  63. y2 = stats.tmean(X, limits=None)
  64. assert_approx_equal(y1, y2, significant=self.dprec)
  65. x_2d = arange(63, dtype=float64).reshape(9, 7)
  66. y = stats.tmean(x_2d, axis=None)
  67. assert_approx_equal(y, x_2d.mean(), significant=self.dprec)
  68. y = stats.tmean(x_2d, axis=0)
  69. assert_array_almost_equal(y, x_2d.mean(axis=0), decimal=8)
  70. y = stats.tmean(x_2d, axis=1)
  71. assert_array_almost_equal(y, x_2d.mean(axis=1), decimal=8)
  72. y = stats.tmean(x_2d, limits=(2, 61), axis=None)
  73. assert_approx_equal(y, 31.5, significant=self.dprec)
  74. y = stats.tmean(x_2d, limits=(2, 21), axis=0)
  75. y_true = [14, 11.5, 9, 10, 11, 12, 13]
  76. assert_array_almost_equal(y, y_true, decimal=8)
  77. y = stats.tmean(x_2d, limits=(2, 21), inclusive=(True, False), axis=0)
  78. y_true = [10.5, 11.5, 9, 10, 11, 12, 13]
  79. assert_array_almost_equal(y, y_true, decimal=8)
  80. x_2d_with_nan = np.array(x_2d)
  81. x_2d_with_nan[-1, -3:] = np.nan
  82. y = stats.tmean(x_2d_with_nan, limits=(1, 13), axis=0)
  83. y_true = [7, 4.5, 5.5, 6.5, np.nan, np.nan, np.nan]
  84. assert_array_almost_equal(y, y_true, decimal=8)
  85. with suppress_warnings() as sup:
  86. sup.record(RuntimeWarning, "Mean of empty slice")
  87. y = stats.tmean(x_2d, limits=(2, 21), axis=1)
  88. y_true = [4, 10, 17, 21, np.nan, np.nan, np.nan, np.nan, np.nan]
  89. assert_array_almost_equal(y, y_true, decimal=8)
  90. y = stats.tmean(x_2d, limits=(2, 21),
  91. inclusive=(False, True), axis=1)
  92. y_true = [4.5, 10, 17, 21, np.nan, np.nan, np.nan, np.nan, np.nan]
  93. assert_array_almost_equal(y, y_true, decimal=8)
  94. def test_tvar(self):
  95. y = stats.tvar(X, limits=(2, 8), inclusive=(True, True))
  96. assert_approx_equal(y, 4.6666666666666661, significant=self.dprec)
  97. y = stats.tvar(X, limits=None)
  98. assert_approx_equal(y, X.var(ddof=1), significant=self.dprec)
  99. x_2d = arange(63, dtype=float64).reshape((9, 7))
  100. y = stats.tvar(x_2d, axis=None)
  101. assert_approx_equal(y, x_2d.var(ddof=1), significant=self.dprec)
  102. y = stats.tvar(x_2d, axis=0)
  103. assert_array_almost_equal(y[0], np.full((1, 7), 367.50000000), decimal=8)
  104. y = stats.tvar(x_2d, axis=1)
  105. assert_array_almost_equal(y[0], np.full((1, 9), 4.66666667), decimal=8)
  106. y = stats.tvar(x_2d[3, :])
  107. assert_approx_equal(y, 4.666666666666667, significant=self.dprec)
  108. with suppress_warnings() as sup:
  109. sup.record(RuntimeWarning, "Degrees of freedom <= 0 for slice.")
  110. # Limiting some values along one axis
  111. y = stats.tvar(x_2d, limits=(1, 5), axis=1, inclusive=(True, True))
  112. assert_approx_equal(y[0], 2.5, significant=self.dprec)
  113. # Limiting all values along one axis
  114. y = stats.tvar(x_2d, limits=(0, 6), axis=1, inclusive=(True, True))
  115. assert_approx_equal(y[0], 4.666666666666667, significant=self.dprec)
  116. assert_equal(y[1], np.nan)
  117. def test_tstd(self):
  118. y = stats.tstd(X, (2, 8), (True, True))
  119. assert_approx_equal(y, 2.1602468994692865, significant=self.dprec)
  120. y = stats.tstd(X, limits=None)
  121. assert_approx_equal(y, X.std(ddof=1), significant=self.dprec)
  122. def test_tmin(self):
  123. assert_equal(stats.tmin(4), 4)
  124. x = np.arange(10)
  125. assert_equal(stats.tmin(x), 0)
  126. assert_equal(stats.tmin(x, lowerlimit=0), 0)
  127. assert_equal(stats.tmin(x, lowerlimit=0, inclusive=False), 1)
  128. x = x.reshape((5, 2))
  129. assert_equal(stats.tmin(x, lowerlimit=0, inclusive=False), [2, 1])
  130. assert_equal(stats.tmin(x, axis=1), [0, 2, 4, 6, 8])
  131. assert_equal(stats.tmin(x, axis=None), 0)
  132. x = np.arange(10.)
  133. x[9] = np.nan
  134. with suppress_warnings() as sup:
  135. sup.record(RuntimeWarning, "invalid value*")
  136. assert_equal(stats.tmin(x), np.nan)
  137. assert_equal(stats.tmin(x, nan_policy='omit'), 0.)
  138. assert_raises(ValueError, stats.tmin, x, nan_policy='raise')
  139. assert_raises(ValueError, stats.tmin, x, nan_policy='foobar')
  140. msg = "'propagate', 'raise', 'omit'"
  141. with assert_raises(ValueError, match=msg):
  142. stats.tmin(x, nan_policy='foo')
  143. def test_tmax(self):
  144. assert_equal(stats.tmax(4), 4)
  145. x = np.arange(10)
  146. assert_equal(stats.tmax(x), 9)
  147. assert_equal(stats.tmax(x, upperlimit=9), 9)
  148. assert_equal(stats.tmax(x, upperlimit=9, inclusive=False), 8)
  149. x = x.reshape((5, 2))
  150. assert_equal(stats.tmax(x, upperlimit=9, inclusive=False), [8, 7])
  151. assert_equal(stats.tmax(x, axis=1), [1, 3, 5, 7, 9])
  152. assert_equal(stats.tmax(x, axis=None), 9)
  153. x = np.arange(10.)
  154. x[6] = np.nan
  155. with suppress_warnings() as sup:
  156. sup.record(RuntimeWarning, "invalid value*")
  157. assert_equal(stats.tmax(x), np.nan)
  158. assert_equal(stats.tmax(x, nan_policy='omit'), 9.)
  159. assert_raises(ValueError, stats.tmax, x, nan_policy='raise')
  160. assert_raises(ValueError, stats.tmax, x, nan_policy='foobar')
  161. def test_tsem(self):
  162. y = stats.tsem(X, limits=(3, 8), inclusive=(False, True))
  163. y_ref = np.array([4, 5, 6, 7, 8])
  164. assert_approx_equal(y, y_ref.std(ddof=1) / np.sqrt(y_ref.size),
  165. significant=self.dprec)
  166. assert_approx_equal(stats.tsem(X, limits=[-1, 10]),
  167. stats.tsem(X, limits=None),
  168. significant=self.dprec)
  169. class TestCorrPearsonr:
  170. """ W.II.D. Compute a correlation matrix on all the variables.
  171. All the correlations, except for ZERO and MISS, should be exactly 1.
  172. ZERO and MISS should have undefined or missing correlations with the
  173. other variables. The same should go for SPEARMAN correlations, if
  174. your program has them.
  175. """
  176. def test_pXX(self):
  177. y = stats.pearsonr(X,X)
  178. r = y[0]
  179. assert_approx_equal(r,1.0)
  180. def test_pXBIG(self):
  181. y = stats.pearsonr(X,BIG)
  182. r = y[0]
  183. assert_approx_equal(r,1.0)
  184. def test_pXLITTLE(self):
  185. y = stats.pearsonr(X,LITTLE)
  186. r = y[0]
  187. assert_approx_equal(r,1.0)
  188. def test_pXHUGE(self):
  189. y = stats.pearsonr(X,HUGE)
  190. r = y[0]
  191. assert_approx_equal(r,1.0)
  192. def test_pXTINY(self):
  193. y = stats.pearsonr(X,TINY)
  194. r = y[0]
  195. assert_approx_equal(r,1.0)
  196. def test_pXROUND(self):
  197. y = stats.pearsonr(X,ROUND)
  198. r = y[0]
  199. assert_approx_equal(r,1.0)
  200. def test_pBIGBIG(self):
  201. y = stats.pearsonr(BIG,BIG)
  202. r = y[0]
  203. assert_approx_equal(r,1.0)
  204. def test_pBIGLITTLE(self):
  205. y = stats.pearsonr(BIG,LITTLE)
  206. r = y[0]
  207. assert_approx_equal(r,1.0)
  208. def test_pBIGHUGE(self):
  209. y = stats.pearsonr(BIG,HUGE)
  210. r = y[0]
  211. assert_approx_equal(r,1.0)
  212. def test_pBIGTINY(self):
  213. y = stats.pearsonr(BIG,TINY)
  214. r = y[0]
  215. assert_approx_equal(r,1.0)
  216. def test_pBIGROUND(self):
  217. y = stats.pearsonr(BIG,ROUND)
  218. r = y[0]
  219. assert_approx_equal(r,1.0)
  220. def test_pLITTLELITTLE(self):
  221. y = stats.pearsonr(LITTLE,LITTLE)
  222. r = y[0]
  223. assert_approx_equal(r,1.0)
  224. def test_pLITTLEHUGE(self):
  225. y = stats.pearsonr(LITTLE,HUGE)
  226. r = y[0]
  227. assert_approx_equal(r,1.0)
  228. def test_pLITTLETINY(self):
  229. y = stats.pearsonr(LITTLE,TINY)
  230. r = y[0]
  231. assert_approx_equal(r,1.0)
  232. def test_pLITTLEROUND(self):
  233. y = stats.pearsonr(LITTLE,ROUND)
  234. r = y[0]
  235. assert_approx_equal(r,1.0)
  236. def test_pHUGEHUGE(self):
  237. y = stats.pearsonr(HUGE,HUGE)
  238. r = y[0]
  239. assert_approx_equal(r,1.0)
  240. def test_pHUGETINY(self):
  241. y = stats.pearsonr(HUGE,TINY)
  242. r = y[0]
  243. assert_approx_equal(r,1.0)
  244. def test_pHUGEROUND(self):
  245. y = stats.pearsonr(HUGE,ROUND)
  246. r = y[0]
  247. assert_approx_equal(r,1.0)
  248. def test_pTINYTINY(self):
  249. y = stats.pearsonr(TINY,TINY)
  250. r = y[0]
  251. assert_approx_equal(r,1.0)
  252. def test_pTINYROUND(self):
  253. y = stats.pearsonr(TINY,ROUND)
  254. r = y[0]
  255. assert_approx_equal(r,1.0)
  256. def test_pROUNDROUND(self):
  257. y = stats.pearsonr(ROUND,ROUND)
  258. r = y[0]
  259. assert_approx_equal(r,1.0)
  260. def test_pearsonr_result_attributes(self):
  261. res = stats.pearsonr(X, X)
  262. attributes = ('correlation', 'pvalue')
  263. check_named_results(res, attributes)
  264. assert_equal(res.correlation, res.statistic)
  265. def test_r_almost_exactly_pos1(self):
  266. a = arange(3.0)
  267. r, prob = stats.pearsonr(a, a)
  268. assert_allclose(r, 1.0, atol=1e-15)
  269. # With n = len(a) = 3, the error in prob grows like the
  270. # square root of the error in r.
  271. assert_allclose(prob, 0.0, atol=np.sqrt(2*np.spacing(1.0)))
  272. def test_r_almost_exactly_neg1(self):
  273. a = arange(3.0)
  274. r, prob = stats.pearsonr(a, -a)
  275. assert_allclose(r, -1.0, atol=1e-15)
  276. # With n = len(a) = 3, the error in prob grows like the
  277. # square root of the error in r.
  278. assert_allclose(prob, 0.0, atol=np.sqrt(2*np.spacing(1.0)))
  279. def test_basic(self):
  280. # A basic test, with a correlation coefficient
  281. # that is not 1 or -1.
  282. a = array([-1, 0, 1])
  283. b = array([0, 0, 3])
  284. r, prob = stats.pearsonr(a, b)
  285. assert_approx_equal(r, np.sqrt(3)/2)
  286. assert_approx_equal(prob, 1/3)
  287. def test_constant_input(self):
  288. # Zero variance input
  289. # See https://github.com/scipy/scipy/issues/3728
  290. msg = "An input array is constant"
  291. with assert_warns(stats.ConstantInputWarning, match=msg):
  292. r, p = stats.pearsonr([0.667, 0.667, 0.667], [0.123, 0.456, 0.789])
  293. assert_equal(r, np.nan)
  294. assert_equal(p, np.nan)
  295. def test_near_constant_input(self):
  296. # Near constant input (but not constant):
  297. x = [2, 2, 2 + np.spacing(2)]
  298. y = [3, 3, 3 + 6*np.spacing(3)]
  299. msg = "An input array is nearly constant; the computed"
  300. with assert_warns(stats.NearConstantInputWarning, match=msg):
  301. # r and p are garbage, so don't bother checking them in this case.
  302. # (The exact value of r would be 1.)
  303. r, p = stats.pearsonr(x, y)
  304. def test_very_small_input_values(self):
  305. # Very small values in an input. A naive implementation will
  306. # suffer from underflow.
  307. # See https://github.com/scipy/scipy/issues/9353
  308. x = [0.004434375, 0.004756007, 0.003911996, 0.0038005, 0.003409971]
  309. y = [2.48e-188, 7.41e-181, 4.09e-208, 2.08e-223, 2.66e-245]
  310. r, p = stats.pearsonr(x,y)
  311. # The expected values were computed using mpmath with 80 digits
  312. # of precision.
  313. assert_allclose(r, 0.7272930540750450)
  314. assert_allclose(p, 0.1637805429533202)
  315. def test_very_large_input_values(self):
  316. # Very large values in an input. A naive implementation will
  317. # suffer from overflow.
  318. # See https://github.com/scipy/scipy/issues/8980
  319. x = 1e90*np.array([0, 0, 0, 1, 1, 1, 1])
  320. y = 1e90*np.arange(7)
  321. r, p = stats.pearsonr(x, y)
  322. # The expected values were computed using mpmath with 80 digits
  323. # of precision.
  324. assert_allclose(r, 0.8660254037844386)
  325. assert_allclose(p, 0.011724811003954638)
  326. def test_extremely_large_input_values(self):
  327. # Extremely large values in x and y. These values would cause the
  328. # product sigma_x * sigma_y to overflow if the two factors were
  329. # computed independently.
  330. x = np.array([2.3e200, 4.5e200, 6.7e200, 8e200])
  331. y = np.array([1.2e199, 5.5e200, 3.3e201, 1.0e200])
  332. r, p = stats.pearsonr(x, y)
  333. # The expected values were computed using mpmath with 80 digits
  334. # of precision.
  335. assert_allclose(r, 0.351312332103289)
  336. assert_allclose(p, 0.648687667896711)
  337. def test_length_two_pos1(self):
  338. # Inputs with length 2.
  339. # See https://github.com/scipy/scipy/issues/7730
  340. res = stats.pearsonr([1, 2], [3, 5])
  341. r, p = res
  342. assert_equal(r, 1)
  343. assert_equal(p, 1)
  344. assert_equal(res.confidence_interval(), (-1, 1))
  345. def test_length_two_neg2(self):
  346. # Inputs with length 2.
  347. # See https://github.com/scipy/scipy/issues/7730
  348. r, p = stats.pearsonr([2, 1], [3, 5])
  349. assert_equal(r, -1)
  350. assert_equal(p, 1)
  351. # Expected values computed with R 3.6.2 cor.test, e.g.
  352. # options(digits=16)
  353. # x <- c(1, 2, 3, 4)
  354. # y <- c(0, 1, 0.5, 1)
  355. # cor.test(x, y, method = "pearson", alternative = "g")
  356. # correlation coefficient and p-value for alternative='two-sided'
  357. # calculated with mpmath agree to 16 digits.
  358. @pytest.mark.parametrize('alternative, pval, rlow, rhigh, sign',
  359. [('two-sided', 0.325800137536, -0.814938968841, 0.99230697523, 1), # noqa
  360. ('less', 0.8370999312316, -1, 0.985600937290653, 1),
  361. ('greater', 0.1629000687684, -0.6785654158217636, 1, 1),
  362. ('two-sided', 0.325800137536, -0.992306975236, 0.81493896884, -1),
  363. ('less', 0.1629000687684, -1.0, 0.6785654158217636, -1),
  364. ('greater', 0.8370999312316, -0.985600937290653, 1.0, -1)])
  365. def test_basic_example(self, alternative, pval, rlow, rhigh, sign):
  366. x = [1, 2, 3, 4]
  367. y = np.array([0, 1, 0.5, 1]) * sign
  368. result = stats.pearsonr(x, y, alternative=alternative)
  369. assert_allclose(result.statistic, 0.6741998624632421*sign, rtol=1e-12)
  370. assert_allclose(result.pvalue, pval, rtol=1e-6)
  371. ci = result.confidence_interval()
  372. assert_allclose(ci, (rlow, rhigh), rtol=1e-6)
  373. def test_negative_correlation_pvalue_gh17795(self):
  374. x = np.arange(10)
  375. y = -x
  376. test_greater = stats.pearsonr(x, y, alternative='greater')
  377. test_less = stats.pearsonr(x, y, alternative='less')
  378. assert_allclose(test_greater.pvalue, 1)
  379. assert_allclose(test_less.pvalue, 0, atol=1e-20)
  380. def test_length3_r_exactly_negative_one(self):
  381. x = [1, 2, 3]
  382. y = [5, -4, -13]
  383. res = stats.pearsonr(x, y)
  384. # The expected r and p are exact.
  385. r, p = res
  386. assert_allclose(r, -1.0)
  387. assert_allclose(p, 0.0, atol=1e-7)
  388. assert_equal(res.confidence_interval(), (-1, 1))
  389. def test_unequal_lengths(self):
  390. x = [1, 2, 3]
  391. y = [4, 5]
  392. assert_raises(ValueError, stats.pearsonr, x, y)
  393. def test_len1(self):
  394. x = [1]
  395. y = [2]
  396. assert_raises(ValueError, stats.pearsonr, x, y)
  397. def test_complex_data(self):
  398. x = [-1j, -2j, -3.0j]
  399. y = [-1j, -2j, -3.0j]
  400. message = 'This function does not support complex data'
  401. with pytest.raises(ValueError, match=message):
  402. stats.pearsonr(x, y)
  403. class TestFisherExact:
  404. """Some tests to show that fisher_exact() works correctly.
  405. Note that in SciPy 0.9.0 this was not working well for large numbers due to
  406. inaccuracy of the hypergeom distribution (see #1218). Fixed now.
  407. Also note that R and SciPy have different argument formats for their
  408. hypergeometric distribution functions.
  409. R:
  410. > phyper(18999, 99000, 110000, 39000, lower.tail = FALSE)
  411. [1] 1.701815e-09
  412. """
  413. def test_basic(self):
  414. fisher_exact = stats.fisher_exact
  415. res = fisher_exact([[14500, 20000], [30000, 40000]])[1]
  416. assert_approx_equal(res, 0.01106, significant=4)
  417. res = fisher_exact([[100, 2], [1000, 5]])[1]
  418. assert_approx_equal(res, 0.1301, significant=4)
  419. res = fisher_exact([[2, 7], [8, 2]])[1]
  420. assert_approx_equal(res, 0.0230141, significant=6)
  421. res = fisher_exact([[5, 1], [10, 10]])[1]
  422. assert_approx_equal(res, 0.1973244, significant=6)
  423. res = fisher_exact([[5, 15], [20, 20]])[1]
  424. assert_approx_equal(res, 0.0958044, significant=6)
  425. res = fisher_exact([[5, 16], [20, 25]])[1]
  426. assert_approx_equal(res, 0.1725862, significant=6)
  427. res = fisher_exact([[10, 5], [10, 1]])[1]
  428. assert_approx_equal(res, 0.1973244, significant=6)
  429. res = fisher_exact([[5, 0], [1, 4]])[1]
  430. assert_approx_equal(res, 0.04761904, significant=6)
  431. res = fisher_exact([[0, 1], [3, 2]])[1]
  432. assert_approx_equal(res, 1.0)
  433. res = fisher_exact([[0, 2], [6, 4]])[1]
  434. assert_approx_equal(res, 0.4545454545)
  435. res = fisher_exact([[2, 7], [8, 2]])
  436. assert_approx_equal(res[1], 0.0230141, significant=6)
  437. assert_approx_equal(res[0], 4.0 / 56)
  438. def test_precise(self):
  439. # results from R
  440. #
  441. # R defines oddsratio differently (see Notes section of fisher_exact
  442. # docstring), so those will not match. We leave them in anyway, in
  443. # case they will be useful later on. We test only the p-value.
  444. tablist = [
  445. ([[100, 2], [1000, 5]], (2.505583993422285e-001, 1.300759363430016e-001)),
  446. ([[2, 7], [8, 2]], (8.586235135736206e-002, 2.301413756522114e-002)),
  447. ([[5, 1], [10, 10]], (4.725646047336584e+000, 1.973244147157190e-001)),
  448. ([[5, 15], [20, 20]], (3.394396617440852e-001, 9.580440012477637e-002)),
  449. ([[5, 16], [20, 25]], (3.960558326183334e-001, 1.725864953812994e-001)),
  450. ([[10, 5], [10, 1]], (2.116112781158483e-001, 1.973244147157190e-001)),
  451. ([[10, 5], [10, 0]], (0.000000000000000e+000, 6.126482213438734e-002)),
  452. ([[5, 0], [1, 4]], (np.inf, 4.761904761904762e-002)),
  453. ([[0, 5], [1, 4]], (0.000000000000000e+000, 1.000000000000000e+000)),
  454. ([[5, 1], [0, 4]], (np.inf, 4.761904761904758e-002)),
  455. ([[0, 1], [3, 2]], (0.000000000000000e+000, 1.000000000000000e+000))
  456. ]
  457. for table, res_r in tablist:
  458. res = stats.fisher_exact(np.asarray(table))
  459. np.testing.assert_almost_equal(res[1], res_r[1], decimal=11,
  460. verbose=True)
  461. def test_gh4130(self):
  462. # Previously, a fudge factor used to distinguish between theoeretically
  463. # and numerically different probability masses was 1e-4; it has been
  464. # tightened to fix gh4130. Accuracy checked against R fisher.test.
  465. # options(digits=16)
  466. # table <- matrix(c(6, 108, 37, 200), nrow = 2)
  467. # fisher.test(table, alternative = "t")
  468. x = [[6, 37], [108, 200]]
  469. res = stats.fisher_exact(x)
  470. assert_allclose(res[1], 0.005092697748126)
  471. # case from https://github.com/brentp/fishers_exact_test/issues/27
  472. # That package has an (absolute?) fudge factor of 1e-6; too big
  473. x = [[22, 0], [0, 102]]
  474. res = stats.fisher_exact(x)
  475. assert_allclose(res[1], 7.175066786244549e-25)
  476. # case from https://github.com/brentp/fishers_exact_test/issues/1
  477. x = [[94, 48], [3577, 16988]]
  478. res = stats.fisher_exact(x)
  479. assert_allclose(res[1], 2.069356340993818e-37)
  480. def test_gh9231(self):
  481. # Previously, fisher_exact was extremely slow for this table
  482. # As reported in gh-9231, the p-value should be very nearly zero
  483. x = [[5829225, 5692693], [5760959, 5760959]]
  484. res = stats.fisher_exact(x)
  485. assert_allclose(res[1], 0, atol=1e-170)
  486. @pytest.mark.slow
  487. def test_large_numbers(self):
  488. # Test with some large numbers. Regression test for #1401
  489. pvals = [5.56e-11, 2.666e-11, 1.363e-11] # from R
  490. for pval, num in zip(pvals, [75, 76, 77]):
  491. res = stats.fisher_exact([[17704, 496], [1065, num]])[1]
  492. assert_approx_equal(res, pval, significant=4)
  493. res = stats.fisher_exact([[18000, 80000], [20000, 90000]])[1]
  494. assert_approx_equal(res, 0.2751, significant=4)
  495. def test_raises(self):
  496. # test we raise an error for wrong shape of input.
  497. assert_raises(ValueError, stats.fisher_exact,
  498. np.arange(6).reshape(2, 3))
  499. def test_row_or_col_zero(self):
  500. tables = ([[0, 0], [5, 10]],
  501. [[5, 10], [0, 0]],
  502. [[0, 5], [0, 10]],
  503. [[5, 0], [10, 0]])
  504. for table in tables:
  505. oddsratio, pval = stats.fisher_exact(table)
  506. assert_equal(pval, 1.0)
  507. assert_equal(oddsratio, np.nan)
  508. def test_less_greater(self):
  509. tables = (
  510. # Some tables to compare with R:
  511. [[2, 7], [8, 2]],
  512. [[200, 7], [8, 300]],
  513. [[28, 21], [6, 1957]],
  514. [[190, 800], [200, 900]],
  515. # Some tables with simple exact values
  516. # (includes regression test for ticket #1568):
  517. [[0, 2], [3, 0]],
  518. [[1, 1], [2, 1]],
  519. [[2, 0], [1, 2]],
  520. [[0, 1], [2, 3]],
  521. [[1, 0], [1, 4]],
  522. )
  523. pvals = (
  524. # from R:
  525. [0.018521725952066501, 0.9990149169715733],
  526. [1.0, 2.0056578803889148e-122],
  527. [1.0, 5.7284374608319831e-44],
  528. [0.7416227, 0.2959826],
  529. # Exact:
  530. [0.1, 1.0],
  531. [0.7, 0.9],
  532. [1.0, 0.3],
  533. [2./3, 1.0],
  534. [1.0, 1./3],
  535. )
  536. for table, pval in zip(tables, pvals):
  537. res = []
  538. res.append(stats.fisher_exact(table, alternative="less")[1])
  539. res.append(stats.fisher_exact(table, alternative="greater")[1])
  540. assert_allclose(res, pval, atol=0, rtol=1e-7)
  541. def test_gh3014(self):
  542. # check if issue #3014 has been fixed.
  543. # before, this would have risen a ValueError
  544. odds, pvalue = stats.fisher_exact([[1, 2], [9, 84419233]])
  545. @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater'])
  546. def test_result(self, alternative):
  547. table = np.array([[14500, 20000], [30000, 40000]])
  548. res = stats.fisher_exact(table, alternative=alternative)
  549. assert_equal((res.statistic, res.pvalue), res)
  550. class TestCorrSpearmanr:
  551. """ W.II.D. Compute a correlation matrix on all the variables.
  552. All the correlations, except for ZERO and MISS, should be exactly 1.
  553. ZERO and MISS should have undefined or missing correlations with the
  554. other variables. The same should go for SPEARMAN correlations, if
  555. your program has them.
  556. """
  557. def test_scalar(self):
  558. y = stats.spearmanr(4., 2.)
  559. assert_(np.isnan(y).all())
  560. def test_uneven_lengths(self):
  561. assert_raises(ValueError, stats.spearmanr, [1, 2, 1], [8, 9])
  562. assert_raises(ValueError, stats.spearmanr, [1, 2, 1], 8)
  563. def test_uneven_2d_shapes(self):
  564. # Different number of columns should work - those just get concatenated.
  565. np.random.seed(232324)
  566. x = np.random.randn(4, 3)
  567. y = np.random.randn(4, 2)
  568. assert stats.spearmanr(x, y).statistic.shape == (5, 5)
  569. assert stats.spearmanr(x.T, y.T, axis=1).pvalue.shape == (5, 5)
  570. assert_raises(ValueError, stats.spearmanr, x, y, axis=1)
  571. assert_raises(ValueError, stats.spearmanr, x.T, y.T)
  572. def test_ndim_too_high(self):
  573. np.random.seed(232324)
  574. x = np.random.randn(4, 3, 2)
  575. assert_raises(ValueError, stats.spearmanr, x)
  576. assert_raises(ValueError, stats.spearmanr, x, x)
  577. assert_raises(ValueError, stats.spearmanr, x, None, None)
  578. # But should work with axis=None (raveling axes) for two input arrays
  579. assert_allclose(stats.spearmanr(x, x, axis=None),
  580. stats.spearmanr(x.flatten(), x.flatten(), axis=0))
  581. def test_nan_policy(self):
  582. x = np.arange(10.)
  583. x[9] = np.nan
  584. assert_array_equal(stats.spearmanr(x, x), (np.nan, np.nan))
  585. assert_array_equal(stats.spearmanr(x, x, nan_policy='omit'),
  586. (1.0, 0.0))
  587. assert_raises(ValueError, stats.spearmanr, x, x, nan_policy='raise')
  588. assert_raises(ValueError, stats.spearmanr, x, x, nan_policy='foobar')
  589. def test_nan_policy_bug_12458(self):
  590. np.random.seed(5)
  591. x = np.random.rand(5, 10)
  592. k = 6
  593. x[:, k] = np.nan
  594. y = np.delete(x, k, axis=1)
  595. corx, px = stats.spearmanr(x, nan_policy='omit')
  596. cory, py = stats.spearmanr(y)
  597. corx = np.delete(np.delete(corx, k, axis=1), k, axis=0)
  598. px = np.delete(np.delete(px, k, axis=1), k, axis=0)
  599. assert_allclose(corx, cory, atol=1e-14)
  600. assert_allclose(px, py, atol=1e-14)
  601. def test_nan_policy_bug_12411(self):
  602. np.random.seed(5)
  603. m = 5
  604. n = 10
  605. x = np.random.randn(m, n)
  606. x[1, 0] = np.nan
  607. x[3, -1] = np.nan
  608. corr, pvalue = stats.spearmanr(x, axis=1, nan_policy="propagate")
  609. res = [[stats.spearmanr(x[i, :], x[j, :]).statistic for i in range(m)]
  610. for j in range(m)]
  611. assert_allclose(corr, res)
  612. def test_sXX(self):
  613. y = stats.spearmanr(X,X)
  614. r = y[0]
  615. assert_approx_equal(r,1.0)
  616. def test_sXBIG(self):
  617. y = stats.spearmanr(X,BIG)
  618. r = y[0]
  619. assert_approx_equal(r,1.0)
  620. def test_sXLITTLE(self):
  621. y = stats.spearmanr(X,LITTLE)
  622. r = y[0]
  623. assert_approx_equal(r,1.0)
  624. def test_sXHUGE(self):
  625. y = stats.spearmanr(X,HUGE)
  626. r = y[0]
  627. assert_approx_equal(r,1.0)
  628. def test_sXTINY(self):
  629. y = stats.spearmanr(X,TINY)
  630. r = y[0]
  631. assert_approx_equal(r,1.0)
  632. def test_sXROUND(self):
  633. y = stats.spearmanr(X,ROUND)
  634. r = y[0]
  635. assert_approx_equal(r,1.0)
  636. def test_sBIGBIG(self):
  637. y = stats.spearmanr(BIG,BIG)
  638. r = y[0]
  639. assert_approx_equal(r,1.0)
  640. def test_sBIGLITTLE(self):
  641. y = stats.spearmanr(BIG,LITTLE)
  642. r = y[0]
  643. assert_approx_equal(r,1.0)
  644. def test_sBIGHUGE(self):
  645. y = stats.spearmanr(BIG,HUGE)
  646. r = y[0]
  647. assert_approx_equal(r,1.0)
  648. def test_sBIGTINY(self):
  649. y = stats.spearmanr(BIG,TINY)
  650. r = y[0]
  651. assert_approx_equal(r,1.0)
  652. def test_sBIGROUND(self):
  653. y = stats.spearmanr(BIG,ROUND)
  654. r = y[0]
  655. assert_approx_equal(r,1.0)
  656. def test_sLITTLELITTLE(self):
  657. y = stats.spearmanr(LITTLE,LITTLE)
  658. r = y[0]
  659. assert_approx_equal(r,1.0)
  660. def test_sLITTLEHUGE(self):
  661. y = stats.spearmanr(LITTLE,HUGE)
  662. r = y[0]
  663. assert_approx_equal(r,1.0)
  664. def test_sLITTLETINY(self):
  665. y = stats.spearmanr(LITTLE,TINY)
  666. r = y[0]
  667. assert_approx_equal(r,1.0)
  668. def test_sLITTLEROUND(self):
  669. y = stats.spearmanr(LITTLE,ROUND)
  670. r = y[0]
  671. assert_approx_equal(r,1.0)
  672. def test_sHUGEHUGE(self):
  673. y = stats.spearmanr(HUGE,HUGE)
  674. r = y[0]
  675. assert_approx_equal(r,1.0)
  676. def test_sHUGETINY(self):
  677. y = stats.spearmanr(HUGE,TINY)
  678. r = y[0]
  679. assert_approx_equal(r,1.0)
  680. def test_sHUGEROUND(self):
  681. y = stats.spearmanr(HUGE,ROUND)
  682. r = y[0]
  683. assert_approx_equal(r,1.0)
  684. def test_sTINYTINY(self):
  685. y = stats.spearmanr(TINY,TINY)
  686. r = y[0]
  687. assert_approx_equal(r,1.0)
  688. def test_sTINYROUND(self):
  689. y = stats.spearmanr(TINY,ROUND)
  690. r = y[0]
  691. assert_approx_equal(r,1.0)
  692. def test_sROUNDROUND(self):
  693. y = stats.spearmanr(ROUND,ROUND)
  694. r = y[0]
  695. assert_approx_equal(r,1.0)
  696. def test_spearmanr_result_attributes(self):
  697. res = stats.spearmanr(X, X)
  698. attributes = ('correlation', 'pvalue')
  699. check_named_results(res, attributes)
  700. assert_equal(res.correlation, res.statistic)
  701. def test_1d_vs_2d(self):
  702. x1 = [1, 2, 3, 4, 5, 6]
  703. x2 = [1, 2, 3, 4, 6, 5]
  704. res1 = stats.spearmanr(x1, x2)
  705. res2 = stats.spearmanr(np.asarray([x1, x2]).T)
  706. assert_allclose(res1, res2)
  707. def test_1d_vs_2d_nans(self):
  708. # Now the same with NaNs present. Regression test for gh-9103.
  709. for nan_policy in ['propagate', 'omit']:
  710. x1 = [1, np.nan, 3, 4, 5, 6]
  711. x2 = [1, 2, 3, 4, 6, np.nan]
  712. res1 = stats.spearmanr(x1, x2, nan_policy=nan_policy)
  713. res2 = stats.spearmanr(np.asarray([x1, x2]).T, nan_policy=nan_policy)
  714. assert_allclose(res1, res2)
  715. def test_3cols(self):
  716. x1 = np.arange(6)
  717. x2 = -x1
  718. x3 = np.array([0, 1, 2, 3, 5, 4])
  719. x = np.asarray([x1, x2, x3]).T
  720. actual = stats.spearmanr(x)
  721. expected_corr = np.array([[1, -1, 0.94285714],
  722. [-1, 1, -0.94285714],
  723. [0.94285714, -0.94285714, 1]])
  724. expected_pvalue = np.zeros((3, 3), dtype=float)
  725. expected_pvalue[2, 0:2] = 0.00480466472
  726. expected_pvalue[0:2, 2] = 0.00480466472
  727. assert_allclose(actual.statistic, expected_corr)
  728. assert_allclose(actual.pvalue, expected_pvalue)
  729. def test_gh_9103(self):
  730. # Regression test for gh-9103.
  731. x = np.array([[np.nan, 3.0, 4.0, 5.0, 5.1, 6.0, 9.2],
  732. [5.0, np.nan, 4.1, 4.8, 4.9, 5.0, 4.1],
  733. [0.5, 4.0, 7.1, 3.8, 8.0, 5.1, 7.6]]).T
  734. corr = np.array([[np.nan, np.nan, np.nan],
  735. [np.nan, np.nan, np.nan],
  736. [np.nan, np.nan, 1.]])
  737. assert_allclose(stats.spearmanr(x, nan_policy='propagate').statistic,
  738. corr)
  739. res = stats.spearmanr(x, nan_policy='omit').statistic
  740. assert_allclose((res[0][1], res[0][2], res[1][2]),
  741. (0.2051957, 0.4857143, -0.4707919), rtol=1e-6)
  742. def test_gh_8111(self):
  743. # Regression test for gh-8111 (different result for float/int/bool).
  744. n = 100
  745. np.random.seed(234568)
  746. x = np.random.rand(n)
  747. m = np.random.rand(n) > 0.7
  748. # bool against float, no nans
  749. a = (x > .5)
  750. b = np.array(x)
  751. res1 = stats.spearmanr(a, b, nan_policy='omit').statistic
  752. # bool against float with NaNs
  753. b[m] = np.nan
  754. res2 = stats.spearmanr(a, b, nan_policy='omit').statistic
  755. # int against float with NaNs
  756. a = a.astype(np.int32)
  757. res3 = stats.spearmanr(a, b, nan_policy='omit').statistic
  758. expected = [0.865895477, 0.866100381, 0.866100381]
  759. assert_allclose([res1, res2, res3], expected)
  760. class TestCorrSpearmanr2:
  761. """Some further tests of the spearmanr function."""
  762. def test_spearmanr_vs_r(self):
  763. # Cross-check with R:
  764. # cor.test(c(1,2,3,4,5),c(5,6,7,8,7),method="spearmanr")
  765. x1 = [1, 2, 3, 4, 5]
  766. x2 = [5, 6, 7, 8, 7]
  767. expected = (0.82078268166812329, 0.088587005313543798)
  768. res = stats.spearmanr(x1, x2)
  769. assert_approx_equal(res[0], expected[0])
  770. assert_approx_equal(res[1], expected[1])
  771. def test_empty_arrays(self):
  772. assert_equal(stats.spearmanr([], []), (np.nan, np.nan))
  773. def test_normal_draws(self):
  774. np.random.seed(7546)
  775. x = np.array([np.random.normal(loc=1, scale=1, size=500),
  776. np.random.normal(loc=1, scale=1, size=500)])
  777. corr = [[1.0, 0.3],
  778. [0.3, 1.0]]
  779. x = np.dot(np.linalg.cholesky(corr), x)
  780. expected = (0.28659685838743354, 6.579862219051161e-11)
  781. res = stats.spearmanr(x[0], x[1])
  782. assert_approx_equal(res[0], expected[0])
  783. assert_approx_equal(res[1], expected[1])
  784. def test_corr_1(self):
  785. assert_approx_equal(stats.spearmanr([1, 1, 2], [1, 1, 2])[0], 1.0)
  786. def test_nan_policies(self):
  787. x = np.arange(10.)
  788. x[9] = np.nan
  789. assert_array_equal(stats.spearmanr(x, x), (np.nan, np.nan))
  790. assert_allclose(stats.spearmanr(x, x, nan_policy='omit'),
  791. (1.0, 0))
  792. assert_raises(ValueError, stats.spearmanr, x, x, nan_policy='raise')
  793. assert_raises(ValueError, stats.spearmanr, x, x, nan_policy='foobar')
  794. def test_unequal_lengths(self):
  795. x = np.arange(10.)
  796. y = np.arange(20.)
  797. assert_raises(ValueError, stats.spearmanr, x, y)
  798. def test_omit_paired_value(self):
  799. x1 = [1, 2, 3, 4]
  800. x2 = [8, 7, 6, np.nan]
  801. res1 = stats.spearmanr(x1, x2, nan_policy='omit')
  802. res2 = stats.spearmanr(x1[:3], x2[:3], nan_policy='omit')
  803. assert_equal(res1, res2)
  804. def test_gh_issue_6061_windows_overflow(self):
  805. x = list(range(2000))
  806. y = list(range(2000))
  807. y[0], y[9] = y[9], y[0]
  808. y[10], y[434] = y[434], y[10]
  809. y[435], y[1509] = y[1509], y[435]
  810. # rho = 1 - 6 * (2 * (9^2 + 424^2 + 1074^2))/(2000 * (2000^2 - 1))
  811. # = 1 - (1 / 500)
  812. # = 0.998
  813. x.append(np.nan)
  814. y.append(3.0)
  815. assert_almost_equal(stats.spearmanr(x, y, nan_policy='omit')[0], 0.998)
  816. def test_tie0(self):
  817. # with only ties in one or both inputs
  818. warn_msg = "An input array is constant"
  819. with assert_warns(stats.ConstantInputWarning, match=warn_msg):
  820. r, p = stats.spearmanr([2, 2, 2], [2, 2, 2])
  821. assert_equal(r, np.nan)
  822. assert_equal(p, np.nan)
  823. r, p = stats.spearmanr([2, 0, 2], [2, 2, 2])
  824. assert_equal(r, np.nan)
  825. assert_equal(p, np.nan)
  826. r, p = stats.spearmanr([2, 2, 2], [2, 0, 2])
  827. assert_equal(r, np.nan)
  828. assert_equal(p, np.nan)
  829. def test_tie1(self):
  830. # Data
  831. x = [1.0, 2.0, 3.0, 4.0]
  832. y = [1.0, 2.0, 2.0, 3.0]
  833. # Ranks of the data, with tie-handling.
  834. xr = [1.0, 2.0, 3.0, 4.0]
  835. yr = [1.0, 2.5, 2.5, 4.0]
  836. # Result of spearmanr should be the same as applying
  837. # pearsonr to the ranks.
  838. sr = stats.spearmanr(x, y)
  839. pr = stats.pearsonr(xr, yr)
  840. assert_almost_equal(sr, pr)
  841. def test_tie2(self):
  842. # Test tie-handling if inputs contain nan's
  843. # Data without nan's
  844. x1 = [1, 2, 2.5, 2]
  845. y1 = [1, 3, 2.5, 4]
  846. # Same data with nan's
  847. x2 = [1, 2, 2.5, 2, np.nan]
  848. y2 = [1, 3, 2.5, 4, np.nan]
  849. # Results for two data sets should be the same if nan's are ignored
  850. sr1 = stats.spearmanr(x1, y1)
  851. sr2 = stats.spearmanr(x2, y2, nan_policy='omit')
  852. assert_almost_equal(sr1, sr2)
  853. def test_ties_axis_1(self):
  854. z1 = np.array([[1, 1, 1, 1], [1, 2, 3, 4]])
  855. z2 = np.array([[1, 2, 3, 4], [1, 1, 1, 1]])
  856. z3 = np.array([[1, 1, 1, 1], [1, 1, 1, 1]])
  857. warn_msg = "An input array is constant"
  858. with assert_warns(stats.ConstantInputWarning, match=warn_msg):
  859. r, p = stats.spearmanr(z1, axis=1)
  860. assert_equal(r, np.nan)
  861. assert_equal(p, np.nan)
  862. r, p = stats.spearmanr(z2, axis=1)
  863. assert_equal(r, np.nan)
  864. assert_equal(p, np.nan)
  865. r, p = stats.spearmanr(z3, axis=1)
  866. assert_equal(r, np.nan)
  867. assert_equal(p, np.nan)
  868. def test_gh_11111(self):
  869. x = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
  870. y = np.array([0, 0.009783728115345005, 0, 0, 0.0019759230121848587,
  871. 0.0007535430349118562, 0.0002661781514710257, 0, 0,
  872. 0.0007835762419683435])
  873. warn_msg = "An input array is constant"
  874. with assert_warns(stats.ConstantInputWarning, match=warn_msg):
  875. r, p = stats.spearmanr(x, y)
  876. assert_equal(r, np.nan)
  877. assert_equal(p, np.nan)
  878. def test_index_error(self):
  879. x = np.array([1.0, 7.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
  880. y = np.array([0, 0.009783728115345005, 0, 0, 0.0019759230121848587,
  881. 0.0007535430349118562, 0.0002661781514710257, 0, 0,
  882. 0.0007835762419683435])
  883. assert_raises(ValueError, stats.spearmanr, x, y, axis=2)
  884. def test_alternative(self):
  885. # Test alternative parameter
  886. # Simple test - Based on the above ``test_spearmanr_vs_r``
  887. x1 = [1, 2, 3, 4, 5]
  888. x2 = [5, 6, 7, 8, 7]
  889. # strong positive correlation
  890. expected = (0.82078268166812329, 0.088587005313543798)
  891. # correlation > 0 -> large "less" p-value
  892. res = stats.spearmanr(x1, x2, alternative="less")
  893. assert_approx_equal(res[0], expected[0])
  894. assert_approx_equal(res[1], 1 - (expected[1] / 2))
  895. # correlation > 0 -> small "less" p-value
  896. res = stats.spearmanr(x1, x2, alternative="greater")
  897. assert_approx_equal(res[0], expected[0])
  898. assert_approx_equal(res[1], expected[1] / 2)
  899. with pytest.raises(ValueError, match="alternative must be 'less'..."):
  900. stats.spearmanr(x1, x2, alternative="ekki-ekki")
  901. @pytest.mark.parametrize("alternative", ('two-sided', 'less', 'greater'))
  902. def test_alternative_nan_policy(self, alternative):
  903. # Test nan policies
  904. x1 = [1, 2, 3, 4, 5]
  905. x2 = [5, 6, 7, 8, 7]
  906. x1nan = x1 + [np.nan]
  907. x2nan = x2 + [np.nan]
  908. # test nan_policy="propagate"
  909. assert_array_equal(stats.spearmanr(x1nan, x2nan), (np.nan, np.nan))
  910. # test nan_policy="omit"
  911. res_actual = stats.spearmanr(x1nan, x2nan, nan_policy='omit',
  912. alternative=alternative)
  913. res_expected = stats.spearmanr(x1, x2, alternative=alternative)
  914. assert_allclose(res_actual, res_expected)
  915. # test nan_policy="raise"
  916. message = 'The input contains nan values'
  917. with pytest.raises(ValueError, match=message):
  918. stats.spearmanr(x1nan, x2nan, nan_policy='raise',
  919. alternative=alternative)
  920. # test invalid nan_policy
  921. message = "nan_policy must be one of..."
  922. with pytest.raises(ValueError, match=message):
  923. stats.spearmanr(x1nan, x2nan, nan_policy='ekki-ekki',
  924. alternative=alternative)
  925. # W.II.E. Tabulate X against X, using BIG as a case weight. The values
  926. # should appear on the diagonal and the total should be 899999955.
  927. # If the table cannot hold these values, forget about working with
  928. # census data. You can also tabulate HUGE against TINY. There is no
  929. # reason a tabulation program should not be able to distinguish
  930. # different values regardless of their magnitude.
  931. # I need to figure out how to do this one.
  932. def test_kendalltau():
  933. # For the cases without ties, both variants should give the same
  934. # result.
  935. variants = ('b', 'c')
  936. # case without ties, con-dis equal zero
  937. x = [5, 2, 1, 3, 6, 4, 7, 8]
  938. y = [5, 2, 6, 3, 1, 8, 7, 4]
  939. # Cross-check with exact result from R:
  940. # cor.test(x,y,method="kendall",exact=1)
  941. expected = (0.0, 1.0)
  942. for taux in variants:
  943. res = stats.kendalltau(x, y)
  944. assert_approx_equal(res[0], expected[0])
  945. assert_approx_equal(res[1], expected[1])
  946. # case without ties, con-dis equal zero
  947. x = [0, 5, 2, 1, 3, 6, 4, 7, 8]
  948. y = [5, 2, 0, 6, 3, 1, 8, 7, 4]
  949. # Cross-check with exact result from R:
  950. # cor.test(x,y,method="kendall",exact=1)
  951. expected = (0.0, 1.0)
  952. for taux in variants:
  953. res = stats.kendalltau(x, y)
  954. assert_approx_equal(res[0], expected[0])
  955. assert_approx_equal(res[1], expected[1])
  956. # case without ties, con-dis close to zero
  957. x = [5, 2, 1, 3, 6, 4, 7]
  958. y = [5, 2, 6, 3, 1, 7, 4]
  959. # Cross-check with exact result from R:
  960. # cor.test(x,y,method="kendall",exact=1)
  961. expected = (-0.14285714286, 0.77261904762)
  962. for taux in variants:
  963. res = stats.kendalltau(x, y)
  964. assert_approx_equal(res[0], expected[0])
  965. assert_approx_equal(res[1], expected[1])
  966. # case without ties, con-dis close to zero
  967. x = [2, 1, 3, 6, 4, 7, 8]
  968. y = [2, 6, 3, 1, 8, 7, 4]
  969. # Cross-check with exact result from R:
  970. # cor.test(x,y,method="kendall",exact=1)
  971. expected = (0.047619047619, 1.0)
  972. for taux in variants:
  973. res = stats.kendalltau(x, y)
  974. assert_approx_equal(res[0], expected[0])
  975. assert_approx_equal(res[1], expected[1])
  976. # simple case without ties
  977. x = np.arange(10)
  978. y = np.arange(10)
  979. # Cross-check with exact result from R:
  980. # cor.test(x,y,method="kendall",exact=1)
  981. expected = (1.0, 5.511463844797e-07)
  982. for taux in variants:
  983. res = stats.kendalltau(x, y, variant=taux)
  984. assert_approx_equal(res[0], expected[0])
  985. assert_approx_equal(res[1], expected[1])
  986. # swap a couple of values
  987. b = y[1]
  988. y[1] = y[2]
  989. y[2] = b
  990. # Cross-check with exact result from R:
  991. # cor.test(x,y,method="kendall",exact=1)
  992. expected = (0.9555555555555556, 5.511463844797e-06)
  993. for taux in variants:
  994. res = stats.kendalltau(x, y, variant=taux)
  995. assert_approx_equal(res[0], expected[0])
  996. assert_approx_equal(res[1], expected[1])
  997. # swap a couple more
  998. b = y[5]
  999. y[5] = y[6]
  1000. y[6] = b
  1001. # Cross-check with exact result from R:
  1002. # cor.test(x,y,method="kendall",exact=1)
  1003. expected = (0.9111111111111111, 2.976190476190e-05)
  1004. for taux in variants:
  1005. res = stats.kendalltau(x, y, variant=taux)
  1006. assert_approx_equal(res[0], expected[0])
  1007. assert_approx_equal(res[1], expected[1])
  1008. # same in opposite direction
  1009. x = np.arange(10)
  1010. y = np.arange(10)[::-1]
  1011. # Cross-check with exact result from R:
  1012. # cor.test(x,y,method="kendall",exact=1)
  1013. expected = (-1.0, 5.511463844797e-07)
  1014. for taux in variants:
  1015. res = stats.kendalltau(x, y, variant=taux)
  1016. assert_approx_equal(res[0], expected[0])
  1017. assert_approx_equal(res[1], expected[1])
  1018. # swap a couple of values
  1019. b = y[1]
  1020. y[1] = y[2]
  1021. y[2] = b
  1022. # Cross-check with exact result from R:
  1023. # cor.test(x,y,method="kendall",exact=1)
  1024. expected = (-0.9555555555555556, 5.511463844797e-06)
  1025. for taux in variants:
  1026. res = stats.kendalltau(x, y, variant=taux)
  1027. assert_approx_equal(res[0], expected[0])
  1028. assert_approx_equal(res[1], expected[1])
  1029. # swap a couple more
  1030. b = y[5]
  1031. y[5] = y[6]
  1032. y[6] = b
  1033. # Cross-check with exact result from R:
  1034. # cor.test(x,y,method="kendall",exact=1)
  1035. expected = (-0.9111111111111111, 2.976190476190e-05)
  1036. for taux in variants:
  1037. res = stats.kendalltau(x, y, variant=taux)
  1038. assert_approx_equal(res[0], expected[0])
  1039. assert_approx_equal(res[1], expected[1])
  1040. # Check a case where variants are different
  1041. # Example values found from Kendall (1970).
  1042. # P-value is the same for the both variants
  1043. x = array([1, 2, 2, 4, 4, 6, 6, 8, 9, 9])
  1044. y = array([1, 2, 4, 4, 4, 4, 8, 8, 8, 10])
  1045. expected = 0.85895569
  1046. assert_approx_equal(stats.kendalltau(x, y, variant='b')[0], expected)
  1047. expected = 0.825
  1048. assert_approx_equal(stats.kendalltau(x, y, variant='c')[0], expected)
  1049. # check exception in case of ties and method='exact' requested
  1050. y[2] = y[1]
  1051. assert_raises(ValueError, stats.kendalltau, x, y, method='exact')
  1052. # check exception in case of invalid method keyword
  1053. assert_raises(ValueError, stats.kendalltau, x, y, method='banana')
  1054. # check exception in case of invalid variant keyword
  1055. assert_raises(ValueError, stats.kendalltau, x, y, variant='rms')
  1056. # tau-b with some ties
  1057. # Cross-check with R:
  1058. # cor.test(c(12,2,1,12,2),c(1,4,7,1,0),method="kendall",exact=FALSE)
  1059. x1 = [12, 2, 1, 12, 2]
  1060. x2 = [1, 4, 7, 1, 0]
  1061. expected = (-0.47140452079103173, 0.28274545993277478)
  1062. res = stats.kendalltau(x1, x2)
  1063. assert_approx_equal(res[0], expected[0])
  1064. assert_approx_equal(res[1], expected[1])
  1065. # test for namedtuple attribute results
  1066. attributes = ('correlation', 'pvalue')
  1067. for taux in variants:
  1068. res = stats.kendalltau(x1, x2, variant=taux)
  1069. check_named_results(res, attributes)
  1070. assert_equal(res.correlation, res.statistic)
  1071. # with only ties in one or both inputs in tau-b or tau-c
  1072. for taux in variants:
  1073. assert_equal(stats.kendalltau([2, 2, 2], [2, 2, 2], variant=taux),
  1074. (np.nan, np.nan))
  1075. assert_equal(stats.kendalltau([2, 0, 2], [2, 2, 2], variant=taux),
  1076. (np.nan, np.nan))
  1077. assert_equal(stats.kendalltau([2, 2, 2], [2, 0, 2], variant=taux),
  1078. (np.nan, np.nan))
  1079. # empty arrays provided as input
  1080. assert_equal(stats.kendalltau([], []), (np.nan, np.nan))
  1081. # check with larger arrays
  1082. np.random.seed(7546)
  1083. x = np.array([np.random.normal(loc=1, scale=1, size=500),
  1084. np.random.normal(loc=1, scale=1, size=500)])
  1085. corr = [[1.0, 0.3],
  1086. [0.3, 1.0]]
  1087. x = np.dot(np.linalg.cholesky(corr), x)
  1088. expected = (0.19291382765531062, 1.1337095377742629e-10)
  1089. res = stats.kendalltau(x[0], x[1])
  1090. assert_approx_equal(res[0], expected[0])
  1091. assert_approx_equal(res[1], expected[1])
  1092. # this should result in 1 for taub but not tau-c
  1093. assert_approx_equal(stats.kendalltau([1, 1, 2], [1, 1, 2], variant='b')[0],
  1094. 1.0)
  1095. assert_approx_equal(stats.kendalltau([1, 1, 2], [1, 1, 2], variant='c')[0],
  1096. 0.88888888)
  1097. # test nan_policy
  1098. x = np.arange(10.)
  1099. x[9] = np.nan
  1100. assert_array_equal(stats.kendalltau(x, x), (np.nan, np.nan))
  1101. assert_allclose(stats.kendalltau(x, x, nan_policy='omit'),
  1102. (1.0, 5.5114638e-6), rtol=1e-06)
  1103. assert_allclose(stats.kendalltau(x, x, nan_policy='omit', method='asymptotic'),
  1104. (1.0, 0.00017455009626808976), rtol=1e-06)
  1105. assert_raises(ValueError, stats.kendalltau, x, x, nan_policy='raise')
  1106. assert_raises(ValueError, stats.kendalltau, x, x, nan_policy='foobar')
  1107. # test unequal length inputs
  1108. x = np.arange(10.)
  1109. y = np.arange(20.)
  1110. assert_raises(ValueError, stats.kendalltau, x, y)
  1111. # test all ties
  1112. tau, p_value = stats.kendalltau([], [])
  1113. assert_equal(np.nan, tau)
  1114. assert_equal(np.nan, p_value)
  1115. tau, p_value = stats.kendalltau([0], [0])
  1116. assert_equal(np.nan, tau)
  1117. assert_equal(np.nan, p_value)
  1118. # Regression test for GitHub issue #6061 - Overflow on Windows
  1119. x = np.arange(2000, dtype=float)
  1120. x = np.ma.masked_greater(x, 1995)
  1121. y = np.arange(2000, dtype=float)
  1122. y = np.concatenate((y[1000:], y[:1000]))
  1123. assert_(np.isfinite(stats.kendalltau(x,y)[1]))
  1124. def test_kendalltau_vs_mstats_basic():
  1125. np.random.seed(42)
  1126. for s in range(2,10):
  1127. a = []
  1128. # Generate rankings with ties
  1129. for i in range(s):
  1130. a += [i]*i
  1131. b = list(a)
  1132. np.random.shuffle(a)
  1133. np.random.shuffle(b)
  1134. expected = mstats_basic.kendalltau(a, b)
  1135. actual = stats.kendalltau(a, b)
  1136. assert_approx_equal(actual[0], expected[0])
  1137. assert_approx_equal(actual[1], expected[1])
  1138. def test_kendalltau_nan_2nd_arg():
  1139. # regression test for gh-6134: nans in the second arg were not handled
  1140. x = [1., 2., 3., 4.]
  1141. y = [np.nan, 2.4, 3.4, 3.4]
  1142. r1 = stats.kendalltau(x, y, nan_policy='omit')
  1143. r2 = stats.kendalltau(x[1:], y[1:])
  1144. assert_allclose(r1.statistic, r2.statistic, atol=1e-15)
  1145. def test_kendalltau_dep_initial_lexsort():
  1146. with pytest.warns(
  1147. DeprecationWarning,
  1148. match="'kendalltau' keyword argument 'initial_lexsort'"
  1149. ):
  1150. stats.kendalltau([], [], initial_lexsort=True)
  1151. class TestKendallTauAlternative:
  1152. def test_kendalltau_alternative_asymptotic(self):
  1153. # Test alternative parameter, asymptotic method (due to tie)
  1154. # Based on TestCorrSpearman2::test_alternative
  1155. x1 = [1, 2, 3, 4, 5]
  1156. x2 = [5, 6, 7, 8, 7]
  1157. # strong positive correlation
  1158. expected = stats.kendalltau(x1, x2, alternative="two-sided")
  1159. assert expected[0] > 0
  1160. # rank correlation > 0 -> large "less" p-value
  1161. res = stats.kendalltau(x1, x2, alternative="less")
  1162. assert_equal(res[0], expected[0])
  1163. assert_allclose(res[1], 1 - (expected[1] / 2))
  1164. # rank correlation > 0 -> small "greater" p-value
  1165. res = stats.kendalltau(x1, x2, alternative="greater")
  1166. assert_equal(res[0], expected[0])
  1167. assert_allclose(res[1], expected[1] / 2)
  1168. # reverse the direction of rank correlation
  1169. x2.reverse()
  1170. # strong negative correlation
  1171. expected = stats.kendalltau(x1, x2, alternative="two-sided")
  1172. assert expected[0] < 0
  1173. # rank correlation < 0 -> large "greater" p-value
  1174. res = stats.kendalltau(x1, x2, alternative="greater")
  1175. assert_equal(res[0], expected[0])
  1176. assert_allclose(res[1], 1 - (expected[1] / 2))
  1177. # rank correlation < 0 -> small "less" p-value
  1178. res = stats.kendalltau(x1, x2, alternative="less")
  1179. assert_equal(res[0], expected[0])
  1180. assert_allclose(res[1], expected[1] / 2)
  1181. with pytest.raises(ValueError, match="alternative must be 'less'..."):
  1182. stats.kendalltau(x1, x2, alternative="ekki-ekki")
  1183. # There are a lot of special cases considered in the calculation of the
  1184. # exact p-value, so we test each separately. We also need to test
  1185. # separately when the observed statistic is in the left tail vs the right
  1186. # tail because the code leverages symmetry of the null distribution; to
  1187. # do that we use the same test case but negate one of the samples.
  1188. # Reference values computed using R cor.test, e.g.
  1189. # options(digits=16)
  1190. # x <- c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1)
  1191. # y <- c( 2.6, 3.1, 2.5, 5.0, 3.6, 4.0, 5.2, 2.8, 3.8)
  1192. # cor.test(x, y, method = "kendall", alternative = "g")
  1193. alternatives = ('less', 'two-sided', 'greater')
  1194. p_n1 = [np.nan, np.nan, np.nan]
  1195. p_n2 = [1, 1, 0.5]
  1196. p_c0 = [1, 0.3333333333333, 0.1666666666667]
  1197. p_c1 = [0.9583333333333, 0.3333333333333, 0.1666666666667]
  1198. p_no_correlation = [0.5916666666667, 1, 0.5916666666667]
  1199. p_no_correlationb = [0.5475694444444, 1, 0.5475694444444]
  1200. p_n_lt_171 = [0.9624118165785, 0.1194389329806, 0.0597194664903]
  1201. p_n_lt_171b = [0.246236925303, 0.4924738506059, 0.755634083327]
  1202. p_n_lt_171c = [0.9847475308925, 0.03071385306533, 0.01535692653267]
  1203. def exact_test(self, x, y, alternative, rev, stat_expected, p_expected):
  1204. if rev:
  1205. y = -np.asarray(y)
  1206. stat_expected *= -1
  1207. res = stats.kendalltau(x, y, method='exact', alternative=alternative)
  1208. res_expected = stat_expected, p_expected
  1209. assert_allclose(res, res_expected)
  1210. case_R_n1 = (list(zip(alternatives, p_n1, [False]*3))
  1211. + list(zip(alternatives, reversed(p_n1), [True]*3)))
  1212. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_n1)
  1213. def test_against_R_n1(self, alternative, p_expected, rev):
  1214. x, y = [1], [2]
  1215. stat_expected = np.nan
  1216. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1217. case_R_n2 = (list(zip(alternatives, p_n2, [False]*3))
  1218. + list(zip(alternatives, reversed(p_n2), [True]*3)))
  1219. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_n2)
  1220. def test_against_R_n2(self, alternative, p_expected, rev):
  1221. x, y = [1, 2], [3, 4]
  1222. stat_expected = 0.9999999999999998
  1223. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1224. case_R_c0 = (list(zip(alternatives, p_c0, [False]*3))
  1225. + list(zip(alternatives, reversed(p_c0), [True]*3)))
  1226. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_c0)
  1227. def test_against_R_c0(self, alternative, p_expected, rev):
  1228. x, y = [1, 2, 3], [1, 2, 3]
  1229. stat_expected = 1
  1230. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1231. case_R_c1 = (list(zip(alternatives, p_c1, [False]*3))
  1232. + list(zip(alternatives, reversed(p_c1), [True]*3)))
  1233. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_c1)
  1234. def test_against_R_c1(self, alternative, p_expected, rev):
  1235. x, y = [1, 2, 3, 4], [1, 2, 4, 3]
  1236. stat_expected = 0.6666666666666667
  1237. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1238. case_R_no_corr = (list(zip(alternatives, p_no_correlation, [False]*3))
  1239. + list(zip(alternatives, reversed(p_no_correlation),
  1240. [True]*3)))
  1241. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_no_corr)
  1242. def test_against_R_no_correlation(self, alternative, p_expected, rev):
  1243. x, y = [1, 2, 3, 4, 5], [1, 5, 4, 2, 3]
  1244. stat_expected = 0
  1245. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1246. case_no_cor_b = (list(zip(alternatives, p_no_correlationb, [False]*3))
  1247. + list(zip(alternatives, reversed(p_no_correlationb),
  1248. [True]*3)))
  1249. @pytest.mark.parametrize("alternative, p_expected, rev", case_no_cor_b)
  1250. def test_against_R_no_correlationb(self, alternative, p_expected, rev):
  1251. x, y = [1, 2, 3, 4, 5, 6, 7, 8], [8, 6, 1, 3, 2, 5, 4, 7]
  1252. stat_expected = 0
  1253. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1254. case_R_lt_171 = (list(zip(alternatives, p_n_lt_171, [False]*3))
  1255. + list(zip(alternatives, reversed(p_n_lt_171), [True]*3)))
  1256. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_lt_171)
  1257. def test_against_R_lt_171(self, alternative, p_expected, rev):
  1258. # Data from Hollander & Wolfe (1973), p. 187f.
  1259. # Used from https://rdrr.io/r/stats/cor.test.html
  1260. x = [44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1]
  1261. y = [2.6, 3.1, 2.5, 5.0, 3.6, 4.0, 5.2, 2.8, 3.8]
  1262. stat_expected = 0.4444444444444445
  1263. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1264. case_R_lt_171b = (list(zip(alternatives, p_n_lt_171b, [False]*3))
  1265. + list(zip(alternatives, reversed(p_n_lt_171b),
  1266. [True]*3)))
  1267. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_lt_171b)
  1268. def test_against_R_lt_171b(self, alternative, p_expected, rev):
  1269. np.random.seed(0)
  1270. x = np.random.rand(100)
  1271. y = np.random.rand(100)
  1272. stat_expected = -0.04686868686868687
  1273. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1274. case_R_lt_171c = (list(zip(alternatives, p_n_lt_171c, [False]*3))
  1275. + list(zip(alternatives, reversed(p_n_lt_171c),
  1276. [True]*3)))
  1277. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_lt_171c)
  1278. def test_against_R_lt_171c(self, alternative, p_expected, rev):
  1279. np.random.seed(0)
  1280. x = np.random.rand(170)
  1281. y = np.random.rand(170)
  1282. stat_expected = 0.1115906717716673
  1283. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1284. case_gt_171 = (list(zip(alternatives, [False]*3)) +
  1285. list(zip(alternatives, [True]*3)))
  1286. @pytest.mark.parametrize("alternative, rev", case_gt_171)
  1287. def test_gt_171(self, alternative, rev):
  1288. np.random.seed(0)
  1289. x = np.random.rand(400)
  1290. y = np.random.rand(400)
  1291. res0 = stats.kendalltau(x, y, method='exact',
  1292. alternative=alternative)
  1293. res1 = stats.kendalltau(x, y, method='asymptotic',
  1294. alternative=alternative)
  1295. assert_equal(res0[0], res1[0])
  1296. assert_allclose(res0[1], res1[1], rtol=1e-3)
  1297. @pytest.mark.parametrize("method", ('exact', 'asymptotic'))
  1298. @pytest.mark.parametrize("alternative", ('two-sided', 'less', 'greater'))
  1299. def test_nan_policy(self, method, alternative):
  1300. # Test nan policies
  1301. x1 = [1, 2, 3, 4, 5]
  1302. x2 = [5, 6, 7, 8, 9]
  1303. x1nan = x1 + [np.nan]
  1304. x2nan = x2 + [np.nan]
  1305. # test nan_policy="propagate"
  1306. res_actual = stats.kendalltau(x1nan, x2nan,
  1307. method=method, alternative=alternative)
  1308. res_expected = (np.nan, np.nan)
  1309. assert_allclose(res_actual, res_expected)
  1310. # test nan_policy="omit"
  1311. res_actual = stats.kendalltau(x1nan, x2nan, nan_policy='omit',
  1312. method=method, alternative=alternative)
  1313. res_expected = stats.kendalltau(x1, x2, method=method,
  1314. alternative=alternative)
  1315. assert_allclose(res_actual, res_expected)
  1316. # test nan_policy="raise"
  1317. message = 'The input contains nan values'
  1318. with pytest.raises(ValueError, match=message):
  1319. stats.kendalltau(x1nan, x2nan, nan_policy='raise',
  1320. method=method, alternative=alternative)
  1321. # test invalid nan_policy
  1322. message = "nan_policy must be one of..."
  1323. with pytest.raises(ValueError, match=message):
  1324. stats.kendalltau(x1nan, x2nan, nan_policy='ekki-ekki',
  1325. method=method, alternative=alternative)
  1326. def test_weightedtau():
  1327. x = [12, 2, 1, 12, 2]
  1328. y = [1, 4, 7, 1, 0]
  1329. tau, p_value = stats.weightedtau(x, y)
  1330. assert_approx_equal(tau, -0.56694968153682723)
  1331. assert_equal(np.nan, p_value)
  1332. tau, p_value = stats.weightedtau(x, y, additive=False)
  1333. assert_approx_equal(tau, -0.62205716951801038)
  1334. assert_equal(np.nan, p_value)
  1335. # This must be exactly Kendall's tau
  1336. tau, p_value = stats.weightedtau(x, y, weigher=lambda x: 1)
  1337. assert_approx_equal(tau, -0.47140452079103173)
  1338. assert_equal(np.nan, p_value)
  1339. # test for namedtuple attribute results
  1340. res = stats.weightedtau(x, y)
  1341. attributes = ('correlation', 'pvalue')
  1342. check_named_results(res, attributes)
  1343. assert_equal(res.correlation, res.statistic)
  1344. # Asymmetric, ranked version
  1345. tau, p_value = stats.weightedtau(x, y, rank=None)
  1346. assert_approx_equal(tau, -0.4157652301037516)
  1347. assert_equal(np.nan, p_value)
  1348. tau, p_value = stats.weightedtau(y, x, rank=None)
  1349. assert_approx_equal(tau, -0.7181341329699029)
  1350. assert_equal(np.nan, p_value)
  1351. tau, p_value = stats.weightedtau(x, y, rank=None, additive=False)
  1352. assert_approx_equal(tau, -0.40644850966246893)
  1353. assert_equal(np.nan, p_value)
  1354. tau, p_value = stats.weightedtau(y, x, rank=None, additive=False)
  1355. assert_approx_equal(tau, -0.83766582937355172)
  1356. assert_equal(np.nan, p_value)
  1357. tau, p_value = stats.weightedtau(x, y, rank=False)
  1358. assert_approx_equal(tau, -0.51604397940261848)
  1359. assert_equal(np.nan, p_value)
  1360. # This must be exactly Kendall's tau
  1361. tau, p_value = stats.weightedtau(x, y, rank=True, weigher=lambda x: 1)
  1362. assert_approx_equal(tau, -0.47140452079103173)
  1363. assert_equal(np.nan, p_value)
  1364. tau, p_value = stats.weightedtau(y, x, rank=True, weigher=lambda x: 1)
  1365. assert_approx_equal(tau, -0.47140452079103173)
  1366. assert_equal(np.nan, p_value)
  1367. # Test argument conversion
  1368. tau, p_value = stats.weightedtau(np.asarray(x, dtype=np.float64), y)
  1369. assert_approx_equal(tau, -0.56694968153682723)
  1370. tau, p_value = stats.weightedtau(np.asarray(x, dtype=np.int16), y)
  1371. assert_approx_equal(tau, -0.56694968153682723)
  1372. tau, p_value = stats.weightedtau(np.asarray(x, dtype=np.float64), np.asarray(y, dtype=np.float64))
  1373. assert_approx_equal(tau, -0.56694968153682723)
  1374. # All ties
  1375. tau, p_value = stats.weightedtau([], [])
  1376. assert_equal(np.nan, tau)
  1377. assert_equal(np.nan, p_value)
  1378. tau, p_value = stats.weightedtau([0], [0])
  1379. assert_equal(np.nan, tau)
  1380. assert_equal(np.nan, p_value)
  1381. # Size mismatches
  1382. assert_raises(ValueError, stats.weightedtau, [0, 1], [0, 1, 2])
  1383. assert_raises(ValueError, stats.weightedtau, [0, 1], [0, 1], [0])
  1384. # NaNs
  1385. x = [12, 2, 1, 12, 2]
  1386. y = [1, 4, 7, 1, np.nan]
  1387. tau, p_value = stats.weightedtau(x, y)
  1388. assert_approx_equal(tau, -0.56694968153682723)
  1389. x = [12, 2, np.nan, 12, 2]
  1390. tau, p_value = stats.weightedtau(x, y)
  1391. assert_approx_equal(tau, -0.56694968153682723)
  1392. # NaNs when the dtype of x and y are all np.float64
  1393. x = [12.0, 2.0, 1.0, 12.0, 2.0]
  1394. y = [1.0, 4.0, 7.0, 1.0, np.nan]
  1395. tau, p_value = stats.weightedtau(x, y)
  1396. assert_approx_equal(tau, -0.56694968153682723)
  1397. x = [12.0, 2.0, np.nan, 12.0, 2.0]
  1398. tau, p_value = stats.weightedtau(x, y)
  1399. assert_approx_equal(tau, -0.56694968153682723)
  1400. # NaNs when there are more than one NaN in x or y
  1401. x = [12.0, 2.0, 1.0, 12.0, 1.0]
  1402. y = [1.0, 4.0, 7.0, 1.0, 1.0]
  1403. tau, p_value = stats.weightedtau(x, y)
  1404. assert_approx_equal(tau, -0.6615242347139803)
  1405. x = [12.0, 2.0, np.nan, 12.0, np.nan]
  1406. tau, p_value = stats.weightedtau(x, y)
  1407. assert_approx_equal(tau, -0.6615242347139803)
  1408. y = [np.nan, 4.0, 7.0, np.nan, np.nan]
  1409. tau, p_value = stats.weightedtau(x, y)
  1410. assert_approx_equal(tau, -0.6615242347139803)
  1411. def test_segfault_issue_9710():
  1412. # https://github.com/scipy/scipy/issues/9710
  1413. # This test was created to check segfault
  1414. # In issue SEGFAULT only repros in optimized builds after calling the function twice
  1415. stats.weightedtau([1], [1.0])
  1416. stats.weightedtau([1], [1.0])
  1417. # The code below also caused SEGFAULT
  1418. stats.weightedtau([np.nan], [52])
  1419. def test_kendall_tau_large():
  1420. n = 172
  1421. # Test omit policy
  1422. x = np.arange(n + 1).astype(float)
  1423. y = np.arange(n + 1).astype(float)
  1424. y[-1] = np.nan
  1425. _, pval = stats.kendalltau(x, y, method='exact', nan_policy='omit')
  1426. assert_equal(pval, 0.0)
  1427. def test_weightedtau_vs_quadratic():
  1428. # Trivial quadratic implementation, all parameters mandatory
  1429. def wkq(x, y, rank, weigher, add):
  1430. tot = conc = disc = u = v = 0
  1431. for (i, j) in product(range(len(x)), range(len(x))):
  1432. w = weigher(rank[i]) + weigher(rank[j]) if add \
  1433. else weigher(rank[i]) * weigher(rank[j])
  1434. tot += w
  1435. if x[i] == x[j]:
  1436. u += w
  1437. if y[i] == y[j]:
  1438. v += w
  1439. if x[i] < x[j] and y[i] < y[j] or x[i] > x[j] and y[i] > y[j]:
  1440. conc += w
  1441. elif x[i] < x[j] and y[i] > y[j] or x[i] > x[j] and y[i] < y[j]:
  1442. disc += w
  1443. return (conc - disc) / np.sqrt(tot - u) / np.sqrt(tot - v)
  1444. def weigher(x):
  1445. return 1. / (x + 1)
  1446. np.random.seed(42)
  1447. for s in range(3,10):
  1448. a = []
  1449. # Generate rankings with ties
  1450. for i in range(s):
  1451. a += [i]*i
  1452. b = list(a)
  1453. np.random.shuffle(a)
  1454. np.random.shuffle(b)
  1455. # First pass: use element indices as ranks
  1456. rank = np.arange(len(a), dtype=np.intp)
  1457. for _ in range(2):
  1458. for add in [True, False]:
  1459. expected = wkq(a, b, rank, weigher, add)
  1460. actual = stats.weightedtau(a, b, rank, weigher, add).statistic
  1461. assert_approx_equal(expected, actual)
  1462. # Second pass: use a random rank
  1463. np.random.shuffle(rank)
  1464. class TestFindRepeats:
  1465. def test_basic(self):
  1466. a = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 5]
  1467. res, nums = stats.find_repeats(a)
  1468. assert_array_equal(res, [1, 2, 3, 4])
  1469. assert_array_equal(nums, [3, 3, 2, 2])
  1470. def test_empty_result(self):
  1471. # Check that empty arrays are returned when there are no repeats.
  1472. for a in [[10, 20, 50, 30, 40], []]:
  1473. repeated, counts = stats.find_repeats(a)
  1474. assert_array_equal(repeated, [])
  1475. assert_array_equal(counts, [])
  1476. class TestRegression:
  1477. def test_linregressBIGX(self):
  1478. # W.II.F. Regress BIG on X.
  1479. result = stats.linregress(X, BIG)
  1480. assert_almost_equal(result.intercept, 99999990)
  1481. assert_almost_equal(result.rvalue, 1.0)
  1482. # The uncertainty ought to be almost zero
  1483. # since all points lie on a line
  1484. assert_almost_equal(result.stderr, 0.0)
  1485. assert_almost_equal(result.intercept_stderr, 0.0)
  1486. def test_regressXX(self):
  1487. # W.IV.B. Regress X on X.
  1488. # The constant should be exactly 0 and the regression coefficient
  1489. # should be 1. This is a perfectly valid regression and the
  1490. # program should not complain.
  1491. result = stats.linregress(X, X)
  1492. assert_almost_equal(result.intercept, 0.0)
  1493. assert_almost_equal(result.rvalue, 1.0)
  1494. # The uncertainly on regression through two points ought to be 0
  1495. assert_almost_equal(result.stderr, 0.0)
  1496. assert_almost_equal(result.intercept_stderr, 0.0)
  1497. # W.IV.C. Regress X on BIG and LITTLE (two predictors). The program
  1498. # should tell you that this model is "singular" because BIG and
  1499. # LITTLE are linear combinations of each other. Cryptic error
  1500. # messages are unacceptable here. Singularity is the most
  1501. # fundamental regression error.
  1502. #
  1503. # Need to figure out how to handle multiple linear regression.
  1504. # This is not obvious
  1505. def test_regressZEROX(self):
  1506. # W.IV.D. Regress ZERO on X.
  1507. # The program should inform you that ZERO has no variance or it should
  1508. # go ahead and compute the regression and report a correlation and
  1509. # total sum of squares of exactly 0.
  1510. result = stats.linregress(X, ZERO)
  1511. assert_almost_equal(result.intercept, 0.0)
  1512. assert_almost_equal(result.rvalue, 0.0)
  1513. def test_regress_simple(self):
  1514. # Regress a line with sinusoidal noise.
  1515. x = np.linspace(0, 100, 100)
  1516. y = 0.2 * np.linspace(0, 100, 100) + 10
  1517. y += np.sin(np.linspace(0, 20, 100))
  1518. result = stats.linregress(x, y)
  1519. lr = stats._stats_mstats_common.LinregressResult
  1520. assert_(isinstance(result, lr))
  1521. assert_almost_equal(result.stderr, 2.3957814497838803e-3)
  1522. def test_regress_alternative(self):
  1523. # test alternative parameter
  1524. x = np.linspace(0, 100, 100)
  1525. y = 0.2 * np.linspace(0, 100, 100) + 10 # slope is greater than zero
  1526. y += np.sin(np.linspace(0, 20, 100))
  1527. with pytest.raises(ValueError, match="alternative must be 'less'..."):
  1528. stats.linregress(x, y, alternative="ekki-ekki")
  1529. res1 = stats.linregress(x, y, alternative="two-sided")
  1530. # slope is greater than zero, so "less" p-value should be large
  1531. res2 = stats.linregress(x, y, alternative="less")
  1532. assert_allclose(res2.pvalue, 1 - (res1.pvalue / 2))
  1533. # slope is greater than zero, so "greater" p-value should be small
  1534. res3 = stats.linregress(x, y, alternative="greater")
  1535. assert_allclose(res3.pvalue, res1.pvalue / 2)
  1536. assert res1.rvalue == res2.rvalue == res3.rvalue
  1537. def test_regress_against_R(self):
  1538. # test against R `lm`
  1539. # options(digits=16)
  1540. # x <- c(151, 174, 138, 186, 128, 136, 179, 163, 152, 131)
  1541. # y <- c(63, 81, 56, 91, 47, 57, 76, 72, 62, 48)
  1542. # relation <- lm(y~x)
  1543. # print(summary(relation))
  1544. x = [151, 174, 138, 186, 128, 136, 179, 163, 152, 131]
  1545. y = [63, 81, 56, 91, 47, 57, 76, 72, 62, 48]
  1546. res = stats.linregress(x, y, alternative="two-sided")
  1547. # expected values from R's `lm` above
  1548. assert_allclose(res.slope, 0.6746104491292)
  1549. assert_allclose(res.intercept, -38.4550870760770)
  1550. assert_allclose(res.rvalue, np.sqrt(0.95478224775))
  1551. assert_allclose(res.pvalue, 1.16440531074e-06)
  1552. assert_allclose(res.stderr, 0.0519051424731)
  1553. assert_allclose(res.intercept_stderr, 8.0490133029927)
  1554. def test_regress_simple_onearg_rows(self):
  1555. # Regress a line w sinusoidal noise,
  1556. # with a single input of shape (2, N)
  1557. x = np.linspace(0, 100, 100)
  1558. y = 0.2 * np.linspace(0, 100, 100) + 10
  1559. y += np.sin(np.linspace(0, 20, 100))
  1560. rows = np.vstack((x, y))
  1561. result = stats.linregress(rows)
  1562. assert_almost_equal(result.stderr, 2.3957814497838803e-3)
  1563. assert_almost_equal(result.intercept_stderr, 1.3866936078570702e-1)
  1564. def test_regress_simple_onearg_cols(self):
  1565. x = np.linspace(0, 100, 100)
  1566. y = 0.2 * np.linspace(0, 100, 100) + 10
  1567. y += np.sin(np.linspace(0, 20, 100))
  1568. columns = np.hstack((np.expand_dims(x, 1), np.expand_dims(y, 1)))
  1569. result = stats.linregress(columns)
  1570. assert_almost_equal(result.stderr, 2.3957814497838803e-3)
  1571. assert_almost_equal(result.intercept_stderr, 1.3866936078570702e-1)
  1572. def test_regress_shape_error(self):
  1573. # Check that a single input argument to linregress with wrong shape
  1574. # results in a ValueError.
  1575. assert_raises(ValueError, stats.linregress, np.ones((3, 3)))
  1576. def test_linregress(self):
  1577. # compared with multivariate ols with pinv
  1578. x = np.arange(11)
  1579. y = np.arange(5, 16)
  1580. y[[(1), (-2)]] -= 1
  1581. y[[(0), (-1)]] += 1
  1582. result = stats.linregress(x, y)
  1583. # This test used to use 'assert_array_almost_equal' but its
  1584. # formualtion got confusing since LinregressResult became
  1585. # _lib._bunch._make_tuple_bunch instead of namedtuple
  1586. # (for backwards compatibility, see PR #12983)
  1587. assert_ae = lambda x, y: assert_almost_equal(x, y, decimal=14)
  1588. assert_ae(result.slope, 1.0)
  1589. assert_ae(result.intercept, 5.0)
  1590. assert_ae(result.rvalue, 0.98229948625750)
  1591. assert_ae(result.pvalue, 7.45259691e-008)
  1592. assert_ae(result.stderr, 0.063564172616372733)
  1593. assert_ae(result.intercept_stderr, 0.37605071654517686)
  1594. def test_regress_simple_negative_cor(self):
  1595. # If the slope of the regression is negative the factor R tend
  1596. # to -1 not 1. Sometimes rounding errors makes it < -1
  1597. # leading to stderr being NaN.
  1598. a, n = 1e-71, 100000
  1599. x = np.linspace(a, 2 * a, n)
  1600. y = np.linspace(2 * a, a, n)
  1601. result = stats.linregress(x, y)
  1602. # Make sure propagated numerical errors
  1603. # did not bring rvalue below -1 (or were coersced)
  1604. assert_(result.rvalue >= -1)
  1605. assert_almost_equal(result.rvalue, -1)
  1606. # slope and intercept stderror should stay numeric
  1607. assert_(not np.isnan(result.stderr))
  1608. assert_(not np.isnan(result.intercept_stderr))
  1609. def test_linregress_result_attributes(self):
  1610. x = np.linspace(0, 100, 100)
  1611. y = 0.2 * np.linspace(0, 100, 100) + 10
  1612. y += np.sin(np.linspace(0, 20, 100))
  1613. result = stats.linregress(x, y)
  1614. # Result is of a correct class
  1615. lr = stats._stats_mstats_common.LinregressResult
  1616. assert_(isinstance(result, lr))
  1617. # LinregressResult elements have correct names
  1618. attributes = ('slope', 'intercept', 'rvalue', 'pvalue', 'stderr')
  1619. check_named_results(result, attributes)
  1620. # Also check that the extra attribute (intercept_stderr) is present
  1621. assert 'intercept_stderr' in dir(result)
  1622. def test_regress_two_inputs(self):
  1623. # Regress a simple line formed by two points.
  1624. x = np.arange(2)
  1625. y = np.arange(3, 5)
  1626. result = stats.linregress(x, y)
  1627. # Non-horizontal line
  1628. assert_almost_equal(result.pvalue, 0.0)
  1629. # Zero error through two points
  1630. assert_almost_equal(result.stderr, 0.0)
  1631. assert_almost_equal(result.intercept_stderr, 0.0)
  1632. def test_regress_two_inputs_horizontal_line(self):
  1633. # Regress a horizontal line formed by two points.
  1634. x = np.arange(2)
  1635. y = np.ones(2)
  1636. result = stats.linregress(x, y)
  1637. # Horizontal line
  1638. assert_almost_equal(result.pvalue, 1.0)
  1639. # Zero error through two points
  1640. assert_almost_equal(result.stderr, 0.0)
  1641. assert_almost_equal(result.intercept_stderr, 0.0)
  1642. def test_nist_norris(self):
  1643. x = [0.2, 337.4, 118.2, 884.6, 10.1, 226.5, 666.3, 996.3, 448.6, 777.0,
  1644. 558.2, 0.4, 0.6, 775.5, 666.9, 338.0, 447.5, 11.6, 556.0, 228.1,
  1645. 995.8, 887.6, 120.2, 0.3, 0.3, 556.8, 339.1, 887.2, 999.0, 779.0,
  1646. 11.1, 118.3, 229.2, 669.1, 448.9, 0.5]
  1647. y = [0.1, 338.8, 118.1, 888.0, 9.2, 228.1, 668.5, 998.5, 449.1, 778.9,
  1648. 559.2, 0.3, 0.1, 778.1, 668.8, 339.3, 448.9, 10.8, 557.7, 228.3,
  1649. 998.0, 888.8, 119.6, 0.3, 0.6, 557.6, 339.3, 888.0, 998.5, 778.9,
  1650. 10.2, 117.6, 228.9, 668.4, 449.2, 0.2]
  1651. result = stats.linregress(x, y)
  1652. assert_almost_equal(result.slope, 1.00211681802045)
  1653. assert_almost_equal(result.intercept, -0.262323073774029)
  1654. assert_almost_equal(result.rvalue**2, 0.999993745883712)
  1655. assert_almost_equal(result.pvalue, 0.0)
  1656. assert_almost_equal(result.stderr, 0.00042979684820)
  1657. assert_almost_equal(result.intercept_stderr, 0.23281823430153)
  1658. def test_compare_to_polyfit(self):
  1659. x = np.linspace(0, 100, 100)
  1660. y = 0.2 * np.linspace(0, 100, 100) + 10
  1661. y += np.sin(np.linspace(0, 20, 100))
  1662. result = stats.linregress(x, y)
  1663. poly = np.polyfit(x, y, 1) # Fit 1st degree polynomial
  1664. # Make sure linear regression slope and intercept
  1665. # match with results from numpy polyfit
  1666. assert_almost_equal(result.slope, poly[0])
  1667. assert_almost_equal(result.intercept, poly[1])
  1668. def test_empty_input(self):
  1669. assert_raises(ValueError, stats.linregress, [], [])
  1670. def test_nan_input(self):
  1671. x = np.arange(10.)
  1672. x[9] = np.nan
  1673. with np.errstate(invalid="ignore"):
  1674. result = stats.linregress(x, x)
  1675. # Make sure the resut still comes back as `LinregressResult`
  1676. lr = stats._stats_mstats_common.LinregressResult
  1677. assert_(isinstance(result, lr))
  1678. assert_array_equal(result, (np.nan,)*5)
  1679. assert_equal(result.intercept_stderr, np.nan)
  1680. def test_identical_x(self):
  1681. x = np.zeros(10)
  1682. y = np.random.random(10)
  1683. msg = "Cannot calculate a linear regression"
  1684. with assert_raises(ValueError, match=msg):
  1685. stats.linregress(x, y)
  1686. def test_theilslopes():
  1687. # Basic slope test.
  1688. slope, intercept, lower, upper = stats.theilslopes([0,1,1])
  1689. assert_almost_equal(slope, 0.5)
  1690. assert_almost_equal(intercept, 0.5)
  1691. msg = ("method must be either 'joint' or 'separate'."
  1692. "'joint_separate' is invalid.")
  1693. with pytest.raises(ValueError, match=msg):
  1694. stats.theilslopes([0, 1, 1], method='joint_separate')
  1695. slope, intercept, lower, upper = stats.theilslopes([0, 1, 1],
  1696. method='joint')
  1697. assert_almost_equal(slope, 0.5)
  1698. assert_almost_equal(intercept, 0.0)
  1699. # Test of confidence intervals.
  1700. x = [1, 2, 3, 4, 10, 12, 18]
  1701. y = [9, 15, 19, 20, 45, 55, 78]
  1702. slope, intercept, lower, upper = stats.theilslopes(y, x, 0.07,
  1703. method='separate')
  1704. assert_almost_equal(slope, 4)
  1705. assert_almost_equal(intercept, 4.0)
  1706. assert_almost_equal(upper, 4.38, decimal=2)
  1707. assert_almost_equal(lower, 3.71, decimal=2)
  1708. slope, intercept, lower, upper = stats.theilslopes(y, x, 0.07,
  1709. method='joint')
  1710. assert_almost_equal(slope, 4)
  1711. assert_almost_equal(intercept, 6.0)
  1712. assert_almost_equal(upper, 4.38, decimal=2)
  1713. assert_almost_equal(lower, 3.71, decimal=2)
  1714. def test_cumfreq():
  1715. x = [1, 4, 2, 1, 3, 1]
  1716. cumfreqs, lowlim, binsize, extrapoints = stats.cumfreq(x, numbins=4)
  1717. assert_array_almost_equal(cumfreqs, np.array([3., 4., 5., 6.]))
  1718. cumfreqs, lowlim, binsize, extrapoints = stats.cumfreq(x, numbins=4,
  1719. defaultreallimits=(1.5, 5))
  1720. assert_(extrapoints == 3)
  1721. # test for namedtuple attribute results
  1722. attributes = ('cumcount', 'lowerlimit', 'binsize', 'extrapoints')
  1723. res = stats.cumfreq(x, numbins=4, defaultreallimits=(1.5, 5))
  1724. check_named_results(res, attributes)
  1725. def test_relfreq():
  1726. a = np.array([1, 4, 2, 1, 3, 1])
  1727. relfreqs, lowlim, binsize, extrapoints = stats.relfreq(a, numbins=4)
  1728. assert_array_almost_equal(relfreqs,
  1729. array([0.5, 0.16666667, 0.16666667, 0.16666667]))
  1730. # test for namedtuple attribute results
  1731. attributes = ('frequency', 'lowerlimit', 'binsize', 'extrapoints')
  1732. res = stats.relfreq(a, numbins=4)
  1733. check_named_results(res, attributes)
  1734. # check array_like input is accepted
  1735. relfreqs2, lowlim, binsize, extrapoints = stats.relfreq([1, 4, 2, 1, 3, 1],
  1736. numbins=4)
  1737. assert_array_almost_equal(relfreqs, relfreqs2)
  1738. class TestScoreatpercentile:
  1739. def setup_method(self):
  1740. self.a1 = [3, 4, 5, 10, -3, -5, 6]
  1741. self.a2 = [3, -6, -2, 8, 7, 4, 2, 1]
  1742. self.a3 = [3., 4, 5, 10, -3, -5, -6, 7.0]
  1743. def test_basic(self):
  1744. x = arange(8) * 0.5
  1745. assert_equal(stats.scoreatpercentile(x, 0), 0.)
  1746. assert_equal(stats.scoreatpercentile(x, 100), 3.5)
  1747. assert_equal(stats.scoreatpercentile(x, 50), 1.75)
  1748. def test_fraction(self):
  1749. scoreatperc = stats.scoreatpercentile
  1750. # Test defaults
  1751. assert_equal(scoreatperc(list(range(10)), 50), 4.5)
  1752. assert_equal(scoreatperc(list(range(10)), 50, (2,7)), 4.5)
  1753. assert_equal(scoreatperc(list(range(100)), 50, limit=(1, 8)), 4.5)
  1754. assert_equal(scoreatperc(np.array([1, 10,100]), 50, (10,100)), 55)
  1755. assert_equal(scoreatperc(np.array([1, 10,100]), 50, (1,10)), 5.5)
  1756. # explicitly specify interpolation_method 'fraction' (the default)
  1757. assert_equal(scoreatperc(list(range(10)), 50, interpolation_method='fraction'),
  1758. 4.5)
  1759. assert_equal(scoreatperc(list(range(10)), 50, limit=(2, 7),
  1760. interpolation_method='fraction'),
  1761. 4.5)
  1762. assert_equal(scoreatperc(list(range(100)), 50, limit=(1, 8),
  1763. interpolation_method='fraction'),
  1764. 4.5)
  1765. assert_equal(scoreatperc(np.array([1, 10,100]), 50, (10, 100),
  1766. interpolation_method='fraction'),
  1767. 55)
  1768. assert_equal(scoreatperc(np.array([1, 10,100]), 50, (1,10),
  1769. interpolation_method='fraction'),
  1770. 5.5)
  1771. def test_lower_higher(self):
  1772. scoreatperc = stats.scoreatpercentile
  1773. # interpolation_method 'lower'/'higher'
  1774. assert_equal(scoreatperc(list(range(10)), 50,
  1775. interpolation_method='lower'), 4)
  1776. assert_equal(scoreatperc(list(range(10)), 50,
  1777. interpolation_method='higher'), 5)
  1778. assert_equal(scoreatperc(list(range(10)), 50, (2,7),
  1779. interpolation_method='lower'), 4)
  1780. assert_equal(scoreatperc(list(range(10)), 50, limit=(2,7),
  1781. interpolation_method='higher'), 5)
  1782. assert_equal(scoreatperc(list(range(100)), 50, (1,8),
  1783. interpolation_method='lower'), 4)
  1784. assert_equal(scoreatperc(list(range(100)), 50, (1,8),
  1785. interpolation_method='higher'), 5)
  1786. assert_equal(scoreatperc(np.array([1, 10, 100]), 50, (10, 100),
  1787. interpolation_method='lower'), 10)
  1788. assert_equal(scoreatperc(np.array([1, 10, 100]), 50, limit=(10, 100),
  1789. interpolation_method='higher'), 100)
  1790. assert_equal(scoreatperc(np.array([1, 10, 100]), 50, (1, 10),
  1791. interpolation_method='lower'), 1)
  1792. assert_equal(scoreatperc(np.array([1, 10, 100]), 50, limit=(1, 10),
  1793. interpolation_method='higher'), 10)
  1794. def test_sequence_per(self):
  1795. x = arange(8) * 0.5
  1796. expected = np.array([0, 3.5, 1.75])
  1797. res = stats.scoreatpercentile(x, [0, 100, 50])
  1798. assert_allclose(res, expected)
  1799. assert_(isinstance(res, np.ndarray))
  1800. # Test with ndarray. Regression test for gh-2861
  1801. assert_allclose(stats.scoreatpercentile(x, np.array([0, 100, 50])),
  1802. expected)
  1803. # Also test combination of 2-D array, axis not None and array-like per
  1804. res2 = stats.scoreatpercentile(np.arange(12).reshape((3,4)),
  1805. np.array([0, 1, 100, 100]), axis=1)
  1806. expected2 = array([[0, 4, 8],
  1807. [0.03, 4.03, 8.03],
  1808. [3, 7, 11],
  1809. [3, 7, 11]])
  1810. assert_allclose(res2, expected2)
  1811. def test_axis(self):
  1812. scoreatperc = stats.scoreatpercentile
  1813. x = arange(12).reshape(3, 4)
  1814. assert_equal(scoreatperc(x, (25, 50, 100)), [2.75, 5.5, 11.0])
  1815. r0 = [[2, 3, 4, 5], [4, 5, 6, 7], [8, 9, 10, 11]]
  1816. assert_equal(scoreatperc(x, (25, 50, 100), axis=0), r0)
  1817. r1 = [[0.75, 4.75, 8.75], [1.5, 5.5, 9.5], [3, 7, 11]]
  1818. assert_equal(scoreatperc(x, (25, 50, 100), axis=1), r1)
  1819. x = array([[1, 1, 1],
  1820. [1, 1, 1],
  1821. [4, 4, 3],
  1822. [1, 1, 1],
  1823. [1, 1, 1]])
  1824. score = stats.scoreatpercentile(x, 50)
  1825. assert_equal(score.shape, ())
  1826. assert_equal(score, 1.0)
  1827. score = stats.scoreatpercentile(x, 50, axis=0)
  1828. assert_equal(score.shape, (3,))
  1829. assert_equal(score, [1, 1, 1])
  1830. def test_exception(self):
  1831. assert_raises(ValueError, stats.scoreatpercentile, [1, 2], 56,
  1832. interpolation_method='foobar')
  1833. assert_raises(ValueError, stats.scoreatpercentile, [1], 101)
  1834. assert_raises(ValueError, stats.scoreatpercentile, [1], -1)
  1835. def test_empty(self):
  1836. assert_equal(stats.scoreatpercentile([], 50), np.nan)
  1837. assert_equal(stats.scoreatpercentile(np.array([[], []]), 50), np.nan)
  1838. assert_equal(stats.scoreatpercentile([], [50, 99]), [np.nan, np.nan])
  1839. @pytest.mark.filterwarnings('ignore::FutureWarning')
  1840. class TestMode:
  1841. deprecation_msg = r"Support for non-numeric arrays has been deprecated"
  1842. def test_empty(self):
  1843. vals, counts = stats.mode([])
  1844. assert_equal(vals, np.array([]))
  1845. assert_equal(counts, np.array([]))
  1846. def test_scalar(self):
  1847. vals, counts = stats.mode(4.)
  1848. assert_equal(vals, np.array([4.]))
  1849. assert_equal(counts, np.array([1]))
  1850. def test_basic(self):
  1851. data1 = [3, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6]
  1852. vals = stats.mode(data1)
  1853. assert_equal(vals[0][0], 6)
  1854. assert_equal(vals[1][0], 3)
  1855. def test_axes(self):
  1856. data1 = [10, 10, 30, 40]
  1857. data2 = [10, 10, 10, 10]
  1858. data3 = [20, 10, 20, 20]
  1859. data4 = [30, 30, 30, 30]
  1860. data5 = [40, 30, 30, 30]
  1861. arr = np.array([data1, data2, data3, data4, data5])
  1862. vals = stats.mode(arr, axis=None)
  1863. assert_equal(vals[0], np.array([30]))
  1864. assert_equal(vals[1], np.array([8]))
  1865. vals = stats.mode(arr, axis=0)
  1866. assert_equal(vals[0], np.array([[10, 10, 30, 30]]))
  1867. assert_equal(vals[1], np.array([[2, 3, 3, 2]]))
  1868. vals = stats.mode(arr, axis=1)
  1869. assert_equal(vals[0], np.array([[10], [10], [20], [30], [30]]))
  1870. assert_equal(vals[1], np.array([[2], [4], [3], [4], [3]]))
  1871. @pytest.mark.parametrize('axis', np.arange(-4, 0))
  1872. def test_negative_axes_gh_15375(self, axis):
  1873. np.random.seed(984213899)
  1874. a = np.random.rand(10, 11, 12, 13)
  1875. res0 = stats.mode(a, axis=a.ndim+axis)
  1876. res1 = stats.mode(a, axis=axis)
  1877. np.testing.assert_array_equal(res0, res1)
  1878. def test_strings(self):
  1879. data1 = ['rain', 'showers', 'showers']
  1880. with pytest.warns(DeprecationWarning, match=self.deprecation_msg):
  1881. vals = stats.mode(data1)
  1882. assert_equal(vals[0][0], 'showers')
  1883. assert_equal(vals[1][0], 2)
  1884. def test_mixed_objects(self):
  1885. objects = [10, True, np.nan, 'hello', 10]
  1886. arr = np.empty((5,), dtype=object)
  1887. arr[:] = objects
  1888. with pytest.warns(DeprecationWarning, match=self.deprecation_msg):
  1889. vals = stats.mode(arr)
  1890. assert_equal(vals[0][0], 10)
  1891. assert_equal(vals[1][0], 2)
  1892. def test_objects(self):
  1893. # Python objects must be sortable (le + eq) and have ne defined
  1894. # for np.unique to work. hash is for set.
  1895. class Point:
  1896. def __init__(self, x):
  1897. self.x = x
  1898. def __eq__(self, other):
  1899. return self.x == other.x
  1900. def __ne__(self, other):
  1901. return self.x != other.x
  1902. def __lt__(self, other):
  1903. return self.x < other.x
  1904. def __hash__(self):
  1905. return hash(self.x)
  1906. points = [Point(x) for x in [1, 2, 3, 4, 3, 2, 2, 2]]
  1907. arr = np.empty((8,), dtype=object)
  1908. arr[:] = points
  1909. assert_(len(set(points)) == 4)
  1910. assert_equal(np.unique(arr).shape, (4,))
  1911. with pytest.warns(DeprecationWarning, match=self.deprecation_msg):
  1912. vals = stats.mode(arr)
  1913. assert_equal(vals[0][0], Point(2))
  1914. assert_equal(vals[1][0], 4)
  1915. def test_mode_result_attributes(self):
  1916. data1 = [3, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6]
  1917. data2 = []
  1918. actual = stats.mode(data1)
  1919. attributes = ('mode', 'count')
  1920. check_named_results(actual, attributes)
  1921. actual2 = stats.mode(data2)
  1922. check_named_results(actual2, attributes)
  1923. def test_mode_nan(self):
  1924. data1 = [3, np.nan, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6]
  1925. actual = stats.mode(data1)
  1926. assert_equal(actual, (6, 3))
  1927. actual = stats.mode(data1, nan_policy='omit')
  1928. assert_equal(actual, (6, 3))
  1929. assert_raises(ValueError, stats.mode, data1, nan_policy='raise')
  1930. assert_raises(ValueError, stats.mode, data1, nan_policy='foobar')
  1931. @pytest.mark.parametrize("data", [
  1932. [3, 5, 1, 1, 3],
  1933. [3, np.nan, 5, 1, 1, 3],
  1934. [3, 5, 1],
  1935. [3, np.nan, 5, 1],
  1936. ])
  1937. def test_smallest_equal(self, data):
  1938. result = stats.mode(data, nan_policy='omit')
  1939. assert_equal(result[0][0], 1)
  1940. def test_obj_arrays_ndim(self):
  1941. # regression test for gh-9645: `mode` fails for object arrays w/ndim > 1
  1942. data = [['Oxidation'], ['Oxidation'], ['Polymerization'], ['Reduction']]
  1943. ar = np.array(data, dtype=object)
  1944. with pytest.warns(DeprecationWarning, match=self.deprecation_msg):
  1945. m = stats.mode(ar, axis=0)
  1946. assert np.all(m.mode == 'Oxidation') and m.mode.shape == (1, 1)
  1947. assert np.all(m.count == 2) and m.count.shape == (1, 1)
  1948. data1 = data + [[np.nan]]
  1949. ar1 = np.array(data1, dtype=object)
  1950. with pytest.warns(DeprecationWarning, match=self.deprecation_msg):
  1951. m = stats.mode(ar1, axis=0)
  1952. assert np.all(m.mode == 'Oxidation') and m.mode.shape == (1, 1)
  1953. assert np.all(m.count == 2) and m.count.shape == (1, 1)
  1954. @pytest.mark.parametrize('axis', np.arange(-3, 3))
  1955. @pytest.mark.parametrize('dtype', [np.float64, 'object'])
  1956. def test_mode_shape_gh_9955(self, axis, dtype):
  1957. rng = np.random.default_rng(984213899)
  1958. a = rng.uniform(size=(3, 4, 5)).astype(dtype)
  1959. if dtype == 'object':
  1960. with pytest.warns(DeprecationWarning, match=self.deprecation_msg):
  1961. res = stats.mode(a, axis=axis, keepdims=False)
  1962. else:
  1963. res = stats.mode(a, axis=axis, keepdims=False)
  1964. reference_shape = list(a.shape)
  1965. reference_shape.pop(axis)
  1966. np.testing.assert_array_equal(res.mode.shape, reference_shape)
  1967. np.testing.assert_array_equal(res.count.shape, reference_shape)
  1968. def test_nan_policy_propagate_gh_9815(self):
  1969. # mode should treat np.nan as it would any other object when
  1970. # nan_policy='propagate'
  1971. a = [2, np.nan, 1, np.nan]
  1972. if NumpyVersion(np.__version__) >= '1.21.0':
  1973. res = stats.mode(a)
  1974. assert np.isnan(res.mode[0]) and res.count[0] == 2
  1975. # mode should work on object arrays. There were issues when
  1976. # objects do not support comparison operations.
  1977. a = np.array(a, dtype='object')
  1978. with pytest.warns(DeprecationWarning, match=self.deprecation_msg):
  1979. res = stats.mode(a)
  1980. assert np.isnan(res.mode[0]) and res.count[0] == 2
  1981. a = np.array([10, True, 'hello', 10], dtype='object')
  1982. with pytest.warns(DeprecationWarning, match=self.deprecation_msg):
  1983. res = stats.mode(a)
  1984. assert_array_equal(res, [[10], [2]])
  1985. def test_keepdims(self):
  1986. # test empty arrays (handled by `np.mean`)
  1987. a = np.zeros((1, 2, 3, 0))
  1988. res = stats.mode(a, axis=1, keepdims=False)
  1989. assert res.mode.shape == res.count.shape == (1, 3, 0)
  1990. res = stats.mode(a, axis=1, keepdims=True)
  1991. assert res.mode.shape == res.count.shape == (1, 1, 3, 0)
  1992. # test nan_policy='propagate'
  1993. a = [[1, 3, 3, np.nan], [1, 1, np.nan, 1]]
  1994. res = stats.mode(a, axis=1, keepdims=False)
  1995. assert_array_equal(res.mode, [3, 1])
  1996. assert_array_equal(res.count, [2, 3])
  1997. res = stats.mode(a, axis=1, keepdims=True)
  1998. assert_array_equal(res.mode, [[3], [1]])
  1999. assert_array_equal(res.count, [[2], [3]])
  2000. a = np.array(a)
  2001. res = stats.mode(a, axis=None, keepdims=False)
  2002. ref = stats.mode(a.ravel(), keepdims=False)
  2003. assert_array_equal(res, ref)
  2004. assert res.mode.shape == ref.mode.shape == ()
  2005. res = stats.mode(a, axis=None, keepdims=True)
  2006. ref = stats.mode(a.ravel(), keepdims=True)
  2007. assert_array_equal(res, ref)
  2008. assert res.mode.shape == ref.mode.shape == (1,)
  2009. # test nan_policy='omit'
  2010. a = [[1, np.nan, np.nan, np.nan, 1],
  2011. [np.nan, np.nan, np.nan, np.nan, 2],
  2012. [1, 2, np.nan, 5, 5]]
  2013. res = stats.mode(a, axis=1, keepdims=False, nan_policy='omit')
  2014. assert_array_equal(res.mode, [1, 2, 5])
  2015. assert_array_equal(res.count, [2, 1, 2])
  2016. res = stats.mode(a, axis=1, keepdims=True, nan_policy='omit')
  2017. assert_array_equal(res.mode, [[1], [2], [5]])
  2018. assert_array_equal(res.count, [[2], [1], [2]])
  2019. a = np.array(a)
  2020. res = stats.mode(a, axis=None, keepdims=False, nan_policy='omit')
  2021. ref = stats.mode(a.ravel(), keepdims=False, nan_policy='omit')
  2022. assert_array_equal(res, ref)
  2023. assert res.mode.shape == ref.mode.shape == ()
  2024. res = stats.mode(a, axis=None, keepdims=True, nan_policy='omit')
  2025. ref = stats.mode(a.ravel(), keepdims=True, nan_policy='omit')
  2026. assert_array_equal(res, ref)
  2027. assert res.mode.shape == ref.mode.shape == (1,)
  2028. def test_gh16952(self):
  2029. # Check that bug reported in gh-16952 is resolved
  2030. shape = (4, 3)
  2031. data = np.ones(shape)
  2032. data[0, 0] = np.nan
  2033. res = stats.mode(a=data, axis=1, keepdims=False, nan_policy="omit")
  2034. assert_array_equal(res.mode, [1, 1, 1, 1])
  2035. assert_array_equal(res.count, [2, 3, 3, 3])
  2036. def test_mode_futurewarning():
  2037. a = [1, 2, 5, 3, 5]
  2038. future_msg = "Unlike other reduction functions..."
  2039. with pytest.warns(FutureWarning, match=future_msg):
  2040. res = stats.mode(a)
  2041. assert_array_equal(res, ([5], [2]))
  2042. # no FutureWarning if `keepdims` is specified
  2043. res = stats.mode(a, keepdims=True)
  2044. assert_array_equal(res, ([5], [2]))
  2045. res = stats.mode(a, keepdims=False)
  2046. assert_array_equal(res, [5, 2])
  2047. class TestSEM:
  2048. testcase = [1, 2, 3, 4]
  2049. scalar_testcase = 4.
  2050. def test_sem(self):
  2051. # This is not in R, so used:
  2052. # sqrt(var(testcase)*3/4)/sqrt(3)
  2053. # y = stats.sem(self.shoes[0])
  2054. # assert_approx_equal(y,0.775177399)
  2055. with suppress_warnings() as sup, np.errstate(invalid="ignore"):
  2056. sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
  2057. y = stats.sem(self.scalar_testcase)
  2058. assert_(np.isnan(y))
  2059. y = stats.sem(self.testcase)
  2060. assert_approx_equal(y, 0.6454972244)
  2061. n = len(self.testcase)
  2062. assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
  2063. stats.sem(self.testcase, ddof=2))
  2064. x = np.arange(10.)
  2065. x[9] = np.nan
  2066. assert_equal(stats.sem(x), np.nan)
  2067. assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
  2068. assert_raises(ValueError, stats.sem, x, nan_policy='raise')
  2069. assert_raises(ValueError, stats.sem, x, nan_policy='foobar')
  2070. class TestZmapZscore:
  2071. @pytest.mark.parametrize(
  2072. 'x, y',
  2073. [([1, 2, 3, 4], [1, 2, 3, 4]),
  2074. ([1, 2, 3], [0, 1, 2, 3, 4])]
  2075. )
  2076. def test_zmap(self, x, y):
  2077. z = stats.zmap(x, y)
  2078. # For these simple cases, calculate the expected result directly
  2079. # by using the formula for the z-score.
  2080. expected = (x - np.mean(y))/np.std(y)
  2081. assert_allclose(z, expected, rtol=1e-12)
  2082. def test_zmap_axis(self):
  2083. # Test use of 'axis' keyword in zmap.
  2084. x = np.array([[0.0, 0.0, 1.0, 1.0],
  2085. [1.0, 1.0, 1.0, 2.0],
  2086. [2.0, 0.0, 2.0, 0.0]])
  2087. t1 = 1.0/np.sqrt(2.0/3)
  2088. t2 = np.sqrt(3.)/3
  2089. t3 = np.sqrt(2.)
  2090. z0 = stats.zmap(x, x, axis=0)
  2091. z1 = stats.zmap(x, x, axis=1)
  2092. z0_expected = [[-t1, -t3/2, -t3/2, 0.0],
  2093. [0.0, t3, -t3/2, t1],
  2094. [t1, -t3/2, t3, -t1]]
  2095. z1_expected = [[-1.0, -1.0, 1.0, 1.0],
  2096. [-t2, -t2, -t2, np.sqrt(3.)],
  2097. [1.0, -1.0, 1.0, -1.0]]
  2098. assert_array_almost_equal(z0, z0_expected)
  2099. assert_array_almost_equal(z1, z1_expected)
  2100. def test_zmap_ddof(self):
  2101. # Test use of 'ddof' keyword in zmap.
  2102. x = np.array([[0.0, 0.0, 1.0, 1.0],
  2103. [0.0, 1.0, 2.0, 3.0]])
  2104. z = stats.zmap(x, x, axis=1, ddof=1)
  2105. z0_expected = np.array([-0.5, -0.5, 0.5, 0.5])/(1.0/np.sqrt(3))
  2106. z1_expected = np.array([-1.5, -0.5, 0.5, 1.5])/(np.sqrt(5./3))
  2107. assert_array_almost_equal(z[0], z0_expected)
  2108. assert_array_almost_equal(z[1], z1_expected)
  2109. @pytest.mark.parametrize('ddof', [0, 2])
  2110. def test_zmap_nan_policy_omit(self, ddof):
  2111. # nans in `scores` are propagated, regardless of `nan_policy`.
  2112. # `nan_policy` only affects how nans in `compare` are handled.
  2113. scores = np.array([-3, -1, 2, np.nan])
  2114. compare = np.array([-8, -3, 2, 7, 12, np.nan])
  2115. z = stats.zmap(scores, compare, ddof=ddof, nan_policy='omit')
  2116. assert_allclose(z, stats.zmap(scores, compare[~np.isnan(compare)],
  2117. ddof=ddof))
  2118. @pytest.mark.parametrize('ddof', [0, 2])
  2119. def test_zmap_nan_policy_omit_with_axis(self, ddof):
  2120. scores = np.arange(-5.0, 9.0).reshape(2, -1)
  2121. compare = np.linspace(-8, 6, 24).reshape(2, -1)
  2122. compare[0, 4] = np.nan
  2123. compare[0, 6] = np.nan
  2124. compare[1, 1] = np.nan
  2125. z = stats.zmap(scores, compare, nan_policy='omit', axis=1, ddof=ddof)
  2126. expected = np.array([stats.zmap(scores[0],
  2127. compare[0][~np.isnan(compare[0])],
  2128. ddof=ddof),
  2129. stats.zmap(scores[1],
  2130. compare[1][~np.isnan(compare[1])],
  2131. ddof=ddof)])
  2132. assert_allclose(z, expected, rtol=1e-14)
  2133. def test_zmap_nan_policy_raise(self):
  2134. scores = np.array([1, 2, 3])
  2135. compare = np.array([-8, -3, 2, 7, 12, np.nan])
  2136. with pytest.raises(ValueError, match='input contains nan'):
  2137. stats.zmap(scores, compare, nan_policy='raise')
  2138. def test_zscore(self):
  2139. # not in R, so tested by using:
  2140. # (testcase[i] - mean(testcase, axis=0)) / sqrt(var(testcase) * 3/4)
  2141. y = stats.zscore([1, 2, 3, 4])
  2142. desired = ([-1.3416407864999, -0.44721359549996, 0.44721359549996,
  2143. 1.3416407864999])
  2144. assert_array_almost_equal(desired, y, decimal=12)
  2145. def test_zscore_axis(self):
  2146. # Test use of 'axis' keyword in zscore.
  2147. x = np.array([[0.0, 0.0, 1.0, 1.0],
  2148. [1.0, 1.0, 1.0, 2.0],
  2149. [2.0, 0.0, 2.0, 0.0]])
  2150. t1 = 1.0/np.sqrt(2.0/3)
  2151. t2 = np.sqrt(3.)/3
  2152. t3 = np.sqrt(2.)
  2153. z0 = stats.zscore(x, axis=0)
  2154. z1 = stats.zscore(x, axis=1)
  2155. z0_expected = [[-t1, -t3/2, -t3/2, 0.0],
  2156. [0.0, t3, -t3/2, t1],
  2157. [t1, -t3/2, t3, -t1]]
  2158. z1_expected = [[-1.0, -1.0, 1.0, 1.0],
  2159. [-t2, -t2, -t2, np.sqrt(3.)],
  2160. [1.0, -1.0, 1.0, -1.0]]
  2161. assert_array_almost_equal(z0, z0_expected)
  2162. assert_array_almost_equal(z1, z1_expected)
  2163. def test_zscore_ddof(self):
  2164. # Test use of 'ddof' keyword in zscore.
  2165. x = np.array([[0.0, 0.0, 1.0, 1.0],
  2166. [0.0, 1.0, 2.0, 3.0]])
  2167. z = stats.zscore(x, axis=1, ddof=1)
  2168. z0_expected = np.array([-0.5, -0.5, 0.5, 0.5])/(1.0/np.sqrt(3))
  2169. z1_expected = np.array([-1.5, -0.5, 0.5, 1.5])/(np.sqrt(5./3))
  2170. assert_array_almost_equal(z[0], z0_expected)
  2171. assert_array_almost_equal(z[1], z1_expected)
  2172. def test_zscore_nan_propagate(self):
  2173. x = np.array([1, 2, np.nan, 4, 5])
  2174. z = stats.zscore(x, nan_policy='propagate')
  2175. assert all(np.isnan(z))
  2176. def test_zscore_nan_omit(self):
  2177. x = np.array([1, 2, np.nan, 4, 5])
  2178. z = stats.zscore(x, nan_policy='omit')
  2179. expected = np.array([-1.2649110640673518,
  2180. -0.6324555320336759,
  2181. np.nan,
  2182. 0.6324555320336759,
  2183. 1.2649110640673518
  2184. ])
  2185. assert_array_almost_equal(z, expected)
  2186. def test_zscore_nan_omit_with_ddof(self):
  2187. x = np.array([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0])
  2188. z = stats.zscore(x, ddof=1, nan_policy='omit')
  2189. expected = np.r_[np.nan, stats.zscore(x[1:], ddof=1)]
  2190. assert_allclose(z, expected, rtol=1e-13)
  2191. def test_zscore_nan_raise(self):
  2192. x = np.array([1, 2, np.nan, 4, 5])
  2193. assert_raises(ValueError, stats.zscore, x, nan_policy='raise')
  2194. def test_zscore_constant_input_1d(self):
  2195. x = [-0.087] * 3
  2196. z = stats.zscore(x)
  2197. assert_equal(z, np.full(len(x), np.nan))
  2198. def test_zscore_constant_input_2d(self):
  2199. x = np.array([[10.0, 10.0, 10.0, 10.0],
  2200. [10.0, 11.0, 12.0, 13.0]])
  2201. z0 = stats.zscore(x, axis=0)
  2202. assert_equal(z0, np.array([[np.nan, -1.0, -1.0, -1.0],
  2203. [np.nan, 1.0, 1.0, 1.0]]))
  2204. z1 = stats.zscore(x, axis=1)
  2205. assert_equal(z1, np.array([[np.nan, np.nan, np.nan, np.nan],
  2206. stats.zscore(x[1])]))
  2207. z = stats.zscore(x, axis=None)
  2208. assert_equal(z, stats.zscore(x.ravel()).reshape(x.shape))
  2209. y = np.ones((3, 6))
  2210. z = stats.zscore(y, axis=None)
  2211. assert_equal(z, np.full(y.shape, np.nan))
  2212. def test_zscore_constant_input_2d_nan_policy_omit(self):
  2213. x = np.array([[10.0, 10.0, 10.0, 10.0],
  2214. [10.0, 11.0, 12.0, np.nan],
  2215. [10.0, 12.0, np.nan, 10.0]])
  2216. z0 = stats.zscore(x, nan_policy='omit', axis=0)
  2217. s = np.sqrt(3/2)
  2218. s2 = np.sqrt(2)
  2219. assert_allclose(z0, np.array([[np.nan, -s, -1.0, np.nan],
  2220. [np.nan, 0, 1.0, np.nan],
  2221. [np.nan, s, np.nan, np.nan]]))
  2222. z1 = stats.zscore(x, nan_policy='omit', axis=1)
  2223. assert_allclose(z1, np.array([[np.nan, np.nan, np.nan, np.nan],
  2224. [-s, 0, s, np.nan],
  2225. [-s2/2, s2, np.nan, -s2/2]]))
  2226. def test_zscore_2d_all_nan_row(self):
  2227. # A row is all nan, and we use axis=1.
  2228. x = np.array([[np.nan, np.nan, np.nan, np.nan],
  2229. [10.0, 10.0, 12.0, 12.0]])
  2230. z = stats.zscore(x, nan_policy='omit', axis=1)
  2231. assert_equal(z, np.array([[np.nan, np.nan, np.nan, np.nan],
  2232. [-1.0, -1.0, 1.0, 1.0]]))
  2233. def test_zscore_2d_all_nan(self):
  2234. # The entire 2d array is nan, and we use axis=None.
  2235. y = np.full((2, 3), np.nan)
  2236. z = stats.zscore(y, nan_policy='omit', axis=None)
  2237. assert_equal(z, y)
  2238. @pytest.mark.parametrize('x', [np.array([]), np.zeros((3, 0, 5))])
  2239. def test_zscore_empty_input(self, x):
  2240. z = stats.zscore(x)
  2241. assert_equal(z, x)
  2242. def test_gzscore_normal_array(self):
  2243. z = stats.gzscore([1, 2, 3, 4])
  2244. desired = ([-1.526072095151, -0.194700599824, 0.584101799472,
  2245. 1.136670895503])
  2246. assert_allclose(desired, z)
  2247. def test_gzscore_masked_array(self):
  2248. x = np.array([1, 2, -1, 3, 4])
  2249. mx = np.ma.masked_array(x, mask=[0, 0, 1, 0, 0])
  2250. z = stats.gzscore(mx)
  2251. desired = ([-1.526072095151, -0.194700599824, np.inf, 0.584101799472,
  2252. 1.136670895503])
  2253. assert_allclose(desired, z)
  2254. class TestMedianAbsDeviation:
  2255. def setup_class(self):
  2256. self.dat_nan = np.array([2.20, 2.20, 2.4, 2.4, 2.5, 2.7, 2.8, 2.9,
  2257. 3.03, 3.03, 3.10, 3.37, 3.4, 3.4, 3.4, 3.5,
  2258. 3.6, 3.7, 3.7, 3.7, 3.7, 3.77, 5.28, np.nan])
  2259. self.dat = np.array([2.20, 2.20, 2.4, 2.4, 2.5, 2.7, 2.8, 2.9, 3.03,
  2260. 3.03, 3.10, 3.37, 3.4, 3.4, 3.4, 3.5, 3.6, 3.7,
  2261. 3.7, 3.7, 3.7, 3.77, 5.28, 28.95])
  2262. def test_median_abs_deviation(self):
  2263. assert_almost_equal(stats.median_abs_deviation(self.dat, axis=None),
  2264. 0.355)
  2265. dat = self.dat.reshape(6, 4)
  2266. mad = stats.median_abs_deviation(dat, axis=0)
  2267. mad_expected = np.asarray([0.435, 0.5, 0.45, 0.4])
  2268. assert_array_almost_equal(mad, mad_expected)
  2269. def test_mad_nan_omit(self):
  2270. mad = stats.median_abs_deviation(self.dat_nan, nan_policy='omit')
  2271. assert_almost_equal(mad, 0.34)
  2272. def test_axis_and_nan(self):
  2273. x = np.array([[1.0, 2.0, 3.0, 4.0, np.nan],
  2274. [1.0, 4.0, 5.0, 8.0, 9.0]])
  2275. mad = stats.median_abs_deviation(x, axis=1)
  2276. assert_equal(mad, np.array([np.nan, 3.0]))
  2277. def test_nan_policy_omit_with_inf(sef):
  2278. z = np.array([1, 3, 4, 6, 99, np.nan, np.inf])
  2279. mad = stats.median_abs_deviation(z, nan_policy='omit')
  2280. assert_equal(mad, 3.0)
  2281. @pytest.mark.parametrize('axis', [0, 1, 2, None])
  2282. def test_size_zero_with_axis(self, axis):
  2283. x = np.zeros((3, 0, 4))
  2284. mad = stats.median_abs_deviation(x, axis=axis)
  2285. assert_equal(mad, np.full_like(x.sum(axis=axis), fill_value=np.nan))
  2286. @pytest.mark.parametrize('nan_policy, expected',
  2287. [('omit', np.array([np.nan, 1.5, 1.5])),
  2288. ('propagate', np.array([np.nan, np.nan, 1.5]))])
  2289. def test_nan_policy_with_axis(self, nan_policy, expected):
  2290. x = np.array([[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
  2291. [1, 5, 3, 6, np.nan, np.nan],
  2292. [5, 6, 7, 9, 9, 10]])
  2293. mad = stats.median_abs_deviation(x, nan_policy=nan_policy, axis=1)
  2294. assert_equal(mad, expected)
  2295. @pytest.mark.parametrize('axis, expected',
  2296. [(1, [2.5, 2.0, 12.0]), (None, 4.5)])
  2297. def test_center_mean_with_nan(self, axis, expected):
  2298. x = np.array([[1, 2, 4, 9, np.nan],
  2299. [0, 1, 1, 1, 12],
  2300. [-10, -10, -10, 20, 20]])
  2301. mad = stats.median_abs_deviation(x, center=np.mean, nan_policy='omit',
  2302. axis=axis)
  2303. assert_allclose(mad, expected, rtol=1e-15, atol=1e-15)
  2304. def test_center_not_callable(self):
  2305. with pytest.raises(TypeError, match='callable'):
  2306. stats.median_abs_deviation([1, 2, 3, 5], center=99)
  2307. def _check_warnings(warn_list, expected_type, expected_len):
  2308. """
  2309. Checks that all of the warnings from a list returned by
  2310. `warnings.catch_all(record=True)` are of the required type and that the list
  2311. contains expected number of warnings.
  2312. """
  2313. assert_equal(len(warn_list), expected_len, "number of warnings")
  2314. for warn_ in warn_list:
  2315. assert_(warn_.category is expected_type)
  2316. class TestIQR:
  2317. def test_basic(self):
  2318. x = np.arange(8) * 0.5
  2319. np.random.shuffle(x)
  2320. assert_equal(stats.iqr(x), 1.75)
  2321. def test_api(self):
  2322. d = np.ones((5, 5))
  2323. stats.iqr(d)
  2324. stats.iqr(d, None)
  2325. stats.iqr(d, 1)
  2326. stats.iqr(d, (0, 1))
  2327. stats.iqr(d, None, (10, 90))
  2328. stats.iqr(d, None, (30, 20), 1.0)
  2329. stats.iqr(d, None, (25, 75), 1.5, 'propagate')
  2330. stats.iqr(d, None, (50, 50), 'normal', 'raise', 'linear')
  2331. stats.iqr(d, None, (25, 75), -0.4, 'omit', 'lower', True)
  2332. def test_empty(self):
  2333. assert_equal(stats.iqr([]), np.nan)
  2334. assert_equal(stats.iqr(np.arange(0)), np.nan)
  2335. def test_constant(self):
  2336. # Constant array always gives 0
  2337. x = np.ones((7, 4))
  2338. assert_equal(stats.iqr(x), 0.0)
  2339. assert_array_equal(stats.iqr(x, axis=0), np.zeros(4))
  2340. assert_array_equal(stats.iqr(x, axis=1), np.zeros(7))
  2341. assert_equal(stats.iqr(x, interpolation='linear'), 0.0)
  2342. assert_equal(stats.iqr(x, interpolation='midpoint'), 0.0)
  2343. assert_equal(stats.iqr(x, interpolation='nearest'), 0.0)
  2344. assert_equal(stats.iqr(x, interpolation='lower'), 0.0)
  2345. assert_equal(stats.iqr(x, interpolation='higher'), 0.0)
  2346. # 0 only along constant dimensions
  2347. # This also tests much of `axis`
  2348. y = np.ones((4, 5, 6)) * np.arange(6)
  2349. assert_array_equal(stats.iqr(y, axis=0), np.zeros((5, 6)))
  2350. assert_array_equal(stats.iqr(y, axis=1), np.zeros((4, 6)))
  2351. assert_array_equal(stats.iqr(y, axis=2), np.full((4, 5), 2.5))
  2352. assert_array_equal(stats.iqr(y, axis=(0, 1)), np.zeros(6))
  2353. assert_array_equal(stats.iqr(y, axis=(0, 2)), np.full(5, 3.))
  2354. assert_array_equal(stats.iqr(y, axis=(1, 2)), np.full(4, 3.))
  2355. def test_scalarlike(self):
  2356. x = np.arange(1) + 7.0
  2357. assert_equal(stats.iqr(x[0]), 0.0)
  2358. assert_equal(stats.iqr(x), 0.0)
  2359. assert_array_equal(stats.iqr(x, keepdims=True), [0.0])
  2360. def test_2D(self):
  2361. x = np.arange(15).reshape((3, 5))
  2362. assert_equal(stats.iqr(x), 7.0)
  2363. assert_array_equal(stats.iqr(x, axis=0), np.full(5, 5.))
  2364. assert_array_equal(stats.iqr(x, axis=1), np.full(3, 2.))
  2365. assert_array_equal(stats.iqr(x, axis=(0, 1)), 7.0)
  2366. assert_array_equal(stats.iqr(x, axis=(1, 0)), 7.0)
  2367. def test_axis(self):
  2368. # The `axis` keyword is also put through its paces in `test_keepdims`.
  2369. o = np.random.normal(size=(71, 23))
  2370. x = np.dstack([o] * 10) # x.shape = (71, 23, 10)
  2371. q = stats.iqr(o)
  2372. assert_equal(stats.iqr(x, axis=(0, 1)), q)
  2373. x = np.moveaxis(x, -1, 0) # x.shape = (10, 71, 23)
  2374. assert_equal(stats.iqr(x, axis=(2, 1)), q)
  2375. x = x.swapaxes(0, 1) # x.shape = (71, 10, 23)
  2376. assert_equal(stats.iqr(x, axis=(0, 2)), q)
  2377. x = x.swapaxes(0, 1) # x.shape = (10, 71, 23)
  2378. assert_equal(stats.iqr(x, axis=(0, 1, 2)),
  2379. stats.iqr(x, axis=None))
  2380. assert_equal(stats.iqr(x, axis=(0,)),
  2381. stats.iqr(x, axis=0))
  2382. d = np.arange(3 * 5 * 7 * 11)
  2383. # Older versions of numpy only shuffle along axis=0.
  2384. # Not sure about newer, don't care.
  2385. np.random.shuffle(d)
  2386. d = d.reshape((3, 5, 7, 11))
  2387. assert_equal(stats.iqr(d, axis=(0, 1, 2))[0],
  2388. stats.iqr(d[:,:,:, 0].ravel()))
  2389. assert_equal(stats.iqr(d, axis=(0, 1, 3))[1],
  2390. stats.iqr(d[:,:, 1,:].ravel()))
  2391. assert_equal(stats.iqr(d, axis=(3, 1, -4))[2],
  2392. stats.iqr(d[:,:, 2,:].ravel()))
  2393. assert_equal(stats.iqr(d, axis=(3, 1, 2))[2],
  2394. stats.iqr(d[2,:,:,:].ravel()))
  2395. assert_equal(stats.iqr(d, axis=(3, 2))[2, 1],
  2396. stats.iqr(d[2, 1,:,:].ravel()))
  2397. assert_equal(stats.iqr(d, axis=(1, -2))[2, 1],
  2398. stats.iqr(d[2, :, :, 1].ravel()))
  2399. assert_equal(stats.iqr(d, axis=(1, 3))[2, 2],
  2400. stats.iqr(d[2, :, 2,:].ravel()))
  2401. assert_raises(np.AxisError, stats.iqr, d, axis=4)
  2402. assert_raises(ValueError, stats.iqr, d, axis=(0, 0))
  2403. def test_rng(self):
  2404. x = np.arange(5)
  2405. assert_equal(stats.iqr(x), 2)
  2406. assert_equal(stats.iqr(x, rng=(25, 87.5)), 2.5)
  2407. assert_equal(stats.iqr(x, rng=(12.5, 75)), 2.5)
  2408. assert_almost_equal(stats.iqr(x, rng=(10, 50)), 1.6) # 3-1.4
  2409. assert_raises(ValueError, stats.iqr, x, rng=(0, 101))
  2410. assert_raises(ValueError, stats.iqr, x, rng=(np.nan, 25))
  2411. assert_raises(TypeError, stats.iqr, x, rng=(0, 50, 60))
  2412. def test_interpolation(self):
  2413. x = np.arange(5)
  2414. y = np.arange(4)
  2415. # Default
  2416. assert_equal(stats.iqr(x), 2)
  2417. assert_equal(stats.iqr(y), 1.5)
  2418. # Linear
  2419. assert_equal(stats.iqr(x, interpolation='linear'), 2)
  2420. assert_equal(stats.iqr(y, interpolation='linear'), 1.5)
  2421. # Higher
  2422. assert_equal(stats.iqr(x, interpolation='higher'), 2)
  2423. assert_equal(stats.iqr(x, rng=(25, 80), interpolation='higher'), 3)
  2424. assert_equal(stats.iqr(y, interpolation='higher'), 2)
  2425. # Lower (will generally, but not always be the same as higher)
  2426. assert_equal(stats.iqr(x, interpolation='lower'), 2)
  2427. assert_equal(stats.iqr(x, rng=(25, 80), interpolation='lower'), 2)
  2428. assert_equal(stats.iqr(y, interpolation='lower'), 2)
  2429. # Nearest
  2430. assert_equal(stats.iqr(x, interpolation='nearest'), 2)
  2431. assert_equal(stats.iqr(y, interpolation='nearest'), 1)
  2432. # Midpoint
  2433. assert_equal(stats.iqr(x, interpolation='midpoint'), 2)
  2434. assert_equal(stats.iqr(x, rng=(25, 80), interpolation='midpoint'), 2.5)
  2435. assert_equal(stats.iqr(y, interpolation='midpoint'), 2)
  2436. # Check all method= values new in numpy 1.22.0 are accepted
  2437. if NumpyVersion(np.__version__) >= '1.22.0':
  2438. for method in ('inverted_cdf', 'averaged_inverted_cdf',
  2439. 'closest_observation', 'interpolated_inverted_cdf',
  2440. 'hazen', 'weibull', 'median_unbiased',
  2441. 'normal_unbiased'):
  2442. stats.iqr(y, interpolation=method)
  2443. assert_raises(ValueError, stats.iqr, x, interpolation='foobar')
  2444. def test_keepdims(self):
  2445. # Also tests most of `axis`
  2446. x = np.ones((3, 5, 7, 11))
  2447. assert_equal(stats.iqr(x, axis=None, keepdims=False).shape, ())
  2448. assert_equal(stats.iqr(x, axis=2, keepdims=False).shape, (3, 5, 11))
  2449. assert_equal(stats.iqr(x, axis=(0, 1), keepdims=False).shape, (7, 11))
  2450. assert_equal(stats.iqr(x, axis=(0, 3), keepdims=False).shape, (5, 7))
  2451. assert_equal(stats.iqr(x, axis=(1,), keepdims=False).shape, (3, 7, 11))
  2452. assert_equal(stats.iqr(x, (0, 1, 2, 3), keepdims=False).shape, ())
  2453. assert_equal(stats.iqr(x, axis=(0, 1, 3), keepdims=False).shape, (7,))
  2454. assert_equal(stats.iqr(x, axis=None, keepdims=True).shape, (1, 1, 1, 1))
  2455. assert_equal(stats.iqr(x, axis=2, keepdims=True).shape, (3, 5, 1, 11))
  2456. assert_equal(stats.iqr(x, axis=(0, 1), keepdims=True).shape, (1, 1, 7, 11))
  2457. assert_equal(stats.iqr(x, axis=(0, 3), keepdims=True).shape, (1, 5, 7, 1))
  2458. assert_equal(stats.iqr(x, axis=(1,), keepdims=True).shape, (3, 1, 7, 11))
  2459. assert_equal(stats.iqr(x, (0, 1, 2, 3), keepdims=True).shape, (1, 1, 1, 1))
  2460. assert_equal(stats.iqr(x, axis=(0, 1, 3), keepdims=True).shape, (1, 1, 7, 1))
  2461. def test_nanpolicy(self):
  2462. x = np.arange(15.0).reshape((3, 5))
  2463. # No NaNs
  2464. assert_equal(stats.iqr(x, nan_policy='propagate'), 7)
  2465. assert_equal(stats.iqr(x, nan_policy='omit'), 7)
  2466. assert_equal(stats.iqr(x, nan_policy='raise'), 7)
  2467. # Yes NaNs
  2468. x[1, 2] = np.nan
  2469. with warnings.catch_warnings(record=True):
  2470. warnings.simplefilter("always")
  2471. assert_equal(stats.iqr(x, nan_policy='propagate'), np.nan)
  2472. assert_equal(stats.iqr(x, axis=0, nan_policy='propagate'), [5, 5, np.nan, 5, 5])
  2473. assert_equal(stats.iqr(x, axis=1, nan_policy='propagate'), [2, np.nan, 2])
  2474. with warnings.catch_warnings(record=True):
  2475. warnings.simplefilter("always")
  2476. assert_equal(stats.iqr(x, nan_policy='omit'), 7.5)
  2477. assert_equal(stats.iqr(x, axis=0, nan_policy='omit'), np.full(5, 5))
  2478. assert_equal(stats.iqr(x, axis=1, nan_policy='omit'), [2, 2.5, 2])
  2479. assert_raises(ValueError, stats.iqr, x, nan_policy='raise')
  2480. assert_raises(ValueError, stats.iqr, x, axis=0, nan_policy='raise')
  2481. assert_raises(ValueError, stats.iqr, x, axis=1, nan_policy='raise')
  2482. # Bad policy
  2483. assert_raises(ValueError, stats.iqr, x, nan_policy='barfood')
  2484. def test_scale(self):
  2485. x = np.arange(15.0).reshape((3, 5))
  2486. # No NaNs
  2487. assert_equal(stats.iqr(x, scale=1.0), 7)
  2488. assert_almost_equal(stats.iqr(x, scale='normal'), 7 / 1.3489795)
  2489. assert_equal(stats.iqr(x, scale=2.0), 3.5)
  2490. # Yes NaNs
  2491. x[1, 2] = np.nan
  2492. with warnings.catch_warnings(record=True):
  2493. warnings.simplefilter("always")
  2494. assert_equal(stats.iqr(x, scale=1.0, nan_policy='propagate'), np.nan)
  2495. assert_equal(stats.iqr(x, scale='normal', nan_policy='propagate'), np.nan)
  2496. assert_equal(stats.iqr(x, scale=2.0, nan_policy='propagate'), np.nan)
  2497. # axis=1 chosen to show behavior with both nans and without
  2498. assert_equal(stats.iqr(x, axis=1, scale=1.0,
  2499. nan_policy='propagate'), [2, np.nan, 2])
  2500. assert_almost_equal(stats.iqr(x, axis=1, scale='normal',
  2501. nan_policy='propagate'),
  2502. np.array([2, np.nan, 2]) / 1.3489795)
  2503. assert_equal(stats.iqr(x, axis=1, scale=2.0, nan_policy='propagate'),
  2504. [1, np.nan, 1])
  2505. # Since NumPy 1.17.0.dev, warnings are no longer emitted by
  2506. # np.percentile with nans, so we don't check the number of
  2507. # warnings here. See https://github.com/numpy/numpy/pull/12679.
  2508. assert_equal(stats.iqr(x, scale=1.0, nan_policy='omit'), 7.5)
  2509. assert_almost_equal(stats.iqr(x, scale='normal', nan_policy='omit'),
  2510. 7.5 / 1.3489795)
  2511. assert_equal(stats.iqr(x, scale=2.0, nan_policy='omit'), 3.75)
  2512. # Bad scale
  2513. assert_raises(ValueError, stats.iqr, x, scale='foobar')
  2514. with pytest.warns(
  2515. DeprecationWarning,
  2516. match="The use of 'scale=\"raw\"'"
  2517. ):
  2518. stats.iqr([1], scale='raw')
  2519. class TestMoments:
  2520. """
  2521. Comparison numbers are found using R v.1.5.1
  2522. note that length(testcase) = 4
  2523. testmathworks comes from documentation for the
  2524. Statistics Toolbox for Matlab and can be found at both
  2525. https://www.mathworks.com/help/stats/kurtosis.html
  2526. https://www.mathworks.com/help/stats/skewness.html
  2527. Note that both test cases came from here.
  2528. """
  2529. testcase = [1,2,3,4]
  2530. scalar_testcase = 4.
  2531. np.random.seed(1234)
  2532. testcase_moment_accuracy = np.random.rand(42)
  2533. testmathworks = [1.165, 0.6268, 0.0751, 0.3516, -0.6965]
  2534. def _assert_equal(self, actual, expect, *, shape=None, dtype=None):
  2535. expect = np.asarray(expect)
  2536. if shape is not None:
  2537. expect = np.broadcast_to(expect, shape)
  2538. assert_array_equal(actual, expect)
  2539. if dtype is None:
  2540. dtype = expect.dtype
  2541. assert actual.dtype == dtype
  2542. def test_moment(self):
  2543. # mean((testcase-mean(testcase))**power,axis=0),axis=0))**power))
  2544. y = stats.moment(self.scalar_testcase)
  2545. assert_approx_equal(y, 0.0)
  2546. y = stats.moment(self.testcase, 0)
  2547. assert_approx_equal(y, 1.0)
  2548. y = stats.moment(self.testcase, 1)
  2549. assert_approx_equal(y, 0.0, 10)
  2550. y = stats.moment(self.testcase, 2)
  2551. assert_approx_equal(y, 1.25)
  2552. y = stats.moment(self.testcase, 3)
  2553. assert_approx_equal(y, 0.0)
  2554. y = stats.moment(self.testcase, 4)
  2555. assert_approx_equal(y, 2.5625)
  2556. # check array_like input for moment
  2557. y = stats.moment(self.testcase, [1, 2, 3, 4])
  2558. assert_allclose(y, [0, 1.25, 0, 2.5625])
  2559. # check moment input consists only of integers
  2560. y = stats.moment(self.testcase, 0.0)
  2561. assert_approx_equal(y, 1.0)
  2562. assert_raises(ValueError, stats.moment, self.testcase, 1.2)
  2563. y = stats.moment(self.testcase, [1.0, 2, 3, 4.0])
  2564. assert_allclose(y, [0, 1.25, 0, 2.5625])
  2565. # test empty input
  2566. message = "Mean of empty slice."
  2567. with pytest.warns(RuntimeWarning, match=message):
  2568. y = stats.moment([])
  2569. self._assert_equal(y, np.nan, dtype=np.float64)
  2570. y = stats.moment(np.array([], dtype=np.float32))
  2571. self._assert_equal(y, np.nan, dtype=np.float32)
  2572. y = stats.moment(np.zeros((1, 0)), axis=0)
  2573. self._assert_equal(y, [], shape=(0,), dtype=np.float64)
  2574. y = stats.moment([[]], axis=1)
  2575. self._assert_equal(y, np.nan, shape=(1,), dtype=np.float64)
  2576. y = stats.moment([[]], moment=[0, 1], axis=0)
  2577. self._assert_equal(y, [], shape=(2, 0))
  2578. x = np.arange(10.)
  2579. x[9] = np.nan
  2580. assert_equal(stats.moment(x, 2), np.nan)
  2581. assert_almost_equal(stats.moment(x, nan_policy='omit'), 0.0)
  2582. assert_raises(ValueError, stats.moment, x, nan_policy='raise')
  2583. assert_raises(ValueError, stats.moment, x, nan_policy='foobar')
  2584. @pytest.mark.parametrize('dtype', [np.float32, np.float64, np.complex128])
  2585. @pytest.mark.parametrize('expect, moment', [(0, 1), (1, 0)])
  2586. def test_constant_moments(self, dtype, expect, moment):
  2587. x = np.random.rand(5).astype(dtype)
  2588. y = stats.moment(x, moment=moment)
  2589. self._assert_equal(y, expect, dtype=dtype)
  2590. y = stats.moment(np.broadcast_to(x, (6, 5)), axis=0, moment=moment)
  2591. self._assert_equal(y, expect, shape=(5,), dtype=dtype)
  2592. y = stats.moment(np.broadcast_to(x, (1, 2, 3, 4, 5)), axis=2,
  2593. moment=moment)
  2594. self._assert_equal(y, expect, shape=(1, 2, 4, 5), dtype=dtype)
  2595. y = stats.moment(np.broadcast_to(x, (1, 2, 3, 4, 5)), axis=None,
  2596. moment=moment)
  2597. self._assert_equal(y, expect, shape=(), dtype=dtype)
  2598. def test_moment_propagate_nan(self):
  2599. # Check that the shape of the result is the same for inputs
  2600. # with and without nans, cf gh-5817
  2601. a = np.arange(8).reshape(2, -1).astype(float)
  2602. a[1, 0] = np.nan
  2603. mm = stats.moment(a, 2, axis=1, nan_policy="propagate")
  2604. np.testing.assert_allclose(mm, [1.25, np.nan], atol=1e-15)
  2605. def test_moment_empty_moment(self):
  2606. # tests moment with empty `moment` list
  2607. with pytest.raises(ValueError, match=r"'moment' must be a scalar or a"
  2608. r" non-empty 1D list/array."):
  2609. stats.moment([1, 2, 3, 4], moment=[])
  2610. def test_skewness(self):
  2611. # Scalar test case
  2612. with pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  2613. y = stats.skew(self.scalar_testcase)
  2614. assert np.isnan(y)
  2615. # sum((testmathworks-mean(testmathworks,axis=0))**3,axis=0) /
  2616. # ((sqrt(var(testmathworks)*4/5))**3)/5
  2617. y = stats.skew(self.testmathworks)
  2618. assert_approx_equal(y, -0.29322304336607, 10)
  2619. y = stats.skew(self.testmathworks, bias=0)
  2620. assert_approx_equal(y, -0.437111105023940, 10)
  2621. y = stats.skew(self.testcase)
  2622. assert_approx_equal(y, 0.0, 10)
  2623. x = np.arange(10.)
  2624. x[9] = np.nan
  2625. with np.errstate(invalid='ignore'):
  2626. assert_equal(stats.skew(x), np.nan)
  2627. assert_equal(stats.skew(x, nan_policy='omit'), 0.)
  2628. assert_raises(ValueError, stats.skew, x, nan_policy='raise')
  2629. assert_raises(ValueError, stats.skew, x, nan_policy='foobar')
  2630. def test_skewness_scalar(self):
  2631. # `skew` must return a scalar for 1-dim input
  2632. assert_equal(stats.skew(arange(10)), 0.0)
  2633. def test_skew_propagate_nan(self):
  2634. # Check that the shape of the result is the same for inputs
  2635. # with and without nans, cf gh-5817
  2636. a = np.arange(8).reshape(2, -1).astype(float)
  2637. a[1, 0] = np.nan
  2638. with np.errstate(invalid='ignore'):
  2639. s = stats.skew(a, axis=1, nan_policy="propagate")
  2640. np.testing.assert_allclose(s, [0, np.nan], atol=1e-15)
  2641. def test_skew_constant_value(self):
  2642. # Skewness of a constant input should be zero even when the mean is not
  2643. # exact (gh-13245)
  2644. with pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  2645. a = np.repeat(-0.27829495, 10)
  2646. assert np.isnan(stats.skew(a))
  2647. assert np.isnan(stats.skew(a * float(2**50)))
  2648. assert np.isnan(stats.skew(a / float(2**50)))
  2649. assert np.isnan(stats.skew(a, bias=False))
  2650. # similarly, from gh-11086:
  2651. assert np.isnan(stats.skew([14.3]*7))
  2652. assert np.isnan(stats.skew(1 + np.arange(-3, 4)*1e-16))
  2653. def test_kurtosis(self):
  2654. # Scalar test case
  2655. with pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  2656. y = stats.kurtosis(self.scalar_testcase)
  2657. assert np.isnan(y)
  2658. # sum((testcase-mean(testcase,axis=0))**4,axis=0)/((sqrt(var(testcase)*3/4))**4)/4
  2659. # sum((test2-mean(testmathworks,axis=0))**4,axis=0)/((sqrt(var(testmathworks)*4/5))**4)/5
  2660. # Set flags for axis = 0 and
  2661. # fisher=0 (Pearson's defn of kurtosis for compatibility with Matlab)
  2662. y = stats.kurtosis(self.testmathworks, 0, fisher=0, bias=1)
  2663. assert_approx_equal(y, 2.1658856802973, 10)
  2664. # Note that MATLAB has confusing docs for the following case
  2665. # kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
  2666. # kurtosis(x) gives a biased estimate of Fisher's skewness (Pearson-3)
  2667. # The MATLAB docs imply that both should give Fisher's
  2668. y = stats.kurtosis(self.testmathworks, fisher=0, bias=0)
  2669. assert_approx_equal(y, 3.663542721189047, 10)
  2670. y = stats.kurtosis(self.testcase, 0, 0)
  2671. assert_approx_equal(y, 1.64)
  2672. x = np.arange(10.)
  2673. x[9] = np.nan
  2674. assert_equal(stats.kurtosis(x), np.nan)
  2675. assert_almost_equal(stats.kurtosis(x, nan_policy='omit'), -1.230000)
  2676. assert_raises(ValueError, stats.kurtosis, x, nan_policy='raise')
  2677. assert_raises(ValueError, stats.kurtosis, x, nan_policy='foobar')
  2678. def test_kurtosis_array_scalar(self):
  2679. assert_equal(type(stats.kurtosis([1,2,3])), float)
  2680. def test_kurtosis_propagate_nan(self):
  2681. # Check that the shape of the result is the same for inputs
  2682. # with and without nans, cf gh-5817
  2683. a = np.arange(8).reshape(2, -1).astype(float)
  2684. a[1, 0] = np.nan
  2685. k = stats.kurtosis(a, axis=1, nan_policy="propagate")
  2686. np.testing.assert_allclose(k, [-1.36, np.nan], atol=1e-15)
  2687. def test_kurtosis_constant_value(self):
  2688. # Kurtosis of a constant input should be zero, even when the mean is not
  2689. # exact (gh-13245)
  2690. a = np.repeat(-0.27829495, 10)
  2691. with pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  2692. assert np.isnan(stats.kurtosis(a, fisher=False))
  2693. assert np.isnan(stats.kurtosis(a * float(2**50), fisher=False))
  2694. assert np.isnan(stats.kurtosis(a / float(2**50), fisher=False))
  2695. assert np.isnan(stats.kurtosis(a, fisher=False, bias=False))
  2696. def test_moment_accuracy(self):
  2697. # 'moment' must have a small enough error compared to the slower
  2698. # but very accurate numpy.power() implementation.
  2699. tc_no_mean = self.testcase_moment_accuracy - \
  2700. np.mean(self.testcase_moment_accuracy)
  2701. assert_allclose(np.power(tc_no_mean, 42).mean(),
  2702. stats.moment(self.testcase_moment_accuracy, 42))
  2703. def test_precision_loss_gh15554(self):
  2704. # gh-15554 was one of several issues that have reported problems with
  2705. # constant or near-constant input. We can't always fix these, but
  2706. # make sure there's a warning.
  2707. with pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  2708. rng = np.random.default_rng(34095309370)
  2709. a = rng.random(size=(100, 10))
  2710. a[:, 0] = 1.01
  2711. stats.skew(a)[0]
  2712. def test_empty_1d(self):
  2713. message = "Mean of empty slice."
  2714. with pytest.warns(RuntimeWarning, match=message):
  2715. stats.skew([])
  2716. with pytest.warns(RuntimeWarning, match=message):
  2717. stats.kurtosis([])
  2718. class TestStudentTest:
  2719. X1 = np.array([-1, 0, 1])
  2720. X2 = np.array([0, 1, 2])
  2721. T1_0 = 0
  2722. P1_0 = 1
  2723. T1_1 = -1.7320508075
  2724. P1_1 = 0.22540333075
  2725. T1_2 = -3.464102
  2726. P1_2 = 0.0741799
  2727. T2_0 = 1.732051
  2728. P2_0 = 0.2254033
  2729. P1_1_l = P1_1 / 2
  2730. P1_1_g = 1 - (P1_1 / 2)
  2731. def test_onesample(self):
  2732. with suppress_warnings() as sup, np.errstate(invalid="ignore"), \
  2733. pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  2734. sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
  2735. t, p = stats.ttest_1samp(4., 3.)
  2736. assert_(np.isnan(t))
  2737. assert_(np.isnan(p))
  2738. t, p = stats.ttest_1samp(self.X1, 0)
  2739. assert_array_almost_equal(t, self.T1_0)
  2740. assert_array_almost_equal(p, self.P1_0)
  2741. res = stats.ttest_1samp(self.X1, 0)
  2742. attributes = ('statistic', 'pvalue')
  2743. check_named_results(res, attributes)
  2744. t, p = stats.ttest_1samp(self.X2, 0)
  2745. assert_array_almost_equal(t, self.T2_0)
  2746. assert_array_almost_equal(p, self.P2_0)
  2747. t, p = stats.ttest_1samp(self.X1, 1)
  2748. assert_array_almost_equal(t, self.T1_1)
  2749. assert_array_almost_equal(p, self.P1_1)
  2750. t, p = stats.ttest_1samp(self.X1, 2)
  2751. assert_array_almost_equal(t, self.T1_2)
  2752. assert_array_almost_equal(p, self.P1_2)
  2753. # check nan policy
  2754. x = stats.norm.rvs(loc=5, scale=10, size=51, random_state=7654567)
  2755. x[50] = np.nan
  2756. with np.errstate(invalid="ignore"):
  2757. assert_array_equal(stats.ttest_1samp(x, 5.0), (np.nan, np.nan))
  2758. assert_array_almost_equal(stats.ttest_1samp(x, 5.0, nan_policy='omit'),
  2759. (-1.6412624074367159, 0.107147027334048005))
  2760. assert_raises(ValueError, stats.ttest_1samp, x, 5.0, nan_policy='raise')
  2761. assert_raises(ValueError, stats.ttest_1samp, x, 5.0,
  2762. nan_policy='foobar')
  2763. def test_1samp_alternative(self):
  2764. assert_raises(ValueError, stats.ttest_1samp, self.X1, 0,
  2765. alternative="error")
  2766. t, p = stats.ttest_1samp(self.X1, 1, alternative="less")
  2767. assert_allclose(p, self.P1_1_l)
  2768. assert_allclose(t, self.T1_1)
  2769. t, p = stats.ttest_1samp(self.X1, 1, alternative="greater")
  2770. assert_allclose(p, self.P1_1_g)
  2771. assert_allclose(t, self.T1_1)
  2772. @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater'])
  2773. def test_1samp_ci_1d(self, alternative):
  2774. # test confidence interval method against reference values
  2775. rng = np.random.default_rng(8066178009154342972)
  2776. n = 10
  2777. x = rng.normal(size=n, loc=1.5, scale=2)
  2778. popmean = rng.normal() # this shouldn't affect confidence interval
  2779. # Reference values generated with R t.test:
  2780. # options(digits=16)
  2781. # x = c(2.75532884, 0.93892217, 0.94835861, 1.49489446, -0.62396595,
  2782. # -1.88019867, -1.55684465, 4.88777104, 5.15310979, 4.34656348)
  2783. # t.test(x, conf.level=0.85, alternative='l')
  2784. ref = {'two-sided': [0.3594423211709136, 2.9333455028290860],
  2785. 'greater': [0.7470806207371626, np.inf],
  2786. 'less': [-np.inf, 2.545707203262837]}
  2787. res = stats.ttest_1samp(x, popmean=popmean, alternative=alternative)
  2788. ci = res.confidence_interval(confidence_level=0.85)
  2789. assert_allclose(ci, ref[alternative])
  2790. assert_equal(res.df, n-1)
  2791. def test_1samp_ci_iv(self):
  2792. # test `confidence_interval` method input validation
  2793. res = stats.ttest_1samp(np.arange(10), 0)
  2794. message = '`confidence_level` must be a number between 0 and 1.'
  2795. with pytest.raises(ValueError, match=message):
  2796. res.confidence_interval(confidence_level=10)
  2797. class TestPercentileOfScore:
  2798. def f(self, *args, **kwargs):
  2799. return stats.percentileofscore(*args, **kwargs)
  2800. @pytest.mark.parametrize("kind, result", [("rank", 40),
  2801. ("mean", 35),
  2802. ("strict", 30),
  2803. ("weak", 40)])
  2804. def test_unique(self, kind, result):
  2805. a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
  2806. assert_equal(self.f(a, 4, kind=kind), result)
  2807. @pytest.mark.parametrize("kind, result", [("rank", 45),
  2808. ("mean", 40),
  2809. ("strict", 30),
  2810. ("weak", 50)])
  2811. def test_multiple2(self, kind, result):
  2812. a = [1, 2, 3, 4, 4, 5, 6, 7, 8, 9]
  2813. assert_equal(self.f(a, 4, kind=kind), result)
  2814. @pytest.mark.parametrize("kind, result", [("rank", 50),
  2815. ("mean", 45),
  2816. ("strict", 30),
  2817. ("weak", 60)])
  2818. def test_multiple3(self, kind, result):
  2819. a = [1, 2, 3, 4, 4, 4, 5, 6, 7, 8]
  2820. assert_equal(self.f(a, 4, kind=kind), result)
  2821. @pytest.mark.parametrize("kind, result", [("rank", 30),
  2822. ("mean", 30),
  2823. ("strict", 30),
  2824. ("weak", 30)])
  2825. def test_missing(self, kind, result):
  2826. a = [1, 2, 3, 5, 6, 7, 8, 9, 10, 11]
  2827. assert_equal(self.f(a, 4, kind=kind), result)
  2828. @pytest.mark.parametrize("kind, result", [("rank", 40),
  2829. ("mean", 35),
  2830. ("strict", 30),
  2831. ("weak", 40)])
  2832. def test_large_numbers(self, kind, result):
  2833. a = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
  2834. assert_equal(self.f(a, 40, kind=kind), result)
  2835. @pytest.mark.parametrize("kind, result", [("rank", 50),
  2836. ("mean", 45),
  2837. ("strict", 30),
  2838. ("weak", 60)])
  2839. def test_large_numbers_multiple3(self, kind, result):
  2840. a = [10, 20, 30, 40, 40, 40, 50, 60, 70, 80]
  2841. assert_equal(self.f(a, 40, kind=kind), result)
  2842. @pytest.mark.parametrize("kind, result", [("rank", 30),
  2843. ("mean", 30),
  2844. ("strict", 30),
  2845. ("weak", 30)])
  2846. def test_large_numbers_missing(self, kind, result):
  2847. a = [10, 20, 30, 50, 60, 70, 80, 90, 100, 110]
  2848. assert_equal(self.f(a, 40, kind=kind), result)
  2849. @pytest.mark.parametrize("kind, result", [("rank", [0, 10, 100, 100]),
  2850. ("mean", [0, 5, 95, 100]),
  2851. ("strict", [0, 0, 90, 100]),
  2852. ("weak", [0, 10, 100, 100])])
  2853. def test_boundaries(self, kind, result):
  2854. a = [10, 20, 30, 50, 60, 70, 80, 90, 100, 110]
  2855. assert_equal(self.f(a, [0, 10, 110, 200], kind=kind), result)
  2856. @pytest.mark.parametrize("kind, result", [("rank", [0, 10, 100]),
  2857. ("mean", [0, 5, 95]),
  2858. ("strict", [0, 0, 90]),
  2859. ("weak", [0, 10, 100])])
  2860. def test_inf(self, kind, result):
  2861. a = [1, 2, 3, 4, 5, 6, 7, 8, 9, +np.inf]
  2862. assert_equal(self.f(a, [-np.inf, 1, +np.inf], kind=kind), result)
  2863. cases = [("propagate", [], 1, np.nan),
  2864. ("propagate", [np.nan], 1, np.nan),
  2865. ("propagate", [np.nan], [0, 1, 2], [np.nan, np.nan, np.nan]),
  2866. ("propagate", [1, 2], [1, 2, np.nan], [50, 100, np.nan]),
  2867. ("omit", [1, 2, np.nan], [0, 1, 2], [0, 50, 100]),
  2868. ("omit", [1, 2], [0, 1, np.nan], [0, 50, np.nan]),
  2869. ("omit", [np.nan, np.nan], [0, 1, 2], [np.nan, np.nan, np.nan])]
  2870. @pytest.mark.parametrize("policy, a, score, result", cases)
  2871. def test_nans_ok(self, policy, a, score, result):
  2872. assert_equal(self.f(a, score, nan_policy=policy), result)
  2873. cases = [
  2874. ("raise", [1, 2, 3, np.nan], [1, 2, 3],
  2875. "The input contains nan values"),
  2876. ("raise", [1, 2, 3], [1, 2, 3, np.nan],
  2877. "The input contains nan values"),
  2878. ]
  2879. @pytest.mark.parametrize("policy, a, score, message", cases)
  2880. def test_nans_fail(self, policy, a, score, message):
  2881. with assert_raises(ValueError, match=message):
  2882. self.f(a, score, nan_policy=policy)
  2883. @pytest.mark.parametrize("shape", [
  2884. (6, ),
  2885. (2, 3),
  2886. (2, 1, 3),
  2887. (2, 1, 1, 3),
  2888. ])
  2889. def test_nd(self, shape):
  2890. a = np.array([0, 1, 2, 3, 4, 5])
  2891. scores = a.reshape(shape)
  2892. results = scores*10
  2893. a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
  2894. assert_equal(self.f(a, scores, kind="rank"), results)
  2895. PowerDivCase = namedtuple('Case', # type: ignore[name-match]
  2896. ['f_obs', 'f_exp', 'ddof', 'axis',
  2897. 'chi2', # Pearson's
  2898. 'log', # G-test (log-likelihood)
  2899. 'mod_log', # Modified log-likelihood
  2900. 'cr', # Cressie-Read (lambda=2/3)
  2901. ])
  2902. # The details of the first two elements in power_div_1d_cases are used
  2903. # in a test in TestPowerDivergence. Check that code before making
  2904. # any changes here.
  2905. power_div_1d_cases = [
  2906. # Use the default f_exp.
  2907. PowerDivCase(f_obs=[4, 8, 12, 8], f_exp=None, ddof=0, axis=None,
  2908. chi2=4,
  2909. log=2*(4*np.log(4/8) + 12*np.log(12/8)),
  2910. mod_log=2*(8*np.log(8/4) + 8*np.log(8/12)),
  2911. cr=(4*((4/8)**(2/3) - 1) + 12*((12/8)**(2/3) - 1))/(5/9)),
  2912. # Give a non-uniform f_exp.
  2913. PowerDivCase(f_obs=[4, 8, 12, 8], f_exp=[2, 16, 12, 2], ddof=0, axis=None,
  2914. chi2=24,
  2915. log=2*(4*np.log(4/2) + 8*np.log(8/16) + 8*np.log(8/2)),
  2916. mod_log=2*(2*np.log(2/4) + 16*np.log(16/8) + 2*np.log(2/8)),
  2917. cr=(4*((4/2)**(2/3) - 1) + 8*((8/16)**(2/3) - 1) +
  2918. 8*((8/2)**(2/3) - 1))/(5/9)),
  2919. # f_exp is a scalar.
  2920. PowerDivCase(f_obs=[4, 8, 12, 8], f_exp=8, ddof=0, axis=None,
  2921. chi2=4,
  2922. log=2*(4*np.log(4/8) + 12*np.log(12/8)),
  2923. mod_log=2*(8*np.log(8/4) + 8*np.log(8/12)),
  2924. cr=(4*((4/8)**(2/3) - 1) + 12*((12/8)**(2/3) - 1))/(5/9)),
  2925. # f_exp equal to f_obs.
  2926. PowerDivCase(f_obs=[3, 5, 7, 9], f_exp=[3, 5, 7, 9], ddof=0, axis=0,
  2927. chi2=0, log=0, mod_log=0, cr=0),
  2928. ]
  2929. power_div_empty_cases = [
  2930. # Shape is (0,)--a data set with length 0. The computed
  2931. # test statistic should be 0.
  2932. PowerDivCase(f_obs=[],
  2933. f_exp=None, ddof=0, axis=0,
  2934. chi2=0, log=0, mod_log=0, cr=0),
  2935. # Shape is (0, 3). This is 3 data sets, but each data set has
  2936. # length 0, so the computed test statistic should be [0, 0, 0].
  2937. PowerDivCase(f_obs=np.array([[],[],[]]).T,
  2938. f_exp=None, ddof=0, axis=0,
  2939. chi2=[0, 0, 0],
  2940. log=[0, 0, 0],
  2941. mod_log=[0, 0, 0],
  2942. cr=[0, 0, 0]),
  2943. # Shape is (3, 0). This represents an empty collection of
  2944. # data sets in which each data set has length 3. The test
  2945. # statistic should be an empty array.
  2946. PowerDivCase(f_obs=np.array([[],[],[]]),
  2947. f_exp=None, ddof=0, axis=0,
  2948. chi2=[],
  2949. log=[],
  2950. mod_log=[],
  2951. cr=[]),
  2952. ]
  2953. class TestPowerDivergence:
  2954. def check_power_divergence(self, f_obs, f_exp, ddof, axis, lambda_,
  2955. expected_stat):
  2956. f_obs = np.asarray(f_obs)
  2957. if axis is None:
  2958. num_obs = f_obs.size
  2959. else:
  2960. b = np.broadcast(f_obs, f_exp)
  2961. num_obs = b.shape[axis]
  2962. with suppress_warnings() as sup:
  2963. sup.filter(RuntimeWarning, "Mean of empty slice")
  2964. stat, p = stats.power_divergence(
  2965. f_obs=f_obs, f_exp=f_exp, ddof=ddof,
  2966. axis=axis, lambda_=lambda_)
  2967. assert_allclose(stat, expected_stat)
  2968. if lambda_ == 1 or lambda_ == "pearson":
  2969. # Also test stats.chisquare.
  2970. stat, p = stats.chisquare(f_obs=f_obs, f_exp=f_exp, ddof=ddof,
  2971. axis=axis)
  2972. assert_allclose(stat, expected_stat)
  2973. ddof = np.asarray(ddof)
  2974. expected_p = stats.distributions.chi2.sf(expected_stat,
  2975. num_obs - 1 - ddof)
  2976. assert_allclose(p, expected_p)
  2977. def test_basic(self):
  2978. for case in power_div_1d_cases:
  2979. self.check_power_divergence(
  2980. case.f_obs, case.f_exp, case.ddof, case.axis,
  2981. None, case.chi2)
  2982. self.check_power_divergence(
  2983. case.f_obs, case.f_exp, case.ddof, case.axis,
  2984. "pearson", case.chi2)
  2985. self.check_power_divergence(
  2986. case.f_obs, case.f_exp, case.ddof, case.axis,
  2987. 1, case.chi2)
  2988. self.check_power_divergence(
  2989. case.f_obs, case.f_exp, case.ddof, case.axis,
  2990. "log-likelihood", case.log)
  2991. self.check_power_divergence(
  2992. case.f_obs, case.f_exp, case.ddof, case.axis,
  2993. "mod-log-likelihood", case.mod_log)
  2994. self.check_power_divergence(
  2995. case.f_obs, case.f_exp, case.ddof, case.axis,
  2996. "cressie-read", case.cr)
  2997. self.check_power_divergence(
  2998. case.f_obs, case.f_exp, case.ddof, case.axis,
  2999. 2/3, case.cr)
  3000. def test_basic_masked(self):
  3001. for case in power_div_1d_cases:
  3002. mobs = np.ma.array(case.f_obs)
  3003. self.check_power_divergence(
  3004. mobs, case.f_exp, case.ddof, case.axis,
  3005. None, case.chi2)
  3006. self.check_power_divergence(
  3007. mobs, case.f_exp, case.ddof, case.axis,
  3008. "pearson", case.chi2)
  3009. self.check_power_divergence(
  3010. mobs, case.f_exp, case.ddof, case.axis,
  3011. 1, case.chi2)
  3012. self.check_power_divergence(
  3013. mobs, case.f_exp, case.ddof, case.axis,
  3014. "log-likelihood", case.log)
  3015. self.check_power_divergence(
  3016. mobs, case.f_exp, case.ddof, case.axis,
  3017. "mod-log-likelihood", case.mod_log)
  3018. self.check_power_divergence(
  3019. mobs, case.f_exp, case.ddof, case.axis,
  3020. "cressie-read", case.cr)
  3021. self.check_power_divergence(
  3022. mobs, case.f_exp, case.ddof, case.axis,
  3023. 2/3, case.cr)
  3024. def test_axis(self):
  3025. case0 = power_div_1d_cases[0]
  3026. case1 = power_div_1d_cases[1]
  3027. f_obs = np.vstack((case0.f_obs, case1.f_obs))
  3028. f_exp = np.vstack((np.ones_like(case0.f_obs)*np.mean(case0.f_obs),
  3029. case1.f_exp))
  3030. # Check the four computational code paths in power_divergence
  3031. # using a 2D array with axis=1.
  3032. self.check_power_divergence(
  3033. f_obs, f_exp, 0, 1,
  3034. "pearson", [case0.chi2, case1.chi2])
  3035. self.check_power_divergence(
  3036. f_obs, f_exp, 0, 1,
  3037. "log-likelihood", [case0.log, case1.log])
  3038. self.check_power_divergence(
  3039. f_obs, f_exp, 0, 1,
  3040. "mod-log-likelihood", [case0.mod_log, case1.mod_log])
  3041. self.check_power_divergence(
  3042. f_obs, f_exp, 0, 1,
  3043. "cressie-read", [case0.cr, case1.cr])
  3044. # Reshape case0.f_obs to shape (2,2), and use axis=None.
  3045. # The result should be the same.
  3046. self.check_power_divergence(
  3047. np.array(case0.f_obs).reshape(2, 2), None, 0, None,
  3048. "pearson", case0.chi2)
  3049. def test_ddof_broadcasting(self):
  3050. # Test that ddof broadcasts correctly.
  3051. # ddof does not affect the test statistic. It is broadcast
  3052. # with the computed test statistic for the computation of
  3053. # the p value.
  3054. case0 = power_div_1d_cases[0]
  3055. case1 = power_div_1d_cases[1]
  3056. # Create 4x2 arrays of observed and expected frequencies.
  3057. f_obs = np.vstack((case0.f_obs, case1.f_obs)).T
  3058. f_exp = np.vstack((np.ones_like(case0.f_obs)*np.mean(case0.f_obs),
  3059. case1.f_exp)).T
  3060. expected_chi2 = [case0.chi2, case1.chi2]
  3061. # ddof has shape (2, 1). This is broadcast with the computed
  3062. # statistic, so p will have shape (2,2).
  3063. ddof = np.array([[0], [1]])
  3064. stat, p = stats.power_divergence(f_obs, f_exp, ddof=ddof)
  3065. assert_allclose(stat, expected_chi2)
  3066. # Compute the p values separately, passing in scalars for ddof.
  3067. stat0, p0 = stats.power_divergence(f_obs, f_exp, ddof=ddof[0,0])
  3068. stat1, p1 = stats.power_divergence(f_obs, f_exp, ddof=ddof[1,0])
  3069. assert_array_equal(p, np.vstack((p0, p1)))
  3070. def test_empty_cases(self):
  3071. with warnings.catch_warnings():
  3072. for case in power_div_empty_cases:
  3073. self.check_power_divergence(
  3074. case.f_obs, case.f_exp, case.ddof, case.axis,
  3075. "pearson", case.chi2)
  3076. self.check_power_divergence(
  3077. case.f_obs, case.f_exp, case.ddof, case.axis,
  3078. "log-likelihood", case.log)
  3079. self.check_power_divergence(
  3080. case.f_obs, case.f_exp, case.ddof, case.axis,
  3081. "mod-log-likelihood", case.mod_log)
  3082. self.check_power_divergence(
  3083. case.f_obs, case.f_exp, case.ddof, case.axis,
  3084. "cressie-read", case.cr)
  3085. def test_power_divergence_result_attributes(self):
  3086. f_obs = power_div_1d_cases[0].f_obs
  3087. f_exp = power_div_1d_cases[0].f_exp
  3088. ddof = power_div_1d_cases[0].ddof
  3089. axis = power_div_1d_cases[0].axis
  3090. res = stats.power_divergence(f_obs=f_obs, f_exp=f_exp, ddof=ddof,
  3091. axis=axis, lambda_="pearson")
  3092. attributes = ('statistic', 'pvalue')
  3093. check_named_results(res, attributes)
  3094. def test_power_divergence_gh_12282(self):
  3095. # The sums of observed and expected frequencies must match
  3096. f_obs = np.array([[10, 20], [30, 20]])
  3097. f_exp = np.array([[5, 15], [35, 25]])
  3098. with assert_raises(ValueError, match='For each axis slice...'):
  3099. stats.power_divergence(f_obs=[10, 20], f_exp=[30, 60])
  3100. with assert_raises(ValueError, match='For each axis slice...'):
  3101. stats.power_divergence(f_obs=f_obs, f_exp=f_exp, axis=1)
  3102. stat, pval = stats.power_divergence(f_obs=f_obs, f_exp=f_exp)
  3103. assert_allclose(stat, [5.71428571, 2.66666667])
  3104. assert_allclose(pval, [0.01682741, 0.10247043])
  3105. def test_gh_chisquare_12282():
  3106. # Currently `chisquare` is implemented via power_divergence
  3107. # in case that ever changes, perform a basic test like
  3108. # test_power_divergence_gh_12282
  3109. with assert_raises(ValueError, match='For each axis slice...'):
  3110. stats.chisquare(f_obs=[10, 20], f_exp=[30, 60])
  3111. @pytest.mark.parametrize("n, dtype", [(200, np.uint8), (1000000, np.int32)])
  3112. def test_chiquare_data_types(n, dtype):
  3113. # Regression test for gh-10159.
  3114. obs = np.array([n, 0], dtype=dtype)
  3115. exp = np.array([n // 2, n // 2], dtype=dtype)
  3116. stat, p = stats.chisquare(obs, exp)
  3117. assert_allclose(stat, n, rtol=1e-13)
  3118. def test_chisquare_masked_arrays():
  3119. # Test masked arrays.
  3120. obs = np.array([[8, 8, 16, 32, -1], [-1, -1, 3, 4, 5]]).T
  3121. mask = np.array([[0, 0, 0, 0, 1], [1, 1, 0, 0, 0]]).T
  3122. mobs = np.ma.masked_array(obs, mask)
  3123. expected_chisq = np.array([24.0, 0.5])
  3124. expected_g = np.array([2*(2*8*np.log(0.5) + 32*np.log(2.0)),
  3125. 2*(3*np.log(0.75) + 5*np.log(1.25))])
  3126. chi2 = stats.distributions.chi2
  3127. chisq, p = stats.chisquare(mobs)
  3128. mat.assert_array_equal(chisq, expected_chisq)
  3129. mat.assert_array_almost_equal(p, chi2.sf(expected_chisq,
  3130. mobs.count(axis=0) - 1))
  3131. g, p = stats.power_divergence(mobs, lambda_='log-likelihood')
  3132. mat.assert_array_almost_equal(g, expected_g, decimal=15)
  3133. mat.assert_array_almost_equal(p, chi2.sf(expected_g,
  3134. mobs.count(axis=0) - 1))
  3135. chisq, p = stats.chisquare(mobs.T, axis=1)
  3136. mat.assert_array_equal(chisq, expected_chisq)
  3137. mat.assert_array_almost_equal(p, chi2.sf(expected_chisq,
  3138. mobs.T.count(axis=1) - 1))
  3139. g, p = stats.power_divergence(mobs.T, axis=1, lambda_="log-likelihood")
  3140. mat.assert_array_almost_equal(g, expected_g, decimal=15)
  3141. mat.assert_array_almost_equal(p, chi2.sf(expected_g,
  3142. mobs.count(axis=0) - 1))
  3143. obs1 = np.ma.array([3, 5, 6, 99, 10], mask=[0, 0, 0, 1, 0])
  3144. exp1 = np.ma.array([2, 4, 8, 10, 99], mask=[0, 0, 0, 0, 1])
  3145. chi2, p = stats.chisquare(obs1, f_exp=exp1)
  3146. # Because of the mask at index 3 of obs1 and at index 4 of exp1,
  3147. # only the first three elements are included in the calculation
  3148. # of the statistic.
  3149. mat.assert_array_equal(chi2, 1/2 + 1/4 + 4/8)
  3150. # When axis=None, the two values should have type np.float64.
  3151. chisq, p = stats.chisquare(np.ma.array([1,2,3]), axis=None)
  3152. assert_(isinstance(chisq, np.float64))
  3153. assert_(isinstance(p, np.float64))
  3154. assert_equal(chisq, 1.0)
  3155. assert_almost_equal(p, stats.distributions.chi2.sf(1.0, 2))
  3156. # Empty arrays:
  3157. # A data set with length 0 returns a masked scalar.
  3158. with np.errstate(invalid='ignore'):
  3159. with suppress_warnings() as sup:
  3160. sup.filter(RuntimeWarning, "Mean of empty slice")
  3161. chisq, p = stats.chisquare(np.ma.array([]))
  3162. assert_(isinstance(chisq, np.ma.MaskedArray))
  3163. assert_equal(chisq.shape, ())
  3164. assert_(chisq.mask)
  3165. empty3 = np.ma.array([[],[],[]])
  3166. # empty3 is a collection of 0 data sets (whose lengths would be 3, if
  3167. # there were any), so the return value is an array with length 0.
  3168. chisq, p = stats.chisquare(empty3)
  3169. assert_(isinstance(chisq, np.ma.MaskedArray))
  3170. mat.assert_array_equal(chisq, [])
  3171. # empty3.T is an array containing 3 data sets, each with length 0,
  3172. # so an array of size (3,) is returned, with all values masked.
  3173. with np.errstate(invalid='ignore'):
  3174. with suppress_warnings() as sup:
  3175. sup.filter(RuntimeWarning, "Mean of empty slice")
  3176. chisq, p = stats.chisquare(empty3.T)
  3177. assert_(isinstance(chisq, np.ma.MaskedArray))
  3178. assert_equal(chisq.shape, (3,))
  3179. assert_(np.all(chisq.mask))
  3180. def test_power_divergence_against_cressie_read_data():
  3181. # Test stats.power_divergence against tables 4 and 5 from
  3182. # Cressie and Read, "Multimonial Goodness-of-Fit Tests",
  3183. # J. R. Statist. Soc. B (1984), Vol 46, No. 3, pp. 440-464.
  3184. # This tests the calculation for several values of lambda.
  3185. # Table 4 data recalculated for greater precision according to:
  3186. # Shelby J. Haberman, Analysis of Qualitative Data: Volume 1
  3187. # Introductory Topics, Academic Press, New York, USA (1978).
  3188. obs = np.array([15, 11, 14, 17, 5, 11, 10, 4, 8,
  3189. 10, 7, 9, 11, 3, 6, 1, 1, 4])
  3190. beta = -0.083769 # Haberman (1978), p. 15
  3191. i = np.arange(1, len(obs) + 1)
  3192. alpha = np.log(obs.sum() / np.exp(beta*i).sum())
  3193. expected_counts = np.exp(alpha + beta*i)
  3194. # `table4` holds just the second and third columns from Table 4.
  3195. table4 = np.vstack((obs, expected_counts)).T
  3196. table5 = np.array([
  3197. # lambda, statistic
  3198. -10.0, 72.2e3,
  3199. -5.0, 28.9e1,
  3200. -3.0, 65.6,
  3201. -2.0, 40.6,
  3202. -1.5, 34.0,
  3203. -1.0, 29.5,
  3204. -0.5, 26.5,
  3205. 0.0, 24.6,
  3206. 0.5, 23.4,
  3207. 0.67, 23.1,
  3208. 1.0, 22.7,
  3209. 1.5, 22.6,
  3210. 2.0, 22.9,
  3211. 3.0, 24.8,
  3212. 5.0, 35.5,
  3213. 10.0, 21.4e1,
  3214. ]).reshape(-1, 2)
  3215. for lambda_, expected_stat in table5:
  3216. stat, p = stats.power_divergence(table4[:,0], table4[:,1],
  3217. lambda_=lambda_)
  3218. assert_allclose(stat, expected_stat, rtol=5e-3)
  3219. def test_friedmanchisquare():
  3220. # see ticket:113
  3221. # verified with matlab and R
  3222. # From Demsar "Statistical Comparisons of Classifiers over Multiple Data Sets"
  3223. # 2006, Xf=9.28 (no tie handling, tie corrected Xf >=9.28)
  3224. x1 = [array([0.763, 0.599, 0.954, 0.628, 0.882, 0.936, 0.661, 0.583,
  3225. 0.775, 1.0, 0.94, 0.619, 0.972, 0.957]),
  3226. array([0.768, 0.591, 0.971, 0.661, 0.888, 0.931, 0.668, 0.583,
  3227. 0.838, 1.0, 0.962, 0.666, 0.981, 0.978]),
  3228. array([0.771, 0.590, 0.968, 0.654, 0.886, 0.916, 0.609, 0.563,
  3229. 0.866, 1.0, 0.965, 0.614, 0.9751, 0.946]),
  3230. array([0.798, 0.569, 0.967, 0.657, 0.898, 0.931, 0.685, 0.625,
  3231. 0.875, 1.0, 0.962, 0.669, 0.975, 0.970])]
  3232. # From "Bioestadistica para las ciencias de la salud" Xf=18.95 p<0.001:
  3233. x2 = [array([4,3,5,3,5,3,2,5,4,4,4,3]),
  3234. array([2,2,1,2,3,1,2,3,2,1,1,3]),
  3235. array([2,4,3,3,4,3,3,4,4,1,2,1]),
  3236. array([3,5,4,3,4,4,3,3,3,4,4,4])]
  3237. # From Jerrorl H. Zar, "Biostatistical Analysis"(example 12.6), Xf=10.68, 0.005 < p < 0.01:
  3238. # Probability from this example is inexact using Chisquare approximation of Friedman Chisquare.
  3239. x3 = [array([7.0,9.9,8.5,5.1,10.3]),
  3240. array([5.3,5.7,4.7,3.5,7.7]),
  3241. array([4.9,7.6,5.5,2.8,8.4]),
  3242. array([8.8,8.9,8.1,3.3,9.1])]
  3243. assert_array_almost_equal(stats.friedmanchisquare(x1[0],x1[1],x1[2],x1[3]),
  3244. (10.2283464566929, 0.0167215803284414))
  3245. assert_array_almost_equal(stats.friedmanchisquare(x2[0],x2[1],x2[2],x2[3]),
  3246. (18.9428571428571, 0.000280938375189499))
  3247. assert_array_almost_equal(stats.friedmanchisquare(x3[0],x3[1],x3[2],x3[3]),
  3248. (10.68, 0.0135882729582176))
  3249. assert_raises(ValueError, stats.friedmanchisquare,x3[0],x3[1])
  3250. # test for namedtuple attribute results
  3251. attributes = ('statistic', 'pvalue')
  3252. res = stats.friedmanchisquare(*x1)
  3253. check_named_results(res, attributes)
  3254. # test using mstats
  3255. assert_array_almost_equal(mstats.friedmanchisquare(x1[0], x1[1],
  3256. x1[2], x1[3]),
  3257. (10.2283464566929, 0.0167215803284414))
  3258. # the following fails
  3259. # assert_array_almost_equal(mstats.friedmanchisquare(x2[0],x2[1],x2[2],x2[3]),
  3260. # (18.9428571428571, 0.000280938375189499))
  3261. assert_array_almost_equal(mstats.friedmanchisquare(x3[0], x3[1],
  3262. x3[2], x3[3]),
  3263. (10.68, 0.0135882729582176))
  3264. assert_raises(ValueError, mstats.friedmanchisquare,x3[0],x3[1])
  3265. class TestKSTest:
  3266. """Tests kstest and ks_1samp agree with K-S various sizes, alternatives, modes."""
  3267. def _testOne(self, x, alternative, expected_statistic, expected_prob, mode='auto', decimal=14):
  3268. result = stats.kstest(x, 'norm', alternative=alternative, mode=mode)
  3269. expected = np.array([expected_statistic, expected_prob])
  3270. assert_array_almost_equal(np.array(result), expected, decimal=decimal)
  3271. def _test_kstest_and_ks1samp(self, x, alternative, mode='auto', decimal=14):
  3272. result = stats.kstest(x, 'norm', alternative=alternative, mode=mode)
  3273. result_1samp = stats.ks_1samp(x, stats.norm.cdf, alternative=alternative, mode=mode)
  3274. assert_array_almost_equal(np.array(result), result_1samp, decimal=decimal)
  3275. def test_namedtuple_attributes(self):
  3276. x = np.linspace(-1, 1, 9)
  3277. # test for namedtuple attribute results
  3278. attributes = ('statistic', 'pvalue')
  3279. res = stats.kstest(x, 'norm')
  3280. check_named_results(res, attributes)
  3281. def test_agree_with_ks_1samp(self):
  3282. x = np.linspace(-1, 1, 9)
  3283. self._test_kstest_and_ks1samp(x, 'two-sided')
  3284. x = np.linspace(-15, 15, 9)
  3285. self._test_kstest_and_ks1samp(x, 'two-sided')
  3286. x = [-1.23, 0.06, -0.60, 0.17, 0.66, -0.17, -0.08, 0.27, -0.98, -0.99]
  3287. self._test_kstest_and_ks1samp(x, 'two-sided')
  3288. self._test_kstest_and_ks1samp(x, 'greater', mode='exact')
  3289. self._test_kstest_and_ks1samp(x, 'less', mode='exact')
  3290. # missing: no test that uses *args
  3291. class TestKSOneSample:
  3292. """Tests kstest and ks_samp 1-samples with K-S various sizes, alternatives, modes."""
  3293. def _testOne(self, x, alternative, expected_statistic, expected_prob, mode='auto', decimal=14):
  3294. result = stats.ks_1samp(x, stats.norm.cdf, alternative=alternative, mode=mode)
  3295. expected = np.array([expected_statistic, expected_prob])
  3296. assert_array_almost_equal(np.array(result), expected, decimal=decimal)
  3297. def test_namedtuple_attributes(self):
  3298. x = np.linspace(-1, 1, 9)
  3299. # test for namedtuple attribute results
  3300. attributes = ('statistic', 'pvalue')
  3301. res = stats.ks_1samp(x, stats.norm.cdf)
  3302. check_named_results(res, attributes)
  3303. def test_agree_with_r(self):
  3304. # comparing with some values from R
  3305. x = np.linspace(-1, 1, 9)
  3306. self._testOne(x, 'two-sided', 0.15865525393145705, 0.95164069201518386)
  3307. x = np.linspace(-15, 15, 9)
  3308. self._testOne(x, 'two-sided', 0.44435602715924361, 0.038850140086788665)
  3309. x = [-1.23, 0.06, -0.60, 0.17, 0.66, -0.17, -0.08, 0.27, -0.98, -0.99]
  3310. self._testOne(x, 'two-sided', 0.293580126801961, 0.293408463684361)
  3311. self._testOne(x, 'greater', 0.293580126801961, 0.146988835042376, mode='exact')
  3312. self._testOne(x, 'less', 0.109348552425692, 0.732768892470675, mode='exact')
  3313. def test_known_examples(self):
  3314. # the following tests rely on deterministically replicated rvs
  3315. x = stats.norm.rvs(loc=0.2, size=100, random_state=987654321)
  3316. self._testOne(x, 'two-sided', 0.12464329735846891, 0.089444888711820769, mode='asymp')
  3317. self._testOne(x, 'less', 0.12464329735846891, 0.040989164077641749)
  3318. self._testOne(x, 'greater', 0.0072115233216310994, 0.98531158590396228)
  3319. def test_ks1samp_allpaths(self):
  3320. # Check NaN input, output.
  3321. assert_(np.isnan(kolmogn(np.nan, 1, True)))
  3322. with assert_raises(ValueError, match='n is not integral: 1.5'):
  3323. kolmogn(1.5, 1, True)
  3324. assert_(np.isnan(kolmogn(-1, 1, True)))
  3325. dataset = np.asarray([
  3326. # Check x out of range
  3327. (101, 1, True, 1.0),
  3328. (101, 1.1, True, 1.0),
  3329. (101, 0, True, 0.0),
  3330. (101, -0.1, True, 0.0),
  3331. (32, 1.0 / 64, True, 0.0), # Ruben-Gambino
  3332. (32, 1.0 / 64, False, 1.0), # Ruben-Gambino
  3333. (32, 0.5, True, 0.9999999363163307), # Miller
  3334. (32, 0.5, False, 6.368366937916623e-08), # Miller 2 * special.smirnov(32, 0.5)
  3335. # Check some other paths
  3336. (32, 1.0 / 8, True, 0.34624229979775223),
  3337. (32, 1.0 / 4, True, 0.9699508336558085),
  3338. (1600, 0.49, False, 0.0),
  3339. (1600, 1 / 16.0, False, 7.0837876229702195e-06), # 2 * special.smirnov(1600, 1/16.0)
  3340. (1600, 14 / 1600, False, 0.99962357317602), # _kolmogn_DMTW
  3341. (1600, 1 / 32, False, 0.08603386296651416), # _kolmogn_PelzGood
  3342. ])
  3343. FuncData(kolmogn, dataset, (0, 1, 2), 3).check(dtypes=[int, float, bool])
  3344. @pytest.mark.parametrize("ksfunc", [stats.kstest, stats.ks_1samp])
  3345. @pytest.mark.parametrize("alternative, x6val, ref_location, ref_sign",
  3346. [('greater', 6, 6, +1),
  3347. ('less', 7, 7, -1),
  3348. ('two-sided', 6, 6, +1),
  3349. ('two-sided', 7, 7, -1)])
  3350. def test_location_sign(self, ksfunc, alternative,
  3351. x6val, ref_location, ref_sign):
  3352. # Test that location and sign corresponding with statistic are as
  3353. # expected. (Test is designed to be easy to predict.)
  3354. x = np.arange(10) + 0.5
  3355. x[6] = x6val
  3356. cdf = stats.uniform(scale=10).cdf
  3357. res = ksfunc(x, cdf, alternative=alternative)
  3358. assert_allclose(res.statistic, 0.1, rtol=1e-15)
  3359. assert res.statistic_location == ref_location
  3360. assert res.statistic_sign == ref_sign
  3361. # missing: no test that uses *args
  3362. class TestKSTwoSamples:
  3363. """Tests 2-samples with K-S various sizes, alternatives, modes."""
  3364. def _testOne(self, x1, x2, alternative, expected_statistic, expected_prob, mode='auto'):
  3365. result = stats.ks_2samp(x1, x2, alternative, mode=mode)
  3366. expected = np.array([expected_statistic, expected_prob])
  3367. assert_array_almost_equal(np.array(result), expected)
  3368. def testSmall(self):
  3369. self._testOne([0], [1], 'two-sided', 1.0/1, 1.0)
  3370. self._testOne([0], [1], 'greater', 1.0/1, 0.5)
  3371. self._testOne([0], [1], 'less', 0.0/1, 1.0)
  3372. self._testOne([1], [0], 'two-sided', 1.0/1, 1.0)
  3373. self._testOne([1], [0], 'greater', 0.0/1, 1.0)
  3374. self._testOne([1], [0], 'less', 1.0/1, 0.5)
  3375. def testTwoVsThree(self):
  3376. data1 = np.array([1.0, 2.0])
  3377. data1p = data1 + 0.01
  3378. data1m = data1 - 0.01
  3379. data2 = np.array([1.0, 2.0, 3.0])
  3380. self._testOne(data1p, data2, 'two-sided', 1.0 / 3, 1.0)
  3381. self._testOne(data1p, data2, 'greater', 1.0 / 3, 0.7)
  3382. self._testOne(data1p, data2, 'less', 1.0 / 3, 0.7)
  3383. self._testOne(data1m, data2, 'two-sided', 2.0 / 3, 0.6)
  3384. self._testOne(data1m, data2, 'greater', 2.0 / 3, 0.3)
  3385. self._testOne(data1m, data2, 'less', 0, 1.0)
  3386. def testTwoVsFour(self):
  3387. data1 = np.array([1.0, 2.0])
  3388. data1p = data1 + 0.01
  3389. data1m = data1 - 0.01
  3390. data2 = np.array([1.0, 2.0, 3.0, 4.0])
  3391. self._testOne(data1p, data2, 'two-sided', 2.0 / 4, 14.0/15)
  3392. self._testOne(data1p, data2, 'greater', 2.0 / 4, 8.0/15)
  3393. self._testOne(data1p, data2, 'less', 1.0 / 4, 12.0/15)
  3394. self._testOne(data1m, data2, 'two-sided', 3.0 / 4, 6.0/15)
  3395. self._testOne(data1m, data2, 'greater', 3.0 / 4, 3.0/15)
  3396. self._testOne(data1m, data2, 'less', 0, 1.0)
  3397. def test100_100(self):
  3398. x100 = np.linspace(1, 100, 100)
  3399. x100_2_p1 = x100 + 2 + 0.1
  3400. x100_2_m1 = x100 + 2 - 0.1
  3401. self._testOne(x100, x100_2_p1, 'two-sided', 3.0 / 100, 0.9999999999962055)
  3402. self._testOne(x100, x100_2_p1, 'greater', 3.0 / 100, 0.9143290114276248)
  3403. self._testOne(x100, x100_2_p1, 'less', 0, 1.0)
  3404. self._testOne(x100, x100_2_m1, 'two-sided', 2.0 / 100, 1.0)
  3405. self._testOne(x100, x100_2_m1, 'greater', 2.0 / 100, 0.960978450786184)
  3406. self._testOne(x100, x100_2_m1, 'less', 0, 1.0)
  3407. def test100_110(self):
  3408. x100 = np.linspace(1, 100, 100)
  3409. x110 = np.linspace(1, 100, 110)
  3410. x110_20_p1 = x110 + 20 + 0.1
  3411. x110_20_m1 = x110 + 20 - 0.1
  3412. # 100, 110
  3413. self._testOne(x100, x110_20_p1, 'two-sided', 232.0 / 1100, 0.015739183865607353)
  3414. self._testOne(x100, x110_20_p1, 'greater', 232.0 / 1100, 0.007869594319053203)
  3415. self._testOne(x100, x110_20_p1, 'less', 0, 1)
  3416. self._testOne(x100, x110_20_m1, 'two-sided', 229.0 / 1100, 0.017803803861026313)
  3417. self._testOne(x100, x110_20_m1, 'greater', 229.0 / 1100, 0.008901905958245056)
  3418. self._testOne(x100, x110_20_m1, 'less', 0.0, 1.0)
  3419. def testRepeatedValues(self):
  3420. x2233 = np.array([2] * 3 + [3] * 4 + [5] * 5 + [6] * 4, dtype=int)
  3421. x3344 = x2233 + 1
  3422. x2356 = np.array([2] * 3 + [3] * 4 + [5] * 10 + [6] * 4, dtype=int)
  3423. x3467 = np.array([3] * 10 + [4] * 2 + [6] * 10 + [7] * 4, dtype=int)
  3424. self._testOne(x2233, x3344, 'two-sided', 5.0/16, 0.4262934613454952)
  3425. self._testOne(x2233, x3344, 'greater', 5.0/16, 0.21465428276573786)
  3426. self._testOne(x2233, x3344, 'less', 0.0/16, 1.0)
  3427. self._testOne(x2356, x3467, 'two-sided', 190.0/21/26, 0.0919245790168125)
  3428. self._testOne(x2356, x3467, 'greater', 190.0/21/26, 0.0459633806858544)
  3429. self._testOne(x2356, x3467, 'less', 70.0/21/26, 0.6121593130022775)
  3430. def testEqualSizes(self):
  3431. data2 = np.array([1.0, 2.0, 3.0])
  3432. self._testOne(data2, data2+1, 'two-sided', 1.0/3, 1.0)
  3433. self._testOne(data2, data2+1, 'greater', 1.0/3, 0.75)
  3434. self._testOne(data2, data2+1, 'less', 0.0/3, 1.)
  3435. self._testOne(data2, data2+0.5, 'two-sided', 1.0/3, 1.0)
  3436. self._testOne(data2, data2+0.5, 'greater', 1.0/3, 0.75)
  3437. self._testOne(data2, data2+0.5, 'less', 0.0/3, 1.)
  3438. self._testOne(data2, data2-0.5, 'two-sided', 1.0/3, 1.0)
  3439. self._testOne(data2, data2-0.5, 'greater', 0.0/3, 1.0)
  3440. self._testOne(data2, data2-0.5, 'less', 1.0/3, 0.75)
  3441. @pytest.mark.slow
  3442. def testMiddlingBoth(self):
  3443. # 500, 600
  3444. n1, n2 = 500, 600
  3445. delta = 1.0/n1/n2/2/2
  3446. x = np.linspace(1, 200, n1) - delta
  3447. y = np.linspace(2, 200, n2)
  3448. self._testOne(x, y, 'two-sided', 2000.0 / n1 / n2, 1.0, mode='auto')
  3449. self._testOne(x, y, 'two-sided', 2000.0 / n1 / n2, 1.0, mode='asymp')
  3450. self._testOne(x, y, 'greater', 2000.0 / n1 / n2, 0.9697596024683929, mode='asymp')
  3451. self._testOne(x, y, 'less', 500.0 / n1 / n2, 0.9968735843165021, mode='asymp')
  3452. with suppress_warnings() as sup:
  3453. message = "ks_2samp: Exact calculation unsuccessful."
  3454. sup.filter(RuntimeWarning, message)
  3455. self._testOne(x, y, 'greater', 2000.0 / n1 / n2, 0.9697596024683929, mode='exact')
  3456. self._testOne(x, y, 'less', 500.0 / n1 / n2, 0.9968735843165021, mode='exact')
  3457. with warnings.catch_warnings(record=True) as w:
  3458. warnings.simplefilter("always")
  3459. self._testOne(x, y, 'less', 500.0 / n1 / n2, 0.9968735843165021, mode='exact')
  3460. _check_warnings(w, RuntimeWarning, 1)
  3461. @pytest.mark.slow
  3462. def testMediumBoth(self):
  3463. # 1000, 1100
  3464. n1, n2 = 1000, 1100
  3465. delta = 1.0/n1/n2/2/2
  3466. x = np.linspace(1, 200, n1) - delta
  3467. y = np.linspace(2, 200, n2)
  3468. self._testOne(x, y, 'two-sided', 6600.0 / n1 / n2, 1.0, mode='asymp')
  3469. self._testOne(x, y, 'two-sided', 6600.0 / n1 / n2, 1.0, mode='auto')
  3470. self._testOne(x, y, 'greater', 6600.0 / n1 / n2, 0.9573185808092622, mode='asymp')
  3471. self._testOne(x, y, 'less', 1000.0 / n1 / n2, 0.9982410869433984, mode='asymp')
  3472. with suppress_warnings() as sup:
  3473. message = "ks_2samp: Exact calculation unsuccessful."
  3474. sup.filter(RuntimeWarning, message)
  3475. self._testOne(x, y, 'greater', 6600.0 / n1 / n2, 0.9573185808092622, mode='exact')
  3476. self._testOne(x, y, 'less', 1000.0 / n1 / n2, 0.9982410869433984, mode='exact')
  3477. with warnings.catch_warnings(record=True) as w:
  3478. warnings.simplefilter("always")
  3479. self._testOne(x, y, 'less', 1000.0 / n1 / n2, 0.9982410869433984, mode='exact')
  3480. _check_warnings(w, RuntimeWarning, 1)
  3481. def testLarge(self):
  3482. # 10000, 110
  3483. n1, n2 = 10000, 110
  3484. lcm = n1*11.0
  3485. delta = 1.0/n1/n2/2/2
  3486. x = np.linspace(1, 200, n1) - delta
  3487. y = np.linspace(2, 100, n2)
  3488. self._testOne(x, y, 'two-sided', 55275.0 / lcm, 4.2188474935755949e-15)
  3489. self._testOne(x, y, 'greater', 561.0 / lcm, 0.99115454582047591)
  3490. self._testOne(x, y, 'less', 55275.0 / lcm, 3.1317328311518713e-26)
  3491. def test_gh11184(self):
  3492. # 3000, 3001, exact two-sided
  3493. np.random.seed(123456)
  3494. x = np.random.normal(size=3000)
  3495. y = np.random.normal(size=3001) * 1.5
  3496. self._testOne(x, y, 'two-sided', 0.11292880151060758, 2.7755575615628914e-15, mode='asymp')
  3497. self._testOne(x, y, 'two-sided', 0.11292880151060758, 2.7755575615628914e-15, mode='exact')
  3498. @pytest.mark.xslow
  3499. def test_gh11184_bigger(self):
  3500. # 10000, 10001, exact two-sided
  3501. np.random.seed(123456)
  3502. x = np.random.normal(size=10000)
  3503. y = np.random.normal(size=10001) * 1.5
  3504. self._testOne(x, y, 'two-sided', 0.10597913208679133, 3.3149311398483503e-49, mode='asymp')
  3505. self._testOne(x, y, 'two-sided', 0.10597913208679133, 2.7755575615628914e-15, mode='exact')
  3506. self._testOne(x, y, 'greater', 0.10597913208679133, 2.7947433906389253e-41, mode='asymp')
  3507. self._testOne(x, y, 'less', 0.09658002199780022, 2.7947433906389253e-41, mode='asymp')
  3508. @pytest.mark.xslow
  3509. def test_gh12999(self):
  3510. np.random.seed(123456)
  3511. for x in range(1000, 12000, 1000):
  3512. vals1 = np.random.normal(size=(x))
  3513. vals2 = np.random.normal(size=(x + 10), loc=0.5)
  3514. exact = stats.ks_2samp(vals1, vals2, mode='exact').pvalue
  3515. asymp = stats.ks_2samp(vals1, vals2, mode='asymp').pvalue
  3516. # these two p-values should be in line with each other
  3517. assert_array_less(exact, 3 * asymp)
  3518. assert_array_less(asymp, 3 * exact)
  3519. @pytest.mark.slow
  3520. def testLargeBoth(self):
  3521. # 10000, 11000
  3522. n1, n2 = 10000, 11000
  3523. lcm = n1*11.0
  3524. delta = 1.0/n1/n2/2/2
  3525. x = np.linspace(1, 200, n1) - delta
  3526. y = np.linspace(2, 200, n2)
  3527. self._testOne(x, y, 'two-sided', 563.0 / lcm, 0.9990660108966576, mode='asymp')
  3528. self._testOne(x, y, 'two-sided', 563.0 / lcm, 0.9990456491488628, mode='exact')
  3529. self._testOne(x, y, 'two-sided', 563.0 / lcm, 0.9990660108966576, mode='auto')
  3530. self._testOne(x, y, 'greater', 563.0 / lcm, 0.7561851877420673)
  3531. self._testOne(x, y, 'less', 10.0 / lcm, 0.9998239693191724)
  3532. with suppress_warnings() as sup:
  3533. message = "ks_2samp: Exact calculation unsuccessful."
  3534. sup.filter(RuntimeWarning, message)
  3535. self._testOne(x, y, 'greater', 563.0 / lcm, 0.7561851877420673, mode='exact')
  3536. self._testOne(x, y, 'less', 10.0 / lcm, 0.9998239693191724, mode='exact')
  3537. def testNamedAttributes(self):
  3538. # test for namedtuple attribute results
  3539. attributes = ('statistic', 'pvalue')
  3540. res = stats.ks_2samp([1, 2], [3])
  3541. check_named_results(res, attributes)
  3542. @pytest.mark.slow
  3543. def test_some_code_paths(self):
  3544. # Check that some code paths are executed
  3545. from scipy.stats._stats_py import (
  3546. _count_paths_outside_method,
  3547. _compute_outer_prob_inside_method
  3548. )
  3549. _compute_outer_prob_inside_method(1, 1, 1, 1)
  3550. _count_paths_outside_method(1000, 1, 1, 1001)
  3551. with np.errstate(invalid='raise'):
  3552. assert_raises(FloatingPointError, _count_paths_outside_method,
  3553. 1100, 1099, 1, 1)
  3554. assert_raises(FloatingPointError, _count_paths_outside_method,
  3555. 2000, 1000, 1, 1)
  3556. def test_argument_checking(self):
  3557. # Check that an empty array causes a ValueError
  3558. assert_raises(ValueError, stats.ks_2samp, [], [1])
  3559. assert_raises(ValueError, stats.ks_2samp, [1], [])
  3560. assert_raises(ValueError, stats.ks_2samp, [], [])
  3561. @pytest.mark.slow
  3562. def test_gh12218(self):
  3563. """Ensure gh-12218 is fixed."""
  3564. # gh-1228 triggered a TypeError calculating sqrt(n1*n2*(n1+n2)).
  3565. # n1, n2 both large integers, the product exceeded 2^64
  3566. np.random.seed(12345678)
  3567. n1 = 2097152 # 2*^21
  3568. rvs1 = stats.uniform.rvs(size=n1, loc=0., scale=1)
  3569. rvs2 = rvs1 + 1 # Exact value of rvs2 doesn't matter.
  3570. stats.ks_2samp(rvs1, rvs2, alternative='greater', mode='asymp')
  3571. stats.ks_2samp(rvs1, rvs2, alternative='less', mode='asymp')
  3572. stats.ks_2samp(rvs1, rvs2, alternative='two-sided', mode='asymp')
  3573. def test_warnings_gh_14019(self):
  3574. # Check that RuntimeWarning is raised when method='auto' and exact
  3575. # p-value calculation fails. See gh-14019.
  3576. rng = np.random.default_rng(abs(hash('test_warnings_gh_14019')))
  3577. # random samples of the same size as in the issue
  3578. data1 = rng.random(size=881) + 0.5
  3579. data2 = rng.random(size=369)
  3580. message = "ks_2samp: Exact calculation unsuccessful"
  3581. with pytest.warns(RuntimeWarning, match=message):
  3582. res = stats.ks_2samp(data1, data2, alternative='less')
  3583. assert_allclose(res.pvalue, 0, atol=1e-14)
  3584. @pytest.mark.parametrize("ksfunc", [stats.kstest, stats.ks_2samp])
  3585. @pytest.mark.parametrize("alternative, x6val, ref_location, ref_sign",
  3586. [('greater', 5.9, 5.9, +1),
  3587. ('less', 6.1, 6.0, -1),
  3588. ('two-sided', 5.9, 5.9, +1),
  3589. ('two-sided', 6.1, 6.0, -1)])
  3590. def test_location_sign(self, ksfunc, alternative,
  3591. x6val, ref_location, ref_sign):
  3592. # Test that location and sign corresponding with statistic are as
  3593. # expected. (Test is designed to be easy to predict.)
  3594. x = np.arange(10, dtype=np.float64)
  3595. y = x.copy()
  3596. x[6] = x6val
  3597. res = stats.ks_2samp(x, y, alternative=alternative)
  3598. assert res.statistic == 0.1
  3599. assert res.statistic_location == ref_location
  3600. assert res.statistic_sign == ref_sign
  3601. def test_ttest_rel():
  3602. # regression test
  3603. tr,pr = 0.81248591389165692, 0.41846234511362157
  3604. tpr = ([tr,-tr],[pr,pr])
  3605. rvs1 = np.linspace(1,100,100)
  3606. rvs2 = np.linspace(1.01,99.989,100)
  3607. rvs1_2D = np.array([np.linspace(1,100,100), np.linspace(1.01,99.989,100)])
  3608. rvs2_2D = np.array([np.linspace(1.01,99.989,100), np.linspace(1,100,100)])
  3609. t,p = stats.ttest_rel(rvs1, rvs2, axis=0)
  3610. assert_array_almost_equal([t,p],(tr,pr))
  3611. t,p = stats.ttest_rel(rvs1_2D.T, rvs2_2D.T, axis=0)
  3612. assert_array_almost_equal([t,p],tpr)
  3613. t,p = stats.ttest_rel(rvs1_2D, rvs2_2D, axis=1)
  3614. assert_array_almost_equal([t,p],tpr)
  3615. # test scalars
  3616. with suppress_warnings() as sup, np.errstate(invalid="ignore"), \
  3617. pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  3618. sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
  3619. t, p = stats.ttest_rel(4., 3.)
  3620. assert_(np.isnan(t))
  3621. assert_(np.isnan(p))
  3622. # test for namedtuple attribute results
  3623. attributes = ('statistic', 'pvalue')
  3624. res = stats.ttest_rel(rvs1, rvs2, axis=0)
  3625. check_named_results(res, attributes)
  3626. # test on 3 dimensions
  3627. rvs1_3D = np.dstack([rvs1_2D,rvs1_2D,rvs1_2D])
  3628. rvs2_3D = np.dstack([rvs2_2D,rvs2_2D,rvs2_2D])
  3629. t,p = stats.ttest_rel(rvs1_3D, rvs2_3D, axis=1)
  3630. assert_array_almost_equal(np.abs(t), tr)
  3631. assert_array_almost_equal(np.abs(p), pr)
  3632. assert_equal(t.shape, (2, 3))
  3633. t, p = stats.ttest_rel(np.moveaxis(rvs1_3D, 2, 0),
  3634. np.moveaxis(rvs2_3D, 2, 0),
  3635. axis=2)
  3636. assert_array_almost_equal(np.abs(t), tr)
  3637. assert_array_almost_equal(np.abs(p), pr)
  3638. assert_equal(t.shape, (3, 2))
  3639. # test alternative parameter
  3640. assert_raises(ValueError, stats.ttest_rel, rvs1, rvs2, alternative="error")
  3641. t, p = stats.ttest_rel(rvs1, rvs2, axis=0, alternative="less")
  3642. assert_allclose(p, 1 - pr/2)
  3643. assert_allclose(t, tr)
  3644. t, p = stats.ttest_rel(rvs1, rvs2, axis=0, alternative="greater")
  3645. assert_allclose(p, pr/2)
  3646. assert_allclose(t, tr)
  3647. # check nan policy
  3648. rng = np.random.RandomState(12345678)
  3649. x = stats.norm.rvs(loc=5, scale=10, size=501, random_state=rng)
  3650. x[500] = np.nan
  3651. y = (stats.norm.rvs(loc=5, scale=10, size=501, random_state=rng) +
  3652. stats.norm.rvs(scale=0.2, size=501, random_state=rng))
  3653. y[500] = np.nan
  3654. with np.errstate(invalid="ignore"):
  3655. assert_array_equal(stats.ttest_rel(x, x), (np.nan, np.nan))
  3656. assert_array_almost_equal(stats.ttest_rel(x, y, nan_policy='omit'),
  3657. (0.25299925303978066, 0.8003729814201519))
  3658. assert_raises(ValueError, stats.ttest_rel, x, y, nan_policy='raise')
  3659. assert_raises(ValueError, stats.ttest_rel, x, y, nan_policy='foobar')
  3660. # test zero division problem
  3661. with pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  3662. t, p = stats.ttest_rel([0, 0, 0], [1, 1, 1])
  3663. assert_equal((np.abs(t), p), (np.inf, 0))
  3664. with np.errstate(invalid="ignore"):
  3665. assert_equal(stats.ttest_rel([0, 0, 0], [0, 0, 0]), (np.nan, np.nan))
  3666. # check that nan in input array result in nan output
  3667. anan = np.array([[1, np.nan], [-1, 1]])
  3668. assert_equal(stats.ttest_rel(anan, np.zeros((2, 2))),
  3669. ([0, np.nan], [1, np.nan]))
  3670. # test incorrect input shape raise an error
  3671. x = np.arange(24)
  3672. assert_raises(ValueError, stats.ttest_rel, x.reshape((8, 3)),
  3673. x.reshape((2, 3, 4)))
  3674. # Convert from two-sided p-values to one sided using T result data.
  3675. def convert(t, p, alt):
  3676. if (t < 0 and alt == "less") or (t > 0 and alt == "greater"):
  3677. return p / 2
  3678. return 1 - (p / 2)
  3679. converter = np.vectorize(convert)
  3680. rvs1_2D[:, 20:30] = np.nan
  3681. rvs2_2D[:, 15:25] = np.nan
  3682. tr, pr = stats.ttest_rel(rvs1_2D, rvs2_2D, 0, nan_policy='omit')
  3683. t, p = stats.ttest_rel(rvs1_2D, rvs2_2D, 0, nan_policy='omit',
  3684. alternative='less')
  3685. assert_allclose(t, tr, rtol=1e-14)
  3686. with np.errstate(invalid='ignore'):
  3687. assert_allclose(p, converter(tr, pr, 'less'), rtol=1e-14)
  3688. t, p = stats.ttest_rel(rvs1_2D, rvs2_2D, 0, nan_policy='omit',
  3689. alternative='greater')
  3690. assert_allclose(t, tr, rtol=1e-14)
  3691. with np.errstate(invalid='ignore'):
  3692. assert_allclose(p, converter(tr, pr, 'greater'), rtol=1e-14)
  3693. def test_ttest_rel_nan_2nd_arg():
  3694. # regression test for gh-6134: nans in the second arg were not handled
  3695. x = [np.nan, 2.0, 3.0, 4.0]
  3696. y = [1.0, 2.0, 1.0, 2.0]
  3697. r1 = stats.ttest_rel(x, y, nan_policy='omit')
  3698. r2 = stats.ttest_rel(y, x, nan_policy='omit')
  3699. assert_allclose(r2.statistic, -r1.statistic, atol=1e-15)
  3700. assert_allclose(r2.pvalue, r1.pvalue, atol=1e-15)
  3701. # NB: arguments are paired when NaNs are dropped
  3702. r3 = stats.ttest_rel(y[1:], x[1:])
  3703. assert_allclose(r2, r3, atol=1e-15)
  3704. # .. and this is consistent with R. R code:
  3705. # x = c(NA, 2.0, 3.0, 4.0)
  3706. # y = c(1.0, 2.0, 1.0, 2.0)
  3707. # t.test(x, y, paired=TRUE)
  3708. assert_allclose(r2, (-2, 0.1835), atol=1e-4)
  3709. def test_ttest_rel_empty_1d_returns_nan():
  3710. # Two empty inputs should return a TtestResult containing nan
  3711. # for both values.
  3712. result = stats.ttest_rel([], [])
  3713. assert isinstance(result, stats._stats_py.TtestResult)
  3714. assert_equal(result, (np.nan, np.nan))
  3715. @pytest.mark.parametrize('b, expected_shape',
  3716. [(np.empty((1, 5, 0)), (3, 5)),
  3717. (np.empty((1, 0, 0)), (3, 0))])
  3718. def test_ttest_rel_axis_size_zero(b, expected_shape):
  3719. # In this test, the length of the axis dimension is zero.
  3720. # The results should be arrays containing nan with shape
  3721. # given by the broadcast nonaxis dimensions.
  3722. a = np.empty((3, 1, 0))
  3723. result = stats.ttest_rel(a, b, axis=-1)
  3724. assert isinstance(result, stats._stats_py.TtestResult)
  3725. expected_value = np.full(expected_shape, fill_value=np.nan)
  3726. assert_equal(result.statistic, expected_value)
  3727. assert_equal(result.pvalue, expected_value)
  3728. def test_ttest_rel_nonaxis_size_zero():
  3729. # In this test, the length of the axis dimension is nonzero,
  3730. # but one of the nonaxis dimensions has length 0. Check that
  3731. # we still get the correctly broadcast shape, which is (5, 0)
  3732. # in this case.
  3733. a = np.empty((1, 8, 0))
  3734. b = np.empty((5, 8, 1))
  3735. result = stats.ttest_rel(a, b, axis=1)
  3736. assert isinstance(result, stats._stats_py.TtestResult)
  3737. assert_equal(result.statistic.shape, (5, 0))
  3738. assert_equal(result.pvalue.shape, (5, 0))
  3739. @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater'])
  3740. def test_ttest_rel_ci_1d(alternative):
  3741. # test confidence interval method against reference values
  3742. rng = np.random.default_rng(3749065329432213059)
  3743. n = 10
  3744. x = rng.normal(size=n, loc=1.5, scale=2)
  3745. y = rng.normal(size=n, loc=2, scale=2)
  3746. # Reference values generated with R t.test:
  3747. # options(digits=16)
  3748. # x = c(1.22825792, 1.63950485, 4.39025641, 0.68609437, 2.03813481,
  3749. # -1.20040109, 1.81997937, 1.86854636, 2.94694282, 3.94291373)
  3750. # y = c(3.49961496, 1.53192536, 5.53620083, 2.91687718, 0.04858043,
  3751. # 3.78505943, 3.3077496 , 2.30468892, 3.42168074, 0.56797592)
  3752. # t.test(x, y, paired=TRUE, conf.level=0.85, alternative='l')
  3753. ref = {'two-sided': [-1.912194489914035, 0.400169725914035],
  3754. 'greater': [-1.563944820311475, np.inf],
  3755. 'less': [-np.inf, 0.05192005631147523]}
  3756. res = stats.ttest_rel(x, y, alternative=alternative)
  3757. ci = res.confidence_interval(confidence_level=0.85)
  3758. assert_allclose(ci, ref[alternative])
  3759. assert_equal(res.df, n-1)
  3760. @pytest.mark.parametrize("test_fun, args",
  3761. [(stats.ttest_1samp, (np.arange(10), 0)),
  3762. (stats.ttest_rel, (np.arange(10), np.arange(10)))])
  3763. def test_ttest_ci_iv(test_fun, args):
  3764. # test `confidence_interval` method input validation
  3765. res = test_fun(*args)
  3766. message = '`confidence_level` must be a number between 0 and 1.'
  3767. with pytest.raises(ValueError, match=message):
  3768. res.confidence_interval(confidence_level=10)
  3769. def _desc_stats(x1, x2, axis=0):
  3770. def _stats(x, axis=0):
  3771. x = np.asarray(x)
  3772. mu = np.mean(x, axis=axis)
  3773. std = np.std(x, axis=axis, ddof=1)
  3774. nobs = x.shape[axis]
  3775. return mu, std, nobs
  3776. return _stats(x1, axis) + _stats(x2, axis)
  3777. def test_ttest_ind():
  3778. # regression test
  3779. tr = 1.0912746897927283
  3780. pr = 0.27647818616351882
  3781. tpr = ([tr,-tr],[pr,pr])
  3782. rvs2 = np.linspace(1,100,100)
  3783. rvs1 = np.linspace(5,105,100)
  3784. rvs1_2D = np.array([rvs1, rvs2])
  3785. rvs2_2D = np.array([rvs2, rvs1])
  3786. t,p = stats.ttest_ind(rvs1, rvs2, axis=0)
  3787. assert_array_almost_equal([t,p],(tr,pr))
  3788. # test from_stats API
  3789. assert_array_almost_equal(stats.ttest_ind_from_stats(*_desc_stats(rvs1,
  3790. rvs2)),
  3791. [t, p])
  3792. t,p = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0)
  3793. assert_array_almost_equal([t,p],tpr)
  3794. args = _desc_stats(rvs1_2D.T, rvs2_2D.T)
  3795. assert_array_almost_equal(stats.ttest_ind_from_stats(*args),
  3796. [t, p])
  3797. t,p = stats.ttest_ind(rvs1_2D, rvs2_2D, axis=1)
  3798. assert_array_almost_equal([t,p],tpr)
  3799. args = _desc_stats(rvs1_2D, rvs2_2D, axis=1)
  3800. assert_array_almost_equal(stats.ttest_ind_from_stats(*args),
  3801. [t, p])
  3802. # test scalars
  3803. with suppress_warnings() as sup, np.errstate(invalid="ignore"), \
  3804. pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  3805. sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
  3806. t, p = stats.ttest_ind(4., 3.)
  3807. assert_(np.isnan(t))
  3808. assert_(np.isnan(p))
  3809. # test on 3 dimensions
  3810. rvs1_3D = np.dstack([rvs1_2D,rvs1_2D,rvs1_2D])
  3811. rvs2_3D = np.dstack([rvs2_2D,rvs2_2D,rvs2_2D])
  3812. t,p = stats.ttest_ind(rvs1_3D, rvs2_3D, axis=1)
  3813. assert_almost_equal(np.abs(t), np.abs(tr))
  3814. assert_array_almost_equal(np.abs(p), pr)
  3815. assert_equal(t.shape, (2, 3))
  3816. t, p = stats.ttest_ind(np.moveaxis(rvs1_3D, 2, 0),
  3817. np.moveaxis(rvs2_3D, 2, 0),
  3818. axis=2)
  3819. assert_array_almost_equal(np.abs(t), np.abs(tr))
  3820. assert_array_almost_equal(np.abs(p), pr)
  3821. assert_equal(t.shape, (3, 2))
  3822. # test alternative parameter
  3823. assert_raises(ValueError, stats.ttest_ind, rvs1, rvs2, alternative="error")
  3824. assert_raises(ValueError, stats.ttest_ind_from_stats,
  3825. *_desc_stats(rvs1_2D.T, rvs2_2D.T), alternative="error")
  3826. t, p = stats.ttest_ind(rvs1, rvs2, alternative="less")
  3827. assert_allclose(p, 1 - (pr/2))
  3828. assert_allclose(t, tr)
  3829. t, p = stats.ttest_ind(rvs1, rvs2, alternative="greater")
  3830. assert_allclose(p, pr/2)
  3831. assert_allclose(t, tr)
  3832. # Below makes sure ttest_ind_from_stats p-val functions identically to
  3833. # ttest_ind
  3834. t, p = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0, alternative="less")
  3835. args = _desc_stats(rvs1_2D.T, rvs2_2D.T)
  3836. assert_allclose(
  3837. stats.ttest_ind_from_stats(*args, alternative="less"), [t, p])
  3838. t, p = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0, alternative="greater")
  3839. args = _desc_stats(rvs1_2D.T, rvs2_2D.T)
  3840. assert_allclose(
  3841. stats.ttest_ind_from_stats(*args, alternative="greater"), [t, p])
  3842. # check nan policy
  3843. rng = np.random.RandomState(12345678)
  3844. x = stats.norm.rvs(loc=5, scale=10, size=501, random_state=rng)
  3845. x[500] = np.nan
  3846. y = stats.norm.rvs(loc=5, scale=10, size=500, random_state=rng)
  3847. with np.errstate(invalid="ignore"):
  3848. assert_array_equal(stats.ttest_ind(x, y), (np.nan, np.nan))
  3849. assert_array_almost_equal(stats.ttest_ind(x, y, nan_policy='omit'),
  3850. (0.24779670949091914, 0.80434267337517906))
  3851. assert_raises(ValueError, stats.ttest_ind, x, y, nan_policy='raise')
  3852. assert_raises(ValueError, stats.ttest_ind, x, y, nan_policy='foobar')
  3853. # test zero division problem
  3854. with pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  3855. t, p = stats.ttest_ind([0, 0, 0], [1, 1, 1])
  3856. assert_equal((np.abs(t), p), (np.inf, 0))
  3857. with np.errstate(invalid="ignore"):
  3858. assert_equal(stats.ttest_ind([0, 0, 0], [0, 0, 0]), (np.nan, np.nan))
  3859. # check that nan in input array result in nan output
  3860. anan = np.array([[1, np.nan], [-1, 1]])
  3861. assert_equal(stats.ttest_ind(anan, np.zeros((2, 2))),
  3862. ([0, np.nan], [1, np.nan]))
  3863. rvs1_3D[:, :, 10:15] = np.nan
  3864. rvs2_3D[:, :, 6:12] = np.nan
  3865. # Convert from two-sided p-values to one sided using T result data.
  3866. def convert(t, p, alt):
  3867. if (t < 0 and alt == "less") or (t > 0 and alt == "greater"):
  3868. return p / 2
  3869. return 1 - (p / 2)
  3870. converter = np.vectorize(convert)
  3871. tr, pr = stats.ttest_ind(rvs1_3D, rvs2_3D, 0, nan_policy='omit')
  3872. t, p = stats.ttest_ind(rvs1_3D, rvs2_3D, 0, nan_policy='omit',
  3873. alternative='less')
  3874. assert_allclose(t, tr, rtol=1e-14)
  3875. assert_allclose(p, converter(tr, pr, 'less'), rtol=1e-14)
  3876. t, p = stats.ttest_ind(rvs1_3D, rvs2_3D, 0, nan_policy='omit',
  3877. alternative='greater')
  3878. assert_allclose(t, tr, rtol=1e-14)
  3879. assert_allclose(p, converter(tr, pr, 'greater'), rtol=1e-14)
  3880. class Test_ttest_ind_permutations():
  3881. N = 20
  3882. # data for most tests
  3883. np.random.seed(0)
  3884. a = np.vstack((np.arange(3*N//4), np.random.random(3*N//4)))
  3885. b = np.vstack((np.arange(N//4) + 100, np.random.random(N//4)))
  3886. # data for equal variance tests
  3887. a2 = np.arange(10)
  3888. b2 = np.arange(10) + 100
  3889. # data for exact test
  3890. a3 = [1, 2]
  3891. b3 = [3, 4]
  3892. # data for bigger test
  3893. np.random.seed(0)
  3894. rvs1 = stats.norm.rvs(loc=5, scale=10, # type: ignore
  3895. size=500).reshape(100, 5).T
  3896. rvs2 = stats.norm.rvs(loc=8, scale=20, size=100) # type: ignore
  3897. p_d = [1/1001, (676+1)/1001] # desired pvalues
  3898. p_d_gen = [1/1001, (672 + 1)/1001] # desired pvalues for Generator seed
  3899. p_d_big = [(993+1)/1001, (685+1)/1001, (840+1)/1001,
  3900. (955+1)/1001, (255+1)/1001]
  3901. params = [
  3902. (a, b, {"axis": 1}, p_d), # basic test
  3903. (a.T, b.T, {'axis': 0}, p_d), # along axis 0
  3904. (a[0, :], b[0, :], {'axis': None}, p_d[0]), # 1d data
  3905. (a[0, :].tolist(), b[0, :].tolist(), {'axis': None}, p_d[0]),
  3906. # different seeds
  3907. (a, b, {'random_state': 0, "axis": 1}, p_d),
  3908. (a, b, {'random_state': np.random.RandomState(0), "axis": 1}, p_d),
  3909. (a2, b2, {'equal_var': True}, 1/1001), # equal variances
  3910. (rvs1, rvs2, {'axis': -1, 'random_state': 0}, p_d_big), # bigger test
  3911. (a3, b3, {}, 1/3), # exact test
  3912. (a, b, {'random_state': np.random.default_rng(0), "axis": 1}, p_d_gen),
  3913. ]
  3914. @pytest.mark.parametrize("a,b,update,p_d", params)
  3915. def test_ttest_ind_permutations(self, a, b, update, p_d):
  3916. options_a = {'axis': None, 'equal_var': False}
  3917. options_p = {'axis': None, 'equal_var': False,
  3918. 'permutations': 1000, 'random_state': 0}
  3919. options_a.update(update)
  3920. options_p.update(update)
  3921. stat_a, _ = stats.ttest_ind(a, b, **options_a)
  3922. stat_p, pvalue = stats.ttest_ind(a, b, **options_p)
  3923. assert_array_almost_equal(stat_a, stat_p, 5)
  3924. assert_array_almost_equal(pvalue, p_d)
  3925. def test_ttest_ind_exact_alternative(self):
  3926. np.random.seed(0)
  3927. N = 3
  3928. a = np.random.rand(2, N, 2)
  3929. b = np.random.rand(2, N, 2)
  3930. options_p = {'axis': 1, 'permutations': 1000}
  3931. options_p.update(alternative="greater")
  3932. res_g_ab = stats.ttest_ind(a, b, **options_p)
  3933. res_g_ba = stats.ttest_ind(b, a, **options_p)
  3934. options_p.update(alternative="less")
  3935. res_l_ab = stats.ttest_ind(a, b, **options_p)
  3936. res_l_ba = stats.ttest_ind(b, a, **options_p)
  3937. options_p.update(alternative="two-sided")
  3938. res_2_ab = stats.ttest_ind(a, b, **options_p)
  3939. res_2_ba = stats.ttest_ind(b, a, **options_p)
  3940. # Alternative doesn't affect the statistic
  3941. assert_equal(res_g_ab.statistic, res_l_ab.statistic)
  3942. assert_equal(res_g_ab.statistic, res_2_ab.statistic)
  3943. # Reversing order of inputs negates statistic
  3944. assert_equal(res_g_ab.statistic, -res_g_ba.statistic)
  3945. assert_equal(res_l_ab.statistic, -res_l_ba.statistic)
  3946. assert_equal(res_2_ab.statistic, -res_2_ba.statistic)
  3947. # Reversing order of inputs does not affect p-value of 2-sided test
  3948. assert_equal(res_2_ab.pvalue, res_2_ba.pvalue)
  3949. # In exact test, distribution is perfectly symmetric, so these
  3950. # identities are exactly satisfied.
  3951. assert_equal(res_g_ab.pvalue, res_l_ba.pvalue)
  3952. assert_equal(res_l_ab.pvalue, res_g_ba.pvalue)
  3953. mask = res_g_ab.pvalue <= 0.5
  3954. assert_equal(res_g_ab.pvalue[mask] + res_l_ba.pvalue[mask],
  3955. res_2_ab.pvalue[mask])
  3956. assert_equal(res_l_ab.pvalue[~mask] + res_g_ba.pvalue[~mask],
  3957. res_2_ab.pvalue[~mask])
  3958. def test_ttest_ind_exact_selection(self):
  3959. # test the various ways of activating the exact test
  3960. np.random.seed(0)
  3961. N = 3
  3962. a = np.random.rand(N)
  3963. b = np.random.rand(N)
  3964. res0 = stats.ttest_ind(a, b)
  3965. res1 = stats.ttest_ind(a, b, permutations=1000)
  3966. res2 = stats.ttest_ind(a, b, permutations=0)
  3967. res3 = stats.ttest_ind(a, b, permutations=np.inf)
  3968. assert res1.pvalue != res0.pvalue
  3969. assert res2.pvalue == res0.pvalue
  3970. assert res3.pvalue == res1.pvalue
  3971. def test_ttest_ind_exact_distribution(self):
  3972. # the exact distribution of the test statistic should have
  3973. # binom(na + nb, na) elements, all unique. This was not always true
  3974. # in gh-4824; fixed by gh-13661.
  3975. np.random.seed(0)
  3976. a = np.random.rand(3)
  3977. b = np.random.rand(4)
  3978. data = np.concatenate((a, b))
  3979. na, nb = len(a), len(b)
  3980. permutations = 100000
  3981. t_stat, _, _ = _permutation_distribution_t(data, permutations, na,
  3982. True)
  3983. n_unique = len(set(t_stat))
  3984. assert n_unique == binom(na + nb, na)
  3985. assert len(t_stat) == n_unique
  3986. def test_ttest_ind_randperm_alternative(self):
  3987. np.random.seed(0)
  3988. N = 50
  3989. a = np.random.rand(2, 3, N)
  3990. b = np.random.rand(3, N)
  3991. options_p = {'axis': -1, 'permutations': 1000, "random_state": 0}
  3992. options_p.update(alternative="greater")
  3993. res_g_ab = stats.ttest_ind(a, b, **options_p)
  3994. res_g_ba = stats.ttest_ind(b, a, **options_p)
  3995. options_p.update(alternative="less")
  3996. res_l_ab = stats.ttest_ind(a, b, **options_p)
  3997. res_l_ba = stats.ttest_ind(b, a, **options_p)
  3998. # Alternative doesn't affect the statistic
  3999. assert_equal(res_g_ab.statistic, res_l_ab.statistic)
  4000. # Reversing order of inputs negates statistic
  4001. assert_equal(res_g_ab.statistic, -res_g_ba.statistic)
  4002. assert_equal(res_l_ab.statistic, -res_l_ba.statistic)
  4003. # For random permutations, the chance of ties between the observed
  4004. # test statistic and the population is small, so:
  4005. assert_equal(res_g_ab.pvalue + res_l_ab.pvalue,
  4006. 1 + 1/(options_p['permutations'] + 1))
  4007. assert_equal(res_g_ba.pvalue + res_l_ba.pvalue,
  4008. 1 + 1/(options_p['permutations'] + 1))
  4009. @pytest.mark.slow()
  4010. def test_ttest_ind_randperm_alternative2(self):
  4011. np.random.seed(0)
  4012. N = 50
  4013. a = np.random.rand(N, 4)
  4014. b = np.random.rand(N, 4)
  4015. options_p = {'permutations': 20000, "random_state": 0}
  4016. options_p.update(alternative="greater")
  4017. res_g_ab = stats.ttest_ind(a, b, **options_p)
  4018. options_p.update(alternative="less")
  4019. res_l_ab = stats.ttest_ind(a, b, **options_p)
  4020. options_p.update(alternative="two-sided")
  4021. res_2_ab = stats.ttest_ind(a, b, **options_p)
  4022. # For random permutations, the chance of ties between the observed
  4023. # test statistic and the population is small, so:
  4024. assert_equal(res_g_ab.pvalue + res_l_ab.pvalue,
  4025. 1 + 1/(options_p['permutations'] + 1))
  4026. # For for large sample sizes, the distribution should be approximately
  4027. # symmetric, so these identities should be approximately satisfied
  4028. mask = res_g_ab.pvalue <= 0.5
  4029. assert_allclose(2 * res_g_ab.pvalue[mask],
  4030. res_2_ab.pvalue[mask], atol=2e-2)
  4031. assert_allclose(2 * (1-res_g_ab.pvalue[~mask]),
  4032. res_2_ab.pvalue[~mask], atol=2e-2)
  4033. assert_allclose(2 * res_l_ab.pvalue[~mask],
  4034. res_2_ab.pvalue[~mask], atol=2e-2)
  4035. assert_allclose(2 * (1-res_l_ab.pvalue[mask]),
  4036. res_2_ab.pvalue[mask], atol=2e-2)
  4037. def test_ttest_ind_permutation_nanpolicy(self):
  4038. np.random.seed(0)
  4039. N = 50
  4040. a = np.random.rand(N, 5)
  4041. b = np.random.rand(N, 5)
  4042. a[5, 1] = np.nan
  4043. b[8, 2] = np.nan
  4044. a[9, 3] = np.nan
  4045. b[9, 3] = np.nan
  4046. options_p = {'permutations': 1000, "random_state": 0}
  4047. # Raise
  4048. options_p.update(nan_policy="raise")
  4049. with assert_raises(ValueError, match="The input contains nan values"):
  4050. res = stats.ttest_ind(a, b, **options_p)
  4051. # Propagate
  4052. with suppress_warnings() as sup:
  4053. sup.record(RuntimeWarning, "invalid value*")
  4054. options_p.update(nan_policy="propagate")
  4055. res = stats.ttest_ind(a, b, **options_p)
  4056. mask = np.isnan(a).any(axis=0) | np.isnan(b).any(axis=0)
  4057. res2 = stats.ttest_ind(a[:, ~mask], b[:, ~mask], **options_p)
  4058. assert_equal(res.pvalue[mask], np.nan)
  4059. assert_equal(res.statistic[mask], np.nan)
  4060. assert_allclose(res.pvalue[~mask], res2.pvalue)
  4061. assert_allclose(res.statistic[~mask], res2.statistic)
  4062. # Propagate 1d
  4063. res = stats.ttest_ind(a.ravel(), b.ravel(), **options_p)
  4064. assert np.isnan(res.pvalue) # assert makes sure it's a scalar
  4065. assert np.isnan(res.statistic)
  4066. def test_ttest_ind_permutation_check_inputs(self):
  4067. with assert_raises(ValueError, match="Permutations must be"):
  4068. stats.ttest_ind(self.a2, self.b2, permutations=-3)
  4069. with assert_raises(ValueError, match="Permutations must be"):
  4070. stats.ttest_ind(self.a2, self.b2, permutations=1.5)
  4071. with assert_raises(ValueError, match="'hello' cannot be used"):
  4072. stats.ttest_ind(self.a, self.b, permutations=1,
  4073. random_state='hello')
  4074. def test_ttest_ind_permutation_check_p_values(self):
  4075. # p-values should never be exactly zero
  4076. N = 10
  4077. a = np.random.rand(N, 20)
  4078. b = np.random.rand(N, 20)
  4079. p_values = stats.ttest_ind(a, b, permutations=1).pvalue
  4080. print(0.0 not in p_values)
  4081. assert 0.0 not in p_values
  4082. class Test_ttest_ind_common:
  4083. # for tests that are performed on variations of the t-test such as
  4084. # permutations and trimming
  4085. @pytest.mark.slow()
  4086. @pytest.mark.parametrize("kwds", [{'permutations': 200, 'random_state': 0},
  4087. {'trim': .2}, {}],
  4088. ids=["permutations", "trim", "basic"])
  4089. @pytest.mark.parametrize('equal_var', [True, False],
  4090. ids=['equal_var', 'unequal_var'])
  4091. def test_ttest_many_dims(self, kwds, equal_var):
  4092. # Test that test works on many-dimensional arrays
  4093. np.random.seed(0)
  4094. a = np.random.rand(5, 4, 4, 7, 1, 6)
  4095. b = np.random.rand(4, 1, 8, 2, 6)
  4096. res = stats.ttest_ind(a, b, axis=-3, **kwds)
  4097. # compare fully-vectorized t-test against t-test on smaller slice
  4098. i, j, k = 2, 3, 1
  4099. a2 = a[i, :, j, :, 0, :]
  4100. b2 = b[:, 0, :, k, :]
  4101. res2 = stats.ttest_ind(a2, b2, axis=-2, **kwds)
  4102. assert_equal(res.statistic[i, :, j, k, :],
  4103. res2.statistic)
  4104. assert_equal(res.pvalue[i, :, j, k, :],
  4105. res2.pvalue)
  4106. # compare against t-test on one axis-slice at a time
  4107. # manually broadcast with tile; move axis to end to simplify
  4108. x = np.moveaxis(np.tile(a, (1, 1, 1, 1, 2, 1)), -3, -1)
  4109. y = np.moveaxis(np.tile(b, (5, 1, 4, 1, 1, 1)), -3, -1)
  4110. shape = x.shape[:-1]
  4111. statistics = np.zeros(shape)
  4112. pvalues = np.zeros(shape)
  4113. for indices in product(*(range(i) for i in shape)):
  4114. xi = x[indices] # use tuple to index single axis slice
  4115. yi = y[indices]
  4116. res3 = stats.ttest_ind(xi, yi, axis=-1, **kwds)
  4117. statistics[indices] = res3.statistic
  4118. pvalues[indices] = res3.pvalue
  4119. assert_allclose(statistics, res.statistic)
  4120. assert_allclose(pvalues, res.pvalue)
  4121. @pytest.mark.parametrize("kwds", [{'permutations': 200, 'random_state': 0},
  4122. {'trim': .2}, {}],
  4123. ids=["trim", "permutations", "basic"])
  4124. @pytest.mark.parametrize("axis", [-1, 0])
  4125. def test_nans_on_axis(self, kwds, axis):
  4126. # confirm that with `nan_policy='propagate'`, NaN results are returned
  4127. # on the correct location
  4128. a = np.random.randint(10, size=(5, 3, 10)).astype('float')
  4129. b = np.random.randint(10, size=(5, 3, 10)).astype('float')
  4130. # set some indices in `a` and `b` to be `np.nan`.
  4131. a[0][2][3] = np.nan
  4132. b[2][0][6] = np.nan
  4133. # arbitrarily use `np.sum` as a baseline for which indices should be
  4134. # NaNs
  4135. expected = np.isnan(np.sum(a + b, axis=axis))
  4136. # multidimensional inputs to `t.sf(np.abs(t), df)` with NaNs on some
  4137. # indices throws an warning. See issue gh-13844
  4138. with suppress_warnings() as sup, np.errstate(invalid="ignore"):
  4139. sup.filter(RuntimeWarning,
  4140. "invalid value encountered in less_equal")
  4141. sup.filter(RuntimeWarning, "Precision loss occurred")
  4142. res = stats.ttest_ind(a, b, axis=axis, **kwds)
  4143. p_nans = np.isnan(res.pvalue)
  4144. assert_array_equal(p_nans, expected)
  4145. statistic_nans = np.isnan(res.statistic)
  4146. assert_array_equal(statistic_nans, expected)
  4147. class Test_ttest_trim:
  4148. params = [
  4149. [[1, 2, 3], [1.1, 2.9, 4.2], 0.53619490753126731, -0.6864951273557258,
  4150. .2],
  4151. [[56, 128.6, 12, 123.8, 64.34, 78, 763.3], [1.1, 2.9, 4.2],
  4152. 0.00998909252078421, 4.591598691181999, .2],
  4153. [[56, 128.6, 12, 123.8, 64.34, 78, 763.3], [1.1, 2.9, 4.2],
  4154. 0.10512380092302633, 2.832256715395378, .32],
  4155. [[2.7, 2.7, 1.1, 3.0, 1.9, 3.0, 3.8, 3.8, 0.3, 1.9, 1.9],
  4156. [6.5, 5.4, 8.1, 3.5, 0.5, 3.8, 6.8, 4.9, 9.5, 6.2, 4.1],
  4157. 0.002878909511344, -4.2461168970325, .2],
  4158. [[-0.84504783, 0.13366078, 3.53601757, -0.62908581, 0.54119466,
  4159. -1.16511574, -0.08836614, 1.18495416, 2.48028757, -1.58925028,
  4160. -1.6706357, 0.3090472, -2.12258305, 0.3697304, -1.0415207,
  4161. -0.57783497, -0.90997008, 1.09850192, 0.41270579, -1.4927376],
  4162. [1.2725522, 1.1657899, 2.7509041, 1.2389013, -0.9490494, -1.0752459,
  4163. 1.1038576, 2.9912821, 3.5349111, 0.4171922, 1.0168959, -0.7625041,
  4164. -0.4300008, 3.0431921, 1.6035947, 0.5285634, -0.7649405, 1.5575896,
  4165. 1.3670797, 1.1726023], 0.005293305834235, -3.0983317739483, .2]]
  4166. @pytest.mark.parametrize("a,b,pr,tr,trim", params)
  4167. def test_ttest_compare_r(self, a, b, pr, tr, trim):
  4168. '''
  4169. Using PairedData's yuen.t.test method. Something to note is that there
  4170. are at least 3 R packages that come with a trimmed t-test method, and
  4171. comparisons were made between them. It was found that PairedData's
  4172. method's results match this method, SAS, and one of the other R
  4173. methods. A notable discrepancy was the DescTools implementation of the
  4174. function, which only sometimes agreed with SAS, WRS2, PairedData and
  4175. this implementation. For this reason, most comparisons in R are made
  4176. against PairedData's method.
  4177. Rather than providing the input and output for all evaluations, here is
  4178. a representative example:
  4179. > library(PairedData)
  4180. > a <- c(1, 2, 3)
  4181. > b <- c(1.1, 2.9, 4.2)
  4182. > options(digits=16)
  4183. > yuen.t.test(a, b, tr=.2)
  4184. Two-sample Yuen test, trim=0.2
  4185. data: x and y
  4186. t = -0.68649512735573, df = 3.4104431643464, p-value = 0.5361949075313
  4187. alternative hypothesis: true difference in trimmed means is not equal
  4188. to 0
  4189. 95 percent confidence interval:
  4190. -3.912777195645217 2.446110528978550
  4191. sample estimates:
  4192. trimmed mean of x trimmed mean of y
  4193. 2.000000000000000 2.73333333333333
  4194. '''
  4195. statistic, pvalue = stats.ttest_ind(a, b, trim=trim, equal_var=False)
  4196. assert_allclose(statistic, tr, atol=1e-15)
  4197. assert_allclose(pvalue, pr, atol=1e-15)
  4198. def test_compare_SAS(self):
  4199. # Source of the data used in this test:
  4200. # https://support.sas.com/resources/papers/proceedings14/1660-2014.pdf
  4201. a = [12, 14, 18, 25, 32, 44, 12, 14, 18, 25, 32, 44]
  4202. b = [17, 22, 14, 12, 30, 29, 19, 17, 22, 14, 12, 30, 29, 19]
  4203. # In this paper, a trimming percentage of 5% is used. However,
  4204. # in their implementation, the number of values trimmed is rounded to
  4205. # the nearest whole number. However, consistent with
  4206. # `scipy.stats.trimmed_mean`, this test truncates to the lower
  4207. # whole number. In this example, the paper notes that 1 value is
  4208. # trimmed off of each side. 9% replicates this amount of trimming.
  4209. statistic, pvalue = stats.ttest_ind(a, b, trim=.09, equal_var=False)
  4210. assert_allclose(pvalue, 0.514522, atol=1e-6)
  4211. assert_allclose(statistic, 0.669169, atol=1e-6)
  4212. def test_equal_var(self):
  4213. '''
  4214. The PairedData library only supports unequal variances. To compare
  4215. samples with equal variances, the multicon library is used.
  4216. > library(multicon)
  4217. > a <- c(2.7, 2.7, 1.1, 3.0, 1.9, 3.0, 3.8, 3.8, 0.3, 1.9, 1.9)
  4218. > b <- c(6.5, 5.4, 8.1, 3.5, 0.5, 3.8, 6.8, 4.9, 9.5, 6.2, 4.1)
  4219. > dv = c(a,b)
  4220. > iv = c(rep('a', length(a)), rep('b', length(b)))
  4221. > yuenContrast(dv~ iv, EQVAR = TRUE)
  4222. $Ms
  4223. N M wgt
  4224. a 11 2.442857142857143 1
  4225. b 11 5.385714285714286 -1
  4226. $test
  4227. stat df crit p
  4228. results -4.246116897032513 12 2.178812829667228 0.00113508833897713
  4229. '''
  4230. a = [2.7, 2.7, 1.1, 3.0, 1.9, 3.0, 3.8, 3.8, 0.3, 1.9, 1.9]
  4231. b = [6.5, 5.4, 8.1, 3.5, 0.5, 3.8, 6.8, 4.9, 9.5, 6.2, 4.1]
  4232. # `equal_var=True` is default
  4233. statistic, pvalue = stats.ttest_ind(a, b, trim=.2)
  4234. assert_allclose(pvalue, 0.00113508833897713, atol=1e-10)
  4235. assert_allclose(statistic, -4.246116897032513, atol=1e-10)
  4236. @pytest.mark.parametrize('alt,pr,tr',
  4237. (('greater', 0.9985605452443, -4.2461168970325),
  4238. ('less', 0.001439454755672, -4.2461168970325),),
  4239. )
  4240. def test_alternatives(self, alt, pr, tr):
  4241. '''
  4242. > library(PairedData)
  4243. > a <- c(2.7,2.7,1.1,3.0,1.9,3.0,3.8,3.8,0.3,1.9,1.9)
  4244. > b <- c(6.5,5.4,8.1,3.5,0.5,3.8,6.8,4.9,9.5,6.2,4.1)
  4245. > options(digits=16)
  4246. > yuen.t.test(a, b, alternative = 'greater')
  4247. '''
  4248. a = [2.7, 2.7, 1.1, 3.0, 1.9, 3.0, 3.8, 3.8, 0.3, 1.9, 1.9]
  4249. b = [6.5, 5.4, 8.1, 3.5, 0.5, 3.8, 6.8, 4.9, 9.5, 6.2, 4.1]
  4250. statistic, pvalue = stats.ttest_ind(a, b, trim=.2, equal_var=False,
  4251. alternative=alt)
  4252. assert_allclose(pvalue, pr, atol=1e-10)
  4253. assert_allclose(statistic, tr, atol=1e-10)
  4254. def test_errors_unsupported(self):
  4255. # confirm that attempting to trim with NaNs or permutations raises an
  4256. # error
  4257. match = "Permutations are currently not supported with trimming."
  4258. with assert_raises(ValueError, match=match):
  4259. stats.ttest_ind([1, 2], [2, 3], trim=.2, permutations=2)
  4260. match = ("not supported by permutation tests or trimmed tests.")
  4261. with assert_raises(ValueError, match=match):
  4262. stats.ttest_ind([1, 2], [2, np.nan, 3], trim=.2, nan_policy='omit')
  4263. @pytest.mark.parametrize("trim", [-.2, .5, 1])
  4264. def test_trim_bounds_error(self, trim):
  4265. match = "Trimming percentage should be 0 <= `trim` < .5."
  4266. with assert_raises(ValueError, match=match):
  4267. stats.ttest_ind([1, 2], [2, 1], trim=trim)
  4268. def test__broadcast_concatenate():
  4269. # test that _broadcast_concatenate properly broadcasts arrays along all
  4270. # axes except `axis`, then concatenates along axis
  4271. np.random.seed(0)
  4272. a = np.random.rand(5, 4, 4, 3, 1, 6)
  4273. b = np.random.rand(4, 1, 8, 2, 6)
  4274. c = _broadcast_concatenate((a, b), axis=-3)
  4275. # broadcast manually as an independent check
  4276. a = np.tile(a, (1, 1, 1, 1, 2, 1))
  4277. b = np.tile(b[None, ...], (5, 1, 4, 1, 1, 1))
  4278. for index in product(*(range(i) for i in c.shape)):
  4279. i, j, k, l, m, n = index
  4280. if l < a.shape[-3]:
  4281. assert a[i, j, k, l, m, n] == c[i, j, k, l, m, n]
  4282. else:
  4283. assert b[i, j, k, l - a.shape[-3], m, n] == c[i, j, k, l, m, n]
  4284. def test_ttest_ind_with_uneq_var():
  4285. # check vs. R
  4286. a = (1, 2, 3)
  4287. b = (1.1, 2.9, 4.2)
  4288. pr = 0.53619490753126731
  4289. tr = -0.68649512735572582
  4290. t, p = stats.ttest_ind(a, b, equal_var=False)
  4291. assert_array_almost_equal([t,p], [tr, pr])
  4292. # test from desc stats API
  4293. assert_array_almost_equal(stats.ttest_ind_from_stats(*_desc_stats(a, b),
  4294. equal_var=False),
  4295. [t, p])
  4296. a = (1, 2, 3, 4)
  4297. pr = 0.84354139131608286
  4298. tr = -0.2108663315950719
  4299. t, p = stats.ttest_ind(a, b, equal_var=False)
  4300. assert_array_almost_equal([t,p], [tr, pr])
  4301. assert_array_almost_equal(stats.ttest_ind_from_stats(*_desc_stats(a, b),
  4302. equal_var=False),
  4303. [t, p])
  4304. # regression test
  4305. tr = 1.0912746897927283
  4306. tr_uneq_n = 0.66745638708050492
  4307. pr = 0.27647831993021388
  4308. pr_uneq_n = 0.50873585065616544
  4309. tpr = ([tr,-tr],[pr,pr])
  4310. rvs3 = np.linspace(1,100, 25)
  4311. rvs2 = np.linspace(1,100,100)
  4312. rvs1 = np.linspace(5,105,100)
  4313. rvs1_2D = np.array([rvs1, rvs2])
  4314. rvs2_2D = np.array([rvs2, rvs1])
  4315. t,p = stats.ttest_ind(rvs1, rvs2, axis=0, equal_var=False)
  4316. assert_array_almost_equal([t,p],(tr,pr))
  4317. assert_array_almost_equal(stats.ttest_ind_from_stats(*_desc_stats(rvs1,
  4318. rvs2),
  4319. equal_var=False),
  4320. (t, p))
  4321. t,p = stats.ttest_ind(rvs1, rvs3, axis=0, equal_var=False)
  4322. assert_array_almost_equal([t,p], (tr_uneq_n, pr_uneq_n))
  4323. assert_array_almost_equal(stats.ttest_ind_from_stats(*_desc_stats(rvs1,
  4324. rvs3),
  4325. equal_var=False),
  4326. (t, p))
  4327. t,p = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0, equal_var=False)
  4328. assert_array_almost_equal([t,p],tpr)
  4329. args = _desc_stats(rvs1_2D.T, rvs2_2D.T)
  4330. assert_array_almost_equal(stats.ttest_ind_from_stats(*args,
  4331. equal_var=False),
  4332. (t, p))
  4333. t,p = stats.ttest_ind(rvs1_2D, rvs2_2D, axis=1, equal_var=False)
  4334. assert_array_almost_equal([t,p],tpr)
  4335. args = _desc_stats(rvs1_2D, rvs2_2D, axis=1)
  4336. assert_array_almost_equal(stats.ttest_ind_from_stats(*args,
  4337. equal_var=False),
  4338. (t, p))
  4339. # test for namedtuple attribute results
  4340. attributes = ('statistic', 'pvalue')
  4341. res = stats.ttest_ind(rvs1, rvs2, axis=0, equal_var=False)
  4342. check_named_results(res, attributes)
  4343. # test on 3 dimensions
  4344. rvs1_3D = np.dstack([rvs1_2D,rvs1_2D,rvs1_2D])
  4345. rvs2_3D = np.dstack([rvs2_2D,rvs2_2D,rvs2_2D])
  4346. t,p = stats.ttest_ind(rvs1_3D, rvs2_3D, axis=1, equal_var=False)
  4347. assert_almost_equal(np.abs(t), np.abs(tr))
  4348. assert_array_almost_equal(np.abs(p), pr)
  4349. assert_equal(t.shape, (2, 3))
  4350. args = _desc_stats(rvs1_3D, rvs2_3D, axis=1)
  4351. t, p = stats.ttest_ind_from_stats(*args, equal_var=False)
  4352. assert_almost_equal(np.abs(t), np.abs(tr))
  4353. assert_array_almost_equal(np.abs(p), pr)
  4354. assert_equal(t.shape, (2, 3))
  4355. t, p = stats.ttest_ind(np.moveaxis(rvs1_3D, 2, 0),
  4356. np.moveaxis(rvs2_3D, 2, 0),
  4357. axis=2, equal_var=False)
  4358. assert_array_almost_equal(np.abs(t), np.abs(tr))
  4359. assert_array_almost_equal(np.abs(p), pr)
  4360. assert_equal(t.shape, (3, 2))
  4361. args = _desc_stats(np.moveaxis(rvs1_3D, 2, 0),
  4362. np.moveaxis(rvs2_3D, 2, 0), axis=2)
  4363. t, p = stats.ttest_ind_from_stats(*args, equal_var=False)
  4364. assert_array_almost_equal(np.abs(t), np.abs(tr))
  4365. assert_array_almost_equal(np.abs(p), pr)
  4366. assert_equal(t.shape, (3, 2))
  4367. # test zero division problem
  4368. with pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  4369. t, p = stats.ttest_ind([0, 0, 0], [1, 1, 1], equal_var=False)
  4370. assert_equal((np.abs(t), p), (np.inf, 0))
  4371. with np.errstate(all='ignore'):
  4372. assert_equal(stats.ttest_ind([0, 0, 0], [0, 0, 0], equal_var=False),
  4373. (np.nan, np.nan))
  4374. # check that nan in input array result in nan output
  4375. anan = np.array([[1, np.nan], [-1, 1]])
  4376. assert_equal(stats.ttest_ind(anan, np.zeros((2, 2)), equal_var=False),
  4377. ([0, np.nan], [1, np.nan]))
  4378. def test_ttest_ind_nan_2nd_arg():
  4379. # regression test for gh-6134: nans in the second arg were not handled
  4380. x = [np.nan, 2.0, 3.0, 4.0]
  4381. y = [1.0, 2.0, 1.0, 2.0]
  4382. r1 = stats.ttest_ind(x, y, nan_policy='omit')
  4383. r2 = stats.ttest_ind(y, x, nan_policy='omit')
  4384. assert_allclose(r2.statistic, -r1.statistic, atol=1e-15)
  4385. assert_allclose(r2.pvalue, r1.pvalue, atol=1e-15)
  4386. # NB: arguments are not paired when NaNs are dropped
  4387. r3 = stats.ttest_ind(y, x[1:])
  4388. assert_allclose(r2, r3, atol=1e-15)
  4389. # .. and this is consistent with R. R code:
  4390. # x = c(NA, 2.0, 3.0, 4.0)
  4391. # y = c(1.0, 2.0, 1.0, 2.0)
  4392. # t.test(x, y, var.equal=TRUE)
  4393. assert_allclose(r2, (-2.5354627641855498, 0.052181400457057901),
  4394. atol=1e-15)
  4395. def test_ttest_ind_empty_1d_returns_nan():
  4396. # Two empty inputs should return a Ttest_indResult containing nan
  4397. # for both values.
  4398. result = stats.ttest_ind([], [])
  4399. assert isinstance(result, stats._stats_py.Ttest_indResult)
  4400. assert_equal(result, (np.nan, np.nan))
  4401. @pytest.mark.parametrize('b, expected_shape',
  4402. [(np.empty((1, 5, 0)), (3, 5)),
  4403. (np.empty((1, 0, 0)), (3, 0))])
  4404. def test_ttest_ind_axis_size_zero(b, expected_shape):
  4405. # In this test, the length of the axis dimension is zero.
  4406. # The results should be arrays containing nan with shape
  4407. # given by the broadcast nonaxis dimensions.
  4408. a = np.empty((3, 1, 0))
  4409. result = stats.ttest_ind(a, b, axis=-1)
  4410. assert isinstance(result, stats._stats_py.Ttest_indResult)
  4411. expected_value = np.full(expected_shape, fill_value=np.nan)
  4412. assert_equal(result.statistic, expected_value)
  4413. assert_equal(result.pvalue, expected_value)
  4414. def test_ttest_ind_nonaxis_size_zero():
  4415. # In this test, the length of the axis dimension is nonzero,
  4416. # but one of the nonaxis dimensions has length 0. Check that
  4417. # we still get the correctly broadcast shape, which is (5, 0)
  4418. # in this case.
  4419. a = np.empty((1, 8, 0))
  4420. b = np.empty((5, 8, 1))
  4421. result = stats.ttest_ind(a, b, axis=1)
  4422. assert isinstance(result, stats._stats_py.Ttest_indResult)
  4423. assert_equal(result.statistic.shape, (5, 0))
  4424. assert_equal(result.pvalue.shape, (5, 0))
  4425. def test_ttest_ind_nonaxis_size_zero_different_lengths():
  4426. # In this test, the length of the axis dimension is nonzero,
  4427. # and that size is different in the two inputs,
  4428. # and one of the nonaxis dimensions has length 0. Check that
  4429. # we still get the correctly broadcast shape, which is (5, 0)
  4430. # in this case.
  4431. a = np.empty((1, 7, 0))
  4432. b = np.empty((5, 8, 1))
  4433. result = stats.ttest_ind(a, b, axis=1)
  4434. assert isinstance(result, stats._stats_py.Ttest_indResult)
  4435. assert_equal(result.statistic.shape, (5, 0))
  4436. assert_equal(result.pvalue.shape, (5, 0))
  4437. def test_gh5686():
  4438. mean1, mean2 = np.array([1, 2]), np.array([3, 4])
  4439. std1, std2 = np.array([5, 3]), np.array([4, 5])
  4440. nobs1, nobs2 = np.array([130, 140]), np.array([100, 150])
  4441. # This will raise a TypeError unless gh-5686 is fixed.
  4442. stats.ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2)
  4443. def test_ttest_ind_from_stats_inputs_zero():
  4444. # Regression test for gh-6409.
  4445. result = stats.ttest_ind_from_stats(0, 0, 6, 0, 0, 6, equal_var=False)
  4446. assert_equal(result, [np.nan, np.nan])
  4447. def test_ttest_1samp_new():
  4448. n1, n2, n3 = (10,15,20)
  4449. rvn1 = stats.norm.rvs(loc=5,scale=10,size=(n1,n2,n3))
  4450. # check multidimensional array and correct axis handling
  4451. # deterministic rvn1 and rvn2 would be better as in test_ttest_rel
  4452. t1,p1 = stats.ttest_1samp(rvn1[:,:,:], np.ones((n2,n3)),axis=0)
  4453. t2,p2 = stats.ttest_1samp(rvn1[:,:,:], 1,axis=0)
  4454. t3,p3 = stats.ttest_1samp(rvn1[:,0,0], 1)
  4455. assert_array_almost_equal(t1,t2, decimal=14)
  4456. assert_almost_equal(t1[0,0],t3, decimal=14)
  4457. assert_equal(t1.shape, (n2,n3))
  4458. t1,p1 = stats.ttest_1samp(rvn1[:,:,:], np.ones((n1, 1, n3)),axis=1) # noqa
  4459. t2,p2 = stats.ttest_1samp(rvn1[:,:,:], 1,axis=1)
  4460. t3,p3 = stats.ttest_1samp(rvn1[0,:,0], 1)
  4461. assert_array_almost_equal(t1,t2, decimal=14)
  4462. assert_almost_equal(t1[0,0],t3, decimal=14)
  4463. assert_equal(t1.shape, (n1,n3))
  4464. t1,p1 = stats.ttest_1samp(rvn1[:,:,:], np.ones((n1,n2,1)),axis=2) # noqa
  4465. t2,p2 = stats.ttest_1samp(rvn1[:,:,:], 1,axis=2)
  4466. t3,p3 = stats.ttest_1samp(rvn1[0,0,:], 1)
  4467. assert_array_almost_equal(t1,t2, decimal=14)
  4468. assert_almost_equal(t1[0,0],t3, decimal=14)
  4469. assert_equal(t1.shape, (n1,n2))
  4470. # test zero division problem
  4471. t, p = stats.ttest_1samp([0, 0, 0], 1)
  4472. assert_equal((np.abs(t), p), (np.inf, 0))
  4473. # test alternative parameter
  4474. # Convert from two-sided p-values to one sided using T result data.
  4475. def convert(t, p, alt):
  4476. if (t < 0 and alt == "less") or (t > 0 and alt == "greater"):
  4477. return p / 2
  4478. return 1 - (p / 2)
  4479. converter = np.vectorize(convert)
  4480. tr, pr = stats.ttest_1samp(rvn1[:, :, :], 1)
  4481. t, p = stats.ttest_1samp(rvn1[:, :, :], 1, alternative="greater")
  4482. pc = converter(tr, pr, "greater")
  4483. assert_allclose(p, pc)
  4484. assert_allclose(t, tr)
  4485. t, p = stats.ttest_1samp(rvn1[:, :, :], 1, alternative="less")
  4486. pc = converter(tr, pr, "less")
  4487. assert_allclose(p, pc)
  4488. assert_allclose(t, tr)
  4489. with np.errstate(all='ignore'):
  4490. assert_equal(stats.ttest_1samp([0, 0, 0], 0), (np.nan, np.nan))
  4491. # check that nan in input array result in nan output
  4492. anan = np.array([[1, np.nan],[-1, 1]])
  4493. assert_equal(stats.ttest_1samp(anan, 0), ([0, np.nan], [1, np.nan]))
  4494. rvn1[0:2, 1:3, 4:8] = np.nan
  4495. tr, pr = stats.ttest_1samp(rvn1[:, :, :], 1, nan_policy='omit')
  4496. t, p = stats.ttest_1samp(rvn1[:, :, :], 1, nan_policy='omit',
  4497. alternative="greater")
  4498. pc = converter(tr, pr, "greater")
  4499. assert_allclose(p, pc)
  4500. assert_allclose(t, tr)
  4501. t, p = stats.ttest_1samp(rvn1[:, :, :], 1, nan_policy='omit',
  4502. alternative="less")
  4503. pc = converter(tr, pr, "less")
  4504. assert_allclose(p, pc)
  4505. assert_allclose(t, tr)
  4506. def test_ttest_1samp_popmean_array():
  4507. # when popmean.shape[axis] != 1, raise an error
  4508. # if the user wants to test multiple null hypotheses simultaneously,
  4509. # use standard broadcasting rules
  4510. rng = np.random.default_rng(2913300596553337193)
  4511. x = rng.random(size=(1, 15, 20))
  4512. message = r"`popmean.shape\[axis\]` must equal 1."
  4513. popmean = rng.random(size=(5, 2, 20))
  4514. with pytest.raises(ValueError, match=message):
  4515. stats.ttest_1samp(x, popmean=popmean, axis=-2)
  4516. popmean = rng.random(size=(5, 1, 20))
  4517. res = stats.ttest_1samp(x, popmean=popmean, axis=-2)
  4518. assert res.statistic.shape == (5, 20)
  4519. ci = np.expand_dims(res.confidence_interval(), axis=-2)
  4520. res = stats.ttest_1samp(x, popmean=ci, axis=-2)
  4521. assert_allclose(res.pvalue, 0.05)
  4522. class TestDescribe:
  4523. def test_describe_scalar(self):
  4524. with suppress_warnings() as sup, np.errstate(invalid="ignore"), \
  4525. pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  4526. sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
  4527. n, mm, m, v, sk, kurt = stats.describe(4.)
  4528. assert_equal(n, 1)
  4529. assert_equal(mm, (4.0, 4.0))
  4530. assert_equal(m, 4.0)
  4531. assert np.isnan(v)
  4532. assert np.isnan(sk)
  4533. assert np.isnan(kurt)
  4534. def test_describe_numbers(self):
  4535. x = np.vstack((np.ones((3,4)), np.full((2, 4), 2)))
  4536. nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.]))
  4537. mc = np.array([1.4, 1.4, 1.4, 1.4])
  4538. vc = np.array([0.3, 0.3, 0.3, 0.3])
  4539. skc = [0.40824829046386357] * 4
  4540. kurtc = [-1.833333333333333] * 4
  4541. n, mm, m, v, sk, kurt = stats.describe(x)
  4542. assert_equal(n, nc)
  4543. assert_equal(mm, mmc)
  4544. assert_equal(m, mc)
  4545. assert_equal(v, vc)
  4546. assert_array_almost_equal(sk, skc, decimal=13)
  4547. assert_array_almost_equal(kurt, kurtc, decimal=13)
  4548. n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
  4549. assert_equal(n, nc)
  4550. assert_equal(mm, mmc)
  4551. assert_equal(m, mc)
  4552. assert_equal(v, vc)
  4553. assert_array_almost_equal(sk, skc, decimal=13)
  4554. assert_array_almost_equal(kurt, kurtc, decimal=13)
  4555. x = np.arange(10.)
  4556. x[9] = np.nan
  4557. nc, mmc = (9, (0.0, 8.0))
  4558. mc = 4.0
  4559. vc = 7.5
  4560. skc = 0.0
  4561. kurtc = -1.2300000000000002
  4562. n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='omit')
  4563. assert_equal(n, nc)
  4564. assert_equal(mm, mmc)
  4565. assert_equal(m, mc)
  4566. assert_equal(v, vc)
  4567. assert_array_almost_equal(sk, skc)
  4568. assert_array_almost_equal(kurt, kurtc, decimal=13)
  4569. assert_raises(ValueError, stats.describe, x, nan_policy='raise')
  4570. assert_raises(ValueError, stats.describe, x, nan_policy='foobar')
  4571. def test_describe_result_attributes(self):
  4572. actual = stats.describe(np.arange(5))
  4573. attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
  4574. 'kurtosis')
  4575. check_named_results(actual, attributes)
  4576. def test_describe_ddof(self):
  4577. x = np.vstack((np.ones((3, 4)), np.full((2, 4), 2)))
  4578. nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.]))
  4579. mc = np.array([1.4, 1.4, 1.4, 1.4])
  4580. vc = np.array([0.24, 0.24, 0.24, 0.24])
  4581. skc = [0.40824829046386357] * 4
  4582. kurtc = [-1.833333333333333] * 4
  4583. n, mm, m, v, sk, kurt = stats.describe(x, ddof=0)
  4584. assert_equal(n, nc)
  4585. assert_allclose(mm, mmc, rtol=1e-15)
  4586. assert_allclose(m, mc, rtol=1e-15)
  4587. assert_allclose(v, vc, rtol=1e-15)
  4588. assert_array_almost_equal(sk, skc, decimal=13)
  4589. assert_array_almost_equal(kurt, kurtc, decimal=13)
  4590. def test_describe_axis_none(self):
  4591. x = np.vstack((np.ones((3, 4)), np.full((2, 4), 2)))
  4592. # expected values
  4593. e_nobs, e_minmax = (20, (1.0, 2.0))
  4594. e_mean = 1.3999999999999999
  4595. e_var = 0.25263157894736848
  4596. e_skew = 0.4082482904638634
  4597. e_kurt = -1.8333333333333333
  4598. # actual values
  4599. a = stats.describe(x, axis=None)
  4600. assert_equal(a.nobs, e_nobs)
  4601. assert_almost_equal(a.minmax, e_minmax)
  4602. assert_almost_equal(a.mean, e_mean)
  4603. assert_almost_equal(a.variance, e_var)
  4604. assert_array_almost_equal(a.skewness, e_skew, decimal=13)
  4605. assert_array_almost_equal(a.kurtosis, e_kurt, decimal=13)
  4606. def test_describe_empty(self):
  4607. assert_raises(ValueError, stats.describe, [])
  4608. def test_normalitytests():
  4609. with pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  4610. assert_raises(ValueError, stats.skewtest, 4.)
  4611. assert_raises(ValueError, stats.kurtosistest, 4.)
  4612. assert_raises(ValueError, stats.normaltest, 4.)
  4613. # numbers verified with R: dagoTest in package fBasics
  4614. st_normal, st_skew, st_kurt = (3.92371918, 1.98078826, -0.01403734)
  4615. pv_normal, pv_skew, pv_kurt = (0.14059673, 0.04761502, 0.98880019)
  4616. pv_skew_less, pv_kurt_less = 1 - pv_skew / 2, pv_kurt / 2
  4617. pv_skew_greater, pv_kurt_greater = pv_skew / 2, 1 - pv_kurt / 2
  4618. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  4619. attributes = ('statistic', 'pvalue')
  4620. assert_array_almost_equal(stats.normaltest(x), (st_normal, pv_normal))
  4621. check_named_results(stats.normaltest(x), attributes)
  4622. assert_array_almost_equal(stats.skewtest(x), (st_skew, pv_skew))
  4623. assert_array_almost_equal(stats.skewtest(x, alternative='less'),
  4624. (st_skew, pv_skew_less))
  4625. assert_array_almost_equal(stats.skewtest(x, alternative='greater'),
  4626. (st_skew, pv_skew_greater))
  4627. check_named_results(stats.skewtest(x), attributes)
  4628. assert_array_almost_equal(stats.kurtosistest(x), (st_kurt, pv_kurt))
  4629. assert_array_almost_equal(stats.kurtosistest(x, alternative='less'),
  4630. (st_kurt, pv_kurt_less))
  4631. assert_array_almost_equal(stats.kurtosistest(x, alternative='greater'),
  4632. (st_kurt, pv_kurt_greater))
  4633. check_named_results(stats.kurtosistest(x), attributes)
  4634. # some more intuitive tests for kurtosistest and skewtest.
  4635. # see gh-13549.
  4636. # skew parameter is 1 > 0
  4637. a1 = stats.skewnorm.rvs(a=1, size=10000, random_state=123)
  4638. pval = stats.skewtest(a1, alternative='greater').pvalue
  4639. assert_almost_equal(pval, 0.0, decimal=5)
  4640. # excess kurtosis of laplace is 3 > 0
  4641. a2 = stats.laplace.rvs(size=10000, random_state=123)
  4642. pval = stats.kurtosistest(a2, alternative='greater').pvalue
  4643. assert_almost_equal(pval, 0.0)
  4644. # Test axis=None (equal to axis=0 for 1-D input)
  4645. assert_array_almost_equal(stats.normaltest(x, axis=None),
  4646. (st_normal, pv_normal))
  4647. assert_array_almost_equal(stats.skewtest(x, axis=None),
  4648. (st_skew, pv_skew))
  4649. assert_array_almost_equal(stats.kurtosistest(x, axis=None),
  4650. (st_kurt, pv_kurt))
  4651. x = np.arange(10.)
  4652. x[9] = np.nan
  4653. with np.errstate(invalid="ignore"):
  4654. assert_array_equal(stats.skewtest(x), (np.nan, np.nan))
  4655. expected = (1.0184643553962129, 0.30845733195153502)
  4656. assert_array_almost_equal(stats.skewtest(x, nan_policy='omit'), expected)
  4657. # test alternative with nan_policy='omit'
  4658. a1[10:100] = np.nan
  4659. z, p = stats.skewtest(a1, nan_policy='omit')
  4660. zl, pl = stats.skewtest(a1, nan_policy='omit', alternative='less')
  4661. zg, pg = stats.skewtest(a1, nan_policy='omit', alternative='greater')
  4662. assert_allclose(zl, z, atol=1e-15)
  4663. assert_allclose(zg, z, atol=1e-15)
  4664. assert_allclose(pl, 1 - p/2, atol=1e-15)
  4665. assert_allclose(pg, p/2, atol=1e-15)
  4666. with np.errstate(all='ignore'):
  4667. assert_raises(ValueError, stats.skewtest, x, nan_policy='raise')
  4668. assert_raises(ValueError, stats.skewtest, x, nan_policy='foobar')
  4669. assert_raises(ValueError, stats.skewtest, list(range(8)),
  4670. alternative='foobar')
  4671. x = np.arange(30.)
  4672. x[29] = np.nan
  4673. with np.errstate(all='ignore'):
  4674. assert_array_equal(stats.kurtosistest(x), (np.nan, np.nan))
  4675. expected = (-2.2683547379505273, 0.023307594135872967)
  4676. assert_array_almost_equal(stats.kurtosistest(x, nan_policy='omit'),
  4677. expected)
  4678. # test alternative with nan_policy='omit'
  4679. a2[10:20] = np.nan
  4680. z, p = stats.kurtosistest(a2[:100], nan_policy='omit')
  4681. zl, pl = stats.kurtosistest(a2[:100], nan_policy='omit',
  4682. alternative='less')
  4683. zg, pg = stats.kurtosistest(a2[:100], nan_policy='omit',
  4684. alternative='greater')
  4685. assert_allclose(zl, z, atol=1e-15)
  4686. assert_allclose(zg, z, atol=1e-15)
  4687. assert_allclose(pl, 1 - p/2, atol=1e-15)
  4688. assert_allclose(pg, p/2, atol=1e-15)
  4689. assert_raises(ValueError, stats.kurtosistest, x, nan_policy='raise')
  4690. assert_raises(ValueError, stats.kurtosistest, x, nan_policy='foobar')
  4691. assert_raises(ValueError, stats.kurtosistest, list(range(20)),
  4692. alternative='foobar')
  4693. with np.errstate(all='ignore'):
  4694. assert_array_equal(stats.normaltest(x), (np.nan, np.nan))
  4695. expected = (6.2260409514287449, 0.04446644248650191)
  4696. assert_array_almost_equal(stats.normaltest(x, nan_policy='omit'), expected)
  4697. assert_raises(ValueError, stats.normaltest, x, nan_policy='raise')
  4698. assert_raises(ValueError, stats.normaltest, x, nan_policy='foobar')
  4699. # regression test for issue gh-9033: x cleary non-normal but power of
  4700. # negtative denom needs to be handled correctly to reject normality
  4701. counts = [128, 0, 58, 7, 0, 41, 16, 0, 0, 167]
  4702. x = np.hstack([np.full(c, i) for i, c in enumerate(counts)])
  4703. assert_equal(stats.kurtosistest(x)[1] < 0.01, True)
  4704. class TestRankSums:
  4705. np.random.seed(0)
  4706. x, y = np.random.rand(2, 10)
  4707. @pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided'])
  4708. def test_ranksums_result_attributes(self, alternative):
  4709. # ranksums pval = mannwhitneyu pval w/out continuity or tie correction
  4710. res1 = stats.ranksums(self.x, self.y,
  4711. alternative=alternative).pvalue
  4712. res2 = stats.mannwhitneyu(self.x, self.y, use_continuity=False,
  4713. alternative=alternative).pvalue
  4714. assert_allclose(res1, res2)
  4715. def test_ranksums_named_results(self):
  4716. res = stats.ranksums(self.x, self.y)
  4717. check_named_results(res, ('statistic', 'pvalue'))
  4718. def test_input_validation(self):
  4719. with assert_raises(ValueError, match="alternative must be 'less'"):
  4720. stats.ranksums(self.x, self.y, alternative='foobar')
  4721. class TestJarqueBera:
  4722. def test_jarque_bera_stats(self):
  4723. np.random.seed(987654321)
  4724. x = np.random.normal(0, 1, 100000)
  4725. y = np.random.chisquare(10000, 100000)
  4726. z = np.random.rayleigh(1, 100000)
  4727. assert_equal(stats.jarque_bera(x)[0], stats.jarque_bera(x).statistic)
  4728. assert_equal(stats.jarque_bera(x)[1], stats.jarque_bera(x).pvalue)
  4729. assert_equal(stats.jarque_bera(y)[0], stats.jarque_bera(y).statistic)
  4730. assert_equal(stats.jarque_bera(y)[1], stats.jarque_bera(y).pvalue)
  4731. assert_equal(stats.jarque_bera(z)[0], stats.jarque_bera(z).statistic)
  4732. assert_equal(stats.jarque_bera(z)[1], stats.jarque_bera(z).pvalue)
  4733. assert_(stats.jarque_bera(x)[1] > stats.jarque_bera(y)[1])
  4734. assert_(stats.jarque_bera(x).pvalue > stats.jarque_bera(y).pvalue)
  4735. assert_(stats.jarque_bera(x)[1] > stats.jarque_bera(z)[1])
  4736. assert_(stats.jarque_bera(x).pvalue > stats.jarque_bera(z).pvalue)
  4737. assert_(stats.jarque_bera(y)[1] > stats.jarque_bera(z)[1])
  4738. assert_(stats.jarque_bera(y).pvalue > stats.jarque_bera(z).pvalue)
  4739. def test_jarque_bera_array_like(self):
  4740. np.random.seed(987654321)
  4741. x = np.random.normal(0, 1, 100000)
  4742. jb_test1 = JB1, p1 = stats.jarque_bera(list(x))
  4743. jb_test2 = JB2, p2 = stats.jarque_bera(tuple(x))
  4744. jb_test3 = JB3, p3 = stats.jarque_bera(x.reshape(2, 50000))
  4745. assert_(JB1 == JB2 == JB3 == jb_test1.statistic == jb_test2.statistic == jb_test3.statistic)
  4746. assert_(p1 == p2 == p3 == jb_test1.pvalue == jb_test2.pvalue == jb_test3.pvalue)
  4747. def test_jarque_bera_size(self):
  4748. assert_raises(ValueError, stats.jarque_bera, [])
  4749. def test_axis(self):
  4750. rng = np.random.default_rng(abs(hash('JarqueBera')))
  4751. x = rng.random(size=(2, 45))
  4752. assert_equal(stats.jarque_bera(x, axis=None),
  4753. stats.jarque_bera(x.ravel()))
  4754. res = stats.jarque_bera(x, axis=1)
  4755. s0, p0 = stats.jarque_bera(x[0, :])
  4756. s1, p1 = stats.jarque_bera(x[1, :])
  4757. assert_allclose(res.statistic, [s0, s1])
  4758. assert_allclose(res.pvalue, [p0, p1])
  4759. resT = stats.jarque_bera(x.T, axis=0)
  4760. assert_allclose(res, resT)
  4761. def test_skewtest_too_few_samples():
  4762. # Regression test for ticket #1492.
  4763. # skewtest requires at least 8 samples; 7 should raise a ValueError.
  4764. x = np.arange(7.0)
  4765. assert_raises(ValueError, stats.skewtest, x)
  4766. def test_kurtosistest_too_few_samples():
  4767. # Regression test for ticket #1425.
  4768. # kurtosistest requires at least 5 samples; 4 should raise a ValueError.
  4769. x = np.arange(4.0)
  4770. assert_raises(ValueError, stats.kurtosistest, x)
  4771. class TestMannWhitneyU:
  4772. X = [19.8958398126694, 19.5452691647182, 19.0577309166425, 21.716543054589,
  4773. 20.3269502208702, 20.0009273294025, 19.3440043632957, 20.4216806548105,
  4774. 19.0649894736528, 18.7808043120398, 19.3680942943298, 19.4848044069953,
  4775. 20.7514611265663, 19.0894948874598, 19.4975522356628, 18.9971170734274,
  4776. 20.3239606288208, 20.6921298083835, 19.0724259532507, 18.9825187935021,
  4777. 19.5144462609601, 19.8256857844223, 20.5174677102032, 21.1122407995892,
  4778. 17.9490854922535, 18.2847521114727, 20.1072217648826, 18.6439891962179,
  4779. 20.4970638083542, 19.5567594734914]
  4780. Y = [19.2790668029091, 16.993808441865, 18.5416338448258, 17.2634018833575,
  4781. 19.1577183624616, 18.5119655377495, 18.6068455037221, 18.8358343362655,
  4782. 19.0366413269742, 18.1135025515417, 19.2201873866958, 17.8344909022841,
  4783. 18.2894380745856, 18.6661374133922, 19.9688601693252, 16.0672254617636,
  4784. 19.00596360572, 19.201561539032, 19.0487501090183, 19.0847908674356]
  4785. significant = 14
  4786. def test_mannwhitneyu_one_sided(self):
  4787. u1, p1 = stats.mannwhitneyu(self.X, self.Y, alternative='less')
  4788. u2, p2 = stats.mannwhitneyu(self.Y, self.X, alternative='greater')
  4789. u3, p3 = stats.mannwhitneyu(self.X, self.Y, alternative='greater')
  4790. u4, p4 = stats.mannwhitneyu(self.Y, self.X, alternative='less')
  4791. assert_equal(p1, p2)
  4792. assert_equal(p3, p4)
  4793. assert_(p1 != p3)
  4794. assert_equal(u1, 498)
  4795. assert_equal(u2, 102)
  4796. assert_equal(u3, 498)
  4797. assert_equal(u4, 102)
  4798. assert_approx_equal(p1, 0.999957683256589, significant=self.significant)
  4799. assert_approx_equal(p3, 4.5941632666275e-05, significant=self.significant)
  4800. def test_mannwhitneyu_two_sided(self):
  4801. u1, p1 = stats.mannwhitneyu(self.X, self.Y, alternative='two-sided')
  4802. u2, p2 = stats.mannwhitneyu(self.Y, self.X, alternative='two-sided')
  4803. assert_equal(p1, p2)
  4804. assert_equal(u1, 498)
  4805. assert_equal(u2, 102)
  4806. assert_approx_equal(p1, 9.188326533255e-05,
  4807. significant=self.significant)
  4808. def test_mannwhitneyu_no_correct_one_sided(self):
  4809. u1, p1 = stats.mannwhitneyu(self.X, self.Y, False,
  4810. alternative='less')
  4811. u2, p2 = stats.mannwhitneyu(self.Y, self.X, False,
  4812. alternative='greater')
  4813. u3, p3 = stats.mannwhitneyu(self.X, self.Y, False,
  4814. alternative='greater')
  4815. u4, p4 = stats.mannwhitneyu(self.Y, self.X, False,
  4816. alternative='less')
  4817. assert_equal(p1, p2)
  4818. assert_equal(p3, p4)
  4819. assert_(p1 != p3)
  4820. assert_equal(u1, 498)
  4821. assert_equal(u2, 102)
  4822. assert_equal(u3, 498)
  4823. assert_equal(u4, 102)
  4824. assert_approx_equal(p1, 0.999955905990004, significant=self.significant)
  4825. assert_approx_equal(p3, 4.40940099958089e-05, significant=self.significant)
  4826. def test_mannwhitneyu_no_correct_two_sided(self):
  4827. u1, p1 = stats.mannwhitneyu(self.X, self.Y, False,
  4828. alternative='two-sided')
  4829. u2, p2 = stats.mannwhitneyu(self.Y, self.X, False,
  4830. alternative='two-sided')
  4831. assert_equal(p1, p2)
  4832. assert_equal(u1, 498)
  4833. assert_equal(u2, 102)
  4834. assert_approx_equal(p1, 8.81880199916178e-05,
  4835. significant=self.significant)
  4836. def test_mannwhitneyu_ones(self):
  4837. # test for gh-1428
  4838. x = np.array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  4839. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
  4840. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  4841. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  4842. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
  4843. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  4844. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
  4845. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  4846. 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2.,
  4847. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  4848. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1.,
  4849. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  4850. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  4851. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
  4852. 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  4853. 1., 1., 1., 1., 1., 1., 1., 1., 3., 1., 1., 1., 1., 1.,
  4854. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  4855. 1., 1., 1., 1., 1., 1.])
  4856. y = np.array([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1., 1., 1., 1.,
  4857. 2., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2., 1., 1., 3.,
  4858. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1.,
  4859. 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
  4860. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2.,
  4861. 2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2.,
  4862. 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  4863. 1., 2., 1., 1., 1., 1., 1., 2., 2., 2., 1., 1., 1., 1.,
  4864. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  4865. 2., 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1.,
  4866. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 2., 1., 1.,
  4867. 1., 1., 1., 1.])
  4868. # checked against R wilcox.test
  4869. assert_allclose(stats.mannwhitneyu(x, y, alternative='less'),
  4870. (16980.5, 2.8214327656317373e-005))
  4871. # p-value from R, e.g. wilcox.test(x, y, alternative="g")
  4872. assert_allclose(stats.mannwhitneyu(x, y, alternative='greater'),
  4873. (16980.5, 0.9999719954296))
  4874. assert_allclose(stats.mannwhitneyu(x, y, alternative='two-sided'),
  4875. (16980.5, 5.642865531266e-05))
  4876. def test_mannwhitneyu_result_attributes(self):
  4877. # test for namedtuple attribute results
  4878. attributes = ('statistic', 'pvalue')
  4879. res = stats.mannwhitneyu(self.X, self.Y, alternative="less")
  4880. check_named_results(res, attributes)
  4881. def test_pointbiserial():
  4882. # same as mstats test except for the nan
  4883. # Test data: https://web.archive.org/web/20060504220742/https://support.sas.com/ctx/samples/index.jsp?sid=490&tab=output
  4884. x = [1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,
  4885. 0,0,0,0,1]
  4886. y = [14.8,13.8,12.4,10.1,7.1,6.1,5.8,4.6,4.3,3.5,3.3,3.2,3.0,
  4887. 2.8,2.8,2.5,2.4,2.3,2.1,1.7,1.7,1.5,1.3,1.3,1.2,1.2,1.1,
  4888. 0.8,0.7,0.6,0.5,0.2,0.2,0.1]
  4889. assert_almost_equal(stats.pointbiserialr(x, y)[0], 0.36149, 5)
  4890. # test for namedtuple attribute results
  4891. attributes = ('correlation', 'pvalue')
  4892. res = stats.pointbiserialr(x, y)
  4893. check_named_results(res, attributes)
  4894. assert_equal(res.correlation, res.statistic)
  4895. def test_obrientransform():
  4896. # A couple tests calculated by hand.
  4897. x1 = np.array([0, 2, 4])
  4898. t1 = stats.obrientransform(x1)
  4899. expected = [7, -2, 7]
  4900. assert_allclose(t1[0], expected)
  4901. x2 = np.array([0, 3, 6, 9])
  4902. t2 = stats.obrientransform(x2)
  4903. expected = np.array([30, 0, 0, 30])
  4904. assert_allclose(t2[0], expected)
  4905. # Test two arguments.
  4906. a, b = stats.obrientransform(x1, x2)
  4907. assert_equal(a, t1[0])
  4908. assert_equal(b, t2[0])
  4909. # Test three arguments.
  4910. a, b, c = stats.obrientransform(x1, x2, x1)
  4911. assert_equal(a, t1[0])
  4912. assert_equal(b, t2[0])
  4913. assert_equal(c, t1[0])
  4914. # This is a regression test to check np.var replacement.
  4915. # The author of this test didn't separately verify the numbers.
  4916. x1 = np.arange(5)
  4917. result = np.array(
  4918. [[5.41666667, 1.04166667, -0.41666667, 1.04166667, 5.41666667],
  4919. [21.66666667, 4.16666667, -1.66666667, 4.16666667, 21.66666667]])
  4920. assert_array_almost_equal(stats.obrientransform(x1, 2*x1), result, decimal=8)
  4921. # Example from "O'Brien Test for Homogeneity of Variance"
  4922. # by Herve Abdi.
  4923. values = range(5, 11)
  4924. reps = np.array([5, 11, 9, 3, 2, 2])
  4925. data = np.repeat(values, reps)
  4926. transformed_values = np.array([3.1828, 0.5591, 0.0344,
  4927. 1.6086, 5.2817, 11.0538])
  4928. expected = np.repeat(transformed_values, reps)
  4929. result = stats.obrientransform(data)
  4930. assert_array_almost_equal(result[0], expected, decimal=4)
  4931. def check_equal_gmean(array_like, desired, axis=None, dtype=None, rtol=1e-7,
  4932. weights=None):
  4933. # Note this doesn't test when axis is not specified
  4934. x = stats.gmean(array_like, axis=axis, dtype=dtype, weights=weights)
  4935. assert_allclose(x, desired, rtol=rtol)
  4936. assert_equal(x.dtype, dtype)
  4937. def check_equal_hmean(array_like, desired, axis=None, dtype=None, rtol=1e-7,
  4938. weights=None):
  4939. x = stats.hmean(array_like, axis=axis, dtype=dtype, weights=weights)
  4940. assert_allclose(x, desired, rtol=rtol)
  4941. assert_equal(x.dtype, dtype)
  4942. def check_equal_pmean(array_like, exp, desired, axis=None, dtype=None,
  4943. rtol=1e-7, weights=None):
  4944. x = stats.pmean(array_like, exp, axis=axis, dtype=dtype, weights=weights)
  4945. assert_allclose(x, desired, rtol=rtol)
  4946. assert_equal(x.dtype, dtype)
  4947. class TestHarMean:
  4948. def test_0(self):
  4949. a = [1, 0, 2]
  4950. desired = 0
  4951. check_equal_hmean(a, desired)
  4952. def test_1d_list(self):
  4953. # Test a 1d list
  4954. a = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
  4955. desired = 34.1417152147
  4956. check_equal_hmean(a, desired)
  4957. a = [1, 2, 3, 4]
  4958. desired = 4. / (1. / 1 + 1. / 2 + 1. / 3 + 1. / 4)
  4959. check_equal_hmean(a, desired)
  4960. def test_1d_array(self):
  4961. # Test a 1d array
  4962. a = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
  4963. desired = 34.1417152147
  4964. check_equal_hmean(a, desired)
  4965. def test_1d_array_with_zero(self):
  4966. a = np.array([1, 0])
  4967. desired = 0.0
  4968. assert_equal(stats.hmean(a), desired)
  4969. def test_1d_array_with_negative_value(self):
  4970. a = np.array([1, 0, -1])
  4971. assert_raises(ValueError, stats.hmean, a)
  4972. # Note the next tests use axis=None as default, not axis=0
  4973. def test_2d_list(self):
  4974. # Test a 2d list
  4975. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  4976. desired = 38.6696271841
  4977. check_equal_hmean(a, desired)
  4978. def test_2d_array(self):
  4979. # Test a 2d array
  4980. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  4981. desired = 38.6696271841
  4982. check_equal_hmean(np.array(a), desired)
  4983. def test_2d_axis0(self):
  4984. # Test a 2d list with axis=0
  4985. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  4986. desired = np.array([22.88135593, 39.13043478, 52.90076336, 65.45454545])
  4987. check_equal_hmean(a, desired, axis=0)
  4988. def test_2d_axis0_with_zero(self):
  4989. a = [[10, 0, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  4990. desired = np.array([22.88135593, 0.0, 52.90076336, 65.45454545])
  4991. assert_allclose(stats.hmean(a, axis=0), desired)
  4992. def test_2d_axis1(self):
  4993. # Test a 2d list with axis=1
  4994. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  4995. desired = np.array([19.2, 63.03939962, 103.80078637])
  4996. check_equal_hmean(a, desired, axis=1)
  4997. def test_2d_axis1_with_zero(self):
  4998. a = [[10, 0, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  4999. desired = np.array([0.0, 63.03939962, 103.80078637])
  5000. assert_allclose(stats.hmean(a, axis=1), desired)
  5001. def test_weights_1d_list(self):
  5002. # Desired result from:
  5003. # https://www.hackmath.net/en/math-problem/35871
  5004. a = [2, 10, 6]
  5005. weights = [10, 5, 3]
  5006. desired = 3
  5007. check_equal_hmean(a, desired, weights=weights, rtol=1e-5)
  5008. def test_weights_2d_array_axis0(self):
  5009. # Desired result from:
  5010. # https://www.hackmath.net/en/math-problem/35871
  5011. a = np.array([[2, 5], [10, 5], [6, 5]])
  5012. weights = np.array([[10, 1], [5, 1], [3, 1]])
  5013. desired = np.array([3, 5])
  5014. check_equal_hmean(a, desired, axis=0, weights=weights, rtol=1e-5)
  5015. def test_weights_2d_array_axis1(self):
  5016. # Desired result from:
  5017. # https://www.hackmath.net/en/math-problem/35871
  5018. a = np.array([[2, 10, 6], [7, 7, 7]])
  5019. weights = np.array([[10, 5, 3], [1, 1, 1]])
  5020. desired = np.array([3, 7])
  5021. check_equal_hmean(a, desired, axis=1, weights=weights, rtol=1e-5)
  5022. def test_weights_masked_1d_array(self):
  5023. # Desired result from:
  5024. # https://www.hackmath.net/en/math-problem/35871
  5025. a = np.array([2, 10, 6, 42])
  5026. weights = np.ma.array([10, 5, 3, 42], mask=[0, 0, 0, 1])
  5027. desired = 3
  5028. check_equal_hmean(a, desired, weights=weights, rtol=1e-5)
  5029. class TestGeoMean:
  5030. def test_0(self):
  5031. a = [1, 0, 2]
  5032. desired = 0
  5033. check_equal_gmean(a, desired)
  5034. def test_1d_list(self):
  5035. # Test a 1d list
  5036. a = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
  5037. desired = 45.2872868812
  5038. check_equal_gmean(a, desired)
  5039. a = [1, 2, 3, 4]
  5040. desired = power(1 * 2 * 3 * 4, 1. / 4.)
  5041. check_equal_gmean(a, desired, rtol=1e-14)
  5042. def test_1d_array(self):
  5043. # Test a 1d array
  5044. a = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
  5045. desired = 45.2872868812
  5046. check_equal_gmean(a, desired)
  5047. a = array([1, 2, 3, 4], float32)
  5048. desired = power(1 * 2 * 3 * 4, 1. / 4.)
  5049. check_equal_gmean(a, desired, dtype=float32)
  5050. # Note the next tests use axis=None as default, not axis=0
  5051. def test_2d_list(self):
  5052. # Test a 2d list
  5053. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  5054. desired = 52.8885199
  5055. check_equal_gmean(a, desired)
  5056. def test_2d_array(self):
  5057. # Test a 2d array
  5058. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  5059. desired = 52.8885199
  5060. check_equal_gmean(array(a), desired)
  5061. def test_2d_axis0(self):
  5062. # Test a 2d list with axis=0
  5063. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  5064. desired = np.array([35.56893304, 49.32424149, 61.3579244, 72.68482371])
  5065. check_equal_gmean(a, desired, axis=0)
  5066. a = array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]])
  5067. desired = array([1, 2, 3, 4])
  5068. check_equal_gmean(a, desired, axis=0, rtol=1e-14)
  5069. def test_2d_axis1(self):
  5070. # Test a 2d list with axis=1
  5071. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  5072. desired = np.array([22.13363839, 64.02171746, 104.40086817])
  5073. check_equal_gmean(a, desired, axis=1)
  5074. a = array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]])
  5075. v = power(1 * 2 * 3 * 4, 1. / 4.)
  5076. desired = array([v, v, v])
  5077. check_equal_gmean(a, desired, axis=1, rtol=1e-14)
  5078. def test_large_values(self):
  5079. a = array([1e100, 1e200, 1e300])
  5080. desired = 1e200
  5081. check_equal_gmean(a, desired, rtol=1e-13)
  5082. def test_1d_list0(self):
  5083. # Test a 1d list with zero element
  5084. a = [10, 20, 30, 40, 50, 60, 70, 80, 90, 0]
  5085. desired = 0.0 # due to exp(-inf)=0
  5086. with np.errstate(all='ignore'):
  5087. check_equal_gmean(a, desired)
  5088. def test_1d_array0(self):
  5089. # Test a 1d array with zero element
  5090. a = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 0])
  5091. desired = 0.0 # due to exp(-inf)=0
  5092. with np.errstate(divide='ignore'):
  5093. check_equal_gmean(a, desired)
  5094. def test_1d_list_neg(self):
  5095. # Test a 1d list with negative element
  5096. a = [10, 20, 30, 40, 50, 60, 70, 80, 90, -1]
  5097. desired = np.nan # due to log(-1) = nan
  5098. with np.errstate(invalid='ignore'):
  5099. check_equal_gmean(a, desired)
  5100. def test_weights_1d_list(self):
  5101. # Desired result from:
  5102. # https://www.dummies.com/education/math/business-statistics/how-to-find-the-weighted-geometric-mean-of-a-data-set/
  5103. a = [1, 2, 3, 4, 5]
  5104. weights = [2, 5, 6, 4, 3]
  5105. desired = 2.77748
  5106. check_equal_gmean(a, desired, weights=weights, rtol=1e-5)
  5107. def test_weights_1d_array(self):
  5108. # Desired result from:
  5109. # https://www.dummies.com/education/math/business-statistics/how-to-find-the-weighted-geometric-mean-of-a-data-set/
  5110. a = np.array([1, 2, 3, 4, 5])
  5111. weights = np.array([2, 5, 6, 4, 3])
  5112. desired = 2.77748
  5113. check_equal_gmean(a, desired, weights=weights, rtol=1e-5)
  5114. def test_weights_masked_1d_array(self):
  5115. # Desired result from:
  5116. # https://www.dummies.com/education/math/business-statistics/how-to-find-the-weighted-geometric-mean-of-a-data-set/
  5117. a = np.array([1, 2, 3, 4, 5, 6])
  5118. weights = np.ma.array([2, 5, 6, 4, 3, 5], mask=[0, 0, 0, 0, 0, 1])
  5119. desired = 2.77748
  5120. check_equal_gmean(a, desired, weights=weights, rtol=1e-5)
  5121. class TestPowMean:
  5122. def pmean_reference(a, p):
  5123. return (np.sum(a**p) / a.size)**(1/p)
  5124. def wpmean_reference(a, p, weights):
  5125. return (np.sum(weights * a**p) / np.sum(weights))**(1/p)
  5126. def test_bad_exponent(self):
  5127. with pytest.raises(ValueError, match='Power mean only defined for'):
  5128. stats.pmean([1, 2, 3], [0])
  5129. with pytest.raises(ValueError, match='Power mean only defined for'):
  5130. stats.pmean([1, 2, 3], np.array([0]))
  5131. def test_1d_list(self):
  5132. a, p = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], 3.5
  5133. desired = TestPowMean.pmean_reference(np.array(a), p)
  5134. check_equal_pmean(a, p, desired)
  5135. a, p = [1, 2, 3, 4], 2
  5136. desired = np.sqrt((1**2 + 2**2 + 3**2 + 4**2) / 4)
  5137. check_equal_pmean(a, p, desired)
  5138. def test_1d_array(self):
  5139. a, p = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]), -2.5
  5140. desired = TestPowMean.pmean_reference(a, p)
  5141. check_equal_pmean(a, p, desired)
  5142. def test_1d_array_with_zero(self):
  5143. a, p = np.array([1, 0]), -1
  5144. desired = 0.0
  5145. assert_equal(stats.pmean(a, p), desired)
  5146. def test_1d_array_with_negative_value(self):
  5147. a, p = np.array([1, 0, -1]), 1.23
  5148. with pytest.raises(ValueError, match='Power mean only defined if all'):
  5149. stats.pmean(a, p)
  5150. @pytest.mark.parametrize(
  5151. ("a", "p"),
  5152. [([[10, 20], [50, 60], [90, 100]], -0.5),
  5153. (np.array([[10, 20], [50, 60], [90, 100]]), 0.5)]
  5154. )
  5155. def test_2d_axisnone(self, a, p):
  5156. desired = TestPowMean.pmean_reference(np.array(a), p)
  5157. check_equal_pmean(a, p, desired)
  5158. @pytest.mark.parametrize(
  5159. ("a", "p"),
  5160. [([[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]], -0.5),
  5161. ([[10, 0, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]], 0.5)]
  5162. )
  5163. def test_2d_list_axis0(self, a, p):
  5164. desired = [
  5165. TestPowMean.pmean_reference(
  5166. np.array([a[i][j] for i in range(len(a))]), p
  5167. )
  5168. for j in range(len(a[0]))
  5169. ]
  5170. check_equal_pmean(a, p, desired, axis=0)
  5171. @pytest.mark.parametrize(
  5172. ("a", "p"),
  5173. [([[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]], -0.5),
  5174. ([[10, 0, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]], 0.5)]
  5175. )
  5176. def test_2d_list_axis1(self, a, p):
  5177. desired = [TestPowMean.pmean_reference(np.array(a_), p) for a_ in a]
  5178. check_equal_pmean(a, p, desired, axis=1)
  5179. def test_weights_1d_list(self):
  5180. a, p = [2, 10, 6], -1.23456789
  5181. weights = [10, 5, 3]
  5182. desired = TestPowMean.wpmean_reference(np.array(a), p, weights)
  5183. check_equal_pmean(a, p, desired, weights=weights, rtol=1e-5)
  5184. def test_weights_masked_1d_array(self):
  5185. a, p = np.array([2, 10, 6, 42]), 1
  5186. weights = np.ma.array([10, 5, 3, 42], mask=[0, 0, 0, 1])
  5187. desired = np.average(a, weights=weights)
  5188. check_equal_pmean(a, p, desired, weights=weights, rtol=1e-5)
  5189. @pytest.mark.parametrize(
  5190. ("axis", "fun_name", "p"),
  5191. [(None, "wpmean_reference", 9.87654321),
  5192. (0, "gmean", 0),
  5193. (1, "hmean", -1)]
  5194. )
  5195. def test_weights_2d_array(self, axis, fun_name, p):
  5196. if fun_name == 'wpmean_reference':
  5197. def fun(a, axis, weights):
  5198. return TestPowMean.wpmean_reference(a, p, weights)
  5199. else:
  5200. fun = getattr(stats, fun_name)
  5201. a = np.array([[2, 5], [10, 5], [6, 5]])
  5202. weights = np.array([[10, 1], [5, 1], [3, 1]])
  5203. desired = fun(a, axis=axis, weights=weights)
  5204. check_equal_pmean(a, p, desired, axis=axis, weights=weights, rtol=1e-5)
  5205. class TestGeometricStandardDeviation:
  5206. # must add 1 as `gstd` is only defined for positive values
  5207. array_1d = np.arange(2 * 3 * 4) + 1
  5208. gstd_array_1d = 2.294407613602
  5209. array_3d = array_1d.reshape(2, 3, 4)
  5210. def test_1d_array(self):
  5211. gstd_actual = stats.gstd(self.array_1d)
  5212. assert_allclose(gstd_actual, self.gstd_array_1d)
  5213. def test_1d_numeric_array_like_input(self):
  5214. gstd_actual = stats.gstd(tuple(self.array_1d))
  5215. assert_allclose(gstd_actual, self.gstd_array_1d)
  5216. def test_raises_value_error_non_array_like_input(self):
  5217. with pytest.raises(ValueError, match='Invalid array input'):
  5218. stats.gstd('This should fail as it can not be cast to an array.')
  5219. def test_raises_value_error_zero_entry(self):
  5220. with pytest.raises(ValueError, match='Non positive value'):
  5221. stats.gstd(np.append(self.array_1d, [0]))
  5222. def test_raises_value_error_negative_entry(self):
  5223. with pytest.raises(ValueError, match='Non positive value'):
  5224. stats.gstd(np.append(self.array_1d, [-1]))
  5225. def test_raises_value_error_inf_entry(self):
  5226. with pytest.raises(ValueError, match='Infinite value'):
  5227. stats.gstd(np.append(self.array_1d, [np.inf]))
  5228. def test_propagates_nan_values(self):
  5229. a = array([[1, 1, 1, 16], [np.nan, 1, 2, 3]])
  5230. gstd_actual = stats.gstd(a, axis=1)
  5231. assert_allclose(gstd_actual, np.array([4, np.nan]))
  5232. def test_ddof_equal_to_number_of_observations(self):
  5233. with pytest.raises(ValueError, match='Degrees of freedom <= 0'):
  5234. stats.gstd(self.array_1d, ddof=self.array_1d.size)
  5235. def test_3d_array(self):
  5236. gstd_actual = stats.gstd(self.array_3d, axis=None)
  5237. assert_allclose(gstd_actual, self.gstd_array_1d)
  5238. def test_3d_array_axis_type_tuple(self):
  5239. gstd_actual = stats.gstd(self.array_3d, axis=(1,2))
  5240. assert_allclose(gstd_actual, [2.12939215, 1.22120169])
  5241. def test_3d_array_axis_0(self):
  5242. gstd_actual = stats.gstd(self.array_3d, axis=0)
  5243. gstd_desired = np.array([
  5244. [6.1330555493918, 3.958900210120, 3.1206598248344, 2.6651441426902],
  5245. [2.3758135028411, 2.174581428192, 2.0260062829505, 1.9115518327308],
  5246. [1.8205343606803, 1.746342404566, 1.6846557065742, 1.6325269194382]
  5247. ])
  5248. assert_allclose(gstd_actual, gstd_desired)
  5249. def test_3d_array_axis_1(self):
  5250. gstd_actual = stats.gstd(self.array_3d, axis=1)
  5251. gstd_desired = np.array([
  5252. [3.118993630946, 2.275985934063, 1.933995977619, 1.742896469724],
  5253. [1.271693593916, 1.254158641801, 1.238774141609, 1.225164057869]
  5254. ])
  5255. assert_allclose(gstd_actual, gstd_desired)
  5256. def test_3d_array_axis_2(self):
  5257. gstd_actual = stats.gstd(self.array_3d, axis=2)
  5258. gstd_desired = np.array([
  5259. [1.8242475707664, 1.2243686572447, 1.1318311657788],
  5260. [1.0934830582351, 1.0724479791887, 1.0591498540749]
  5261. ])
  5262. assert_allclose(gstd_actual, gstd_desired)
  5263. def test_masked_3d_array(self):
  5264. ma = np.ma.masked_where(self.array_3d > 16, self.array_3d)
  5265. gstd_actual = stats.gstd(ma, axis=2)
  5266. gstd_desired = stats.gstd(self.array_3d, axis=2)
  5267. mask = [[0, 0, 0], [0, 1, 1]]
  5268. assert_allclose(gstd_actual, gstd_desired)
  5269. assert_equal(gstd_actual.mask, mask)
  5270. @pytest.mark.parametrize('alternative', ['two-sided', 'greater', 'less'])
  5271. def test_binom_test_deprecation(alternative):
  5272. deprecation_msg = ("'binom_test' is deprecated in favour of"
  5273. " 'binomtest' from version 1.7.0 and will"
  5274. " be removed in Scipy 1.12.0.")
  5275. num = 10
  5276. rng = np.random.default_rng(156114182869662948677852568516310985853)
  5277. X = rng.integers(10, 100, (num,))
  5278. N = X + rng.integers(0, 100, (num,))
  5279. P = rng.uniform(0, 1, (num,))
  5280. for x, n, p in zip(X, N, P):
  5281. with pytest.warns(DeprecationWarning, match=deprecation_msg):
  5282. res = stats.binom_test(x, n, p, alternative=alternative)
  5283. assert res == stats.binomtest(x, n, p, alternative=alternative).pvalue
  5284. def test_binomtest():
  5285. # precision tests compared to R for ticket:986
  5286. pp = np.concatenate((np.linspace(0.1, 0.2, 5),
  5287. np.linspace(0.45, 0.65, 5),
  5288. np.linspace(0.85, 0.95, 5)))
  5289. n = 501
  5290. x = 450
  5291. results = [0.0, 0.0, 1.0159969301994141e-304,
  5292. 2.9752418572150531e-275, 7.7668382922535275e-250,
  5293. 2.3381250925167094e-099, 7.8284591587323951e-081,
  5294. 9.9155947819961383e-065, 2.8729390725176308e-050,
  5295. 1.7175066298388421e-037, 0.0021070691951093692,
  5296. 0.12044570587262322, 0.88154763174802508, 0.027120993063129286,
  5297. 2.6102587134694721e-006]
  5298. for p, res in zip(pp, results):
  5299. assert_approx_equal(stats.binomtest(x, n, p).pvalue, res,
  5300. significant=12, err_msg='fail forp=%f' % p)
  5301. assert_approx_equal(stats.binomtest(50, 100, 0.1).pvalue,
  5302. 5.8320387857343647e-024,
  5303. significant=12)
  5304. def test_binomtest2():
  5305. # test added for issue #2384
  5306. res2 = [
  5307. [1.0, 1.0],
  5308. [0.5, 1.0, 0.5],
  5309. [0.25, 1.00, 1.00, 0.25],
  5310. [0.125, 0.625, 1.000, 0.625, 0.125],
  5311. [0.0625, 0.3750, 1.0000, 1.0000, 0.3750, 0.0625],
  5312. [0.03125, 0.21875, 0.68750, 1.00000, 0.68750, 0.21875, 0.03125],
  5313. [0.015625, 0.125000, 0.453125, 1.000000, 1.000000, 0.453125, 0.125000,
  5314. 0.015625],
  5315. [0.0078125, 0.0703125, 0.2890625, 0.7265625, 1.0000000, 0.7265625,
  5316. 0.2890625, 0.0703125, 0.0078125],
  5317. [0.00390625, 0.03906250, 0.17968750, 0.50781250, 1.00000000,
  5318. 1.00000000, 0.50781250, 0.17968750, 0.03906250, 0.00390625],
  5319. [0.001953125, 0.021484375, 0.109375000, 0.343750000, 0.753906250,
  5320. 1.000000000, 0.753906250, 0.343750000, 0.109375000, 0.021484375,
  5321. 0.001953125]
  5322. ]
  5323. for k in range(1, 11):
  5324. res1 = [stats.binomtest(v, k, 0.5).pvalue for v in range(k + 1)]
  5325. assert_almost_equal(res1, res2[k-1], decimal=10)
  5326. def test_binomtest3():
  5327. # test added for issue #2384
  5328. # test when x == n*p and neighbors
  5329. res3 = [stats.binomtest(v, v*k, 1./k).pvalue
  5330. for v in range(1, 11) for k in range(2, 11)]
  5331. assert_equal(res3, np.ones(len(res3), int))
  5332. # > bt=c()
  5333. # > for(i in as.single(1:10)) {
  5334. # + for(k in as.single(2:10)) {
  5335. # + bt = c(bt, binom.test(i-1, k*i,(1/k))$p.value);
  5336. # + print(c(i+1, k*i,(1/k)))
  5337. # + }
  5338. # + }
  5339. binom_testm1 = np.array([
  5340. 0.5, 0.5555555555555556, 0.578125, 0.5904000000000003,
  5341. 0.5981224279835393, 0.603430543396034, 0.607304096221924,
  5342. 0.610255656871054, 0.612579511000001, 0.625, 0.670781893004115,
  5343. 0.68853759765625, 0.6980101120000006, 0.703906431368616,
  5344. 0.70793209416498, 0.7108561134173507, 0.713076544331419,
  5345. 0.714820192935702, 0.6875, 0.7268709038256367, 0.7418963909149174,
  5346. 0.74986110468096, 0.7548015520398076, 0.7581671424768577,
  5347. 0.760607984787832, 0.762459425024199, 0.7639120677676575, 0.7265625,
  5348. 0.761553963657302, 0.774800934828818, 0.7818005980538996,
  5349. 0.78613491480358, 0.789084353140195, 0.7912217659828884,
  5350. 0.79284214559524, 0.794112956558801, 0.75390625, 0.7856929451142176,
  5351. 0.7976688481430754, 0.8039848974727624, 0.807891868948366,
  5352. 0.8105487660137676, 0.812473307174702, 0.8139318233591120,
  5353. 0.815075399104785, 0.7744140625, 0.8037322594985427,
  5354. 0.814742863657656, 0.8205425178645808, 0.8241275984172285,
  5355. 0.8265645374416, 0.8283292196088257, 0.829666291102775,
  5356. 0.8307144686362666, 0.7905273437499996, 0.8178712053954738,
  5357. 0.828116983756619, 0.833508948940494, 0.8368403871552892,
  5358. 0.839104213210105, 0.840743186196171, 0.84198481438049,
  5359. 0.8429580531563676, 0.803619384765625, 0.829338573944648,
  5360. 0.8389591907548646, 0.84401876783902, 0.84714369697889,
  5361. 0.8492667010581667, 0.850803474598719, 0.851967542858308,
  5362. 0.8528799045949524, 0.8145294189453126, 0.838881732845347,
  5363. 0.847979024541911, 0.852760894015685, 0.8557134656773457,
  5364. 0.8577190131799202, 0.85917058278431, 0.860270010472127,
  5365. 0.861131648404582, 0.823802947998047, 0.846984756807511,
  5366. 0.855635653643743, 0.860180994825685, 0.86298688573253,
  5367. 0.864892525675245, 0.866271647085603, 0.867316125625004,
  5368. 0.8681346531755114
  5369. ])
  5370. # > bt=c()
  5371. # > for(i in as.single(1:10)) {
  5372. # + for(k in as.single(2:10)) {
  5373. # + bt = c(bt, binom.test(i+1, k*i,(1/k))$p.value);
  5374. # + print(c(i+1, k*i,(1/k)))
  5375. # + }
  5376. # + }
  5377. binom_testp1 = np.array([
  5378. 0.5, 0.259259259259259, 0.26171875, 0.26272, 0.2632244513031551,
  5379. 0.2635138663069203, 0.2636951804161073, 0.2638162407564354,
  5380. 0.2639010709000002, 0.625, 0.4074074074074074, 0.42156982421875,
  5381. 0.4295746560000003, 0.43473045988554, 0.4383309503172684,
  5382. 0.4409884859402103, 0.4430309389962837, 0.444649849401104, 0.6875,
  5383. 0.4927602499618962, 0.5096031427383425, 0.5189636628480,
  5384. 0.5249280070771274, 0.5290623300865124, 0.5320974248125793,
  5385. 0.5344204730474308, 0.536255847400756, 0.7265625, 0.5496019313526808,
  5386. 0.5669248746708034, 0.576436455045805, 0.5824538812831795,
  5387. 0.5866053321547824, 0.589642781414643, 0.5919618019300193,
  5388. 0.593790427805202, 0.75390625, 0.590868349763505, 0.607983393277209,
  5389. 0.617303847446822, 0.623172512167948, 0.627208862156123,
  5390. 0.6301556891501057, 0.632401894928977, 0.6341708982290303,
  5391. 0.7744140625, 0.622562037497196, 0.639236102912278, 0.648263335014579,
  5392. 0.65392850011132, 0.657816519817211, 0.660650782947676,
  5393. 0.662808780346311, 0.6645068560246006, 0.7905273437499996,
  5394. 0.6478843304312477, 0.6640468318879372, 0.6727589686071775,
  5395. 0.6782129857784873, 0.681950188903695, 0.684671508668418,
  5396. 0.686741824999918, 0.688369886732168, 0.803619384765625,
  5397. 0.668716055304315, 0.684360013879534, 0.6927642396829181,
  5398. 0.6980155964704895, 0.701609591890657, 0.7042244320992127,
  5399. 0.7062125081341817, 0.707775152962577, 0.8145294189453126,
  5400. 0.686243374488305, 0.7013873696358975, 0.709501223328243,
  5401. 0.714563595144314, 0.718024953392931, 0.7205416252126137,
  5402. 0.722454130389843, 0.723956813292035, 0.823802947998047,
  5403. 0.701255953767043, 0.715928221686075, 0.723772209289768,
  5404. 0.7286603031173616, 0.7319999279787631, 0.7344267920995765,
  5405. 0.736270323773157, 0.737718376096348
  5406. ])
  5407. res4_p1 = [stats.binomtest(v+1, v*k, 1./k).pvalue
  5408. for v in range(1, 11) for k in range(2, 11)]
  5409. res4_m1 = [stats.binomtest(v-1, v*k, 1./k).pvalue
  5410. for v in range(1, 11) for k in range(2, 11)]
  5411. assert_almost_equal(res4_p1, binom_testp1, decimal=13)
  5412. assert_almost_equal(res4_m1, binom_testm1, decimal=13)
  5413. class TestTrim:
  5414. # test trim functions
  5415. def test_trim1(self):
  5416. a = np.arange(11)
  5417. assert_equal(np.sort(stats.trim1(a, 0.1)), np.arange(10))
  5418. assert_equal(np.sort(stats.trim1(a, 0.2)), np.arange(9))
  5419. assert_equal(np.sort(stats.trim1(a, 0.2, tail='left')),
  5420. np.arange(2, 11))
  5421. assert_equal(np.sort(stats.trim1(a, 3/11., tail='left')),
  5422. np.arange(3, 11))
  5423. assert_equal(stats.trim1(a, 1.0), [])
  5424. assert_equal(stats.trim1(a, 1.0, tail='left'), [])
  5425. # empty input
  5426. assert_equal(stats.trim1([], 0.1), [])
  5427. assert_equal(stats.trim1([], 3/11., tail='left'), [])
  5428. assert_equal(stats.trim1([], 4/6.), [])
  5429. # test axis
  5430. a = np.arange(24).reshape(6, 4)
  5431. ref = np.arange(4, 24).reshape(5, 4) # first row trimmed
  5432. axis = 0
  5433. trimmed = stats.trim1(a, 0.2, tail='left', axis=axis)
  5434. assert_equal(np.sort(trimmed, axis=axis), ref)
  5435. axis = 1
  5436. trimmed = stats.trim1(a.T, 0.2, tail='left', axis=axis)
  5437. assert_equal(np.sort(trimmed, axis=axis), ref.T)
  5438. def test_trimboth(self):
  5439. a = np.arange(11)
  5440. assert_equal(np.sort(stats.trimboth(a, 3/11.)), np.arange(3, 8))
  5441. assert_equal(np.sort(stats.trimboth(a, 0.2)),
  5442. np.array([2, 3, 4, 5, 6, 7, 8]))
  5443. assert_equal(np.sort(stats.trimboth(np.arange(24).reshape(6, 4), 0.2)),
  5444. np.arange(4, 20).reshape(4, 4))
  5445. assert_equal(np.sort(stats.trimboth(np.arange(24).reshape(4, 6).T,
  5446. 2/6.)),
  5447. np.array([[2, 8, 14, 20], [3, 9, 15, 21]]))
  5448. assert_raises(ValueError, stats.trimboth,
  5449. np.arange(24).reshape(4, 6).T, 4/6.)
  5450. # empty input
  5451. assert_equal(stats.trimboth([], 0.1), [])
  5452. assert_equal(stats.trimboth([], 3/11.), [])
  5453. assert_equal(stats.trimboth([], 4/6.), [])
  5454. def test_trim_mean(self):
  5455. # don't use pre-sorted arrays
  5456. a = np.array([4, 8, 2, 0, 9, 5, 10, 1, 7, 3, 6])
  5457. idx = np.array([3, 5, 0, 1, 2, 4])
  5458. a2 = np.arange(24).reshape(6, 4)[idx, :]
  5459. a3 = np.arange(24).reshape(6, 4, order='F')[idx, :]
  5460. assert_equal(stats.trim_mean(a3, 2/6.),
  5461. np.array([2.5, 8.5, 14.5, 20.5]))
  5462. assert_equal(stats.trim_mean(a2, 2/6.),
  5463. np.array([10., 11., 12., 13.]))
  5464. idx4 = np.array([1, 0, 3, 2])
  5465. a4 = np.arange(24).reshape(4, 6)[idx4, :]
  5466. assert_equal(stats.trim_mean(a4, 2/6.),
  5467. np.array([9., 10., 11., 12., 13., 14.]))
  5468. # shuffled arange(24) as array_like
  5469. a = [7, 11, 12, 21, 16, 6, 22, 1, 5, 0, 18, 10, 17, 9, 19, 15, 23,
  5470. 20, 2, 14, 4, 13, 8, 3]
  5471. assert_equal(stats.trim_mean(a, 2/6.), 11.5)
  5472. assert_equal(stats.trim_mean([5,4,3,1,2,0], 2/6.), 2.5)
  5473. # check axis argument
  5474. np.random.seed(1234)
  5475. a = np.random.randint(20, size=(5, 6, 4, 7))
  5476. for axis in [0, 1, 2, 3, -1]:
  5477. res1 = stats.trim_mean(a, 2/6., axis=axis)
  5478. res2 = stats.trim_mean(np.moveaxis(a, axis, 0), 2/6.)
  5479. assert_equal(res1, res2)
  5480. res1 = stats.trim_mean(a, 2/6., axis=None)
  5481. res2 = stats.trim_mean(a.ravel(), 2/6.)
  5482. assert_equal(res1, res2)
  5483. assert_raises(ValueError, stats.trim_mean, a, 0.6)
  5484. # empty input
  5485. assert_equal(stats.trim_mean([], 0.0), np.nan)
  5486. assert_equal(stats.trim_mean([], 0.6), np.nan)
  5487. class TestSigmaClip:
  5488. def test_sigmaclip1(self):
  5489. a = np.concatenate((np.linspace(9.5, 10.5, 31), np.linspace(0, 20, 5)))
  5490. fact = 4 # default
  5491. c, low, upp = stats.sigmaclip(a)
  5492. assert_(c.min() > low)
  5493. assert_(c.max() < upp)
  5494. assert_equal(low, c.mean() - fact*c.std())
  5495. assert_equal(upp, c.mean() + fact*c.std())
  5496. assert_equal(c.size, a.size)
  5497. def test_sigmaclip2(self):
  5498. a = np.concatenate((np.linspace(9.5, 10.5, 31), np.linspace(0, 20, 5)))
  5499. fact = 1.5
  5500. c, low, upp = stats.sigmaclip(a, fact, fact)
  5501. assert_(c.min() > low)
  5502. assert_(c.max() < upp)
  5503. assert_equal(low, c.mean() - fact*c.std())
  5504. assert_equal(upp, c.mean() + fact*c.std())
  5505. assert_equal(c.size, 4)
  5506. assert_equal(a.size, 36) # check original array unchanged
  5507. def test_sigmaclip3(self):
  5508. a = np.concatenate((np.linspace(9.5, 10.5, 11),
  5509. np.linspace(-100, -50, 3)))
  5510. fact = 1.8
  5511. c, low, upp = stats.sigmaclip(a, fact, fact)
  5512. assert_(c.min() > low)
  5513. assert_(c.max() < upp)
  5514. assert_equal(low, c.mean() - fact*c.std())
  5515. assert_equal(upp, c.mean() + fact*c.std())
  5516. assert_equal(c, np.linspace(9.5, 10.5, 11))
  5517. def test_sigmaclip_result_attributes(self):
  5518. a = np.concatenate((np.linspace(9.5, 10.5, 11),
  5519. np.linspace(-100, -50, 3)))
  5520. fact = 1.8
  5521. res = stats.sigmaclip(a, fact, fact)
  5522. attributes = ('clipped', 'lower', 'upper')
  5523. check_named_results(res, attributes)
  5524. def test_std_zero(self):
  5525. # regression test #8632
  5526. x = np.ones(10)
  5527. assert_equal(stats.sigmaclip(x)[0], x)
  5528. class TestAlexanderGovern:
  5529. def test_compare_dtypes(self):
  5530. args = [[13, 13, 13, 13, 13, 13, 13, 12, 12],
  5531. [14, 13, 12, 12, 12, 12, 12, 11, 11],
  5532. [14, 14, 13, 13, 13, 13, 13, 12, 12],
  5533. [15, 14, 13, 13, 13, 12, 12, 12, 11]]
  5534. args_int16 = np.array(args, dtype=np.int16)
  5535. args_int32 = np.array(args, dtype=np.int32)
  5536. args_uint8 = np.array(args, dtype=np.uint8)
  5537. args_float64 = np.array(args, dtype=np.float64)
  5538. res_int16 = stats.alexandergovern(*args_int16)
  5539. res_int32 = stats.alexandergovern(*args_int32)
  5540. res_unit8 = stats.alexandergovern(*args_uint8)
  5541. res_float64 = stats.alexandergovern(*args_float64)
  5542. assert (res_int16.pvalue == res_int32.pvalue ==
  5543. res_unit8.pvalue == res_float64.pvalue)
  5544. assert (res_int16.statistic == res_int32.statistic ==
  5545. res_unit8.statistic == res_float64.statistic)
  5546. def test_bad_inputs(self):
  5547. # input array is of size zero
  5548. with assert_raises(ValueError, match="Input sample size must be"
  5549. " greater than one."):
  5550. stats.alexandergovern([1, 2], [])
  5551. # input is a singular non list element
  5552. with assert_raises(ValueError, match="Input sample size must be"
  5553. " greater than one."):
  5554. stats.alexandergovern([1, 2], 2)
  5555. # input list is of size 1
  5556. with assert_raises(ValueError, match="Input sample size must be"
  5557. " greater than one."):
  5558. stats.alexandergovern([1, 2], [2])
  5559. # inputs are not finite (infinity)
  5560. with assert_raises(ValueError, match="Input samples must be finite."):
  5561. stats.alexandergovern([1, 2], [np.inf, np.inf])
  5562. # inputs are multidimensional
  5563. with assert_raises(ValueError, match="Input samples must be one"
  5564. "-dimensional"):
  5565. stats.alexandergovern([1, 2], [[1, 2], [3, 4]])
  5566. def test_compare_r(self):
  5567. '''
  5568. Data generated in R with
  5569. > set.seed(1)
  5570. > library("onewaytests")
  5571. > library("tibble")
  5572. > y <- c(rnorm(40, sd=10),
  5573. + rnorm(30, sd=15),
  5574. + rnorm(20, sd=20))
  5575. > x <- c(rep("one", times=40),
  5576. + rep("two", times=30),
  5577. + rep("eight", times=20))
  5578. > x <- factor(x)
  5579. > ag.test(y ~ x, tibble(y,x))
  5580. Alexander-Govern Test (alpha = 0.05)
  5581. -------------------------------------------------------------
  5582. data : y and x
  5583. statistic : 1.359941
  5584. parameter : 2
  5585. p.value : 0.5066321
  5586. Result : Difference is not statistically significant.
  5587. -------------------------------------------------------------
  5588. Example adapted from:
  5589. https://eval-serv2.metpsy.uni-jena.de/wiki-metheval-hp/index.php/R_FUN_Alexander-Govern
  5590. '''
  5591. one = [-6.264538107423324, 1.8364332422208225, -8.356286124100471,
  5592. 15.952808021377916, 3.295077718153605, -8.204683841180152,
  5593. 4.874290524284853, 7.383247051292173, 5.757813516534923,
  5594. -3.0538838715635603, 15.11781168450848, 3.898432364114311,
  5595. -6.2124058054180376, -22.146998871774997, 11.249309181431082,
  5596. -0.4493360901523085, -0.16190263098946087, 9.438362106852992,
  5597. 8.212211950980885, 5.939013212175088, 9.189773716082183,
  5598. 7.821363007310671, 0.745649833651906, -19.89351695863373,
  5599. 6.198257478947102, -0.5612873952900078, -1.557955067053293,
  5600. -14.707523838992744, -4.781500551086204, 4.179415601997024,
  5601. 13.58679551529044, -1.0278772734299553, 3.876716115593691,
  5602. -0.5380504058290512, -13.770595568286065, -4.149945632996798,
  5603. -3.942899537103493, -0.5931339671118566, 11.000253719838831,
  5604. 7.631757484575442]
  5605. two = [-2.4678539438038034, -3.8004252020476135, 10.454450631071062,
  5606. 8.34994798010486, -10.331335418242798, -10.612427354431794,
  5607. 5.468729432052455, 11.527993867731237, -1.6851931822534207,
  5608. 13.216615896813222, 5.971588205506021, -9.180395898761569,
  5609. 5.116795371366372, -16.94044644121189, 21.495355525515556,
  5610. 29.7059984775879, -5.508322146997636, -15.662019394747961,
  5611. 8.545794411636193, -2.0258190582123654, 36.024266407571645,
  5612. -0.5886000409975387, 10.346090436761651, 0.4200323817099909,
  5613. -11.14909813323608, 2.8318844927151434, -27.074379433365568,
  5614. 21.98332292344329, 2.2988000731784655, 32.58917505543229]
  5615. eight = [9.510190577993251, -14.198928618436291, 12.214527069781099,
  5616. -18.68195263288503, -25.07266800478204, 5.828924710349257,
  5617. -8.86583746436866, 0.02210703263248262, 1.4868264830332811,
  5618. -11.79041892376144, -11.37337465637004, -2.7035723024766414,
  5619. 23.56173993146409, -30.47133600859524, 11.878923752568431,
  5620. 6.659007424270365, 21.261996745527256, -6.083678472686013,
  5621. 7.400376198325763, 5.341975815444621]
  5622. soln = stats.alexandergovern(one, two, eight)
  5623. assert_allclose(soln.statistic, 1.3599405447999450836)
  5624. assert_allclose(soln.pvalue, 0.50663205309676440091)
  5625. def test_compare_scholar(self):
  5626. '''
  5627. Data taken from 'The Modification and Evaluation of the
  5628. Alexander-Govern Test in Terms of Power' by Kingsley Ochuko, T.,
  5629. Abdullah, S., Binti Zain, Z., & Soaad Syed Yahaya, S. (2015).
  5630. '''
  5631. young = [482.43, 484.36, 488.84, 495.15, 495.24, 502.69, 504.62,
  5632. 518.29, 519.1, 524.1, 524.12, 531.18, 548.42, 572.1, 584.68,
  5633. 609.09, 609.53, 666.63, 676.4]
  5634. middle = [335.59, 338.43, 353.54, 404.27, 437.5, 469.01, 485.85,
  5635. 487.3, 493.08, 494.31, 499.1, 886.41]
  5636. old = [519.01, 528.5, 530.23, 536.03, 538.56, 538.83, 557.24, 558.61,
  5637. 558.95, 565.43, 586.39, 594.69, 629.22, 645.69, 691.84]
  5638. soln = stats.alexandergovern(young, middle, old)
  5639. assert_allclose(soln.statistic, 5.3237, atol=1e-3)
  5640. assert_allclose(soln.pvalue, 0.06982, atol=1e-4)
  5641. # verify with ag.test in r
  5642. '''
  5643. > library("onewaytests")
  5644. > library("tibble")
  5645. > young <- c(482.43, 484.36, 488.84, 495.15, 495.24, 502.69, 504.62,
  5646. + 518.29, 519.1, 524.1, 524.12, 531.18, 548.42, 572.1,
  5647. + 584.68, 609.09, 609.53, 666.63, 676.4)
  5648. > middle <- c(335.59, 338.43, 353.54, 404.27, 437.5, 469.01, 485.85,
  5649. + 487.3, 493.08, 494.31, 499.1, 886.41)
  5650. > old <- c(519.01, 528.5, 530.23, 536.03, 538.56, 538.83, 557.24,
  5651. + 558.61, 558.95, 565.43, 586.39, 594.69, 629.22,
  5652. + 645.69, 691.84)
  5653. > young_fct <- c(rep("young", times=19))
  5654. > middle_fct <-c(rep("middle", times=12))
  5655. > old_fct <- c(rep("old", times=15))
  5656. > ag.test(a ~ b, tibble(a=c(young, middle, old), b=factor(c(young_fct,
  5657. + middle_fct, old_fct))))
  5658. Alexander-Govern Test (alpha = 0.05)
  5659. -------------------------------------------------------------
  5660. data : a and b
  5661. statistic : 5.324629
  5662. parameter : 2
  5663. p.value : 0.06978651
  5664. Result : Difference is not statistically significant.
  5665. -------------------------------------------------------------
  5666. '''
  5667. assert_allclose(soln.statistic, 5.324629)
  5668. assert_allclose(soln.pvalue, 0.06978651)
  5669. def test_compare_scholar3(self):
  5670. '''
  5671. Data taken from 'Robustness And Comparative Power Of WelchAspin,
  5672. Alexander-Govern And Yuen Tests Under Non-Normality And Variance
  5673. Heteroscedasticity', by Ayed A. Almoied. 2017. Page 34-37.
  5674. https://digitalcommons.wayne.edu/cgi/viewcontent.cgi?article=2775&context=oa_dissertations
  5675. '''
  5676. x1 = [-1.77559, -1.4113, -0.69457, -0.54148, -0.18808, -0.07152,
  5677. 0.04696, 0.051183, 0.148695, 0.168052, 0.422561, 0.458555,
  5678. 0.616123, 0.709968, 0.839956, 0.857226, 0.929159, 0.981442,
  5679. 0.999554, 1.642958]
  5680. x2 = [-1.47973, -1.2722, -0.91914, -0.80916, -0.75977, -0.72253,
  5681. -0.3601, -0.33273, -0.28859, -0.09637, -0.08969, -0.01824,
  5682. 0.260131, 0.289278, 0.518254, 0.683003, 0.877618, 1.172475,
  5683. 1.33964, 1.576766]
  5684. soln = stats.alexandergovern(x1, x2)
  5685. assert_allclose(soln.statistic, 0.713526, atol=1e-5)
  5686. assert_allclose(soln.pvalue, 0.398276, atol=1e-5)
  5687. '''
  5688. tested in ag.test in R:
  5689. > library("onewaytests")
  5690. > library("tibble")
  5691. > x1 <- c(-1.77559, -1.4113, -0.69457, -0.54148, -0.18808, -0.07152,
  5692. + 0.04696, 0.051183, 0.148695, 0.168052, 0.422561, 0.458555,
  5693. + 0.616123, 0.709968, 0.839956, 0.857226, 0.929159, 0.981442,
  5694. + 0.999554, 1.642958)
  5695. > x2 <- c(-1.47973, -1.2722, -0.91914, -0.80916, -0.75977, -0.72253,
  5696. + -0.3601, -0.33273, -0.28859, -0.09637, -0.08969, -0.01824,
  5697. + 0.260131, 0.289278, 0.518254, 0.683003, 0.877618, 1.172475,
  5698. + 1.33964, 1.576766)
  5699. > x1_fact <- c(rep("x1", times=20))
  5700. > x2_fact <- c(rep("x2", times=20))
  5701. > a <- c(x1, x2)
  5702. > b <- factor(c(x1_fact, x2_fact))
  5703. > ag.test(a ~ b, tibble(a, b))
  5704. Alexander-Govern Test (alpha = 0.05)
  5705. -------------------------------------------------------------
  5706. data : a and b
  5707. statistic : 0.7135182
  5708. parameter : 1
  5709. p.value : 0.3982783
  5710. Result : Difference is not statistically significant.
  5711. -------------------------------------------------------------
  5712. '''
  5713. assert_allclose(soln.statistic, 0.7135182)
  5714. assert_allclose(soln.pvalue, 0.3982783)
  5715. def test_nan_policy_propogate(self):
  5716. args = [[1, 2, 3, 4], [1, np.nan]]
  5717. # default nan_policy is 'propagate'
  5718. res = stats.alexandergovern(*args)
  5719. assert_equal(res.pvalue, np.nan)
  5720. assert_equal(res.statistic, np.nan)
  5721. def test_nan_policy_raise(self):
  5722. args = [[1, 2, 3, 4], [1, np.nan]]
  5723. with assert_raises(ValueError, match="The input contains nan values"):
  5724. stats.alexandergovern(*args, nan_policy='raise')
  5725. def test_nan_policy_omit(self):
  5726. args_nan = [[1, 2, 3, None, 4], [1, np.nan, 19, 25]]
  5727. args_no_nan = [[1, 2, 3, 4], [1, 19, 25]]
  5728. res_nan = stats.alexandergovern(*args_nan, nan_policy='omit')
  5729. res_no_nan = stats.alexandergovern(*args_no_nan)
  5730. assert_equal(res_nan.pvalue, res_no_nan.pvalue)
  5731. assert_equal(res_nan.statistic, res_no_nan.statistic)
  5732. def test_constant_input(self):
  5733. # Zero variance input, consistent with `stats.pearsonr`
  5734. msg = "An input array is constant; the statistic is not defined."
  5735. with assert_warns(stats.ConstantInputWarning, match=msg):
  5736. res = stats.alexandergovern([0.667, 0.667, 0.667],
  5737. [0.123, 0.456, 0.789])
  5738. assert_equal(res.statistic, np.nan)
  5739. assert_equal(res.pvalue, np.nan)
  5740. class TestFOneWay:
  5741. def test_trivial(self):
  5742. # A trivial test of stats.f_oneway, with F=0.
  5743. F, p = stats.f_oneway([0, 2], [0, 2])
  5744. assert_equal(F, 0.0)
  5745. assert_equal(p, 1.0)
  5746. def test_basic(self):
  5747. # Despite being a floating point calculation, this data should
  5748. # result in F being exactly 2.0.
  5749. F, p = stats.f_oneway([0, 2], [2, 4])
  5750. assert_equal(F, 2.0)
  5751. assert_allclose(p, 1 - np.sqrt(0.5), rtol=1e-14)
  5752. def test_known_exact(self):
  5753. # Another trivial dataset for which the exact F and p can be
  5754. # calculated.
  5755. F, p = stats.f_oneway([2], [2], [2, 3, 4])
  5756. # The use of assert_equal might be too optimistic, but the calculation
  5757. # in this case is trivial enough that it is likely to go through with
  5758. # no loss of precision.
  5759. assert_equal(F, 3/5)
  5760. assert_equal(p, 5/8)
  5761. def test_large_integer_array(self):
  5762. a = np.array([655, 788], dtype=np.uint16)
  5763. b = np.array([789, 772], dtype=np.uint16)
  5764. F, p = stats.f_oneway(a, b)
  5765. # The expected value was verified by computing it with mpmath with
  5766. # 40 digits of precision.
  5767. assert_allclose(F, 0.77450216931805540, rtol=1e-14)
  5768. def test_result_attributes(self):
  5769. a = np.array([655, 788], dtype=np.uint16)
  5770. b = np.array([789, 772], dtype=np.uint16)
  5771. res = stats.f_oneway(a, b)
  5772. attributes = ('statistic', 'pvalue')
  5773. check_named_results(res, attributes)
  5774. def test_nist(self):
  5775. # These are the nist ANOVA files. They can be found at:
  5776. # https://www.itl.nist.gov/div898/strd/anova/anova.html
  5777. filenames = ['SiRstv.dat', 'SmLs01.dat', 'SmLs02.dat', 'SmLs03.dat',
  5778. 'AtmWtAg.dat', 'SmLs04.dat', 'SmLs05.dat', 'SmLs06.dat',
  5779. 'SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat']
  5780. for test_case in filenames:
  5781. rtol = 1e-7
  5782. fname = os.path.abspath(os.path.join(os.path.dirname(__file__),
  5783. 'data/nist_anova', test_case))
  5784. with open(fname, 'r') as f:
  5785. content = f.read().split('\n')
  5786. certified = [line.split() for line in content[40:48]
  5787. if line.strip()]
  5788. dataf = np.loadtxt(fname, skiprows=60)
  5789. y, x = dataf.T
  5790. y = y.astype(int)
  5791. caty = np.unique(y)
  5792. f = float(certified[0][-1])
  5793. xlist = [x[y == i] for i in caty]
  5794. res = stats.f_oneway(*xlist)
  5795. # With the hard test cases we relax the tolerance a bit.
  5796. hard_tc = ('SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat')
  5797. if test_case in hard_tc:
  5798. rtol = 1e-4
  5799. assert_allclose(res[0], f, rtol=rtol,
  5800. err_msg='Failing testcase: %s' % test_case)
  5801. @pytest.mark.parametrize("a, b, expected", [
  5802. (np.array([42, 42, 42]), np.array([7, 7, 7]), (np.inf, 0)),
  5803. (np.array([42, 42, 42]), np.array([42, 42, 42]), (np.nan, np.nan))
  5804. ])
  5805. def test_constant_input(self, a, b, expected):
  5806. # For more details, look on https://github.com/scipy/scipy/issues/11669
  5807. msg = "Each of the input arrays is constant;"
  5808. with assert_warns(stats.ConstantInputWarning, match=msg):
  5809. f, p = stats.f_oneway(a, b)
  5810. assert f, p == expected
  5811. @pytest.mark.parametrize('axis', [-2, -1, 0, 1])
  5812. def test_2d_inputs(self, axis):
  5813. a = np.array([[1, 4, 3, 3],
  5814. [2, 5, 3, 3],
  5815. [3, 6, 3, 3],
  5816. [2, 3, 3, 3],
  5817. [1, 4, 3, 3]])
  5818. b = np.array([[3, 1, 5, 3],
  5819. [4, 6, 5, 3],
  5820. [4, 3, 5, 3],
  5821. [1, 5, 5, 3],
  5822. [5, 5, 5, 3],
  5823. [2, 3, 5, 3],
  5824. [8, 2, 5, 3],
  5825. [2, 2, 5, 3]])
  5826. c = np.array([[4, 3, 4, 3],
  5827. [4, 2, 4, 3],
  5828. [5, 4, 4, 3],
  5829. [5, 4, 4, 3]])
  5830. if axis in [-1, 1]:
  5831. a = a.T
  5832. b = b.T
  5833. c = c.T
  5834. take_axis = 0
  5835. else:
  5836. take_axis = 1
  5837. warn_msg = "Each of the input arrays is constant;"
  5838. with assert_warns(stats.ConstantInputWarning, match=warn_msg):
  5839. f, p = stats.f_oneway(a, b, c, axis=axis)
  5840. # Verify that the result computed with the 2d arrays matches
  5841. # the result of calling f_oneway individually on each slice.
  5842. for j in [0, 1]:
  5843. fj, pj = stats.f_oneway(np.take(a, j, take_axis),
  5844. np.take(b, j, take_axis),
  5845. np.take(c, j, take_axis))
  5846. assert_allclose(f[j], fj, rtol=1e-14)
  5847. assert_allclose(p[j], pj, rtol=1e-14)
  5848. for j in [2, 3]:
  5849. with assert_warns(stats.ConstantInputWarning, match=warn_msg):
  5850. fj, pj = stats.f_oneway(np.take(a, j, take_axis),
  5851. np.take(b, j, take_axis),
  5852. np.take(c, j, take_axis))
  5853. assert_equal(f[j], fj)
  5854. assert_equal(p[j], pj)
  5855. def test_3d_inputs(self):
  5856. # Some 3-d arrays. (There is nothing special about the values.)
  5857. a = 1/np.arange(1.0, 4*5*7 + 1).reshape(4, 5, 7)
  5858. b = 2/np.arange(1.0, 4*8*7 + 1).reshape(4, 8, 7)
  5859. c = np.cos(1/np.arange(1.0, 4*4*7 + 1).reshape(4, 4, 7))
  5860. f, p = stats.f_oneway(a, b, c, axis=1)
  5861. assert f.shape == (4, 7)
  5862. assert p.shape == (4, 7)
  5863. for i in range(a.shape[0]):
  5864. for j in range(a.shape[2]):
  5865. fij, pij = stats.f_oneway(a[i, :, j], b[i, :, j], c[i, :, j])
  5866. assert_allclose(fij, f[i, j])
  5867. assert_allclose(pij, p[i, j])
  5868. def test_length0_1d_error(self):
  5869. # Require at least one value in each group.
  5870. msg = 'all input arrays have length 1.'
  5871. with assert_warns(stats.DegenerateDataWarning, match=msg):
  5872. result = stats.f_oneway([1, 2, 3], [], [4, 5, 6, 7])
  5873. assert_equal(result, (np.nan, np.nan))
  5874. def test_length0_2d_error(self):
  5875. msg = 'all input arrays have length 1.'
  5876. with assert_warns(stats.DegenerateDataWarning, match=msg):
  5877. ncols = 3
  5878. a = np.ones((4, ncols))
  5879. b = np.ones((0, ncols))
  5880. c = np.ones((5, ncols))
  5881. f, p = stats.f_oneway(a, b, c)
  5882. nans = np.full((ncols,), fill_value=np.nan)
  5883. assert_equal(f, nans)
  5884. assert_equal(p, nans)
  5885. def test_all_length_one(self):
  5886. msg = 'all input arrays have length 1.'
  5887. with assert_warns(stats.DegenerateDataWarning, match=msg):
  5888. result = stats.f_oneway([10], [11], [12], [13])
  5889. assert_equal(result, (np.nan, np.nan))
  5890. @pytest.mark.parametrize('args', [(), ([1, 2, 3],)])
  5891. def test_too_few_inputs(self, args):
  5892. with assert_raises(TypeError):
  5893. stats.f_oneway(*args)
  5894. def test_axis_error(self):
  5895. a = np.ones((3, 4))
  5896. b = np.ones((5, 4))
  5897. with assert_raises(np.AxisError):
  5898. stats.f_oneway(a, b, axis=2)
  5899. def test_bad_shapes(self):
  5900. a = np.ones((3, 4))
  5901. b = np.ones((5, 4))
  5902. with assert_raises(ValueError):
  5903. stats.f_oneway(a, b, axis=1)
  5904. class TestKruskal:
  5905. def test_simple(self):
  5906. x = [1]
  5907. y = [2]
  5908. h, p = stats.kruskal(x, y)
  5909. assert_equal(h, 1.0)
  5910. assert_approx_equal(p, stats.distributions.chi2.sf(h, 1))
  5911. h, p = stats.kruskal(np.array(x), np.array(y))
  5912. assert_equal(h, 1.0)
  5913. assert_approx_equal(p, stats.distributions.chi2.sf(h, 1))
  5914. def test_basic(self):
  5915. x = [1, 3, 5, 7, 9]
  5916. y = [2, 4, 6, 8, 10]
  5917. h, p = stats.kruskal(x, y)
  5918. assert_approx_equal(h, 3./11, significant=10)
  5919. assert_approx_equal(p, stats.distributions.chi2.sf(3./11, 1))
  5920. h, p = stats.kruskal(np.array(x), np.array(y))
  5921. assert_approx_equal(h, 3./11, significant=10)
  5922. assert_approx_equal(p, stats.distributions.chi2.sf(3./11, 1))
  5923. def test_simple_tie(self):
  5924. x = [1]
  5925. y = [1, 2]
  5926. h_uncorr = 1.5**2 + 2*2.25**2 - 12
  5927. corr = 0.75
  5928. expected = h_uncorr / corr # 0.5
  5929. h, p = stats.kruskal(x, y)
  5930. # Since the expression is simple and the exact answer is 0.5, it
  5931. # should be safe to use assert_equal().
  5932. assert_equal(h, expected)
  5933. def test_another_tie(self):
  5934. x = [1, 1, 1, 2]
  5935. y = [2, 2, 2, 2]
  5936. h_uncorr = (12. / 8. / 9.) * 4 * (3**2 + 6**2) - 3 * 9
  5937. corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8)
  5938. expected = h_uncorr / corr
  5939. h, p = stats.kruskal(x, y)
  5940. assert_approx_equal(h, expected)
  5941. def test_three_groups(self):
  5942. # A test of stats.kruskal with three groups, with ties.
  5943. x = [1, 1, 1]
  5944. y = [2, 2, 2]
  5945. z = [2, 2]
  5946. h_uncorr = (12. / 8. / 9.) * (3*2**2 + 3*6**2 + 2*6**2) - 3 * 9 # 5.0
  5947. corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8)
  5948. expected = h_uncorr / corr # 7.0
  5949. h, p = stats.kruskal(x, y, z)
  5950. assert_approx_equal(h, expected)
  5951. assert_approx_equal(p, stats.distributions.chi2.sf(h, 2))
  5952. def test_empty(self):
  5953. # A test of stats.kruskal with three groups, with ties.
  5954. x = [1, 1, 1]
  5955. y = [2, 2, 2]
  5956. z = []
  5957. assert_equal(stats.kruskal(x, y, z), (np.nan, np.nan))
  5958. def test_kruskal_result_attributes(self):
  5959. x = [1, 3, 5, 7, 9]
  5960. y = [2, 4, 6, 8, 10]
  5961. res = stats.kruskal(x, y)
  5962. attributes = ('statistic', 'pvalue')
  5963. check_named_results(res, attributes)
  5964. def test_nan_policy(self):
  5965. x = np.arange(10.)
  5966. x[9] = np.nan
  5967. assert_equal(stats.kruskal(x, x), (np.nan, np.nan))
  5968. assert_almost_equal(stats.kruskal(x, x, nan_policy='omit'), (0.0, 1.0))
  5969. assert_raises(ValueError, stats.kruskal, x, x, nan_policy='raise')
  5970. assert_raises(ValueError, stats.kruskal, x, x, nan_policy='foobar')
  5971. def test_large_no_samples(self):
  5972. # Test to see if large samples are handled correctly.
  5973. n = 50000
  5974. x = np.random.randn(n)
  5975. y = np.random.randn(n) + 50
  5976. h, p = stats.kruskal(x, y)
  5977. expected = 0
  5978. assert_approx_equal(p, expected)
  5979. class TestCombinePvalues:
  5980. def test_fisher(self):
  5981. # Example taken from https://en.wikipedia.org/wiki/Fisher%27s_exact_test#Example
  5982. xsq, p = stats.combine_pvalues([.01, .2, .3], method='fisher')
  5983. assert_approx_equal(p, 0.02156, significant=4)
  5984. def test_stouffer(self):
  5985. Z, p = stats.combine_pvalues([.01, .2, .3], method='stouffer')
  5986. assert_approx_equal(p, 0.01651, significant=4)
  5987. def test_stouffer2(self):
  5988. Z, p = stats.combine_pvalues([.5, .5, .5], method='stouffer')
  5989. assert_approx_equal(p, 0.5, significant=4)
  5990. def test_weighted_stouffer(self):
  5991. Z, p = stats.combine_pvalues([.01, .2, .3], method='stouffer',
  5992. weights=np.ones(3))
  5993. assert_approx_equal(p, 0.01651, significant=4)
  5994. def test_weighted_stouffer2(self):
  5995. Z, p = stats.combine_pvalues([.01, .2, .3], method='stouffer',
  5996. weights=np.array((1, 4, 9)))
  5997. assert_approx_equal(p, 0.1464, significant=4)
  5998. def test_pearson(self):
  5999. Z, p = stats.combine_pvalues([.01, .2, .3], method='pearson')
  6000. assert_approx_equal(p, 0.02213, significant=4)
  6001. def test_tippett(self):
  6002. Z, p = stats.combine_pvalues([.01, .2, .3], method='tippett')
  6003. assert_approx_equal(p, 0.0297, significant=4)
  6004. def test_mudholkar_george(self):
  6005. Z, p = stats.combine_pvalues([.1, .1, .1], method='mudholkar_george')
  6006. assert_approx_equal(p, 0.019462, significant=4)
  6007. def test_mudholkar_george_equal_fisher_pearson_average(self):
  6008. Z, p = stats.combine_pvalues([.01, .2, .3], method='mudholkar_george')
  6009. Z_f, p_f = stats.combine_pvalues([.01, .2, .3], method='fisher')
  6010. Z_p, p_p = stats.combine_pvalues([.01, .2, .3], method='pearson')
  6011. assert_approx_equal(0.5 * (Z_f+Z_p), Z, significant=4)
  6012. methods = ["fisher", "pearson", "tippett", "stouffer", "mudholkar_george"]
  6013. @pytest.mark.parametrize("variant", ["single", "all", "random"])
  6014. @pytest.mark.parametrize("method", methods)
  6015. def test_monotonicity(self, variant, method):
  6016. # Test that result increases monotonically with respect to input.
  6017. m, n = 10, 7
  6018. rng = np.random.default_rng(278448169958891062669391462690811630763)
  6019. # `pvaluess` is an m × n array of p values. Each row corresponds to
  6020. # a set of p values to be combined with p values increasing
  6021. # monotonically down one column (single), simultaneously down each
  6022. # column (all), or independently down each column (random).
  6023. if variant == "single":
  6024. pvaluess = np.full((m, n), rng.random(n))
  6025. pvaluess[:, 0] = np.linspace(0.1, 0.9, m)
  6026. elif variant == "all":
  6027. pvaluess = np.full((n, m), np.linspace(0.1, 0.9, m)).T
  6028. elif variant == "random":
  6029. pvaluess = np.sort(rng.uniform(0, 1, size=(m, n)), axis=0)
  6030. combined_pvalues = [
  6031. stats.combine_pvalues(pvalues, method=method)[1]
  6032. for pvalues in pvaluess
  6033. ]
  6034. assert np.all(np.diff(combined_pvalues) >= 0)
  6035. @pytest.mark.parametrize("method", methods)
  6036. def test_result(self, method):
  6037. res = stats.combine_pvalues([.01, .2, .3], method=method)
  6038. assert_equal((res.statistic, res.pvalue), res)
  6039. class TestCdfDistanceValidation:
  6040. """
  6041. Test that _cdf_distance() (via wasserstein_distance()) raises ValueErrors
  6042. for bad inputs.
  6043. """
  6044. def test_distinct_value_and_weight_lengths(self):
  6045. # When the number of weights does not match the number of values,
  6046. # a ValueError should be raised.
  6047. assert_raises(ValueError, stats.wasserstein_distance,
  6048. [1], [2], [4], [3, 1])
  6049. assert_raises(ValueError, stats.wasserstein_distance, [1], [2], [1, 0])
  6050. def test_zero_weight(self):
  6051. # When a distribution is given zero weight, a ValueError should be
  6052. # raised.
  6053. assert_raises(ValueError, stats.wasserstein_distance,
  6054. [0, 1], [2], [0, 0])
  6055. assert_raises(ValueError, stats.wasserstein_distance,
  6056. [0, 1], [2], [3, 1], [0])
  6057. def test_negative_weights(self):
  6058. # A ValueError should be raised if there are any negative weights.
  6059. assert_raises(ValueError, stats.wasserstein_distance,
  6060. [0, 1], [2, 2], [1, 1], [3, -1])
  6061. def test_empty_distribution(self):
  6062. # A ValueError should be raised when trying to measure the distance
  6063. # between something and nothing.
  6064. assert_raises(ValueError, stats.wasserstein_distance, [], [2, 2])
  6065. assert_raises(ValueError, stats.wasserstein_distance, [1], [])
  6066. def test_inf_weight(self):
  6067. # An inf weight is not valid.
  6068. assert_raises(ValueError, stats.wasserstein_distance,
  6069. [1, 2, 1], [1, 1], [1, np.inf, 1], [1, 1])
  6070. class TestWassersteinDistance:
  6071. """ Tests for wasserstein_distance() output values.
  6072. """
  6073. def test_simple(self):
  6074. # For basic distributions, the value of the Wasserstein distance is
  6075. # straightforward.
  6076. assert_almost_equal(
  6077. stats.wasserstein_distance([0, 1], [0], [1, 1], [1]),
  6078. .5)
  6079. assert_almost_equal(stats.wasserstein_distance(
  6080. [0, 1], [0], [3, 1], [1]),
  6081. .25)
  6082. assert_almost_equal(stats.wasserstein_distance(
  6083. [0, 2], [0], [1, 1], [1]),
  6084. 1)
  6085. assert_almost_equal(stats.wasserstein_distance(
  6086. [0, 1, 2], [1, 2, 3]),
  6087. 1)
  6088. def test_same_distribution(self):
  6089. # Any distribution moved to itself should have a Wasserstein distance of
  6090. # zero.
  6091. assert_equal(stats.wasserstein_distance([1, 2, 3], [2, 1, 3]), 0)
  6092. assert_equal(
  6093. stats.wasserstein_distance([1, 1, 1, 4], [4, 1],
  6094. [1, 1, 1, 1], [1, 3]),
  6095. 0)
  6096. def test_shift(self):
  6097. # If the whole distribution is shifted by x, then the Wasserstein
  6098. # distance should be x.
  6099. assert_almost_equal(stats.wasserstein_distance([0], [1]), 1)
  6100. assert_almost_equal(stats.wasserstein_distance([-5], [5]), 10)
  6101. assert_almost_equal(
  6102. stats.wasserstein_distance([1, 2, 3, 4, 5], [11, 12, 13, 14, 15]),
  6103. 10)
  6104. assert_almost_equal(
  6105. stats.wasserstein_distance([4.5, 6.7, 2.1], [4.6, 7, 9.2],
  6106. [3, 1, 1], [1, 3, 1]),
  6107. 2.5)
  6108. def test_combine_weights(self):
  6109. # Assigning a weight w to a value is equivalent to including that value
  6110. # w times in the value array with weight of 1.
  6111. assert_almost_equal(
  6112. stats.wasserstein_distance(
  6113. [0, 0, 1, 1, 1, 1, 5], [0, 3, 3, 3, 3, 4, 4],
  6114. [1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1]),
  6115. stats.wasserstein_distance([5, 0, 1], [0, 4, 3],
  6116. [1, 2, 4], [1, 2, 4]))
  6117. def test_collapse(self):
  6118. # Collapsing a distribution to a point distribution at zero is
  6119. # equivalent to taking the average of the absolute values of the values.
  6120. u = np.arange(-10, 30, 0.3)
  6121. v = np.zeros_like(u)
  6122. assert_almost_equal(
  6123. stats.wasserstein_distance(u, v),
  6124. np.mean(np.abs(u)))
  6125. u_weights = np.arange(len(u))
  6126. v_weights = u_weights[::-1]
  6127. assert_almost_equal(
  6128. stats.wasserstein_distance(u, v, u_weights, v_weights),
  6129. np.average(np.abs(u), weights=u_weights))
  6130. def test_zero_weight(self):
  6131. # Values with zero weight have no impact on the Wasserstein distance.
  6132. assert_almost_equal(
  6133. stats.wasserstein_distance([1, 2, 100000], [1, 1],
  6134. [1, 1, 0], [1, 1]),
  6135. stats.wasserstein_distance([1, 2], [1, 1], [1, 1], [1, 1]))
  6136. def test_inf_values(self):
  6137. # Inf values can lead to an inf distance or trigger a RuntimeWarning
  6138. # (and return NaN) if the distance is undefined.
  6139. assert_equal(
  6140. stats.wasserstein_distance([1, 2, np.inf], [1, 1]),
  6141. np.inf)
  6142. assert_equal(
  6143. stats.wasserstein_distance([1, 2, np.inf], [-np.inf, 1]),
  6144. np.inf)
  6145. assert_equal(
  6146. stats.wasserstein_distance([1, -np.inf, np.inf], [1, 1]),
  6147. np.inf)
  6148. with suppress_warnings() as sup:
  6149. sup.record(RuntimeWarning, "invalid value*")
  6150. assert_equal(
  6151. stats.wasserstein_distance([1, 2, np.inf], [np.inf, 1]),
  6152. np.nan)
  6153. class TestEnergyDistance:
  6154. """ Tests for energy_distance() output values.
  6155. """
  6156. def test_simple(self):
  6157. # For basic distributions, the value of the energy distance is
  6158. # straightforward.
  6159. assert_almost_equal(
  6160. stats.energy_distance([0, 1], [0], [1, 1], [1]),
  6161. np.sqrt(2) * .5)
  6162. assert_almost_equal(stats.energy_distance(
  6163. [0, 1], [0], [3, 1], [1]),
  6164. np.sqrt(2) * .25)
  6165. assert_almost_equal(stats.energy_distance(
  6166. [0, 2], [0], [1, 1], [1]),
  6167. 2 * .5)
  6168. assert_almost_equal(
  6169. stats.energy_distance([0, 1, 2], [1, 2, 3]),
  6170. np.sqrt(2) * (3*(1./3**2))**.5)
  6171. def test_same_distribution(self):
  6172. # Any distribution moved to itself should have a energy distance of
  6173. # zero.
  6174. assert_equal(stats.energy_distance([1, 2, 3], [2, 1, 3]), 0)
  6175. assert_equal(
  6176. stats.energy_distance([1, 1, 1, 4], [4, 1], [1, 1, 1, 1], [1, 3]),
  6177. 0)
  6178. def test_shift(self):
  6179. # If a single-point distribution is shifted by x, then the energy
  6180. # distance should be sqrt(2) * sqrt(x).
  6181. assert_almost_equal(stats.energy_distance([0], [1]), np.sqrt(2))
  6182. assert_almost_equal(
  6183. stats.energy_distance([-5], [5]),
  6184. np.sqrt(2) * 10**.5)
  6185. def test_combine_weights(self):
  6186. # Assigning a weight w to a value is equivalent to including that value
  6187. # w times in the value array with weight of 1.
  6188. assert_almost_equal(
  6189. stats.energy_distance([0, 0, 1, 1, 1, 1, 5], [0, 3, 3, 3, 3, 4, 4],
  6190. [1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1]),
  6191. stats.energy_distance([5, 0, 1], [0, 4, 3], [1, 2, 4], [1, 2, 4]))
  6192. def test_zero_weight(self):
  6193. # Values with zero weight have no impact on the energy distance.
  6194. assert_almost_equal(
  6195. stats.energy_distance([1, 2, 100000], [1, 1], [1, 1, 0], [1, 1]),
  6196. stats.energy_distance([1, 2], [1, 1], [1, 1], [1, 1]))
  6197. def test_inf_values(self):
  6198. # Inf values can lead to an inf distance or trigger a RuntimeWarning
  6199. # (and return NaN) if the distance is undefined.
  6200. assert_equal(stats.energy_distance([1, 2, np.inf], [1, 1]), np.inf)
  6201. assert_equal(
  6202. stats.energy_distance([1, 2, np.inf], [-np.inf, 1]),
  6203. np.inf)
  6204. assert_equal(
  6205. stats.energy_distance([1, -np.inf, np.inf], [1, 1]),
  6206. np.inf)
  6207. with suppress_warnings() as sup:
  6208. sup.record(RuntimeWarning, "invalid value*")
  6209. assert_equal(
  6210. stats.energy_distance([1, 2, np.inf], [np.inf, 1]),
  6211. np.nan)
  6212. class TestBrunnerMunzel:
  6213. # Data from (Lumley, 1996)
  6214. X = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1]
  6215. Y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4]
  6216. significant = 13
  6217. def test_brunnermunzel_one_sided(self):
  6218. # Results are compared with R's lawstat package.
  6219. u1, p1 = stats.brunnermunzel(self.X, self.Y, alternative='less')
  6220. u2, p2 = stats.brunnermunzel(self.Y, self.X, alternative='greater')
  6221. u3, p3 = stats.brunnermunzel(self.X, self.Y, alternative='greater')
  6222. u4, p4 = stats.brunnermunzel(self.Y, self.X, alternative='less')
  6223. assert_approx_equal(p1, p2, significant=self.significant)
  6224. assert_approx_equal(p3, p4, significant=self.significant)
  6225. assert_(p1 != p3)
  6226. assert_approx_equal(u1, 3.1374674823029505,
  6227. significant=self.significant)
  6228. assert_approx_equal(u2, -3.1374674823029505,
  6229. significant=self.significant)
  6230. assert_approx_equal(u3, 3.1374674823029505,
  6231. significant=self.significant)
  6232. assert_approx_equal(u4, -3.1374674823029505,
  6233. significant=self.significant)
  6234. assert_approx_equal(p1, 0.0028931043330757342,
  6235. significant=self.significant)
  6236. assert_approx_equal(p3, 0.99710689566692423,
  6237. significant=self.significant)
  6238. def test_brunnermunzel_two_sided(self):
  6239. # Results are compared with R's lawstat package.
  6240. u1, p1 = stats.brunnermunzel(self.X, self.Y, alternative='two-sided')
  6241. u2, p2 = stats.brunnermunzel(self.Y, self.X, alternative='two-sided')
  6242. assert_approx_equal(p1, p2, significant=self.significant)
  6243. assert_approx_equal(u1, 3.1374674823029505,
  6244. significant=self.significant)
  6245. assert_approx_equal(u2, -3.1374674823029505,
  6246. significant=self.significant)
  6247. assert_approx_equal(p1, 0.0057862086661515377,
  6248. significant=self.significant)
  6249. def test_brunnermunzel_default(self):
  6250. # The default value for alternative is two-sided
  6251. u1, p1 = stats.brunnermunzel(self.X, self.Y)
  6252. u2, p2 = stats.brunnermunzel(self.Y, self.X)
  6253. assert_approx_equal(p1, p2, significant=self.significant)
  6254. assert_approx_equal(u1, 3.1374674823029505,
  6255. significant=self.significant)
  6256. assert_approx_equal(u2, -3.1374674823029505,
  6257. significant=self.significant)
  6258. assert_approx_equal(p1, 0.0057862086661515377,
  6259. significant=self.significant)
  6260. def test_brunnermunzel_alternative_error(self):
  6261. alternative = "error"
  6262. distribution = "t"
  6263. nan_policy = "propagate"
  6264. assert_(alternative not in ["two-sided", "greater", "less"])
  6265. assert_raises(ValueError,
  6266. stats.brunnermunzel,
  6267. self.X,
  6268. self.Y,
  6269. alternative,
  6270. distribution,
  6271. nan_policy)
  6272. def test_brunnermunzel_distribution_norm(self):
  6273. u1, p1 = stats.brunnermunzel(self.X, self.Y, distribution="normal")
  6274. u2, p2 = stats.brunnermunzel(self.Y, self.X, distribution="normal")
  6275. assert_approx_equal(p1, p2, significant=self.significant)
  6276. assert_approx_equal(u1, 3.1374674823029505,
  6277. significant=self.significant)
  6278. assert_approx_equal(u2, -3.1374674823029505,
  6279. significant=self.significant)
  6280. assert_approx_equal(p1, 0.0017041417600383024,
  6281. significant=self.significant)
  6282. def test_brunnermunzel_distribution_error(self):
  6283. alternative = "two-sided"
  6284. distribution = "error"
  6285. nan_policy = "propagate"
  6286. assert_(alternative not in ["t", "normal"])
  6287. assert_raises(ValueError,
  6288. stats.brunnermunzel,
  6289. self.X,
  6290. self.Y,
  6291. alternative,
  6292. distribution,
  6293. nan_policy)
  6294. def test_brunnermunzel_empty_imput(self):
  6295. u1, p1 = stats.brunnermunzel(self.X, [])
  6296. u2, p2 = stats.brunnermunzel([], self.Y)
  6297. u3, p3 = stats.brunnermunzel([], [])
  6298. assert_equal(u1, np.nan)
  6299. assert_equal(p1, np.nan)
  6300. assert_equal(u2, np.nan)
  6301. assert_equal(p2, np.nan)
  6302. assert_equal(u3, np.nan)
  6303. assert_equal(p3, np.nan)
  6304. def test_brunnermunzel_nan_input_propagate(self):
  6305. X = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1, np.nan]
  6306. Y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4]
  6307. u1, p1 = stats.brunnermunzel(X, Y, nan_policy="propagate")
  6308. u2, p2 = stats.brunnermunzel(Y, X, nan_policy="propagate")
  6309. assert_equal(u1, np.nan)
  6310. assert_equal(p1, np.nan)
  6311. assert_equal(u2, np.nan)
  6312. assert_equal(p2, np.nan)
  6313. def test_brunnermunzel_nan_input_raise(self):
  6314. X = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1, np.nan]
  6315. Y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4]
  6316. alternative = "two-sided"
  6317. distribution = "t"
  6318. nan_policy = "raise"
  6319. assert_raises(ValueError,
  6320. stats.brunnermunzel,
  6321. X,
  6322. Y,
  6323. alternative,
  6324. distribution,
  6325. nan_policy)
  6326. assert_raises(ValueError,
  6327. stats.brunnermunzel,
  6328. Y,
  6329. X,
  6330. alternative,
  6331. distribution,
  6332. nan_policy)
  6333. def test_brunnermunzel_nan_input_omit(self):
  6334. X = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1, np.nan]
  6335. Y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4]
  6336. u1, p1 = stats.brunnermunzel(X, Y, nan_policy="omit")
  6337. u2, p2 = stats.brunnermunzel(Y, X, nan_policy="omit")
  6338. assert_approx_equal(p1, p2, significant=self.significant)
  6339. assert_approx_equal(u1, 3.1374674823029505,
  6340. significant=self.significant)
  6341. assert_approx_equal(u2, -3.1374674823029505,
  6342. significant=self.significant)
  6343. assert_approx_equal(p1, 0.0057862086661515377,
  6344. significant=self.significant)
  6345. def test_brunnermunzel_return_nan(self):
  6346. """ tests that a warning is emitted when p is nan
  6347. p-value with t-distributions can be nan (0/0) (see gh-15843)
  6348. """
  6349. x = [1, 2, 3]
  6350. y = [5, 6, 7, 8, 9]
  6351. with pytest.warns(RuntimeWarning, match='p-value cannot be estimated'):
  6352. stats.brunnermunzel(x, y, distribution="t")
  6353. def test_brunnermunzel_normal_dist(self):
  6354. """ tests that a p is 0 for datasets that cause p->nan
  6355. when t-distribution is used (see gh-15843)
  6356. """
  6357. x = [1, 2, 3]
  6358. y = [5, 6, 7, 8, 9]
  6359. with pytest.warns(RuntimeWarning, match='divide by zero'):
  6360. _, p = stats.brunnermunzel(x, y, distribution="normal")
  6361. assert_equal(p, 0)
  6362. class TestRatioUniforms:
  6363. """ Tests for rvs_ratio_uniforms.
  6364. """
  6365. def test_rv_generation(self):
  6366. # use KS test to check distribution of rvs
  6367. # normal distribution
  6368. f = stats.norm.pdf
  6369. v_bound = np.sqrt(f(np.sqrt(2))) * np.sqrt(2)
  6370. umax, vmin, vmax = np.sqrt(f(0)), -v_bound, v_bound
  6371. rvs = stats.rvs_ratio_uniforms(f, umax, vmin, vmax, size=2500,
  6372. random_state=12345)
  6373. assert_equal(stats.kstest(rvs, 'norm')[1] > 0.25, True)
  6374. # exponential distribution
  6375. rvs = stats.rvs_ratio_uniforms(lambda x: np.exp(-x), umax=1,
  6376. vmin=0, vmax=2*np.exp(-1),
  6377. size=1000, random_state=12345)
  6378. assert_equal(stats.kstest(rvs, 'expon')[1] > 0.25, True)
  6379. def test_shape(self):
  6380. # test shape of return value depending on size parameter
  6381. f = stats.norm.pdf
  6382. v_bound = np.sqrt(f(np.sqrt(2))) * np.sqrt(2)
  6383. umax, vmin, vmax = np.sqrt(f(0)), -v_bound, v_bound
  6384. r1 = stats.rvs_ratio_uniforms(f, umax, vmin, vmax, size=3,
  6385. random_state=1234)
  6386. r2 = stats.rvs_ratio_uniforms(f, umax, vmin, vmax, size=(3,),
  6387. random_state=1234)
  6388. r3 = stats.rvs_ratio_uniforms(f, umax, vmin, vmax, size=(3, 1),
  6389. random_state=1234)
  6390. assert_equal(r1, r2)
  6391. assert_equal(r2, r3.flatten())
  6392. assert_equal(r1.shape, (3,))
  6393. assert_equal(r3.shape, (3, 1))
  6394. r4 = stats.rvs_ratio_uniforms(f, umax, vmin, vmax, size=(3, 3, 3),
  6395. random_state=12)
  6396. r5 = stats.rvs_ratio_uniforms(f, umax, vmin, vmax, size=27,
  6397. random_state=12)
  6398. assert_equal(r4.flatten(), r5)
  6399. assert_equal(r4.shape, (3, 3, 3))
  6400. r6 = stats.rvs_ratio_uniforms(f, umax, vmin, vmax, random_state=1234)
  6401. r7 = stats.rvs_ratio_uniforms(f, umax, vmin, vmax, size=1,
  6402. random_state=1234)
  6403. r8 = stats.rvs_ratio_uniforms(f, umax, vmin, vmax, size=(1, ),
  6404. random_state=1234)
  6405. assert_equal(r6, r7)
  6406. assert_equal(r7, r8)
  6407. def test_random_state(self):
  6408. f = stats.norm.pdf
  6409. v_bound = np.sqrt(f(np.sqrt(2))) * np.sqrt(2)
  6410. umax, vmin, vmax = np.sqrt(f(0)), -v_bound, v_bound
  6411. np.random.seed(1234)
  6412. r1 = stats.rvs_ratio_uniforms(f, umax, vmin, vmax, size=(3, 4))
  6413. r2 = stats.rvs_ratio_uniforms(f, umax, vmin, vmax, size=(3, 4),
  6414. random_state=1234)
  6415. assert_equal(r1, r2)
  6416. def test_exceptions(self):
  6417. f = stats.norm.pdf
  6418. # need vmin < vmax
  6419. assert_raises(ValueError,
  6420. stats.rvs_ratio_uniforms, pdf=f, umax=1, vmin=3, vmax=1)
  6421. assert_raises(ValueError,
  6422. stats.rvs_ratio_uniforms, pdf=f, umax=1, vmin=1, vmax=1)
  6423. # need umax > 0
  6424. assert_raises(ValueError,
  6425. stats.rvs_ratio_uniforms, pdf=f, umax=-1, vmin=1, vmax=1)
  6426. assert_raises(ValueError,
  6427. stats.rvs_ratio_uniforms, pdf=f, umax=0, vmin=1, vmax=1)
  6428. class TestMGCErrorWarnings:
  6429. """ Tests errors and warnings derived from MGC.
  6430. """
  6431. def test_error_notndarray(self):
  6432. # raises error if x or y is not a ndarray
  6433. x = np.arange(20)
  6434. y = [5] * 20
  6435. assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
  6436. assert_raises(ValueError, stats.multiscale_graphcorr, y, x)
  6437. def test_error_shape(self):
  6438. # raises error if number of samples different (n)
  6439. x = np.arange(100).reshape(25, 4)
  6440. y = x.reshape(10, 10)
  6441. assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
  6442. def test_error_lowsamples(self):
  6443. # raises error if samples are low (< 3)
  6444. x = np.arange(3)
  6445. y = np.arange(3)
  6446. assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
  6447. def test_error_nans(self):
  6448. # raises error if inputs contain NaNs
  6449. x = np.arange(20, dtype=float)
  6450. x[0] = np.nan
  6451. assert_raises(ValueError, stats.multiscale_graphcorr, x, x)
  6452. y = np.arange(20)
  6453. assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
  6454. def test_error_wrongdisttype(self):
  6455. # raises error if metric is not a function
  6456. x = np.arange(20)
  6457. compute_distance = 0
  6458. assert_raises(ValueError, stats.multiscale_graphcorr, x, x,
  6459. compute_distance=compute_distance)
  6460. @pytest.mark.parametrize("reps", [
  6461. -1, # reps is negative
  6462. '1', # reps is not integer
  6463. ])
  6464. def test_error_reps(self, reps):
  6465. # raises error if reps is negative
  6466. x = np.arange(20)
  6467. assert_raises(ValueError, stats.multiscale_graphcorr, x, x, reps=reps)
  6468. def test_warns_reps(self):
  6469. # raises warning when reps is less than 1000
  6470. x = np.arange(20)
  6471. reps = 100
  6472. assert_warns(RuntimeWarning, stats.multiscale_graphcorr, x, x, reps=reps)
  6473. def test_error_infty(self):
  6474. # raises error if input contains infinities
  6475. x = np.arange(20)
  6476. y = np.ones(20) * np.inf
  6477. assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
  6478. class TestMGCStat:
  6479. """ Test validity of MGC test statistic
  6480. """
  6481. def _simulations(self, samps=100, dims=1, sim_type=""):
  6482. # linear simulation
  6483. if sim_type == "linear":
  6484. x = np.random.uniform(-1, 1, size=(samps, 1))
  6485. y = x + 0.3 * np.random.random_sample(size=(x.size, 1))
  6486. # spiral simulation
  6487. elif sim_type == "nonlinear":
  6488. unif = np.array(np.random.uniform(0, 5, size=(samps, 1)))
  6489. x = unif * np.cos(np.pi * unif)
  6490. y = unif * np.sin(np.pi * unif) + (0.4
  6491. * np.random.random_sample(size=(x.size, 1)))
  6492. # independence (tests type I simulation)
  6493. elif sim_type == "independence":
  6494. u = np.random.normal(0, 1, size=(samps, 1))
  6495. v = np.random.normal(0, 1, size=(samps, 1))
  6496. u_2 = np.random.binomial(1, p=0.5, size=(samps, 1))
  6497. v_2 = np.random.binomial(1, p=0.5, size=(samps, 1))
  6498. x = u/3 + 2*u_2 - 1
  6499. y = v/3 + 2*v_2 - 1
  6500. # raises error if not approved sim_type
  6501. else:
  6502. raise ValueError("sim_type must be linear, nonlinear, or "
  6503. "independence")
  6504. # add dimensions of noise for higher dimensions
  6505. if dims > 1:
  6506. dims_noise = np.random.normal(0, 1, size=(samps, dims-1))
  6507. x = np.concatenate((x, dims_noise), axis=1)
  6508. return x, y
  6509. @pytest.mark.slow
  6510. @pytest.mark.parametrize("sim_type, obs_stat, obs_pvalue", [
  6511. ("linear", 0.97, 1/1000), # test linear simulation
  6512. ("nonlinear", 0.163, 1/1000), # test spiral simulation
  6513. ("independence", -0.0094, 0.78) # test independence simulation
  6514. ])
  6515. def test_oned(self, sim_type, obs_stat, obs_pvalue):
  6516. np.random.seed(12345678)
  6517. # generate x and y
  6518. x, y = self._simulations(samps=100, dims=1, sim_type=sim_type)
  6519. # test stat and pvalue
  6520. stat, pvalue, _ = stats.multiscale_graphcorr(x, y)
  6521. assert_approx_equal(stat, obs_stat, significant=1)
  6522. assert_approx_equal(pvalue, obs_pvalue, significant=1)
  6523. @pytest.mark.slow
  6524. @pytest.mark.parametrize("sim_type, obs_stat, obs_pvalue", [
  6525. ("linear", 0.184, 1/1000), # test linear simulation
  6526. ("nonlinear", 0.0190, 0.117), # test spiral simulation
  6527. ])
  6528. def test_fived(self, sim_type, obs_stat, obs_pvalue):
  6529. np.random.seed(12345678)
  6530. # generate x and y
  6531. x, y = self._simulations(samps=100, dims=5, sim_type=sim_type)
  6532. # test stat and pvalue
  6533. stat, pvalue, _ = stats.multiscale_graphcorr(x, y)
  6534. assert_approx_equal(stat, obs_stat, significant=1)
  6535. assert_approx_equal(pvalue, obs_pvalue, significant=1)
  6536. @pytest.mark.xslow
  6537. def test_twosamp(self):
  6538. np.random.seed(12345678)
  6539. # generate x and y
  6540. x = np.random.binomial(100, 0.5, size=(100, 5))
  6541. y = np.random.normal(0, 1, size=(80, 5))
  6542. # test stat and pvalue
  6543. stat, pvalue, _ = stats.multiscale_graphcorr(x, y)
  6544. assert_approx_equal(stat, 1.0, significant=1)
  6545. assert_approx_equal(pvalue, 0.001, significant=1)
  6546. # generate x and y
  6547. y = np.random.normal(0, 1, size=(100, 5))
  6548. # test stat and pvalue
  6549. stat, pvalue, _ = stats.multiscale_graphcorr(x, y, is_twosamp=True)
  6550. assert_approx_equal(stat, 1.0, significant=1)
  6551. assert_approx_equal(pvalue, 0.001, significant=1)
  6552. @pytest.mark.slow
  6553. def test_workers(self):
  6554. np.random.seed(12345678)
  6555. # generate x and y
  6556. x, y = self._simulations(samps=100, dims=1, sim_type="linear")
  6557. # test stat and pvalue
  6558. stat, pvalue, _ = stats.multiscale_graphcorr(x, y, workers=2)
  6559. assert_approx_equal(stat, 0.97, significant=1)
  6560. assert_approx_equal(pvalue, 0.001, significant=1)
  6561. @pytest.mark.slow
  6562. def test_random_state(self):
  6563. # generate x and y
  6564. x, y = self._simulations(samps=100, dims=1, sim_type="linear")
  6565. # test stat and pvalue
  6566. stat, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=1)
  6567. assert_approx_equal(stat, 0.97, significant=1)
  6568. assert_approx_equal(pvalue, 0.001, significant=1)
  6569. @pytest.mark.slow
  6570. def test_dist_perm(self):
  6571. np.random.seed(12345678)
  6572. # generate x and y
  6573. x, y = self._simulations(samps=100, dims=1, sim_type="nonlinear")
  6574. distx = cdist(x, x, metric="euclidean")
  6575. disty = cdist(y, y, metric="euclidean")
  6576. stat_dist, pvalue_dist, _ = stats.multiscale_graphcorr(distx, disty,
  6577. compute_distance=None,
  6578. random_state=1)
  6579. assert_approx_equal(stat_dist, 0.163, significant=1)
  6580. assert_approx_equal(pvalue_dist, 0.001, significant=1)
  6581. @pytest.mark.slow
  6582. def test_pvalue_literature(self):
  6583. np.random.seed(12345678)
  6584. # generate x and y
  6585. x, y = self._simulations(samps=100, dims=1, sim_type="linear")
  6586. # test stat and pvalue
  6587. _, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=1)
  6588. assert_allclose(pvalue, 1/1001)
  6589. @pytest.mark.slow
  6590. def test_alias(self):
  6591. np.random.seed(12345678)
  6592. # generate x and y
  6593. x, y = self._simulations(samps=100, dims=1, sim_type="linear")
  6594. res = stats.multiscale_graphcorr(x, y, random_state=1)
  6595. assert_equal(res.stat, res.statistic)
  6596. class TestPageTrendTest:
  6597. # expected statistic and p-values generated using R at
  6598. # https://rdrr.io/cran/cultevo/, e.g.
  6599. # library(cultevo)
  6600. # data = rbind(c(72, 47, 73, 35, 47, 96, 30, 59, 41, 36, 56, 49, 81, 43,
  6601. # 70, 47, 28, 28, 62, 20, 61, 20, 80, 24, 50),
  6602. # c(68, 52, 60, 34, 44, 20, 65, 88, 21, 81, 48, 31, 31, 67,
  6603. # 69, 94, 30, 24, 40, 87, 70, 43, 50, 96, 43),
  6604. # c(81, 13, 85, 35, 79, 12, 92, 86, 21, 64, 16, 64, 68, 17,
  6605. # 16, 89, 71, 43, 43, 36, 54, 13, 66, 51, 55))
  6606. # result = page.test(data, verbose=FALSE)
  6607. # Most test cases generated to achieve common critical p-values so that
  6608. # results could be checked (to limited precision) against tables in
  6609. # scipy.stats.page_trend_test reference [1]
  6610. np.random.seed(0)
  6611. data_3_25 = np.random.rand(3, 25)
  6612. data_10_26 = np.random.rand(10, 26)
  6613. ts = [
  6614. (12805, 0.3886487053947608, False, 'asymptotic', data_3_25),
  6615. (49140, 0.02888978556179862, False, 'asymptotic', data_10_26),
  6616. (12332, 0.7722477197436702, False, 'asymptotic',
  6617. [[72, 47, 73, 35, 47, 96, 30, 59, 41, 36, 56, 49, 81,
  6618. 43, 70, 47, 28, 28, 62, 20, 61, 20, 80, 24, 50],
  6619. [68, 52, 60, 34, 44, 20, 65, 88, 21, 81, 48, 31, 31,
  6620. 67, 69, 94, 30, 24, 40, 87, 70, 43, 50, 96, 43],
  6621. [81, 13, 85, 35, 79, 12, 92, 86, 21, 64, 16, 64, 68,
  6622. 17, 16, 89, 71, 43, 43, 36, 54, 13, 66, 51, 55]]),
  6623. (266, 4.121656378600823e-05, False, 'exact',
  6624. [[1.5, 4., 8.3, 5, 19, 11],
  6625. [5, 4, 3.5, 10, 20, 21],
  6626. [8.4, 3.2, 10, 12, 14, 15]]),
  6627. (332, 0.9566400920502488, True, 'exact',
  6628. [[4, 3, 2, 1], [4, 3, 2, 1], [4, 3, 2, 1], [4, 3, 2, 1],
  6629. [4, 3, 2, 1], [4, 3, 2, 1], [4, 3, 2, 1], [4, 3, 2, 1],
  6630. [3, 4, 1, 2], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4],
  6631. [1, 2, 3, 4], [1, 2, 3, 4]]),
  6632. (241, 0.9622210164861476, True, 'exact',
  6633. [[3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1],
  6634. [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1],
  6635. [3, 2, 1], [2, 1, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3],
  6636. [1, 2, 3], [1, 2, 3], [1, 2, 3]]),
  6637. (197, 0.9619432897162209, True, 'exact',
  6638. [[6, 5, 4, 3, 2, 1], [6, 5, 4, 3, 2, 1], [1, 3, 4, 5, 2, 6]]),
  6639. (423, 0.9590458306880073, True, 'exact',
  6640. [[5, 4, 3, 2, 1], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1],
  6641. [5, 4, 3, 2, 1], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1],
  6642. [4, 1, 3, 2, 5], [1, 2, 3, 4, 5], [1, 2, 3, 4, 5],
  6643. [1, 2, 3, 4, 5]]),
  6644. (217, 0.9693058575034678, True, 'exact',
  6645. [[3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1],
  6646. [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1],
  6647. [2, 1, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3],
  6648. [1, 2, 3]]),
  6649. (395, 0.991530289351305, True, 'exact',
  6650. [[7, 6, 5, 4, 3, 2, 1], [7, 6, 5, 4, 3, 2, 1],
  6651. [6, 5, 7, 4, 3, 2, 1], [1, 2, 3, 4, 5, 6, 7]]),
  6652. (117, 0.9997817843373017, True, 'exact',
  6653. [[3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1],
  6654. [3, 2, 1], [3, 2, 1], [3, 2, 1], [2, 1, 3], [1, 2, 3]]),
  6655. ]
  6656. @pytest.mark.parametrize("L, p, ranked, method, data", ts)
  6657. def test_accuracy(self, L, p, ranked, method, data):
  6658. np.random.seed(42)
  6659. res = stats.page_trend_test(data, ranked=ranked, method=method)
  6660. assert_equal(L, res.statistic)
  6661. assert_allclose(p, res.pvalue)
  6662. assert_equal(method, res.method)
  6663. ts2 = [
  6664. (542, 0.9481266260876332, True, 'exact',
  6665. [[10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
  6666. [1, 8, 4, 7, 6, 5, 9, 3, 2, 10]]),
  6667. (1322, 0.9993113928199309, True, 'exact',
  6668. [[10, 9, 8, 7, 6, 5, 4, 3, 2, 1], [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
  6669. [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], [9, 2, 8, 7, 6, 5, 4, 3, 10, 1],
  6670. [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]),
  6671. (2286, 0.9908688345484833, True, 'exact',
  6672. [[8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1],
  6673. [8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1],
  6674. [8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1],
  6675. [8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1],
  6676. [8, 7, 6, 5, 4, 3, 2, 1], [1, 3, 5, 6, 4, 7, 2, 8],
  6677. [1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3, 4, 5, 6, 7, 8],
  6678. [1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3, 4, 5, 6, 7, 8],
  6679. [1, 2, 3, 4, 5, 6, 7, 8]]),
  6680. ]
  6681. # only the first of these appears slow because intermediate data are
  6682. # cached and used on the rest
  6683. @pytest.mark.parametrize("L, p, ranked, method, data", ts)
  6684. @pytest.mark.slow()
  6685. def test_accuracy2(self, L, p, ranked, method, data):
  6686. np.random.seed(42)
  6687. res = stats.page_trend_test(data, ranked=ranked, method=method)
  6688. assert_equal(L, res.statistic)
  6689. assert_allclose(p, res.pvalue)
  6690. assert_equal(method, res.method)
  6691. def test_options(self):
  6692. np.random.seed(42)
  6693. m, n = 10, 20
  6694. predicted_ranks = np.arange(1, n+1)
  6695. perm = np.random.permutation(np.arange(n))
  6696. data = np.random.rand(m, n)
  6697. ranks = stats.rankdata(data, axis=1)
  6698. res1 = stats.page_trend_test(ranks)
  6699. res2 = stats.page_trend_test(ranks, ranked=True)
  6700. res3 = stats.page_trend_test(data, ranked=False)
  6701. res4 = stats.page_trend_test(ranks, predicted_ranks=predicted_ranks)
  6702. res5 = stats.page_trend_test(ranks[:, perm],
  6703. predicted_ranks=predicted_ranks[perm])
  6704. assert_equal(res1.statistic, res2.statistic)
  6705. assert_equal(res1.statistic, res3.statistic)
  6706. assert_equal(res1.statistic, res4.statistic)
  6707. assert_equal(res1.statistic, res5.statistic)
  6708. def test_Ames_assay(self):
  6709. # test from _page_trend_test.py [2] page 151; data on page 144
  6710. np.random.seed(42)
  6711. data = [[101, 117, 111], [91, 90, 107], [103, 133, 121],
  6712. [136, 140, 144], [190, 161, 201], [146, 120, 116]]
  6713. data = np.array(data).T
  6714. predicted_ranks = np.arange(1, 7)
  6715. res = stats.page_trend_test(data, ranked=False,
  6716. predicted_ranks=predicted_ranks,
  6717. method="asymptotic")
  6718. assert_equal(res.statistic, 257)
  6719. assert_almost_equal(res.pvalue, 0.0035, decimal=4)
  6720. res = stats.page_trend_test(data, ranked=False,
  6721. predicted_ranks=predicted_ranks,
  6722. method="exact")
  6723. assert_equal(res.statistic, 257)
  6724. assert_almost_equal(res.pvalue, 0.0023, decimal=4)
  6725. def test_input_validation(self):
  6726. # test data not a 2d array
  6727. with assert_raises(ValueError, match="`data` must be a 2d array."):
  6728. stats.page_trend_test(None)
  6729. with assert_raises(ValueError, match="`data` must be a 2d array."):
  6730. stats.page_trend_test([])
  6731. with assert_raises(ValueError, match="`data` must be a 2d array."):
  6732. stats.page_trend_test([1, 2])
  6733. with assert_raises(ValueError, match="`data` must be a 2d array."):
  6734. stats.page_trend_test([[[1]]])
  6735. # test invalid dimensions
  6736. with assert_raises(ValueError, match="Page's L is only appropriate"):
  6737. stats.page_trend_test(np.random.rand(1, 3))
  6738. with assert_raises(ValueError, match="Page's L is only appropriate"):
  6739. stats.page_trend_test(np.random.rand(2, 2))
  6740. # predicted ranks must include each integer [1, 2, 3] exactly once
  6741. message = "`predicted_ranks` must include each integer"
  6742. with assert_raises(ValueError, match=message):
  6743. stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]],
  6744. predicted_ranks=[0, 1, 2])
  6745. with assert_raises(ValueError, match=message):
  6746. stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]],
  6747. predicted_ranks=[1.1, 2, 3])
  6748. with assert_raises(ValueError, match=message):
  6749. stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]],
  6750. predicted_ranks=[1, 2, 3, 3])
  6751. with assert_raises(ValueError, match=message):
  6752. stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]],
  6753. predicted_ranks="invalid")
  6754. # test improperly ranked data
  6755. with assert_raises(ValueError, match="`data` is not properly ranked"):
  6756. stats.page_trend_test([[0, 2, 3], [1, 2, 3]], True)
  6757. with assert_raises(ValueError, match="`data` is not properly ranked"):
  6758. stats.page_trend_test([[1, 2, 3], [1, 2, 4]], True)
  6759. # various
  6760. with assert_raises(ValueError, match="`data` contains NaNs"):
  6761. stats.page_trend_test([[1, 2, 3], [1, 2, np.nan]],
  6762. ranked=False)
  6763. with assert_raises(ValueError, match="`method` must be in"):
  6764. stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]],
  6765. method="ekki")
  6766. with assert_raises(TypeError, match="`ranked` must be boolean."):
  6767. stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]],
  6768. ranked="ekki")
  6769. rng = np.random.default_rng(902340982)
  6770. x = rng.random(10)
  6771. y = rng.random(10)
  6772. @pytest.mark.parametrize("fun, args",
  6773. [(stats.wilcoxon, (x,)),
  6774. (stats.ks_1samp, (x, stats.norm.cdf)), # type: ignore[attr-defined] # noqa
  6775. (stats.ks_2samp, (x, y)),
  6776. (stats.kstest, (x, y)),
  6777. ])
  6778. def test_rename_mode_method(fun, args):
  6779. res = fun(*args, method='exact')
  6780. res2 = fun(*args, mode='exact')
  6781. assert_equal(res, res2)
  6782. err = rf"{fun.__name__}() got multiple values for argument"
  6783. with pytest.raises(TypeError, match=re.escape(err)):
  6784. fun(*args, method='exact', mode='exact')
  6785. class TestExpectile:
  6786. def test_same_as_mean(self):
  6787. rng = np.random.default_rng(42)
  6788. x = rng.random(size=20)
  6789. assert_allclose(stats.expectile(x, alpha=0.5), np.mean(x))
  6790. def test_minimum(self):
  6791. rng = np.random.default_rng(42)
  6792. x = rng.random(size=20)
  6793. assert_allclose(stats.expectile(x, alpha=0), np.amin(x))
  6794. def test_maximum(self):
  6795. rng = np.random.default_rng(42)
  6796. x = rng.random(size=20)
  6797. assert_allclose(stats.expectile(x, alpha=1), np.amax(x))
  6798. def test_weights(self):
  6799. # expectile should minimize `fun` defined below; see
  6800. # F. Sobotka and T. Kneib, "Geoadditive expectile regression",
  6801. # Computational Statistics and Data Analysis 56 (2012) 755-767
  6802. # :doi:`10.1016/j.csda.2010.11.015`
  6803. rng = np.random.default_rng(1856392524598679138)
  6804. def fun(u, a, alpha, weights):
  6805. w = np.full_like(a, fill_value=alpha)
  6806. w[a <= u] = 1 - alpha
  6807. return np.sum(w * weights * (a - u)**2)
  6808. def expectile2(a, alpha, weights):
  6809. bracket = np.min(a), np.max(a)
  6810. return optimize.minimize_scalar(fun, bracket=bracket,
  6811. args=(a, alpha, weights)).x
  6812. n = 10
  6813. a = rng.random(n)
  6814. alpha = rng.random()
  6815. weights = rng.random(n)
  6816. res = stats.expectile(a, alpha, weights=weights)
  6817. ref = expectile2(a, alpha, weights)
  6818. assert_allclose(res, ref)
  6819. @pytest.mark.parametrize(
  6820. "alpha", [0.2, 0.5 - 1e-12, 0.5, 0.5 + 1e-12, 0.8]
  6821. )
  6822. @pytest.mark.parametrize("n", [20, 2000])
  6823. def test_expectile_properties(self, alpha, n):
  6824. """
  6825. See Section 6 of
  6826. I. Steinwart, C. Pasin, R.C. Williamson & S. Zhang (2014).
  6827. "Elicitation and Identification of Properties". COLT.
  6828. http://proceedings.mlr.press/v35/steinwart14.html
  6829. and
  6830. Propositions 5, 6, 7 of
  6831. F. Bellini, B. Klar, and A. Müller and E. Rosazza Gianin (2013).
  6832. "Generalized Quantiles as Risk Measures"
  6833. http://doi.org/10.2139/ssrn.2225751
  6834. """
  6835. rng = np.random.default_rng(42)
  6836. x = rng.normal(size=n)
  6837. # 0. definite / constancy
  6838. # Let T(X) denote the expectile of rv X ~ F.
  6839. # T(c) = c for constant c
  6840. for c in [-5, 0, 0.5]:
  6841. assert_allclose(
  6842. stats.expectile(np.full(shape=n, fill_value=c), alpha=alpha),
  6843. c
  6844. )
  6845. # 1. translation equivariance
  6846. # T(X + c) = T(X) + c
  6847. c = rng.exponential()
  6848. assert_allclose(
  6849. stats.expectile(x + c, alpha=alpha),
  6850. stats.expectile(x, alpha=alpha) + c,
  6851. )
  6852. assert_allclose(
  6853. stats.expectile(x - c, alpha=alpha),
  6854. stats.expectile(x, alpha=alpha) - c,
  6855. )
  6856. # 2. positively homogeneity
  6857. # T(cX) = c * T(X) for c > 0
  6858. assert_allclose(
  6859. stats.expectile(c * x, alpha=alpha),
  6860. c * stats.expectile(x, alpha=alpha),
  6861. )
  6862. # 3. subadditivity
  6863. # Note that subadditivity holds for alpha >= 0.5.
  6864. # T(X + Y) <= T(X) + T(Y)
  6865. # For alpha = 0.5, i.e. the mean, strict equality holds.
  6866. # For alpha < 0.5, one can use property 6. to show
  6867. # T(X + Y) >= T(X) + T(Y)
  6868. y = rng.logistic(size=n, loc=10) # different distibution than x
  6869. if alpha == 0.5:
  6870. def assert_op(a, b):
  6871. assert_allclose(a, b)
  6872. elif alpha > 0.5:
  6873. def assert_op(a, b):
  6874. assert a < b
  6875. else:
  6876. def assert_op(a, b):
  6877. assert a > b
  6878. assert_op(
  6879. stats.expectile(np.r_[x + y], alpha=alpha),
  6880. stats.expectile(x, alpha=alpha)
  6881. + stats.expectile(y, alpha=alpha)
  6882. )
  6883. # 4. monotonicity
  6884. # This holds for first order stochastic dominance X:
  6885. # X >= Y whenever P(X <= x) < P(Y <= x)
  6886. # T(X) <= T(Y) whenever X <= Y
  6887. y = rng.normal(size=n, loc=5)
  6888. assert (
  6889. stats.expectile(x, alpha=alpha) <= stats.expectile(y, alpha=alpha)
  6890. )
  6891. # 5. convexity for alpha > 0.5, concavity for alpha < 0.5
  6892. # convexity is
  6893. # T((1 - c) X + c Y) <= (1 - c) T(X) + c T(Y) for 0 <= c <= 1
  6894. y = rng.logistic(size=n, loc=10)
  6895. for c in [0.1, 0.5, 0.8]:
  6896. assert_op(
  6897. stats.expectile((1-c)*x + c*y, alpha=alpha),
  6898. (1-c) * stats.expectile(x, alpha=alpha) +
  6899. c * stats.expectile(y, alpha=alpha)
  6900. )
  6901. # 6. negative argument
  6902. # T_{alpha}(-X) = -T_{1-alpha}(X)
  6903. assert_allclose(
  6904. stats.expectile(-x, alpha=alpha),
  6905. -stats.expectile(x, alpha=1-alpha),
  6906. )
  6907. @pytest.mark.parametrize("n", [20, 2000])
  6908. def test_monotonicity_in_alpha(self, n):
  6909. rng = np.random.default_rng(42)
  6910. x = rng.pareto(a=2, size=n)
  6911. e_list = []
  6912. alpha_seq = np.logspace(-15, np.log10(0.5), 100)
  6913. # sorted list of unique alpha values in interval (0, 1)
  6914. for alpha in np.r_[0, alpha_seq, 1 - alpha_seq[:-1:-1], 1]:
  6915. e_list.append(stats.expectile(x, alpha=alpha))
  6916. assert np.all(np.diff(e_list) > 0)