
    !g:              
          d Z ddlZddlmZ ddlmZ  eg d      dddddfd	Zd(d
Zd)dZ	e	Z
	 	 d*dZd+dZedk(  r ej                  d      Z e e	e              ej                  d      j#                  dd      Z e e	e              e eed              e eed              e eed              ej$                  e      Zej)                  e      Zej.                  ed<    e e	ed              ed      dfD ]  Z e e	eedf          e	eedf         k(  j5                                 e eeedf          eeedf         k(  j5                                 eej6                  j9                   ej:                  eedf                eeedf   d      k(  j5                                 dD ]  Z e e	ee       e	ee      k(  j5                                 e eee       eee      k(  j5                                 eej6                  j9                   ej:                  e      e       eeed      k(  j5                                  eej6                  j                   ej:                  e             e	edd      k(  j5                                 ej>                  eef      j@                  Z!dD ];  Z e e	e!e      d    e	ej@                  edz
        k(  j5                                = ejD                  jG                   e	 ej                  d      dd      d ej                  d      z   dz
  dz         ejD                  jG                   e	 ej                  d      dd      d ej                  d      z   dz
  dz         ejD                  jG                   e	 ej                  d            d ej                  d      z   dz
  dz          ed        e eeddg              e eedddf                 e eedddf          e	edddf         k(  j5                                g d Z$dZ%e%rddl&m'Z(  e(jR                           e(jT                  d!        e(jV                   eedddf   e$d"#      edddf   d$%        e(jV                  ej6                  j                   ejX                  edddf   e$d             ejX                  edddf   e$d      d$%        e(jZ                   eedddf   e$d"#      edddf   d&        e(jZ                  ej6                  j                   ejX                  edddf   e$d             ejX                  edddf   e$d      d&        e(jR                           e(jT                  d'        e(jV                  edddf    eedddf   e$d"#      d$%        e(jV                   ejX                  edddf   e$d      ej6                  j                   ejX                  edddf   e$d            d$%        e(jZ                  edddf    eedddf   e$d"#      d&        e(jZ                   ejX                  edddf   e$d      ej6                  j                   ejX                  edddf   e$d            d&        e(j\                          yy),aQ  get versions of mstats percentile functions that also work with non-masked arrays

uses dispatch to mstats version for difficult cases:
  - data is masked array
  - data requires nan handling (masknan=True)
  - data should be trimmed (limit is non-empty)
handle simple cases directly, which does not require apply_along_axis
changes compared to mstats: plotting_positions for n-dim with axis argument
addition: plotting_positions_w1d: with weights, 1d ndarray only

TODO:
consistency with scipy.stats versions not checked
docstrings from mstats not updated yet
code duplication, better solutions (?)
convert examples to tests
rename alphap, betap for consistency
timing question: one additional argsort versus apply_along_axis
weighted plotting_positions
- I have not figured out nd version of weighted plotting_positions
- add weighted quantiles


    N)mastats)g      ?g      ?g      ?皙? Fc                    t        | t        j                  j                        r%t        j
                  j                  | |||||      S |rJt        j
                  j                  | |||||      }t        j                  |t        j                        S |rt        j                  |       }|j                         rat        j                  | |      }t        j
                  j                  ||||||      }t        j                  |t        j                        S t        j                  |       }	t        j                  |dd      }
||
d|z
  |z
  z  z   }d}||	j                         }	d	}n:t        j                  |	j                        |   }t        j                   |	|      }	d
}t        j"                  |	d	      }|j$                  d	   }t'        |	j$                        }|
||<   |d	k(  r$t        j(                  t+        |
      t,              S |dk(  r t        j.                  ||
j$                        S ||
z  |z   }t        j0                  |j3                  d|dz
              j5                  t6              }dg|j                  z  }t9        d      |d	<   ||z
  j3                  d	d      |   }d|z
  ||dz
     z  |||   z  z   }|rt        j                   |d	|dz         S |S )aL  
    Computes empirical quantiles for a data array.

    Samples quantile are defined by :math:`Q(p) = (1-g).x[i] +g.x[i+1]`,
    where :math:`x[j]` is the *j*th order statistic, and
    `i = (floor(n*p+m))`, `m=alpha+p*(1-alpha-beta)` and `g = n*p + m - i`.

    Typical values of (alpha,beta) are:
        - (0,1)    : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
        - (.5,.5)  : *p(k) = (k+1/2.)/n* : piecewise linear
          function (R, type 5)
        - (0,0)    : *p(k) = k/(n+1)* : (R type 6)
        - (1,1)    : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
          That's R default (R type 7)
        - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
          The resulting quantile estimates are approximately median-unbiased
          regardless of the distribution of x. (R type 8)
        - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*. Blom.
          The resulting quantile estimates are approximately unbiased
          if x is normally distributed (R type 9)
        - (.4,.4)  : approximately quantile unbiased (Cunnane)
        - (.35,.35): APL, used with PWM ?? JP
        - (0.35, 0.65): PWM   ?? JP  p(k) = (k-0.35)/n

    Parameters
    ----------
    a : array_like
        Input data, as a sequence or array of dimension at most 2.
    prob : array_like, optional
        List of quantiles to compute.
    alpha : float, optional
        Plotting positions parameter, default is 0.4.
    beta : float, optional
        Plotting positions parameter, default is 0.4.
    axis : int, optional
        Axis along which to perform the trimming.
        If None (default), the input array is first flattened.
    limit : tuple
        Tuple of (lower, upper) values.
        Values of `a` outside this closed interval are ignored.

    Returns
    -------
    quants : MaskedArray
        An array containing the calculated quantiles.

    Examples
    --------
    >>> from scipy.stats.mstats import mquantiles
    >>> a = np.array([6., 47., 49., 15., 42., 41., 7., 39., 43., 40., 36.])
    >>> mquantiles(a)
    array([ 19.2,  40. ,  42.8])

    Using a 2D array, specifying axis and limit.

    >>> data = np.array([[   6.,    7.,    1.],
                         [  47.,   15.,    2.],
                         [  49.,   36.,    3.],
                         [  15.,   39.,    4.],
                         [  42.,   40., -999.],
                         [  41.,   41., -999.],
                         [   7., -999., -999.],
                         [  39., -999., -999.],
                         [  43., -999., -999.],
                         [  40., -999., -999.],
                         [  36., -999., -999.]])
    >>> mquantiles(data, axis=0, limit=(0, 50))
    array([[ 19.2 ,  14.6 ,   1.45],
           [ 40.  ,  37.5 ,   2.5 ],
           [ 42.8 ,  40.05,   3.55]])

    >>> data[:, 2] = -999.
    >>> mquantiles(data, axis=0, limit=(0, 50))
    masked_array(data =
     [[19.2 14.6 --]
     [40.0 37.5 --]
     [42.8 40.05 --]],
                 mask =
     [[False False  True]
      [False False  True]
      [False False  True]],
           fill_value = 1e+20)
    )probalphapbetapaxislimit
fill_valuemaskF   copyndmin      ?Nr   Tr   dtype)
isinstancenpr   MaskedArrayr   mstats
mquantilesfillednanisnananyarrayasarrayravelarangendimrollaxissortshapelistemptylenfloatresizefloorclipastypeintslice)ar	   r
   r   r   r   masknanmarrnanmaskdatapmisrolledxnreturnshapealephkindgammaqs                        i/var/www/dash_apps/app1/venv/lib/python3.12/site-packages/statsmodels/sandbox/stats/stats_mstats_short.py	quantilesrF   &   s   l !RUU&&'||&&qtF&W[ '  	||&&qtF&W[ ' yy"&&11((1+;;=88AG,D<<**4d6QW#'u + 6D99Tbff55 ::a=D
E+ABvIeO$$AHzz|yy#D){{4&
1A	
Atzz"KK 	AvxxAe,,	
ayyAGG$$qS1WE
Aqs#$++C0A&-C4[CF1WNN1Q$E	E1QqS6E!A$J&A{{1aa((    c           	          t        j                  |t              }|dk  j                         s|dkD  j                         rt	        d|z        t        | |dz  g|||||      j                         S )zCalculate the score at the given 'per' percentile of the
    sequence a.  For example, the score at per=50 is the median.

    This function is a shortcut to mquantile
    r   g      Y@z7The percentile should be between 0. and 100. ! (got %s))r	   r
   r   r   r   r6   )r   r$   r.   r"   
ValueErrorrF   squeeze)r9   perr   r
   r   r   r6   s          rE   scoreatpercentilerL      st     **S%
 Ca}}3:**, %'*+ , 	,TT
6!g??FwyIrG   c                    t        | t        j                  j                        re|| j                  dk(  r"t
        j                  j                  | ||      S t        j                  t
        j                  j                  || ||      S |rt        j                  |       }|j                         rt        j                  | |      }|| j                  dk(  r#t
        j                  j                  |||      }n2t        j                  t
        j                  j                  ||||      }t        j                  |t        j                        S t        j                  |       } | j                  dk(  rt        j                   |       } d}|| j#                         } d}| j$                  |   }| j                  dk(  r`t        j&                  | j$                  t(              }t        j*                  d|dz         |z
  |dz   |z
  |z
  z  || j-                         <   |S | j-                  |      j-                  |      dz   |z
  |dz   |z
  |z
  z  }|S )aZ  Returns the plotting positions (or empirical percentile points) for the
    data.
    Plotting positions are defined as (i-alpha)/(n+1-alpha-beta), where:
        - i is the rank order statistics (starting at 1)
        - n is the number of unmasked values along the given axis
        - alpha and beta are two parameters.

    Typical values for alpha and beta are:
        - (0,1)    : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
        - (.5,.5)  : *p(k) = (k-1/2.)/n* : piecewise linear function (R, type 5)
          (Bliss 1967: "Rankit")
        - (0,0)    : *p(k) = k/(n+1)* : Weibull (R type 6), (Van der Waerden 1952)
        - (1,1)    : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
          That's R default (R type 7)
        - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
          The resulting quantile estimates are approximately median-unbiased
          regardless of the distribution of x. (R type 8), (Tukey 1962)
        - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*.
          The resulting quantile estimates are approximately unbiased
          if x is normally distributed (R type 9) (Blom 1958)
        - (.4,.4)  : approximately quantile unbiased (Cunnane)
        - (.35,.35): APL, used with PWM

    Parameters
    ----------
    x : sequence
        Input data, as a sequence or array of dimension at most 2.
    prob : sequence
        List of quantiles to compute.
    alpha : {0.4, float} optional
        Plotting positions parameter.
    beta : {0.4, float} optional
        Plotting positions parameter.

    Notes
    -----
    I think the adjustments assume that there are no ties in order to be a reasonable
    approximation to a continuous density function. TODO: check this

    References
    ----------
    unknown,
    dates to original papers from Beasley, Erickson, Allison 2009 Behav Genet
    r   alphabetar   r   r   r   r   )r   r   r   r   r'   r   r   plotting_positionsapply_along_axisr!   r"   r#   r   r    r$   size
atleast_1dr%   r*   r,   r.   r&   argsort)	r9   rO   rP   r   r6   r8   r7   r>   plposs	            rE   rQ   rQ      s   Z $))*<499><<224u42PP&&u||'F'FdZ_fjkk((4.;;=88Dw/D|tyyA~||66t5t6T**5<<+J+JDRV^cjno99Tbff55::dDyyA~}}T"|zz|

4AyyA~51!#1QqS!1E!9AbDJtO Ldlln L d#++D1B6>2eDQLrG   c                 F   t        j                  |       }|j                  dkD  rt        d      | t        j                  |j
                        }nAt        j                  |t        dd      }|j
                  |j
                  k7  rt        d      t        |      }|j                         }||   j                         }t        j                  |j
                        }	|dk(  r"d|z  |d   z  |z  |z
  |dz   |z
  |z
  z  |	|<   |	S d|z  |z
  |d   dz   |z
  |z
  z  |	|<   |	S )	a  Weighted plotting positions (or empirical percentile points) for the data.

    observations are weighted and the plotting positions are defined as
    (ws-alpha)/(n-alpha-beta), where:
        - ws is the weighted rank order statistics or cumulative weighted sum,
          normalized to n if method is "normed"
        - n is the number of values along the given axis if method is "normed"
          and total weight otherwise
        - alpha and beta are two parameters.

    wtd.quantile in R package Hmisc seems to use the "notnormed" version.
    notnormed coincides with unweighted segment in example, drop "normed" version ?


    See Also
    --------
    plotting_positions : unweighted version that works also with more than one
        dimension and has other options
    r   z!currently implemented only for 1dFr   z9if weights is given, it needs to be the sameshape as datanormedr   )r   rT   r'   rI   onesr*   r#   r.   r-   rU   cumsumr,   )
r9   weightsrO   rP   methodr=   r>   xargsortwsress
             rE   plotting_positions_w1dra     s   , 	dAvvz<==''!''"((7EQ?==AGG# - . .AAyy{H			!	!	#B
((177
CBr"va-"U
4@H J Bur"vbyt';<HJrG   c                 h    ddl m} t        | ||dd      }|j                  j	                  |      }|S )z.rank based normal inverse transformed cdf
    r   r   F)rO   rP   r   r6   )scipyr   rQ   normppf)r=   rO   rP   r   r   ranksranks_transfs          rE   edf_normal_inverse_transformedrh   7  s1     qE5QE::>>%(LrG   __main__   
   rY      r   r   )r   r   )r6   )r   r   NrY   )r   r6   )r   rl   gffffff?g?rN   gffffff$@ Z   )r   r   rl   r   r   z"ppf, cdf values on horizontal axis0)r\   r]   post)wherez-oz cdf, cdf values on vertical axis)r   r   r   r   N)r   r   r   F)Nr   r   	notnormed)      ?rs   r   )/__doc__numpyr   r   rc   r   r+   rF   rL   rQ   meppfra   rh   __name__r&   r=   printreshaper#   xmr2   r.   x2r    r4   sl1allr   r   fix_invalidaxdstackTx3testingassert_equalw1plotexamplematplotlib.pyplotpyplotpltfiguretitlesteprepeatplotshowr   rG   rE   <module>r      s  .    <(2DHTIKZ 	?B"-(T z		!A	
Q
 		"b#A	
Q
 	)AA
	)AD
!"	)AA
	!B	
%BffBsG	
Ra
() dQ m!"SU),0B1SU80LLQQSTyCE#y3q5'::??ABu||&&~r~~bQi'@AYrRUVWRWybcEddiiklm  q!"2.2DQR2PPUUWXy"%12)>>CCEFu||&&~r~~b'9&CyQSZ\fgGhhmmopq 
5<<**>2>>"+=>BTUW^blmBnn
s
s
uv 
Aa5			B `!"2.q15GRTUVRV5WW\\^_` JJ.yryy}DvVYZ[d[][d[deg[hYhimYmoqXqrJJ.yryy}CcRUVW`WYW`W`acWdUdehUhkqTrsJJ.yryy}=)"))B-PS@SV\?]^	"I	
A2w
'(	
 1Q3
()	!!AaC&)-?!A#-GG
L
L
NO 
BK'

		67'!A#3G1Q3W]^001QqS6"!1LMibiiXYZ[\]Z]X^_aghNipvw'!A#3G1Q3QUV001QqS6"!1LMibiiXYZ[\]Z]X^_aghNikop

		451Q3/!A#3OV\]1QqS6"!,ell.M.MibiiXYZ[\]Z]X^_aghNi.jqwx1Q3/!A#3OQUV1QqS6"!,ell.M.MibiiXYZ[\]Z]X^_aghNi.jlpqCHHJ{ rG   