U
    cv                     @   s&  d dddddddddddd	d	d	d	d
d
ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddgZ ddlmZ ddlmZ ee ZedZdd eD Z	e
ee	 eddddg ed ed ed  ee Zed!ks$tee Zee Zed"ksBtedksPtee Zed Zed Zed# Zedks~tedkstedkstdd$lmZ ee ed%d&d'Zee  d(ee   k rd)k sn tee ed%d*d+Zee ed%d,d-Zee ed.d/d0Z e dddddgdks<te ddddgd1ksVte e dkshtee eed2d3d4Z!e!e d5dkste!e d6dkste!e d7dkste!e d8dkstee ee d9d:d;Z"e#e"e ddhks tee ed%d<d=Z$e$e d>ks&tdd?l%m&Z& ee ee d%d@dAZ'ee ed%dBdCZ(dDe(e   k rzdEk sn tddl)Z)ee ed%dFdGZ*dHe*e   k rdIk sn tee ed%dJdKZ+e+e dkstddLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudfdvdwdxdydzd{d|d}d~ddddddddddddddddddddddddddddddddddddddddddUdddddddddddddddddddddddddddddddddddddddwddddddddddddddddddddddddddddddddddddddddddddddTdddddddddddddd ddddddddd	d
gZ,dd e,D Z-ddl%m.Z. ee ee edddZ/de/e e,  k rdk sn tde/e e-  k rdk s$n tee ee edddZ0de0e e,  k rdd6k sjn tde0e e-  k rd6k sn te 1d"Z2dd e3e D Z4dd e3e,D Z5dd e5D Z6de0e4e5  k rdk sn tde0e4e6  k rdk s"n tdS (  g      Y@1   )   (                                 
   	                               )CounterNe   c                 C   s   g | ]}t | qS  )friend_counts.0xr   r   J/home/cheung/ML/DS-from-scratch/10-working-with-data/scratch/statistics.py
<listcomp>	   s     r!   zHistogram of Friend Countsz# of friendsz# of people   d   )List)xsreturnc                 C   s   t | t|  S N)sumlenr&   r   r   r    mean*   s    r,   g_LU@gAfU@c                 C   s   t | t| d  S )z3If len(xs) is odd, the median is the middle elementr   sortedr*   r+   r   r   r    _median_odd5   s    r/   c                 C   s,   t | }t| d }||d  ||  d S )z?If len(xs) is even, it's the average of the middle two elementsr   r   r-   )r&   Z	sorted_xsZhi_midpointr   r   r    _median_even9   s    r0   )vr'   c                 C   s    t | d dkrt| S t| S )z"Finds the 'middle-most' value of vr   r   )r*   r0   r/   )r1   r   r   r    median?   s    r2   g      @)r&   pr'   c                 C   s   t |t|  }t| | S )z%Returns the pth-percentile value in x)intr*   r.   )r&   r3   Zp_indexr   r   r    quantileI   s    r5   g?      ?      ?g?)r   r'   c                    s*   t | }t|   fdd| D S )z7Returns a list, since there might be more than one modec                    s   g | ]\}}| kr|qS r   r   )r   Zx_icountZ	max_countr   r    r!   W   s    zmode.<locals>.<listcomp>)r   maxvaluesitems)r   countsr   r9   r    modeS   s    r>   c                 C   s   t | t|  S r(   )r:   minr+   r   r   r    
data_range]   s    r@   c   )sum_of_squaresc                    s   t |   fdd| D S )z?Translate xs by subtracting its mean (so the result has mean 0)c                    s   g | ]}|  qS r   r   r   Zx_barr   r    r!   g   s     zde_mean.<locals>.<listcomp>)r,   r+   r   rC   r    de_meand   s    rD   c                 C   s4   t | dkstdt | }t| }t||d  S )z2Almost the average squared deviation from the meanr   z'variance requires at least two elementsr   )r*   AssertionErrorrD   rB   )r&   nZ
deviationsr   r   r    variancei   s    rG   g(\bT@g33333cT@c                 C   s   t t| S )z9The standard deviation is the square root of the variance)mathsqrtrG   r+   r   r   r    standard_deviationu   s    rJ   g
ףp=
"@gGz"@c                 C   s   t | dt | d S )z:Returns the difference between the 75%-ile and the 25%-iler7   r6   )r5   r+   r   r   r    interquartile_range{   s    rK   gzG1Q@g     I@g
ףp=
J@gGz.C@gQEF@gq=
ףL@g33333I@g(\µD@gQ8?@gzGaA@gzGK@gQeC@gQG@ǧH@g)\;@gp=
׃D@g=
ףp]B@g33333SH@gQ<@g(\OG@g)\A@g=
ףp}@@#   gR:@gQ7@g=
ףpC@g)\HD@gfffff?@g(\5?@g)\(B@g33333s4@gGz5@gQ:@gףp=
W;@g=
ףp}7@gQxG@g     >@gfffff@@g{G:8@gffffff5@gq=
ף;@gQ@@gR9@gQk3@gp=
c6@gQk2@g{GzG@gQ7@g)\h:@gQ:@gzGaB@g)\(D@g(\A@gQx=@g333333>@   gGzC@gףp=
C@gHz'B@gHz5@g\(>@g)\B@g)\<@gGz=@gp=
ףB@g(\.@gQ+8@g(\O6@gQ+>@gHz9@g3@g(\A@gLF@g{G:1@gq=
ף*@gGzT:@gQ@@g(\8@gGzT3@gQ<@g(\B8@g{G?@g{G9@g\(8@gHzG0@gzGAA@g(\u.@g\(C@gffffffD@g(\:@gzGA@gzG!0@gQF@gHz2@gfffff3@g(\O@@gQA@gףp=
C@g\(\,@gQA@gq=
ףD@g)\D@gA@g(\E@g\(8@gfffff4@gfffff5@g
ףp=2@g\(;@g(\5;@g\(:@gQ=@gףp=
4@gHz;@gp=
ף+@g@@ǧ@@g33333SB@gzG2@g=
ףp-@g3333336@gHzgB@gQ8@g     @:@g(\52@gGz<@g
ףp==@gffffff@@gQA@gRQ<@g
ףp=;@gq=
ףA@g(\=@gp=
#B@g333333,@gq=
ףPB@gQB@gfffff:@gGz2@g=
ףp=C@g{Gz8@g333332@gfffff@@g{Gz,@g
ףp=
=@gzGA@@gzG9@gQ86@g=
ףp]@@gRQ.@g+@g333333;@gzG@@gQE=@!   g{Gz+@gQk4@gRQ;@g{G:2@g̬A@g{Gz<@g)\("@gQ4@gzGA@gQ3@gQ?@g=
ףp}0@gRQ(@g33333>@g33333SA@g(\B*@g(\;@gR?@g(\µ@@gq=
ףp1@g=
ףp=$@gQk8@gp=
ף#@gp=
c7@gGz>@g(\.@gQ5@gףp=
?@gQ@@g\(6@gp=
:@g{Gz7@g(\ @g(\;@g,@@gףp=
7@c                 C   s   g | ]}|d  qS <   r   r   dmr   r   r    r!      s     )dot)r&   ysr'   c                 C   s6   t | t |kstdtt| t|t | d  S )Nz+xs and ys must have same number of elementsr   )r*   rE   rS   rD   )r&   rT   r   r   r    
covariance   s    rU   gQk6@gGzn6@g<'?g4?c                 C   s:   t | }t |}|dkr2|dkr2t| || | S dS dS )z<Measures how much xs and ys vary in tandem about their meansr   N)rJ   rU   )r&   rT   Zstdev_xZstdev_yr   r   r    correlation   s
    rV   gQ?c                 C   s   g | ]\}}|t kr|qS r   outlierr   ir   r   r   r    r!      s   c                 C   s   g | ]\}}|t kr|qS r   rW   rY   r   r   r    r!      s   c                 C   s   g | ]}|d  qS rO   r   rQ   r   r   r    r!      s     g=
ףp=?g(\?)7Znum_friendscollectionsr   matplotlib.pyplotpyplotpltr   ranger&   rT   baraxistitlexlabelylabelr*   
num_pointsrE   r:   largest_valuer?   smallest_valuer.   Zsorted_valuesZsecond_smallest_valueZsecond_largest_valuetypingr%   floatr,   r/   r0   r2   r5   r>   setr@   Zscratch.linear_algebrarB   rD   rG   rH   rJ   rK   Zdaily_minutesZdaily_hoursrS   rU   rV   indexrX   	enumerateZnum_friends_goodZdaily_minutes_goodZdaily_hours_goodr   r   r   r    <module>   s    


"""  (( 	&&
(