o
    ҷh                     @   s  d Z ddlmZ ddlZddlZddlm  mZ	 ddl
mZmZmZmZmZmZmZmZmZ ddlmZ ddlmZ dd Zdd	 Zg Zg Zd
D ]LZeddD ]D\ZZeeeeZ de!de"de d " d dfZ#ddddgfZ$ee$e#D ]\Z%Z&e'e e%e&eef e'e% de de  qxqPqIej(j)ej(j*deedej(*dddgej(*dddgej(*dddgej(*dddgej(*dddgdd  Z+ej(*d!ddgd"d# Z,ej(*d$d%d&gg d'gd(d) Z-ej(*d$d%d&gg d'gd*d+ Z.d,d- Z/d.d/ Z0ej1d0d1 Z2d2d3 Z3d4d5 Z4d6d7 Z5d8d9 Z6ej(*d:g d;ej(*dddgej(*d<g d=ej(*d>ddgej(*d?ddgd@dA Z7ej(*dBe8ej9dCe	:dDdEej9dFe	:dDdEgej(*dGddgej(*dHddg dIg dJg dKfddg dLg dMg dNfddg dOg dPg dNfgdQdR Z;ej1dSdT Z<ej(*dUddddVg dWg dXfddddVg dYg dZfddddVg d[g d\fdddd]g d^g d\fgd_d` Z=ej1dadb Z>ej(*dcddg ddg defddg dfg dgfddg dhg difddg djg dkfgdldm Z?ej1dndo Z@ej(*dpdddgejAg dqg drdsfdg dteedgeg duedvdwejBggg dxg dyg dzgg drd{fgej(*dddgd|d} ZCej(*d>ddgej(*d~ddgej(*dddVejDg dejEdfdd]eDg dfgdd ZFdd ZGej(*d>ddgej(*dddVejDg dejEdfdd]eDg dfgdd ZHej(*d>ddgej(*dddVejDg dejEdfdd]eDg dfgdd ZIej(*d>ddgej(*ddg dfdg dfgej(*dddVejDg dejEdfdd]eDg dfgdd ZJej(*d>ddgej(*d~ddgej(*dddVejDg dejEdfdd]eDg dfgdd ZKej(*dddVg dfdd]g dkfgdd ZLej(*ddeMdg dfdeMddg g dfgej(*d>ddgdd ZNej(*dddgdd ZOdd ZPdd ZQdd ZRdd ZSdd ZTej(*d!ddgdd ZUdd ZVdS )z
these are systematically testing all of the args to value_counts
with different size combinations. This is to ensure stability of the sorting
and proper parameter handling
    )productN)	CategoricalCategoricalIndex	DataFrameGrouperIndex
MultiIndexSeries
date_rangeto_datetime)Versionc                  C   s   t dgdgd} | d d| d< | dd  }t ddggddgd}|d d|d< t|}tdg|d	d
}t|| d S )NfemaleUS)gendercountryr   categoryr   columns   countindexname)	r   astypegroupbyvalue_countsr   
from_framer	   tmassert_series_equal)dfresultdf_mi_expectedmi_expectedexpected r$   Y/var/www/html/venv/lib/python3.10/site-packages/pandas/tests/groupby/test_value_counts.py.tests_value_counts_index_names_category_column   s   
r&   c                 C   s   t ddd}ttjdtd|tjd||tjdd|d |d}| rm|d d	|d< tj	|j
dd d
df< tj	|j
dd ddf< tj	|j
dd ddf< tj	|j
dd ddf< tj	|j
dd ddf< |S )Nz
2015-08-24
   )periods   abcdr   )1st2nd3rdr-   float   r+         r,            	   )r
   r   nprandomdefault_rngchoicelistintegersr   nanloc)	seed_nansnmdaysframer$   r$   r%   seed_df2   s   rC   TF)d   i  )      rF   r-   r   r)   r+   r,   -zdf, keys, bins, n, m)idsisortTFznormalize, name)T
proportion)Fr   sort	ascendingdropnac                 C   s   dd }|||	|
|d}| j ||d}|d jdi |}| j ||d}|d jtjfi |}|jjd d dg |j_||}t|||f\}}t	|
 |
  d S )Nc                 S   s2   t t| jjt| jj}tj|| jjd| _| S )Nnames)	r:   mapr   get_level_valuesrangenlevelsr   from_arraysrP   )r   arrr$   r$   r%   rebuild_index`   s   z7test_series_groupby_value_counts.<locals>.rebuild_index)	normalizerL   rM   rN   binsrL   r-   r$   )r   r   applyr	   r   rP   renamerQ   r   r   
sort_index)r   keysrY   r?   r@   rJ   rX   r   rL   rM   rN   rW   kwargsgrleftrightr$   r$   r%    test_series_groupby_value_countsV   s   

rd   utcc                 C   s   t g dg dddg}t|d | dd|d< |td	dd
}|d   }|d tj }|j	j
|j	_
|d}t|| d S )NiGI]i)J]iJ]iK]i)<M]iU=M]iN]applerh   bananari   orangerj   pear	TimestampFoodr0   rm   sre   unitDatetime1Dfreqkeyrn   r   )r   dropr   r   r   r   r^   r\   r	   r   rP   r]   r   r   )re   r   dfgr    r#   r$   r$   r%   -test_series_groupby_value_counts_with_grouper{   s   	
ry   r   AB)rz   r{   Cc                 C   sf   t | d}|| d d }|| d   }tg |jdd}tjg gt|  | d|_t	
|| d S )Nr   r[   r   )dtyper   rO   )r   r   r   r	   r}   r   rU   lenr   r   r   r   r   rx   r    r#   r$   r$   r%   &test_series_groupby_value_counts_empty   s   
r   c                 C   sP   t tt| g| d}|| d d }|| d   }| }t|| d S )N)datar   r[   )r   rS   r~   r   r   r   r   r   r$   r$   r%   (test_series_groupby_value_counts_one_row   s
   r   c                  C   sp   t tdgddgd} | dg }t ddgttddgtddgddgdddgd	d
}t	
|| d S )Nab)
categoriesr   r   Fr   )r   orderedr}   r   r   r   r   )r	   r   r   r   r   rU   r6   arrayr   r   r   )ro   r    r#   r$   r$   r%   /test_series_groupby_value_counts_on_categorical   s   r   c                  C   s   t g dg dg dd} | jddgddd	 }|jdd}td
dgddgg dgg dg dg dgg dd}tg d|dd}t|| d S )Nmaler   r   r   r   r   lowmediumhighr   r   r   r   FRr   r   r   r   r   	educationr   r   r   FrZ   r   r   r   r   r   )r   r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r)   r   r)   r   r   r   levelscodesrP   r   r   r   r)   r   r   r   )r   r   r   r   r	   r   r   )r   gbr    r   r#   r$   r$   r%   (test_series_groupby_value_counts_no_sort   s   r   c                   C   s   t g dg dg ddS )Nr   r   r   r   r   r$   r$   r$   r%   education_df   s   r   c                 C   s|   d}t jt|d | jddd}W d    n1 sw   Y  tjtdd |  W d    d S 1 s7w   Y  d S )Nz+DataFrame.groupby with axis=1 is deprecatedmatchr   r   axisr   )r   assert_produces_warningFutureWarningr   pytestraisesNotImplementedErrorr   )r   msggpr$   r$   r%   	test_axis   s   
"r   c                 C   sL   |  d}tjtdd |jdgd W d    d S 1 sw   Y  d S )Nr   subsetr   r   )r   r   r   
ValueErrorr   )r   r   r$   r$   r%   test_bad_subset   s   
"r   c                 C   sx   t tjt dkr|jtjjddd | dddg j	dd	}t
g d
tjg dg dddd}t|| d S )N1.25Ypandas default unstable sorting of duplicatesissue with numpy>=1.25 with AVX instructionsFreasonstrictr   r   r   TrX   )      ?      ?r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rO   rK   r   )r   r6   __version__node
add_markerr   markxfailr   r   r	   r   from_tuplesr   r   )r   requestr    r#   r$   r$   r%   
test_basic   s&   	
r   c                 C   s   | | j |||dS )NrX   rL   rM   )r   )r   r_   rX   rL   rM   r$   r$   r%   _frame_value_counts  s   r   r   columnr   functionzsort, ascending))FN)TTrD   as_indexrB   c	                    sF  t tjt dkr|r|r|r|jtjjddd d d j fddd| }	 j	|	|d	}
|
d
dg j
|||d}|r|
td
dg|||}|rTt|| d S |rXdnd}| jd|idd}|dkr~|jddidd}t|d dd|d< n|dkr|d dk|d< nt|d dd|d< t|| d S  d
 d  d   d< |
d j
|||d}||_|r|jjdd}|d jdjd|d
< |d jdjd|d< |d= |jdd idd}t||_t|| d S |dd
|d jdjd |dd|d jdjd |d= t|| d S )Nr   r   Fr   r   c                    s    d |  dkS )Nr   r   r$   )xr   r$   r%   <lambda>C      z6test_against_frame_and_seriesgroupby.<locals>.<lambda>r   )byr   r   r   r   rK   r   r   r   r   r   level_0r   r   r   rH   bothr   r)   )r   r6   r   r   r   r   r   r   valuesr   r   r\   r   r   r   reset_indexr]   whereassert_frame_equalr   r   to_framestrsplitgetr   r   insert)r   r   rX   r   rL   rM   r   rB   r   r   r   r    r#   index_framer$   r   r%   $test_against_frame_and_seriesgroupby  sb   

""r   r}   zstring[pyarrow_numpy]pyarrow)marksstring[pyarrow]rX   zCsort, ascending, expected_rows, expected_count, expected_group_size)r   r   r)   r0      r   )r   r0   r   r0   r   )r   r0   r   r)   r   )r   r)   r   r   r   )r   r0   r0   r   r   )r   r   r0   r)   r   )r   r   r)   r   r   c                    s    |j |_jddgddd}|d j|||d}	t }
dD ]  fdd	|D |
 < |
 |}
|
j ||
_q%|r[||
d
< |
d
  |  < |dkrZ|
d
  |
d
< n||
d< |dkrk|
d  |
d< t|	|
 d S )Nr   r   Fr   rL   r   r   r   c                       g | ]}  | qS r$   r$   .0rowr   r   r$   r%   
<listcomp>      z!test_compound.<locals>.<listcomp>rK   r   r   )r   r   r   r   r   convert_dtypesr   r   )r   rX   rL   rM   expected_rowsexpected_countexpected_group_sizer}   r   r    r#   r$   r   r%   test_compounds  s*   

r   c                   C   s$   t g dg dg ddg ddS )Nr   r   r   r   )r)   r   r      )r)   r   r   r   rv   num_legs	num_wings)falcondogcatantr   r   r$   r$   r$   r%   
animals_df  s   r   z?sort, ascending, normalize, name, expected_data, expected_indexr   r   r)   r   )r   r   r   )r)   r   r   r)   r   r   r   r   r)   )r   )r)   r   r   r   )r)   r   r   )r   )r   r)   r   )r   r)   r   rK   )r   r   r   c           
      C   s^   | j |||d}t|tj|g dd|d}t|| | dj |||d}	t|	| d S )N)rL   rM   rX   r   rO   r   rv   )r   r	   r   rU   r   r   r   )
r   rL   rM   rX   r   expected_dataexpected_indexresult_framer#   result_frame_groupbyr$   r$   r%   test_data_frame_value_counts  s   
r   c                  C   s`   t j} tdd| d| ddddg	ddd| | ddddg	dddddd| d| g	ddddddd| | g	d	S )
Nr   r   r   r0   r)   rF   r4   r2   )rz   r{   r|   D)r6   r<   r   )r?   r$   r$   r%   nulls_df  s   r   z:group_dropna, count_dropna, expected_rows, expected_values)	r   r   r0   rF   r2   r   r4   r)   r   )	r   r         ?r   r   r   r   r  r  )r   r   r0   rF   r)   r   )r   r   r  r  r  r  )r   r   rF   r2   r   r4   )r   r   r   r   r   r   )r   r   rF   )r   r   r  c                    s   t tjt dkr|s|jtjjddd jddg|d}|j	dd|d	}t
 }jD ]  fd
d|D | < q.t|}	t||	dd}
t||
 d S )Nr   r   Fr   rz   r{   )rN   T)rX   rL   rN   c                    r   r$   r$   r   r   r   r$   r%   r     r   z,test_dropna_combinations.<locals>.<listcomp>rK   r   )r   r6   r   r   r   r   r   r   r   r   r   r   r   r   r	   r   r   )r   group_dropnacount_dropnar   expected_valuesr   r   r    r   r   r#   r$   r  r%   test_dropna_combinations  s   	

r  c                 C   s    t g dg dd| | dgdS )Nr   )JohnAnner  BethSmithLouiserv   
first_namemiddle_namer   )nulls_fixturer$   r$   r%   names_with_nulls_df  s   
r  z%dropna, expected_data, expected_index)r   r   )r	  r  )r  r
  r  rO   r   )r  r	  r  r  r
  )r   r   r   r   )r   r   r)   r)   )r)   r   r   r)   r   c           	      C   s`   | j ||d}t|||d}|r|tt| }t|| | dj ||d}t|| d S )N)rN   rX   r   rv   )r   r	   r.   r~   r   r   r   )	r  rN   rX   r   r   r   r   r#   r   r$   r$   r%   #test_data_frame_value_counts_dropna  s   !
r  observedznormalize, name, expected_data)r)   r   r   r   r   r   r   r   r   r   r   r   r}   )r   r   r           r  r  r   r   r  r  r  r  c                 C   s   t tjt dkr|jtjjddd | dj	d||d}|j
|d}tjg d	g d
d}	t||	|d}
tdD ]}|
jjt|
jj| |d|
_q<|rXt||
 d S |
j|r^dndd}t|| d S )Nr   r   Fr   r   r   r   r  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rO   r   r0   levelrK   r   r   )r   r6   r   r   r   r   r   r   r   r   r   r   r   r	   rS   r   
set_levelsr   r   r   r   r   r   r   r   r  rX   r   r   r   r   r    r   expected_seriesir#   r$   r$   r%   =test_categorical_single_grouper_with_only_observed_categoriesF  s<   



r&  c                 C   s   |   d} | d jdg| d< | jd||d}|j|d}t|tj|g dd|d}	t	d	D ] }
t
|	jj|
 }|
d
krI|| d jj}|	jj||
d|	_q2|r]t||	 d S |	j|d}t|| d S )Nr   r   ASIAr  r   r   rO   r   r0   r   r  r!  )copyr   r   add_categoriesr   r   r	   r   r   rS   r   r   r   set_categoriesr   r"  r   r   r   r   )r   r   r  r   rX   r   r   r   r    r$  r%  index_levelr#   r$   r$   r%   !assert_categorical_single_grouper  s.   
r,  c              	   C   sL   t tjt dkr|jtjjddd g d}t| |d||||d d S )Nr   r   Fr   r  Tr   r   r  r   rX   r   r   	r   r6   r   r   r   r   r   r   r,  r   r   rX   r   r   r   r   r$   r$   r%   -test_categorical_single_grouper_observed_true  s"   

r0  )r)   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r  r  r  r   r   r  r  r  r  r  r  r  r  r  r  c              	   C   sL   t tjt dkr|jtjjddd g d}t| |d||||d d S )Nr   r   Fr   )r   r   r   r  r  r  r   r   r  r  r  r  )r'  r   r   )r'  r   r   )r'  r   r   )r'  r   r   )r'  r   r   )r'  r   r   r-  r.  r/  r$   r$   r%   .test_categorical_single_grouper_observed_false  s"   ,

r1  zobserved, expected_index)r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   )r   r   r   )r   r   r   )r2  r3  r4  r5  r6  )r   r   r)   r   r   r   r   r   r   r   r   r   )r  r  r  r  r  r  r  r  r  r  r  r  c                 C   s   |   } | d d| d< | d d| d< | jddg||d}|j|d}t|r/||dk n|tj|g dd|d	}	td
D ]}
|	jj	t
|	jj|
 |
d|	_q@|r\t||	 d S |	j|rbdndd}t|| d S )Nr   r   r   r  r   r  )r   r   r   rO   r   r)   r  rK   r   r!  )r(  r   r   r   r	   r   r   rS   r   r"  r   r   r   r   r   r   )r   r   r  r   rX   r   r   r   r    r$  r%  r#   r$   r$   r%   "test_categorical_multiple_groupersE  s2   7


r7  c                 C   s   t tjt dkr|jtjjddd |  } | d 	d| d< | d 	d| d< | j
d||d	}|j|d
}g d}	t|tj|	g dd|d}
tddD ]}|
jjt|
jj| |d|
_qP|rlt||
 d S |
j|rrdndd}t|| d S )Nr   r   Fr   r   r   r   r   r  r   r  r   rO   r   r   r0   r  rK   r   r!  )r   r6   r   r   r   r   r   r   r(  r   r   r   r	   r   r   rS   r   r"  r   r   r   r   r   r   r#  r$   r$   r%   test_categorical_non_groupers  s>   


r8  z*normalize, expected_label, expected_valuesr   c                 C   s   t g dg dd}|jg dddd gdd	}|jd
| d}t dtjg dtddg ddg ddg d||i}t|| d S )Nr   r   r)   r0   )rz   r{   )r   rF   r   rz   c                 S   s   | dkrdS dS )Nr   r2   r4   r$   )r%  r$   r$   r%   r     r   z&test_mixed_groupings.<locals>.<lambda>Fr   T)rL   rX   r   )r   r   rF   r  r   level_2)r4   r4   r2   r{   )r   r0   r)   )r   r   r   r6   r   intr   r   )rX   expected_labelr  r   r   r    r#   r$   r$   r%   test_mixed_groupings  s   		r>  ztest, columns, expected_namesrepeatabbde)r   Ndr   r   er   r*   level_1)r   NrA  r   crC  c           
      C   s   t g dg dg|d}ddg}dtjddgtjd	d
g}|j||d }|r<tdtj||ddd}t	
|| d S dd |D }t|}	d|	d< |	d t ||	d}t	|| d S )N)r   r0   rF   r2   r5   )r)   r   r   r4   r'   r   )r   r   r2   r0   rF   r5   )r)   r   r4   r   r   r'   r   r   r   r  rA  r:  r  rO   r   r   c                 S   s   g | ]	}t |d g qS )r   )r:   r   r$   r$   r%   r     s    z0test_column_label_duplicates.<locals>.<listcomp>rC  )r   r6   r   int64r   r   r	   r   r   r   r   r:   appendr   )
testr   expected_namesr   r   r   r_   r    r#   expected_columnsr$   r$   r%   test_column_label_duplicates  s(   
rJ  znormalize, expected_labelc                 C   sn   t g dgdd|gdjddd}d| d}tjt|d	 |j| d
 W d    d S 1 s0w   Y  d S )Nr9  r   r   r   Fr:  zColumn label 'z' is duplicate of result columnr   r   )r   r   r   r   r   r   )rX   r=  r   r   r$   r$   r%   test_result_label_duplicates#  s   	"rK  c                  C   sf   t dddgi} | tjddgtjd}| }tdgtjddggd dgddd}t	
|| d S )Nr   r   r  r)   rO   r   r   )r   r   r6   r   rE  r   r	   r   r   r   r   )r   r   r    r#   r$   r$   r%   test_ambiguous_grouping4  s   rL  c                  C   sj   t g dg ddg dd} d}tjt|d | djdgd	 W d    d S 1 s.w   Y  d S )
Nr   r   rD  r   yrO  c1c2r   r   r   r   z;Keys {'c1'} in subset cannot be in the groupby column keys.r   rQ  r   r   r   r   r   r   r   r   r   r$   r$   r%   "test_subset_overlaps_gb_key_raises?  
   "rV  c                  C   sj   t g dg ddg dd} d}tjt|d | djd	gd
 W d    d S 1 s.w   Y  d S )NrM  rN  rP  rS  r   z4Keys {'c3'} in subset do not exist in the DataFrame.r   rQ  c3r   rT  rU  r$   r$   r%   !test_subset_doesnt_exist_in_frameG  rW  rY  c                  C   sp   t g dg ddg dd} | jddjdgd	}td
dgtjdd
gddggd dgddd}t|| d S )NrM  rN  rP  rS  r   r   r  rR  r   r   r)   r   rO  rO   r   r   r   r   r   r	   r   rU   r   r   r   r    r#   r$   r$   r%   test_subsetO  s   r\  c                  C   s   t g dg dg dgg dg dd} | jddjdgd	}td
dgtjdd
gddgddggg dddd}t|| d S )N)r   r   r   )r   rO  rO  rS  )rQ  rR  rR  )r   r   r   r  rR  r   r   r)   r   rO  )NrR  rR  rO   r   r   rZ  r[  r$   r$   r%   test_subset_duplicate_columns[  s   r]  c                 C   s   t g dg dddg}t|d | dd|d< |td	dd
}| }tg d| d}|d  }t||g dgg dtdg dgg dd}t	d|dd}t
|| d S )Nrf   rg   rl   r0   rm   ro   rp   rr   rs   rt   )z
2019-08-06z
2019-08-07z
2019-08-09z
2019-08-10)re   )rh   ri   rj   rk   )r   r   r   r)   r)   r0   r   )r   r   r   r)   r)   r0   )rr   rm   rn   r   r   r   r   )r   rw   r   r   r   r   uniquer   rS   r	   r   r   )re   r   r   r    dates
timestampsr   r#   r$   r$   r%   test_value_counts_time_grouperm  s*   	ra  c                  C   sj   t g dg dg dd} | jddgddd}|d	  }t g dg dg ddd
}t|| d S )N)r   r   r   )r   r   rA  rM  r9  r   r)   Fr   r0   )r   r)   r0   r   )r   r   r   r   r   )r   r   r    r#   r$   r$   r%   !test_value_counts_integer_columns  s   rb  )W__doc__	itertoolsr   numpyr6   r   pandas.util._test_decoratorsutil_test_decoratorstdpandasr   r   r   r   r   r   r	   r
   r   pandas._testing_testingr   pandas.util.versionr   r&   rC   binnedrI   r>   r?   r@   r   arangemaxrY   r_   kr   rF  r   slowparametrizerd   ry   r   r   r   r   fixturer   r   r   r   r   r   objectparam
skip_if_nor   r   r   r   r  r  rU   r<   r  r   rE  r&  r,  r0  r1  r7  r8  r>  r:   rJ  rK  rL  rV  rY  r\  r]  ra  rb  r$   r$   r$   r%   <module>   s   ,$	




 H"





	
:#*
&0 %<



!