o
    ҷh4e                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlm	Z	m
Z
mZmZmZmZmZ d dlmZ d dlmZ G dd dZdS )    N)is_integer_dtype)CategoricalCategoricalIndex	DataFrame
RangeIndexSeriesSparseDtypeget_dummies)SparseArrayc                
   @   s(  e Zd Zejdd Zejddejedgddd Z	ejd	d
gddd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zejd3d4e d5d6giie d7d8gifd4e d9d5giie d:d8gife d9d6gid5d;e d7d8gife d9d6gid5d<e d=d8gifgd>d? Z!d@dA Z"dBdC Z#dDdE Z$dFdG Z%dHdI Z&dJdK Z'dLdM Z(dNdO Z)ejdPd8dQgdRdS Z*ejd
d8dQgdTdU Z+dVdW Z,dXdY Z-ejdZd[gd\d] Z.d^d_ Z/d`da Z0dS )bTestGetDummiesc                 C   s   t g dg dg ddS )Nabr   r   r   c         )ABC)r   )self r   X/var/www/html/venv/lib/python3.10/site-packages/pandas/tests/reshape/test_get_dummies.pydf   s   zTestGetDummies.dfuint8i8N)paramsc                 C   s   t |jS N)npdtypeparamr   requestr   r   r   r!      s   zTestGetDummies.dtypedensesparsec                 C   s
   |j dkS )Nr&   )r"   r#   r   r   r   r&       s   
zTestGetDummies.sparsec                 C   s   |d u rt jS |S r   )r    r   )r   r!   r   r   r   effective_dtype&   s   zTestGetDummies.effective_dtypec                 C   sD   d}t jt|d t|dd W d    d S 1 sw   Y  d S )Nz1dtype=object is not a valid dtype for get_dummiesmatchobjectr!   )pytestraises
ValueErrorr	   )r   r   msgr   r   r   'test_get_dummies_raises_on_dtype_object+   s   "z6TestGetDummies.test_get_dummies_raises_on_dtype_objectc                 C   s   t d}t|}t|t d}tg dg dg dd| |d}|r8|jdkr1|jtd	d
}n|jtdd
}t|||d}t	|| t|||d}t	|| t d|_
t|||d}t	|| d S )NabcABCr   r   r   r   r   r   r   r   r   r   r   r   r+   r   F
fill_value        r&   r!   )listr   r   r'   kindapplyr
   r	   tmassert_frame_equalindex)r   r&   r!   s_lists_seriess_series_indexexpectedresultr   r   r   test_get_dummies_basic0   s$   

z%TestGetDummies.test_get_dummies_basicc                 C   s  t d}t|}tg dg dg dd}tg dg dg dd| |t dd	}|rEt|r5d
}n	|tkr<d}nd}|jt|d}t|||d}t	
|| t|||d}t	
|| t||j||d}|rxd| |j d| d}	n| |j}	t|	didd}|j }dd |jD |_t	|| t|dg||d}ddd}
d|
|	d
 |
|	< t|
dd }|j }dd |jD |_| }t	|| d S )Nr1   )r   r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   r6   r3   r4   r5   )r!   columnsr   Fr9   r7   r:   )rG   r&   r!   zSparse[z, ]   countnamec                 S      g | ]}t |qS r   str.0ir   r   r   
<listcomp>l       z?TestGetDummies.test_get_dummies_basic_types.<locals>.<listcomp>r   r   )int64r*   r   c                 S   rM   r   rN   rP   r   r   r   rS   v   rT   )r;   r   r   r'   r   boolr=   r
   r	   r>   r?   rG   rL   dtypesvalue_countsr@   assert_series_equalget
sort_index)r   r&   r!   rA   rB   s_dfrD   r8   rE   
dtype_nameexpected_countsr   r   r   test_get_dummies_basic_typesH   sJ   


z+TestGetDummies.test_get_dummies_basic_typesc                 C   s   t jg}t|}t|dgd}t||d}t||d}t||d}|js&J |js+J |js0J |j dgks:J |j dgksDJ |j dgksNJ d S )Nr   r@   r&   r   )r    nanr   r	   emptyr@   tolist)r   r&   just_na_listjust_na_seriesjust_na_series_indexres_list
res_seriesres_series_indexr   r   r   test_get_dummies_just_naz   s   


z'TestGetDummies.test_get_dummies_just_nac           
      C   sN  ddt jg}t|||d}tg dg dd| |d}|r3|jdkr,|jtdd	}n|jtd
d	}t	|| t|d||d}tt jg ddg ddg di| |d}|j
ddt jgdd}|j|_|r||jdkru|jtdd	}n|jtd
d	}t	|| tt jgd||d}ttddgdt jg| |d}	t|j|	j d S )Nr   r   r:   r3   r4   )r   r   r+   Fr7   r9   Tdummy_nar&   r!   r5   r   axisr   r`   rG   r!   )r    rb   r	   r   r'   r<   r=   r
   r>   r?   reindexrG   r   assert_numpy_array_equalvalues)
r   r&   r!   sresexpres_naexp_nares_just_naexp_just_nar   r   r   test_get_dummies_include_na   s6   

z*TestGetDummies.test_get_dummies_include_nac                 C   sf   d}t d}|||g}t|d|d}tdg dd| g di}|r+|jtd	d
}t|| d S )NezLATIN SMALL LETTER E WITH ACUTEletterprefixr&   letter_e)TFFletter_)FTTFr7   )unicodedatalookupr	   r   r=   r
   r>   r?   )r   r&   r|   eacutert   ru   rv   r   r   r   test_get_dummies_unicode   s   

z'TestGetDummies.test_get_dummies_unicodec                 C   s   |ddg }t ||d}tg dg dg dg ddtd	}|rBttg dd
d	tg dd
d	tg dd
d	tg dd
d	d}t|| d S )Nr   r   ra   r   r   r   r4   r   r   r   r5   A_aA_bB_bB_cr+   rV   )r	   r   rV   r
   r>   r?   r   r   r&   rE   rD   r   r   r   test_dataframe_dummies_all_obj   s   	z-TestGetDummies.test_dataframe_dummies_all_objc                 C   sZ   |ddg }| ddd}t|}tg dg dg dg d	d
td}t|| d S )Nr   r   r*   stringr   r   r   r4   r   r5   r   r+   )astyper	   r   rV   r>   r?   r   r   rE   rD   r   r   r   #test_dataframe_dummies_string_dtype   s   	z2TestGetDummies.test_dataframe_dummies_string_dtypec              	   C   s   t |||d}|rt}|jdkrt|d}nt|d}ntj}|}tg d|g d|d|g d|d|g d	|d|g d
|dd}|g d }t|| d S )Nr:   r   Fr   r   r   r+   r4   r   r5   r   r   r   r   r   )	r	   r
   r<   r   r    arrayr   r>   r?   r   r   r&   r!   rE   arrtyprD   r   r   r   "test_dataframe_dummies_mix_default   s$   
	z1TestGetDummies.test_dataframe_dummies_mix_defaultc                    s   ddg}t |||d}tg dg dg dg dg dd	}|d
g |d
g< g d}|d
g|  }|r6tnt ||  fdd||< t|| d S )Nfrom_Afrom_Br~   r   TFTFTFTTFFFTr   from_A_afrom_A_bfrom_B_bfrom_B_cr   r   r   r   r   c                    s    | S r   r   )xr   r   r   <lambda>  s    zCTestGetDummies.test_dataframe_dummies_prefix_list.<locals>.<lambda>)r	   r   r
   r   r=   r>   r?   )r   r   r&   prefixesrE   rD   colsr   r   r   "test_dataframe_dummies_prefix_list   s    	z1TestGetDummies.test_dataframe_dummies_prefix_listc              
   C   s   t |d|d}g d}tg dg dg dgdg| d}|dtji}|rUtjtg d	dd
tg ddddtg ddddtg ddddtg ddddgdd}t	|| d S )Nbadr~   )bad_abad_br   bad_cr   TFTFr   FTTFr   TFFTr   rG   r   rK   r   r   zSparse[bool])rL   r!   r   r   r   r   r   r   rn   )
r	   r   r   r    rU   pdconcatr   r>   r?   )r   r   r&   rE   bad_columnsrD   r   r   r   !test_dataframe_dummies_prefix_str	  s*   z0TestGetDummies.test_dataframe_dummies_prefix_strc                 C   s   t |dgdg|d}tg dg dg dg dd}|j}||d	d   t||d	d  < |d
g |d
g< |rIddg}|| tdd||< t|| d S )Nr   r   )r   rG   r&   r   r   r   r4   )r   r   r   r   r   r   r   r   rV   F)r	   r   rG   r   rV   r   r>   r?   r   r   r&   rE   rD   r   r   r   r   test_dataframe_dummies_subset&  s   "z,TestGetDummies.test_dataframe_dummies_subsetc                 C   s   t |d|d}tg dg dg dg dg dd}|d	g |d	g< |g d }|r;g d
}|| tdd||< t|| t |ddg|d}|jdddd}t|| t |ddd|d}t|| d S )Nz..
prefix_sepr&   r   r   r   r   r   )r   A..aA..bB..bB..cr   )r   r   r   r   rV   F__B__bB__c)r   r   r   r   )r	   r   r   r   r>   r?   renamer   r   r   r   !test_dataframe_dummies_prefix_sep8  s(   	z0TestGetDummies.test_dataframe_dummies_prefix_sepc                 C   N   t d}tjt|d t|dg|d W d    d S 1 s w   Y  d S )NzPLength of 'prefix' (1) did not match the length of the columns being encoded (2)r(   ztoo fewr~   reescaper,   r-   r.   r	   r   r   r&   r/   r   r   r   (test_dataframe_dummies_prefix_bad_lengthR     "z7TestGetDummies.test_dataframe_dummies_prefix_bad_lengthc                 C   r   )NzTLength of 'prefix_sep' (1) did not match the length of the columns being encoded (2)r(   r   r   r   r   r   r   r   ,test_dataframe_dummies_prefix_sep_bad_lengthZ  r   z;TestGetDummies.test_dataframe_dummies_prefix_sep_bad_lengthc                 C   s   ddd}t g dg dg dd}t|||d}t g dg d	g d
g dg dd}g d}|| t||< |rI|| tdd||< t|| d S )Nr   r   r   r   r   r   )r   r   r   r~   r   r4   r   r5   r   r   rV   F)r   r	   r   rV   r   r>   r?   )r   r&   r   r   rE   rD   rG   r   r   r   "test_dataframe_dummies_prefix_dictb  s    

z1TestGetDummies.test_dataframe_dummies_prefix_dictc                 C   s  t jt jt jg|jdd d f< t|d||djdd}|r0t}|jdkr*t|d}nt|d}nt j}|}t	dd	dt jg|g d
|d|g d|d|g d|d|g d|d|g d|d|g d|ddjdd}t
|| t|d||d}|g d }t
|| d S )Nr   Trl   r   rn   r   Fr   r   )r   r   r   r   r+   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   A_nanr   r   B_nanr   )r    rb   locr	   r[   r
   r<   r   r   r   r>   r?   r   r   r   r   test_dataframe_dummies_with_nax  s6   

z-TestGetDummies.test_dataframe_dummies_with_nac                 C   s   t g d|d< t|||djdd}|r(t}|jdkr"t|d}nt|d}ntj}|}tg d	|g d
|d|g d|d|g d|d|g d|d|g d|d|g d|ddjdd}t	
|| d S )Nr   yr   catr:   r   rn   r   Fr   r   r   r+   r4   r   r5   r3   r   r   r   )r   r   r   r   r   cat_xcat_y)r   r	   r[   r
   r<   r   r    r   r   r>   r?   r   r   r   r   'test_dataframe_dummies_with_categorical  s,   

z6TestGetDummies.test_dataframe_dummies_with_categoricalzget_dummies_kwargs,expecteddata   är   u   ä_aTr   u   x_ä)r   r   )r   r   u   xäac                 C   s   t di |}t|| d S )Nr   )r	   r>   r?   )r   get_dummies_kwargsrD   rE   r   r   r   test_dataframe_dummies_unicode  s   z-TestGetDummies.test_dataframe_dummies_unicodec                 C   s   t d}t|}t|t d}tg dg ddtd}t|d|d}|r,|jtd	d
}t|| t|d|d}t|| t d|_	t|d|d}t|| d S )Nr1   r2   r4   r5   )r   r   r+   T
drop_firstr&   Fr7   )
r;   r   r   rV   r	   r=   r
   r>   r?   r@   r   r&   rA   rB   rC   rD   rE   r   r   r   !test_get_dummies_basic_drop_first  s   
z0TestGetDummies.test_get_dummies_basic_drop_firstc                 C   s   t d}t|}t|t d}ttdd}t|d|d}t|| t|d|d}t|| tt dd}t|d|d}t|| d S )Naaar2   r   r`   Tr   )r;   r   r   r   r	   r>   r?   r   r   r   r   +test_get_dummies_basic_drop_first_one_level  s   z:TestGetDummies.test_get_dummies_basic_drop_first_one_levelc           	      C   s   ddt jg}t|d|d}tdg ditd}|r |jtdd}t|| t|dd|d	}tdg dt jg d
itdj	dt jgdd}|rN|jtdd}t|| tt jgdd|d	}tt
dd}t|| d S )Nr   r   Tr   r4   r+   Fr7   rm   r   r&   r5   r   rn   r`   )r    rb   r	   r   rV   r=   r
   r>   r?   rq   r   )	r   r&   s_NAru   rv   rw   rx   ry   rz   r   r   r   $test_get_dummies_basic_drop_first_NA  s$   
z3TestGetDummies.test_get_dummies_basic_drop_first_NAc                 C   sV   |ddg }t |d|d}tg dg ddtd}|r#|jtd	d
}t|| d S )Nr   r   Tr   r4   r5   )r   r   r+   Fr7   )r	   r   rV   r=   r
   r>   r?   r   r   r   r   !test_dataframe_dummies_drop_first  s   z0TestGetDummies.test_dataframe_dummies_drop_firstc                 C   s   t g d|d< t|d|d}tg dg dg dg dd	}g d
}|| t||< |g d	 }|rB|D ]
}t|| ||< q7t|| d S )Nr   r   Tr   r   r4   r5   r   )r   r   r   r   )r   r   r   )r   r	   r   r   rV   r
   r>   r?   )r   r   r&   r!   rE   rD   r   colr   r   r   2test_dataframe_dummies_drop_first_with_categorical  s   zATestGetDummies.test_dataframe_dummies_drop_first_with_categoricalc                 C   s   t jt jt jg|jdd d f< t|dd|djdd}tdddt jgg dg dg d	g dd
}g d}|| t||< |jdd}|rT|D ]
}t|| ||< qIt	
|| t|dd|d}|g d }t	
|| d S )Nr   Tr   r   rn   r   r   r   r   )r   r   r   r   r   )r   r   r   r   F)r   r   r   )r    rb   r   r	   r[   r   r   rV   r
   r>   r?   )r   r   r&   rE   rD   r   r   r   r   r   )test_dataframe_dummies_drop_first_with_na%  s0   	z8TestGetDummies.test_dataframe_dummies_drop_first_with_nac                 C   s   t g d}t|}tddgddgddggddgtd}t|| t tg d}t|}tddgddgddggtddgtd}t|| d S )	Nr   r   r   r   r   r   rp   r   r   r   )r   r	   r   rV   r>   r?   r   )r   r   rE   rD   r   r   r   test_get_dummies_int_int@  s   $ z'TestGetDummies.test_get_dummies_int_intc                 C   s   t g dtg dg dg dd}g d}t g dg dg dg|d}||d	d   |||d	d  < t|d
dg|d}t|| d S )Nr   r   )      ?       @r   )r   r   r   D)r   r   A_1A_2B_ar   )r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   rp   )r   r   r   r	   r>   r?   )r   r!   r   rG   rD   rE   r   r   r   test_get_dummies_int_dfM  s   
"z&TestGetDummies.test_get_dummies_int_dforderedFc                 C   sx   t tdtd|d}t||d}tjg dg dg| |d}t|j|j|d}t||| |d}t	
|| d S )Nxyxyz)
categoriesr   r+   r3   r4   rp   )r   r;   r	   r    r   r'   r   r   r   r>   r?   )r   r!   r   r   rE   r   r   rD   r   r   r   1test_dataframe_dummies_preserve_categorical_dtype_  s    
z@TestGetDummies.test_dataframe_dummies_preserve_categorical_dtypec                 C   sL   t ddgddgd}t|dg|d}|jdgd	}t|dg | d S )
Nr   r   ABCD)GDPNationr   rG   r&   r   r   )r   	from_dictr	   rq   r>   r?   )r   r&   r   df2r   r   r   *test_get_dummies_dont_sparsify_all_columnsm  s   z9TestGetDummies.test_get_dummies_dont_sparsify_all_columnsc                 C   sd   g d|_ t|jdd}tg dg dg dgg ddjdd}|d	tji}t|| d S )
N)r   r   r   r   rn   r   r   r   )r   r   r   r   A_cr   r   )	rG   r	   r[   r   r   r    rU   r>   r?   r   r   r   r   "test_get_dummies_duplicate_columnsv  s   
	z1TestGetDummies.test_get_dummies_duplicate_columnsc                 C   s`   t dddgi}t|dgdd}tdd}t tddg|d	tddg|d	d
}t|| d S )Nr   r   r   Tr   rV   Fr   r+   )r   r   )r   r	   r   r
   r>   r?   )r   r   rE   r!   rD   r   r   r   test_get_dummies_all_sparse  s   
z*TestGetDummies.test_get_dummies_all_sparsers   bazc                 C   sf   t g dg dg dg dd}d}tjt|d t||d W d    d S 1 s,w   Y  d S )	N)r   r   r            )oner
  r
  twor  r  )r   r   r   r   r   r   )r   r   zqwt)barfoor  zooz1Input must be a list-like for parameter `columns`r(   r   )r   r,   r-   	TypeErrorr	   )r   rs   r   r/   r   r   r   #test_get_dummies_with_string_values  s   	"z2TestGetDummies.test_get_dummies_with_string_valuesc                 C   sH   t td}t||d}tg dg dg dd|d}t|| d S )Nabcar+   r   r   r   r   r   r   r6   )r   r;   r	   r   r>   r?   )r   any_numeric_ea_and_arrow_dtypeserrE   rD   r   r   r    test_get_dummies_ea_dtype_series  s   z/TestGetDummies.test_get_dummies_ea_dtype_seriesc                 C   sL   t dtdi}t||d}t g dg dg dd|d}t|| d S )Nr   r  r+   r  r   r   )x_ax_bx_c)r   r;   r	   r>   r?   )r   r  r   rE   rD   r   r   r   #test_get_dummies_ea_dtype_dataframe  s   z2TestGetDummies.test_get_dummies_ea_dtype_dataframe)1__name__
__module____qualname__r,   fixturer   r    float64rV   r!   r&   r'   r0   rF   r_   rk   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   markparametrizer   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r   r   r   r   r      sz    


2#"




r   )r   r   numpyr    r,   pandas.core.dtypes.commonr   pandasr   r   r   r   r   r   r   r	   pandas._testing_testingr>   pandas.core.arrays.sparser
   r   r   r   r   r   <module>   s    $	