o
    ҷh8D                     @   s  d Z ddlmZ ddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ejdZejdZdd	 Zd
d Zeejdddgddgddgdgdgdgdggejdddgdd Zdd Zeejdddggdd Zdd Zedd Zejddge	ejgd gd!gd"e
dgd#d$d%fdd&ge	ejgd!gd'ejd(gd#d)gd*d%fd#d)ge	ejgd!gd'ejd(gd#d)gd*d%fgd+d, Zeejd-i e	d#d.ejd/d0ejd1gg d2d3d4d5ejd6ejd7gd8fg g d9d:d;e	g d<g d2g d=d8fd#gd:d;e	ejd.d>d/d0d?d1gg d2g d=d8fdg g d9ie	d#d.ejd/d0ejd1gg d2d3d4d5ejd6ejd7gd8fgd@dA ZdBdC ZedDdE Z edFdG Z!eejdHdIdJgdKdL Z"eejdMdNd dOgejejgdPdQggfd:dRdOgd?dSgdTdQggfgdUdV Z#edWdX Z$eejdYd ejdZgdZejggfd&d d[d\dZgejejggfgd]d^ Z%ed_d` Z&edadb Z'eejdce(ddde e(df dddgie	e(dde(dfgfe(dddg dh i e	e(ddd gd>d&ggfe(dddi dddgie	ejd gfgdjdk Z)dldm Z*eejdnd:d>dogfdNejdpgfgdqdr Z+dsdt Z,eejdudNd:gdvdw Z-eejdxdydzd{d|d}d~gfd}d#d~ifgdd Z.edd Z/edd Z0edd Z1dd Z2dd Z3dS )zg
Tests that NA values are properly handled during
parsing for all of the parsers defined in parsers.py
    )StringION)STR_NA_VALUES)	DataFrameIndex
MultiIndexpyarrow_skippyarrow_xfailc                 C   sn   | }d}| t|}tg ddtjdgtjddggg dd}|jd	kr/d |jd
< d |jd< t|| d S )NzA,B,C
a,b,c
d,,f
,g,h
abcdfghABCcolumnspyarrow)   r   )   r   )	read_csvr   r   npnanengineloctmassert_frame_equalall_parsersparserdataresultexpected r'   X/var/www/html/venv/lib/python3.10/site-packages/pandas/tests/io/parser/test_na_values.pytest_string_nas   s   


r)   c                 C   st   | }d}t ddgtjdgtjtjggddgd}|jdkr+d |jd	d
gdf< d |jd< |t|}t|| d S )NzA,B
foo,bar
NA,baz
NaN,nan
foobarbazr   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r    )r"   r#   r$   r&   r%   r'   r'   r(   test_detect_string_na)   s    

r.   	na_valuesz-999.0z-999ig     8r$   zA,B
-999,1.2
2,-999
3,4.5
z"A,B
-999,1.200
2,-999.000
3,4.500
c                 C   sL   | }t tjdgdtjgddggddgd}|jt||d}t|| d S )	Ng333333?       @g      @g      @r   r   r   r/   r   r   r   r   r   r   r    )r"   r$   r/   r#   r&   r%   r'   r'   r(   test_non_string_na_values:   s   &r3   c                    s   h d}|t ks
J | }t|fdd td fddt|D }ttjttd}|j	|d d}t
|| d S )	N>   #NAN/An/a#N/A-NaN-nan<NA>1.#IND-1.#IND1.#QNAN#N/A N/A-1.#QNAN NANaNr   NULLNonenullc                    sf   | dkrd}n| dkrd dg|  }| | }|  d k r1d dg |  d  }| | }|S )Nr   r@   ,r   )join)ivbufjoined)nvr'   r(   r   {   s   z!test_default_na_values.<locals>.f
c                    s   g | ]	\}} ||qS r'   r'   ).0rH   rI   )r   r'   r(   
<listcomp>   s    z*test_default_na_values.<locals>.<listcomp>)r   index)header)r   lenr   rG   	enumerater   r   r   ranger   r   r    )r"   
_NA_VALUESr#   r$   r&   r%   r'   )r   rL   r(   test_default_na_values`   s    rV   r,   c                 C   s^   | }d}t dtjdgtjdtjgddtjggg dd}|jt||d	gd
}t|| d S )Nz3A,B,C
ignore,this,row
1,NA,3
-1.#IND,5,baz
7,8,NaN
      ?            r   r   r   )r/   skiprowsr2   )r"   r/   r#   r$   r&   r%   r'   r'   r(   test_custom_na_values   s   (r]   c                 C   s|   d}| }| t|}ttjdtjdgtdtjddtjgtdg dd}|jdkr6d |jd< d |jd	< t	
|| d S )
Nz1A,B,C
True,False,True
NA,True,False
False,NA,TrueTFdtype)TFTr   r   )r   r   r-   )r   r   r   r   arrayr   objectr   r   r   r    r"   r$   r#   r%   r&   r'   r'   r(   test_bool_na_values   s   


rc   c                 C   sh   d}| }|j t|dgdgdd}ttjdtjdgtjdtjdgtjdtjdgd}t|| d S )Nz3A,B,C
foo,bar,NA
bar,foo,foo
foo,bar,NA
bar,foo,foor*   r+   )r   r   r1   r   r   r   r   r   r   r   r    r"   r$   r#   dfr&   r'   r'   r(   test_na_value_dict   s   rg   zindex_col,expectedr   rY   )r   r   r   r
   namerP   r   )r   r   )r   r   r   )namesc                 C   s.   d}| }|j t|t |d}t|| d S )Nza,b,c,d
0,NA,1,5
)r/   	index_col)r   r   setr   r    )r"   rl   r&   r$   r#   r%   r'   r'   r(   test_na_value_dict_multi_index   s   rn   zkwargs,expectedr   r   er   r   r   rX      rY      rZ   onetwothreefivesevenr   )r   r   Fr/   keep_default_nar
   r   r@   r   ro   r   r   )rs   rt   ru   r   rv   r@   rw   r@   r   c                 C   s.   d}| }|j t|fi |}t|| d S )NzAA,B,C
a,1,one
b,2,two
,3,three
d,4,nan
e,5,five
nan,6,
g,7,seven
r   r   r   r    )r"   kwargsr&   r$   r#   r%   r'   r'   r(   test_na_values_keep_default   s   /
r}   c                 C   sF   d}| }|j t|dd}tg dg dg dd}t|| d S )NzAA,B,C
a,1,None
b,2,two
,3,None
d,4,nan
e,5,five
nan,6,
g,7,seven
F)ry   rz   rp   )rD   rt   rD   r   rv   r@   rw   r   r   r   r   r   r    rb   r'   r'   r(   !test_no_na_values_no_keep_default*  s   
r   c                 C   sF   d}| }|j t|ddgidd}tdgtjgd}t|| d S )Nza,b
,2r   2Frx   r@   r
   r   rd   rb   r'   r'   r(   &test_no_keep_default_na_dict_na_valuesE  s   r   c                 C   sD   d}| }|j t|ddidd}tdgtjgd}t|| d S )Nza,b
1,2r   r   Frx   r   r   rd   re   r'   r'   r(   -test_no_keep_default_na_dict_na_scalar_valuesR  s
   r   col_zero_na_valuesi 113125c              	   C   st   d}| }t tjdgtjdgdtjgddgddgd	d
gtjdgd}|jt|d dd
dd|dd}t|| d S )Nz_113125,"blah","/blaha",kjsdkj,412.166,225.874,214.008
729639,"qwer","",asdfkj,466.681,,252.373
g    ND&Aqwerz/blahakjsdkjasdfkjg-y@g7A`*}@z225.874r@   g-o@)r   r   r   rX   rq   rY   rr   Fz214.008blah)r   rr   r   r   )rQ   ry   r/   r2   )r"   r   r$   r#   r&   r%   r'   r'   r(   1test_no_keep_default_na_dict_na_values_diff_reprs_  s&   r   zna_filter,row_dataTr   rX   r   1r   3c                 C   s>   d}| }|j t|dg|d}t|ddgd}t|| d S )NzA,B
1,A
nan,B
3,C
r   )r/   	na_filterr   r   r~   )r"   r   row_datar$   r#   r%   r&   r'   r'   r(   !test_na_values_na_filter_override~  s
   	r   c              
   C   sf   | }d}| t|}tdddddtjtjtjgdddd	d
tjtjtjggg dd}t|| d S )NzlDate,Currency,Symbol,Type,Units,UnitPrice,Cost,Tax
2012-03-14,USD,AAPL,BUY,1000
2012-05-12,USD,SBUX,SELL,500z
2012-03-14USDAAPLBUYi  z
2012-05-12SBUXSELLi  )DateCurrencySymbolTypeUnits	UnitPriceCostTaxr   rd   r!   r'   r'   r(   test_na_trailing_columns  s   r   zna_values,row_datar0   r   rW   c                 C   s@   | }ddg}d}|j t|||d}t||d}t|| d S )Nr
   r   1,2
2,1rk   r/   r   r~   )r"   r/   r   r#   rk   r$   r%   r&   r'   r'   r(   test_na_values_scalar  s   
r   c                 C   sn   | }ddd}|  }ddg}d}tddgtjtjgg|d	}|jt|||d
}t|| t|| d S )Nr   r   r   r
   r   r   rW   r0   r   r   )	copyr   r   r   r   r   r   r    assert_dict_equal)r"   r#   r/   na_values_copyrk   r$   r&   r%   r'   r'   r(   test_na_values_dict_aliasing  s   
r   c                 C   sD   d}| }ddi}|j t||d}tdtjdgi}t|| d S )Nza
foo
1r   r*   r1   r
   r   rd   )r"   r$   r#   r/   r%   r&   r'   r'   r(   test_na_values_dict_col_index  s   r   zdata,kwargs,expectedl            rM   l           z,1z
,2z
1c                 C   s.   | }|j t|fdd i|}t|| d S )NrQ   r{   )r"   r$   r|   r&   r#   r%   r'   r'   r(   test_na_values_uint64  s   r   c                 C   sH   d}| }t ddgitdgddd}|jt|dd	d
}t|| d S )Nza,1
b,2r   r   r   r
   rh   rj   r   F)rl   ry   r   r   r   r   r   r    )r"   r$   r#   r&   r%   r'   r'   r(   *test_empty_na_values_no_default_with_index  s
   r   zna_filter,index_data5g      @c                 C   sP   | }d}t ddgddgdt|ddd	}|jt|dg|d
}t|| d S )Na,b,c
1,,3
4,5,6r   rq   rX   rr   )r
   r   r   rh   rj   )rl   r   r   )r"   r   
index_datar#   r$   r&   r%   r'   r'   r(   test_no_na_filter_on_index   s
   "r   c                 C   s\   | }d}|j t|dgddgd}tdtjgdtjgdtd	d
gddd}t|| d S )Nzidx,col1,col2
1,3,4
2,inf,-infr   infz-inf)rl   r/   rX   rq   )col1col2r   r   idxrh   rj   )r   r   r   r   r   r   r   r    )r"   r#   r$   outr&   r'   r'   r(   !test_inf_na_values_with_int_index  s   "r   r   c                 C   sV   | }d}|r	t jnd}tddg|dgddgd}|jt||td	}t|| d S )
Nr   r@   r   4r   r   6r	   )r   r_   )r   r   r   r   r   strr   r    )r"   r   r#   r$   emptyr&   r%   r'   r'   r(   +test_na_values_with_dtype_str_and_na_filter  s   r   zdata, na_values)zfalse,1
,1
trueN)zfalse,1
null,1
trueN)zfalse,1
nan,1
trueN)false,1
foo,1
truer*   r   r*   c                 C   s\   | }d}t jt|d |jt|d ddgddi|d W d    d S 1 s'w   Y  d S )Nz(Bool column has NA values in column [0a])|(cannot safely convert passed user dtype of bool for object dtyped data in column 0)matchr
   r   bool)rQ   rk   r_   r/   pytestraises
ValueErrorr   r   )r"   r$   r/   r#   msgr'   r'   r(   !test_cast_NA_to_bool_raises_error-  s   "r   c                 C   sb   | }d}|j t|d g dtttdd }tddgddgdd	gdd
dgd}t|| d S )NzDFile: small.csv,,
10010010233,0123,654
foo,,bar
01001000155,4530,898)r   r   col3)rQ   rk   r_   100100102330100100015501234530654898r   rX   rj   )r   r   r   dropnar   r   r    r!   r'   r'   r(   test_str_nan_droppedJ  s$   
	r   c                 C   sP   | }d}|j t|ttdddid}tdgdgtjgd}t|| d S )NzA,B,B
X,Y,Z
1,2,infr   r   Zr   )rQ   r/   r   ))r   X)r   Yr   )	r   r   listrT   r   r   r   r   r    r!   r'   r'   r(   test_nan_multi_indexg  s   r   c                 C   N   | }d}t jtdd |jt|dd W d    d S 1 s w   Y  d S )N0
NaN
True
False
z	NA valuesr   r   r^   r   r"   r#   r$   r'   r'   r(   test_bool_and_nan_to_bool|  s
   "r   c                 C   r   )Nr   zconvert|NoneTyper   intr^   r   r   r'   r'   r(   test_bool_and_nan_to_int  s
   "r   c                 C   s@   | }d}|j t|dd}tdtjddgi}t|| d S )Nr   floatr^   0rW   g        )r   r   r   	from_dictr   r   r   r    r!   r'   r'   r(   test_bool_and_nan_to_float  s
   r   )4__doc__ior   numpyr   r   pandas._libs.parsersr   pandasr   r   r   pandas._testing_testingr   markusefixturesskip_pyarrowxfail_pyarrowr)   r.   parametrizer3   rV   r]   rc   rg   r   from_tuplesrn   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r'   r'   r'   r(   <module>   sN   
1
$

-




( 



