o
    ҷh9W                  	   @   s  d Z ddlZddlZddlmZ ddlmZ ddlZ	ddl
mZ ddlmZmZ dd Zejdd	 Zejd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zejjd d!d"d# Z ejjd d!d$d% Z!d&d' Z"d(d) Z#d*d+ Z$d,d- Z%d.d/ Z&ej'd0ej(e)d1e*d1de	j+gd2d3 Z,ej'd4d5d6gd7d8 Z-d9d: Z.d;d< Z/d=d> Z0ej'd?d5d6gejjd@d!dAdB Z1ej'd?d5d6gejjd@d!dCdD Z2ej'dEdFdGgej'd?d5d6gdHdI Z3ej'dEdFdGgej'dJe	j4e	j5gdKdL Z6dMdN Z7dOdP Z8dQdR Z9dSdT Z:dUdV Z;dWdX Z<ej'dYg dZe5g d[fg d\e5g d]fgd^d_ Z=d`da Z>ej'dbej?ej@ej)gdcdd ZAdedf ZBdgdh ZCdidj ZDdkdl ZEdmdn ZFdodp ZGdS )qz
This module tests the functionality of StringArray and ArrowStringArray.
Tests for the str accessors are in pandas/tests/strings/test_string_array.py
    N)pa_version_under12p0)is_dtype_equal)ArrowStringArrayArrowStringArrayNumpySemanticsc                 C   s   | j dkrtjS tjS )Npyarrow_numpy)storagenpnanpdNAdtype r   Z/var/www/html/venv/lib/python3.10/site-packages/pandas/tests/arrays/string_/test_string.pyna_val   s   
r   c                 C   s   t j| dS )z=Fixture giving StringDtype from parametrized 'string_storage')r   )r
   StringDtype)string_storager   r   r   r      s   r   c                 C   s   |   S )z3Fixture giving array type from parametrized 'dtype')construct_array_typer   r   r   r   cls!   s   r   c                 C   s   t dt jdt jdg| di}| jdkrd}nd}t||ks"J | jdkr*d}nd	}t|j|ks5J | jd
krCd}d| d}n| jdkrQd}d| d}nd}d| d}t|jj|kscJ d S )NAabr   r   z     A
0    a
1  NaN
2    bz      A
0     a
1  <NA>
2     bz10      a
1    NaN
2      b
Name: A, dtype: stringz40       a
1    <NA>
2       b
Name: A, dtype: stringpyarrowr   <z+>
['a', <NA>, 'b']
Length: 3, dtype: stringr   z*>
['a', nan, 'b']
Length: 3, dtype: stringStringArray)r
   	DataFramearrayr   r   reprr   )r   dfexpectedarr_namer   r   r   	test_repr'   s$    



r!   c                 C   s8   |  g d}|d d usJ |d t|ju sJ d S )N)r   Nr      )_from_sequencer   r   )r   r   r   r   r   test_none_to_nanA   s   r$   c                 C   s   |  ddg}| tjju rd}nd}tjt|d d|d< W d    n1 s(w   Y  | tjju r6d}nd}tjt|d td	d
g|d d < W d    d S 1 sVw   Y  d S )Nr   r   z4Cannot set non-string value '10' into a StringArray.Scalar must be NA or strmatch
   r   zMust provide strings.r"      )	r#   r
   arraysr   pytestraises	TypeErrorr   r   )r   arrmsgr   r   r   test_setitem_validatesG   s   
"r0   c                 C   s<   t jddg| d}d|d< t jddg| d}t|| d S )Nr   cr   dr   r
   r   tmassert_extension_array_equal)r   r.   r   r   r   r   test_setitem_with_scalar_stringY   s   r6   c                 C   sf   t jg d| d}tdd g}| }||ddg< t jdt jdg| d}t|| t|| d S )Nr   r   r1   r   r   r   r"   r1   )r
   r   r   copyr   r4   r5   assert_numpy_array_equal)r   r.   value
value_origr   r   r   r   $test_setitem_with_array_with_missingb   s   r<   c                 C   sP   t t jddd}d |d< || }t|j| sJ |d}t|| d S )N2000   )periodsr   zdatetime64[ns])r
   Series
date_rangeastyper   r   r4   assert_series_equal)r   sercastedresultr   r   r   test_astype_roundtripo   s   

rG   c                 C   s   t jg d| d}t jg d| d}|| }t jg d| d}t|| ||}t|| ||}t jg d| d}t|| |j|dd}t jg d| d}t|| d S )	N)r   r   r1   NNr   )xyNzN)axbyNNN)xaybNNN-)
fill_value)rK   rL   zc-z-zN)r
   r@   r4   rC   addradd)r   r   r   rF   r   r   r   r   test_addz   s   

rS   c                 C   s   | j |v rd}tjjd |d}|j| tjg d| d}tjg dgt	d}tj
tdd ||  W d    n1 s>w   Y  t|}tj
tdd ||  W d    d S 1 s_w   Y  d S )Nz*Failed: DID NOT RAISE <class 'ValueError'>r,   reasonr7   r   z3 != 1r&   )r   r+   markxfailnode
add_markerr
   r   r   objectr,   
ValueErrorr@   )r   requestarrow_string_storagerU   rV   r   r   sr   r   r   test_add_2d   s   



"r_   c                 C   sj   t jg d| d}g d}|| }t jg d| d}t|| || }t jg d| d}t|| d S )N)r   r   NNr   )rH   NrI   N)rK   NNN)rM   NNNr3   )r   r   otherrF   r   r   r   r   test_add_sequence   s   ra   c                 C   sz   | j |v rd}tjjt|d}|j| tjg d| d}|d }tjg d| d}t	
|| d| }t	
|| d S )Nz?unsupported operand type(s) for *: 'ArrowStringArray' and 'int'rT   r   r   Nr   r)   )aabbN)r   r+   rV   rW   NotImplementedErrorrX   rY   r
   r   r4   r5   )r   r\   r]   rU   rV   r   rF   r   r   r   r   test_mul   s   
rf   zGH-28527)rU   c                 C   s   t jg d| d}t g dg}||tu sJ || }t g dg| }t|| || }t g dg| }t|| d S )N)r   r   r1   r2   r   )trI   vw)atrL   cvdw)tarN   vcwd)r
   r   r   __add__NotImplementedrB   r4   assert_frame_equalr   r.   r   rF   r   r   r   r   test_add_strings   s   rt   c                 C   s   t jddtjtjg| d}t dtjdtjgg}||tu s"J || }t dtjtjtjgg| }t	|| || }t dtjtjtjgg| }t	|| d S )Nr   r   r   rH   rI   rK   rM   )
r
   r   r   r	   r   rp   rq   rB   r4   rr   rs   r   r   r   test_add_frame   s     ru   c                    s   d| j  d tjg d|d}dt| }|jdkr;t fdd|D }d|d	< t||tj	 d S |jd
krBdnd}tj fdd|D t
d}tj||d}t|| d S )N__r   Nr1   r   r   r   c                       g | ]	}t | qS r   getattr.0itemop_namer`   r   r   
<listcomp>       z2test_comparison_methods_scalar.<locals>.<listcomp>Fr"   r   boolean[pyarrow]booleanc                    rx   r   ry   r{   r~   r   r   r      r   )__name__r
   r   rz   r   r   r4   r9   rB   bool_rZ   r5   )comparison_opr   r   rF   r   expected_dtyper   r~   r   test_comparison_methods_scalar   s   
r   c                 C   s   d| j  d}tjg d|d}t||tj}|jdkr,tg d}t|| d S |jdkr3dnd}tjg d	|d}t	|| t	|| d S )
Nrv   rw   r   r   FFFr   r   r   NNN)
r   r
   r   rz   r   r   r   r4   r9   r5   )r   r   r   r   rF   r   r   r   r   r   $test_comparison_methods_scalar_pd_na   s   
r   c           	      C   s   d| j  d}tjg d|d}d}|dvr7tjtdd t||| W d    d S 1 s0w   Y  d S t|||}|jdkr[g d	g d
d| }t|}t	
|| d S g dg dd| }|jdkrmdnd}tj||d}t	|| d S )Nrv   rw   r   *   )__eq____ne__znot supported betweenr&   r   r   TFT)FNF)TNTr   r   r   )r   r
   r   r+   r,   r-   rz   r   r   r4   r9   r5   )	r   r   r   r   r`   rF   expected_datar   r   r   r   r   )test_comparison_methods_scalar_not_string   s2   


r   c                 C   s.  d| j  d}tjg d|d}g d}t|||}|jdkrQtg d}t|d ||d |d< t|| t||tj}tg d}t|| d S |jdkrXd	nd
}tj	t
|d dd}t|d ||d |d< tj||d}t|| t||tj}tjg d|d}t|| d S )Nrv   rw   r   )NNr1   r   r   r   r   r   rZ   )rP   r   r   )r   r
   r   rz   r   r   r4   r9   r   fulllenr5   )r   r   r   r   r`   rF   r   r   r   r   r   test_comparison_methods_array  s&   
r   c                 C   sB  | t jju r	d}nd}tjt|d | tjddgdd W d    n1 s(w   Y  tjt|d | tg  W d    n1 sFw   Y  | t jju ri| tjdtjgt	d | tjdd gt	d nEtjt|d | tjdtjgt	d W d    n1 sw   Y  tjt|d | tjdd gt	d W d    n1 sw   Y  tjt|d | tjdt j
gt	d W d    n1 sw   Y  tjt|d | tjdtdd	gt	d W d    n1 sw   Y  tjt|d | tjdtdd	gt	d W d    d S 1 sw   Y  d S )
Nz7StringArray requires a sequence of strings or pandas.NAzBUnsupported type '<class 'numpy.ndarray'>' for ArrowExtensionArrayr&   r   r   S1r   NaTns)r
   r*   r   r+   r,   r[   r   r   r	   rZ   r   
datetime64timedelta64)r   r/   r   r   r   test_constructor_raises1  s6     $r   nar	   c                 C   s>   t jtdt jg}tt jtjd| gdd| d S )Nr   rZ   r   )r
   r*   r   r   r   r   r4   r5   )r   r   r   r   r   test_constructor_nan_likeR  s   r   r8   TFc           	      C   s   t jdt jgtd}| }t jdtjgtd}|j|| d}|tt	fv r6dd l
}||j|| dd}n||}t|| t|| d S )Nr   r   )r8   r   Ttypefrom_pandas)r   r   r	   rZ   r8   r
   r   r#   r   r   r   stringr4   r5   r9   )	r8   r   r\   nan_arrexpected_inputna_arrrF   par   r   r   r   test_from_sequence_no_mutateZ  s   r   c                 C   s   t jg d| d}|d}tjg ddd}t|| t jdt jdg| d}d}tjt	|d |d W d    d S 1 sBw   Y  d S )	N)123r   int64)r"   r)      r   r   zJint\(\) argument must be a string, a bytes-like object or a( real)? numberr&   )
r
   r   rB   r   r4   r9   r   r+   r,   r-   )r   r.   rF   r   r/   r   r   r   test_astype_intm  s   
"r   c                 C   sF   t jdt jdg| d}|d}t jdt jdgdd}t|| d S )Nr   r   r   Int64r"   r   )r
   r   r   rB   r4   r5   r   r.   rF   r   r   r   r   test_astype_nullable_inty  s   
r   c                 C   sF   t jdt jdg| d}||}t jdtjdg|d}t|| d S )Nz1.1z3.3r   g?gffffff
@)r
   r@   r   rB   r   r	   r4   rC   )r   any_float_dtyperD   rF   r   r   r   r   test_astype_float  s   
r   skipnazNot implemented StringArray.sumc                 C   s.   t jg d|d}|j| d}|dksJ d S )Nr7   r   r   abc)r
   r@   sumr   r   r.   rF   r   r   r   test_reduce  s   r   c                 C   sD   t jg d|d}|j| d}| r|dksJ d S t |s J d S )N)Nr   Nr   r1   Nr   r   r   )r
   r@   r   isnar   r   r   r   test_reduce_missing  s
   r   methodminmaxc                 C   s\   t jg d|d}t|| |d}|r#| dkrdnd}||ks!J d S |t|ju s,J d S )Nr   r   r1   Nr   r   r   r   r1   )r
   r@   rz   r   r   )r   r   r   r\   r.   rF   r   r   r   r   test_min_max  s   r   boxc           
      C   s   |j |v r"|tju r"|tju rd}nd}tjjt|d}|j| |g d|d}t	t
| |}| dkr7dnd}	||	ks?J d S )	Nz<'<=' not supported between instances of 'str' and 'NoneType'z0'ArrowStringArray' object has no attribute 'max'rT   r   r   r   r   r1   )r   r
   r   r+   rV   rW   r-   rX   rY   rz   r   )
r   r   r   r\   r]   rU   rV   r.   rF   r   r   r   r   test_min_max_numpy  s   
r   c                 C   s   t jdt jg| d}|jdd}t jddg| d}t|| |jtdd}t jddg| d}t|| | j|v r?d}nd}t	j
t|d |jdd W d    d S 1 sZw   Y  d S )	Nr   r   r   )r:   z"Invalid value '1' for dtype stringz3Cannot set non-string value '1' into a StringArray.r&   r"   )r
   r   r   fillnar4   r5   r   str_r   r+   r,   r-   )r   r\   r]   r.   resr   r/   r   r   r   test_fillna_args  s   
"r   c                 C   sh   t d}tjg d| d}||}|jt|| dd}| jdv r+tr+||}|	|s2J d S )Nr   r7   r   Tr   )r   r   )
r+   importorskipr
   r   listr   r   r   chunked_arrayequals)r   r   datar.   r   r   r   r   test_arrow_array  s   


r   c                 C   s   t d}tjg d| d}td|i}||}|djdks$J td| |	 }W d    n1 s9w   Y  t
|d jtjsIJ |d| d}t|| |jd	 t|d ju sfJ d S )
Nr   rb   r   r   r   r   string[])r)   r   )r+   r   r
   r   r   tablefieldr   option_context	to_pandas
isinstancer   r   rB   r4   rr   locr   r   string_storage2r   r   r   r   rF   r   r   r   r   test_arrow_roundtrip  s   


 r   c                 C   s   t d}tjg | d}td|i}||}|djdks"J |j|jg |	 dg|j
d}td| | }W d    n1 sGw   Y  t|d jtjsWJ |d| d	}t|| d S )
Nr   r   r   r   )r   )schemar   r   r   )r+   r   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   rB   r4   rr   r   r   r   r    test_arrow_load_from_zero_chunks  s   

 
r   c                 C   s   t | dddkrd}nt | dddkrd}nd}tjdd	dtjg| d
}|jdd}tjg d|g d |dd}t|| |jdd}tjddg|d d |dd}t|| d S )Nr    r   zint64[pyarrow]r   r   r   r   r   r   F)dropna)r)   r"   r"   )r   r"   r   countindexr   nameTr)   r"   )rz   r
   r   r   value_countsr@   r4   rC   )r   	exp_dtyper.   rF   r   r   r   r   test_value_counts_na  s   r   c                 C   s   t | dddkrd}nt | dddkrtj}nd}tjdddtjg| d	}|jd
d}tjddg|d d |ddd }t|| d S )Nr   r   r   zdouble[pyarrow]r   Float64r   r   r   T)	normalizer)   r"   
proportionr   r   )	rz   r   float64r
   r@   r   r   r4   rC   )r   r   rD   rF   r   r   r   r    test_value_counts_with_normalize  s   "r   zvalues, expectedr7   r   rb   )FFTc              	   C   s   t j| |d} d}tjt|dU t dd6 |  }t|| t |  }t |}t	|| t 
|  }t 
|}t|| W d    n1 sPw   Y  W d    d S W d    d S 1 shw   Y  d S )Nr   z"use_inf_as_na option is deprecatedr&   zmode.use_inf_as_naT)r
   r   r4   assert_produces_warningFutureWarningr   r   r9   r@   rC   r   rr   )valuesr   r   r/   rF   r   r   r   test_use_inf_as_na  s    	

"r   c                 C   sf   | j |v rtd| j   tjg d| d}d|j  k r.|   kr.|jddk s1J  J d S )Nznot applicable for r7   r   r   T)deep)r   r+   skipr
   r@   nbytesmemory_usage)r   r]   seriesr   r   r   test_memory_usage4  s   
8r   float_dtypec                 C   s:   t jdg| d}||}t jdg|d}t|| d S )Ng?r   z0.1)r
   r@   rB   r4   rC   )r   r   rD   rF   r   r   r   r   test_astype_from_float_dtype?  s   
r   c                 C   sH   t jdt jdg| d}t|}tjdt| dgtd}t|| d S )Nr   r   r   )r
   r   r   r   r   rZ   r4   r9   r   r   r   r   "test_to_numpy_returns_pdna_defaultH  s   
r   c                 C   sJ   |}t jdt jdg| d}|j|d}tjd|dgtd}t|| d S )Nr   r   r   )na_value)r
   r   r   to_numpyr   rZ   r4   r9   )r   nulls_fixturer   r.   rF   r   r   r   r   test_to_numpy_na_valueO  s
   r   c                 C   s   t jg d| d}|ddg}t g d}t|| |dt jg}t g d}t|| |g }t g d}t|| |d|g}t g d}t|| d S )Nrb   r   r   r1   )TFFr   r   )r
   r@   isinr4   rC   r   )r   fixed_now_tsr^   rF   r   r   r   r   	test_isinW  s   
r   c                 C   s   t jg d| d}tg d}d ||< |jd t|ju s J t jg d| d}t|jt jju r5d}nd}t	j
t|d d||< W d    d S 1 sNw   Y  d S )Nr7   r   )FTFr"   zCannot set non-string valuer%   r&   )r
   r@   r   r   r   r   r   r*   r   r+   r,   r-   )r   rD   maskr/   r   r   r   (test_setitem_scalar_with_mask_validationk  s   
"r   c                 C   sD   g d}t j|t jd}tj|| d}tj|| d}t|| d S Nr7   r   )r   r   r   r
   r4   r5   r   valsr.   rF   r   r   r   r   test_from_numpy_str  s
   r   c                 C   s2   g d}t j|| d}| }|}t|| d S r   )r
   r   tolistr4   assert_equalr   r   r   r   test_tolist  s
   r  )H__doc__numpyr   r+   pandas.compat.pyarrowr   pandas.core.dtypes.commonr   pandasr
   pandas._testing_testingr4   pandas.core.arrays.string_arrowr   r   r   fixturer   r   r!   r$   r0   r6   r<   rG   rS   r_   ra   rf   rV   rW   rt   ru   r   r   r   r   r   parametrizer	   r   floatr   r   r   r   r   r   r   r   r   r@   r   r   r   r   r   r   r   r   r   r   float16float32r   r   r   r   r   r   r  r   r   r   r   <module>   s    

	

$!

	


