o
    ҷh                      @  s   d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlZd dlmZ d d	lmZmZ d d
lmZ erDd dlmZ G dd deZdS )    )annotations)TYPE_CHECKING)using_pyarrow_string_dtype)lib)import_optional_dependency)
is_integerN)	DataFrame)_arrow_dtype_mappingarrow_string_types_mapper)
ParserBase)
ReadBufferc                      sH   e Zd ZdZd fddZdd	 Zdd
dZdddZdddZ  Z	S )ArrowParserWrapperz7
    Wrapper for the pyarrow engine for read_csv()
    srcReadBuffer[bytes]returnNonec                   s$   t  | || _|| _|   d S )N)super__init__kwdsr   _parse_kwds)selfr   r   	__class__ Y/var/www/html/venv/lib/python3.10/site-packages/pandas/io/parsers/arrow_parser_wrapper.pyr      s   zArrowParserWrapper.__init__c                 C  sN   | j d}|du rdn|| _| j d }t|trtdt| j d | _dS )z?
        Validates keywords before passing to pyarrow.
        encodingNzutf-8	na_valuesz?The pyarrow engine doesn't support passing a dict for na_values)r   getr   
isinstancedict
ValueErrorlistr   )r   r   r   r   r   r   r   %   s   

zArrowParserWrapper._parse_kwdsc                 C  s   dddddd}|  D ]\}}|| jv r&| j|dur&| j|| j|< q| j}t|tr3|g}nd}|| jd< d	d
 | j  D | _dd
 | j  D | _d| jd v | jd< | j	du | j	durf| j	n| jd | j
d| _dS )z:
        Rename some arguments to pass to pyarrow
        include_columnsnull_valuesescape_charignore_empty_linesdecimal_point)usecolsr   
escapecharskip_blank_linesdecimalNtimestamp_parsersc                 S  &   i | ]\}}|d ur|dv r||qS )N)	delimiter
quote_charr$   r%   r   .0option_nameoption_valuer   r   r   
<dictcomp>Q       z;ArrowParserWrapper._get_pyarrow_options.<locals>.<dictcomp>c                 S  r,   )N)r"   r#   true_valuesfalse_valuesr&   r+   r   r/   r   r   r   r3   X   r4    strings_can_be_nullskiprows)autogenerate_column_names	skip_rowsr   )itemsr   r   popdate_formatr   strparse_optionsconvert_optionsheaderr   read_options)r   mappingpandas_namepyarrow_namer>   r   r   r   _get_pyarrow_options3   s8   


z'ArrowParserWrapper._get_pyarrow_optionsframer   c              
     s  t  j}d}| jdu r7| jdu r| jdu rt|| _t | j|kr3tt|t | j | j | _d}| j _|  j \} | jdur| j }t	| jD ]S\}}t
|r_ j| ||< n| jvrltd| d| jdur| j|dur|| j|fn j| | j j| f\}}	|	dur | |	 |< | j|= qO j|ddd | jdu r|sdgt  jj  j_| jdurt| jtrՇ fdd| j D | _z	 | j W  S  ty }
 zt|
d}
~
ww  S )	z
        Processes data read in based on kwargs.

        Parameters
        ----------
        frame: DataFrame
            The DataFrame to process.

        Returns
        -------
        DataFrame
            The processed DataFrame.
        TNFzIndex z invalid)dropinplacec                   s    i | ]\}}| j v r||qS r   )columns)r0   kvrH   r   r   r3      s     z>ArrowParserWrapper._finalize_pandas_output.<locals>.<dictcomp>)lenrK   rB   namesranger!   _do_date_conversions	index_colcopy	enumerater   r    dtyper   astype	set_indexindexr   r   r<   	TypeError)r   rH   num_colsmulti_index_named_index_to_setiitemkey	new_dtypeer   rN   r   _finalize_pandas_outputo   sR   









z*ArrowParserWrapper._finalize_pandas_outputc                 C  s*  t d}t d}|   |j| j|jdi | j|jdi | j|jdi | j	d}| j
d }|tju r^|j}| }t|jjD ]\}}|j|rX|||||}qB||}|dkrj|jtjd}	n&|dkrt }
t |
| < |j|
jd}	nt r|jt d}	n| }	| |	S )	z
        Reads the contents of a CSV file into a DataFrame and
        processes it according to the kwargs passed in the
        constructor.

        Returns
        -------
        DataFrame
            The DataFrame created from the CSV file.
        pyarrowzpyarrow.csv)rC   r@   rA   dtype_backend)types_mappernumpy_nullableNr   ) r   rG   read_csvr   ReadOptionsrC   ParseOptionsr@   ConvertOptionsrA   r   r   
no_defaultschemafloat64rU   typesis_nullsetfield	with_typecast	to_pandaspd
ArrowDtyper	   
Int64Dtypenullr   r   r
   rd   )r   papyarrow_csvtablerf   
new_schemanew_typer_   
arrow_typerH   dtype_mappingr   r   r   read   s<   



zArrowParserWrapper.read)r   r   r   r   )r   r   )rH   r   r   r   )r   r   )
__name__
__module____qualname____doc__r   r   rG   rd   r   __classcell__r   r   r   r   r      s    

<Cr   )
__future__r   typingr   pandas._configr   pandas._libsr   pandas.compat._optionalr   pandas.core.dtypes.inferencer   pandasrw   r   pandas.io._utilr	   r
   pandas.io.parsers.base_parserr   pandas._typingr   r   r   r   r   r   <module>   s    