o
    ҷh7                     @  sP  d dl mZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlZd d	lmZ d d
lmZ d dlmZmZmZmZ d dlmZmZmZ ej ej!ej"ej#ej$ej%ej%dZ&ej"ej'dfej%ej(e	fej ej)dfej!ej)dfej#ej)dfej*ej(dfej$ej+d fiZ,ej)dej'dej(diZ-G dd deZ.dS )    )annotations)AnyN)infer_dtype)iNaT)NoBufferPresent)cache_readonly)
ArrowDtypeDatetimeTZDtype)is_string_dtype)PandasBuffer)ColumnColumnBuffersColumnNullType	DtypeKind)ArrowCTypes
Endiannessdtype_to_arrow_c_fmt)iufbUMmzThis column is non-nullablezThis column uses NaN as nullz!This column uses a sentinel valuec                   @  s   e Zd ZdZd/d0d	d
Zd1ddZed1ddZed2ddZ	d2ddZ
edd Zedd Zed1ddZed3ddZd1ddZd4d5d#d$Zd6d&d'Zd7d)d*Zd7d+d,Zd7d-d.Zd S )8PandasColumna  
    A column object, with only the methods and properties required by the
    interchange protocol defined.
    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length
    strings).
    Note: this Column object can only be produced by ``__dataframe__``, so
          doesn't need its own version or ``__column__`` protocol.
    Tcolumn	pd.Series
allow_copyboolreturnNonec                 C  s0   t |tjstdt| d|| _|| _dS )zu
        Note: doesn't deal with extension arrays yet, just assume a regular
        Series/ndarray for now.
        zColumns of type  not handled yetN)
isinstancepdSeriesNotImplementedErrortype_col_allow_copy)selfr   r    r+   Q/var/www/html/venv/lib/python3.10/site-packages/pandas/core/interchange/column.py__init__J   s   
zPandasColumn.__init__intc                 C  s   | j jS )z2
        Size of the column, in elements.
        )r(   sizer*   r+   r+   r,   r/   V   s   zPandasColumn.sizec                 C     dS )z7
        Offset of first element. Always zero.
        r   r+   r0   r+   r+   r,   offset\   s   zPandasColumn.offsettuple[DtypeKind, int, str, str]c                 C  s~   | j j}t|tjr!| j jj}| |j\}}}}tj	||t
jfS t|r:t| j dkr6tjdt|t
jfS td| |S )Nstring   z.Non-string object dtypes are not supported yet)r(   dtyper#   r$   CategoricalDtypevaluescodes_dtype_from_pandasdtyper   CATEGORICALr   NATIVEr
   r   STRINGr   r&   )r*   r6   r9   _bitwidthc_arrow_dtype_f_strr+   r+   r,   r6   d   s.   


zPandasColumn.dtypec                 C  sj   t |jd}|du rtd| dt|tr|jj}nt|tr'|j	j}n|j}||j
d t||fS )z/
        See `self.dtype` for details.
        N
Data type z& not supported by interchange protocolr5   )	_NP_KINDSgetkind
ValueErrorr#   r   numpy_dtype	byteorderr	   baseitemsizer   )r*   r6   rD   rG   r+   r+   r,   r:      s   



z$PandasColumn._dtype_from_pandasdtypec                 C  s:   | j d tjkstd| jjjdtt	| jjj
dS )a:  
        If the dtype is categorical, there are two options:
        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding for categorical values.

        Raises TypeError if the dtype is not categorical

        Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a dictionary-style mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.
        r   zCdescribe_categorical only works on a column with categorical dtype!T)
is_orderedis_dictionary
categories)r6   r   r;   	TypeErrorr(   catorderedr   r$   r%   rL   r0   r+   r+   r,   describe_categorical   s   z!PandasColumn.describe_categoricalc                 C  s@   | j d }zt| \}}W ||fS  ty   td| dw )Nr   rA   z not yet supported)r6   _NULL_DESCRIPTIONKeyErrorr&   )r*   rD   nullvaluer+   r+   r,   describe_null   s   
zPandasColumn.describe_nullc                 C  s   | j    S )zB
        Number of null elements. Should always be known.
        )r(   isnasumitemr0   r+   r+   r,   
null_count   s   zPandasColumn.null_countdict[str, pd.Index]c                 C  s   d| j jiS )z8
        Store specific metadata of the column.
        zpandas.index)r(   indexr0   r+   r+   r,   metadata   s   zPandasColumn.metadatac                 C  r1   )zE
        Return the number of chunks the column consists of.
           r+   r0   r+   r+   r,   
num_chunks   s   zPandasColumn.num_chunksNn_chunks
int | Nonec                 c  sv    |r6|dkr6t | j}|| }|| dkr|d7 }td|| |D ]}t| jj|||  | jV  q"dS | V  dS )zy
        Return an iterator yielding the chunks.
        See `DataFrame.get_chunks` for details on ``n_chunks``.
        r]   r   N)lenr(   ranger   ilocr)   )r*   r_   r/   stepstartr+   r+   r,   
get_chunks   s   

zPandasColumn.get_chunksr   c                 C  s\   |   ddd}z|  |d< W n	 ty   Y nw z	|  |d< W |S  ty-   Y |S w )a`  
        Return a dictionary containing the underlying buffers.
        The returned dictionary has the following contents:
            - "data": a two-element tuple whose first element is a buffer
                      containing the data and whose second element is the data
                      buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
                          containing mask values indicating missing data and
                          whose second element is the mask value buffer's
                          associated dtype. None if the null representation is
                          not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
                         containing the offset values for variable-size binary
                         data (e.g., variable-length strings) and whose second
                         element is the offsets buffer's associated dtype. None
                         if the data buffer does not have an associated offsets
                         buffer.
        N)datavalidityoffsetsrh   ri   )_get_data_buffer_get_validity_bufferr   _get_offsets_buffer)r*   buffersr+   r+   r,   get_buffers   s    zPandasColumn.get_bufferstuple[PandasBuffer, Any]c                 C  s@  | j d tjtjtjtjtjfv r?| j d tjkr,t| j d dkr,| jj	
d }n| j }t|| jd}| j }||fS | j d tjkr]| jjj}t|| jd}| |j }||fS | j d tjkr| j }t }|D ]}t|tr||jdd qottj|dd	}tjd
tjtjf}||fS td| jj  d)zZ
        Return the buffer containing the data and the buffer's associated dtype.
        r         N)r   utf-8encodinguint8)r6   r5   rA   r"   )r6   r   INTUINTFLOATBOOLDATETIMEra   r(   dt
tz_convertto_numpyr   r)   r;   r8   _codesr:   r=   	bytearrayr#   strextendencodenp
frombufferr   r   r<   r&   )r*   np_arrbufferr6   r9   bufr   objr+   r+   r,   rj     sB   "	



	zPandasColumn._get_data_bufferc                 C  s   | j \}}| jd tjkrI| j }|dk}| }tjt|ftj	d}t
|D ]\}}t|tr3|n|||< q(t|}tjdtjtjf}	||	fS zt|  d}
W t|
 ty`   tdw )z
        Return the buffer containing the mask values indicating missing data and
        the buffer's associated dtype.
        Raises NoBufferPresent if null representation is not a bit or byte mask.
        r   shaper6   r5   z! so does not have a separate maskzSee self.describe_null)rU   r6   r   r=   r(   r}   r   zerosra   bool_	enumerater#   r   r   ry   r   r   r<   _NO_VALIDITY_BUFFERrR   r&   r   )r*   rS   invalidr   validmaskr   r   r   r6   msgr+   r+   r,   rk   ;  s"   

z!PandasColumn._get_validity_bufferc           	      C  s   | j d tjkrM| j }d}tjt|d ftjd}t	|D ]\}}t
|tr5|jdd}|t|7 }|||d < q t|}tjdtjtjf}||fS td)a  
        Return the buffer containing the offset values for variable-size binary
        data (e.g., variable-length strings) and the buffer's associated dtype.
        Raises NoBufferPresent if the data buffer does not have an associated
        offsets buffer.
        r   r]   r   rr   rs   @   zJThis column has a fixed-length dtype so it does not have an offsets buffer)r6   r   r=   r(   r}   r   r   ra   int64r   r#   r   r   r   rv   r   INT64r   r<   r   )	r*   r8   ptrri   r   vr   r   r6   r+   r+   r,   rl   a  s&   

z PandasColumn._get_offsets_buffer)T)r   r   r   r   r    r!   )r    r.   )r    r3   )r    rZ   )N)r_   r`   )r    r   )r    ro   )__name__
__module____qualname____doc__r-   r/   propertyr2   r   r6   r:   rP   rU   rY   r\   r^   rf   rn   rj   rk   rl   r+   r+   r+   r,   r   >   s.    



	


%
3&r   )/
__future__r   typingr   numpyr   pandas._libs.libr   pandas._libs.tslibsr   pandas.errorsr   pandas.util._decoratorsr   pandas.core.dtypes.dtypesr   r	   pandasr$   pandas.api.typesr
   pandas.core.interchange.bufferr   *pandas.core.interchange.dataframe_protocolr   r   r   r   pandas.core.interchange.utilsr   r   r   rv   rw   rx   ry   r=   rz   rB   USE_NANUSE_SENTINELNON_NULLABLEr;   USE_BYTEMASKrQ   r   r   r+   r+   r+   r,   <module>   sD    