o
    &_h=                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZmZ d dlmZmZ ejejdd ee Z!eG dd dZ"eG dd dZ#G dd dZ$dS )    N)	webdriver)Options)By)NoSuchElementExceptionTimeoutException)WebDriverWait)fuzz)datetime)parser)contextmanager)DictListOptionalTuple)	dataclassasdictz)%(asctime)s - %(levelname)s - %(message)s)levelformatc                   @   sP   e Zd ZU dZeed< eed< eed< eed< edeeef dd fdd	Zd
S )LicenseRecordzData class for license recordspayroll_numberemployee_namelicense_number
csv_expiryrowreturnc                 C   s@   | | dd | dd | dd | dd dS )NPayroll Number Employee NameLicense NumberExpiry/Update  Dater   r   r   r   )getstrip)clsr    r$   /var/www/html/scrapers/act.pyfrom_csv_row   s   zLicenseRecord.from_csv_rowN)	__name__
__module____qualname____doc__str__annotations__classmethodr   r&   r$   r$   r$   r%   r      s   
  r   c                   @   sZ   e Zd ZU dZeed< eed< eed< eed< eed< eed< eed< eed	< eed
< dS )ACTVerificationResultz'Data class for ACT verification resultsr   r   rolecall_namedatabase_name
name_matchrolecall_expirydatabase_expiryexpiry_statuslicense_typeN)r'   r(   r)   r*   r+   r,   r$   r$   r$   r%   r.   '   s   
 r.   c                   @   s  e Zd ZdZdd ZdedefddZdedee fd	d
Z	e
dd Zdejdedee fddZdedefddZdededefddZdedee defddZdejdededeeeef fddZdejded edefd!d"Zdedee fd#d$Zd%ee dee fd&d'Zd(S ))ACTLicenseCheckerAPIz ACT License checking API servicec                 C   s   d| _ d| _d S )Nu   ﻿z}https://services.accesscanberra.act.gov.au/s/public-registers/occupational-register?registerid=security-employee&licenceID={})BOM_CHARSEARCH_URL_TEMPLATEselfr$   r$   r%   __init__8   s   
zACTLicenseCheckerAPI.__init__namer   c                 C   sP   |sdS |    }t|dkr"|d  dd|dd  S |   S )z$Normalize name format for comparisonr      z,  N)r"   uppersplitlenjoin)r:   r<   partsr$   r$   r%   normalize_name<   s   8z#ACTLicenseCheckerAPI.normalize_name	file_pathc           	   
      s   g }zVt |ddd7}t|}|jstd fdd|jD }||_|D ]}dd | D }|t| q&W d	   n1 sDw   Y  t	
d
t| d|  |W S  tym } z	t	d|   d	}~ww )z Load CSV file and return recordsr   zutf-8)newlineencodingzCSV file has no headersc                    s0   g | ]}|  jr| j n| qS r$   )
startswithr7   lstripr"   ).0hr9   r$   r%   
<listcomp>M   s    "z6ACTLicenseCheckerAPI.load_csv_file.<locals>.<listcomp>c                 S   s,   i | ]\}}|  t|tr|  n|qS r$   )r"   
isinstancer+   )rK   kvr$   r$   r%   
<dictcomp>U   s     z6ACTLicenseCheckerAPI.load_csv_file.<locals>.<dictcomp>NzLoaded z records from zError loading CSV: )opencsv
DictReader
fieldnames
ValueErroritemsappendr   r&   loggerinforB   	Exceptionerror)	r:   rF   recordscsvfile
csv_readercleaned_headersr   cleaned_rower$   r9   r%   load_csv_fileC   s0   

z"ACTLicenseCheckerAPI.load_csv_filec                 c   s    t  }|d |d |d |d |d |d |d d}ztj|d	}t|d
}||fV  W |rC|  dS dS |rL|  w w )z/Context manager for WebDriver setup and cleanupz--disable-gpuz--no-sandboxz--window-size=1920,1080z--disable-dev-shm-usagez--disable-extensionsz---disable-blink-features=AutomationControlledz
--headlessN)options   )r   add_argumentr   Chromer   quit)r:   chrome_optionsdriverwaitr$   r$   r%   setup_driver`   s&   








z!ACTLicenseCheckerAPI.setup_driverrj   xpathc                 C   s<   z| tj|}|jr|j W S dW S  ty   Y dS w )z-Helper method to safely get text from elementN)find_elementr   XPATHtextr"   r   )r:   rj   rm   elementr$   r$   r%   get_text_or_noneu   s   z%ACTLicenseCheckerAPI.get_text_or_nonetype_strc              	   C   s@   z|  }t|dkr|d W S |W S  ttfy   | Y S w )z Extract license code for sortingr=   )rA   rB   
IndexErrorAttributeError)r:   rs   rD   r$   r$   r%   extract_license_code}   s   z)ACTLicenseCheckerAPI.extract_license_codecsv_nameweb_namec                 C   sL   |r|r|dkr
dS |  |}| }t||}|dkrdS d|ddS )zCalculate name match percentage	Not Foundz
No LicenseU   YeszNo (.1fz%))rE   r@   r   token_set_ratio)r:   rw   rx   norm_csvnorm_web
similarityr$   r$   r%   calculate_name_match   s   
z)ACTLicenseCheckerAPI.calculate_name_matchr   web_expiry_partsc           
   
   C   s  |r|sdS zct j|dd}g }|D ]L}z<t j| dd}|t k }| | k}|r6|s6|d n|r@|r@|d n|sJ|sJ|d n|d W q ty^   |d Y qw |rgd	|W S dW S  ty }	 zt	
d
|	  dt|	dd  W  Y d}	~	S d}	~	ww )z8Calculate expiry status comparison for multiple licenseszMissing DateT)dayfirstActiveExpiredzActive - Date WrongzExpired - Date WrongzParse Error | z!Error calculating expiry status: zError: N   )r
   parser"   r	   todaydaterX   r[   rC   rY   warningr+   )
r:   r   r   csv_datematchesdate_strweb_date
is_expireddates_matchrb   r$   r$   r%   calculate_expiry_status   s6   
"z,ACTLicenseCheckerAPI.calculate_expiry_statusrk   r   c              
      s  |sdS z j |}|| |dd  z|tjd}|j }W n t	y0   d}Y nw g }g }d}		 d|	 d	}
d|	 d
} 
||
} 
||}|rk|rk|| |dd }|| |	d7 }	nnq8|r|rtt||}|j fddd dd |D }dd |D }d|}d|}nd}d}|||fW S  tt	fy } ztd| d|  W Y d}~dS d}~w ty } ztd| d|  W Y d}~dS d}~ww )z1Verify a single license and return extracted data)ry   ry   ry   c                 S   s"   |  tjdpd| jv pd| jv S )Nz///c-cxs-spf-public-register-occupational//tablez	No recordzdoes not hold a licence)find_elementsr   ro   page_source)dr$   r$   r%   <lambda>   s   
z5ACTLicenseCheckerAPI.verify_license.<locals>.<lambda>z[/html/body/div[3]/div[4]/div/div/div[1]/div/div/c-cxs-spf-banner/div/div/div/div/div/div/h1ry      Tzn/html/body/div[3]/div[4]/div/div/div[2]/div/div/c-cxs-spf-public-register-occupational/div/div/div/div[2]/div[z]/table/tbody/tr[2]/tdz]/table/tbody/tr[4]/tdzDate expiry:r   c                    s     | d S )Nr   )rv   )pairr9   r$   r%   r      s    )keyc                 S      g | ]}|d  qS )r   r$   rK   pr$   r$   r%   rM          z7ACTLicenseCheckerAPI.verify_license.<locals>.<listcomp>c                 S   r   )r   r$   r   r$   r$   r%   rM      r   r   z License verification failed for : Nz#Unexpected error verifying license )Errorr   r   )r8   r   r!   untilrn   r   ro   rp   r"   r   rr   rX   replacelistzipsortrC   r   rY   r   r[   r\   )r:   rj   rk   r   
search_urlname_elementrx   web_type_partsr   i
type_xpathexpiry_xpathtype_val
expiry_valclean_expirytype_expiry_pairsweb_type_parts_sortedweb_expiry_parts_sortedweb_type
web_expiryrb   r$   r9   r%   verify_license   sd   
	



z#ACTLicenseCheckerAPI.verify_licenserecordc           
      C   s|   |  |||j\}}}| |j|}g }|dkr&|dkr&dd |dD }| |j|}	t|j|j|j|||j||	|d	S )z6Process a single record and return verification resultry   r   c                 S   s   g | ]}|  qS r$   )r"   )rK   partr$   r$   r%   rM      r   z7ACTLicenseCheckerAPI.process_record.<locals>.<listcomp>r   	r   r   r/   r0   r1   r2   r3   r4   r5   )	r   r   r   r   rA   r   r   r.   r   )
r:   rj   rk   r   rx   r   r   r1   r   r4   r$   r$   r%   process_record   s"   z#ACTLicenseCheckerAPI.process_recordc                 C   s  z|  |}|sg W S g }t }|  x\}}t|D ]i\}}z0| |||}	||	 |d d dkrOt | }
td|d  dt| d|
dd W q t	y } z)t
d	|d  d
|  t|j|j|jdd|jdddd	}|| W Y d}~qd}~ww W d   n1 sw   Y  g }|D ]	}	|t|	 qt | }tdt| d|dd |W S  t	y } z	t
d|   d}~ww )z;Process CSV file and return results as list of dictionariesr   
   r   
Processed / records in r|   sError processing record r   r   r   NCompleted processing zError processing CSV file: )rc   timerl   	enumerater   rX   rY   rZ   rB   r[   r\   r.   r   r   r   r   r   )r:   rF   r]   output_rows
start_timerj   rk   r   r   resultelapsedrb   error_resultresult_dicts
total_timer$   r$   r%   process_csv_file  sX   

(z%ACTLicenseCheckerAPI.process_csv_filerecords_datac                 C   s  zg }|D ]'}t |dd |dd |dd |dd d}|| q|s2g W S g }t }|  x\}}t|D ]i\}	}z0| |||}
||
 |	d d d	krvt | }t	d
|	d  dt
| d|dd W qC ty } z)td|	d  d|  t|j|j|jdd|jdddd	}|| W Y d}~qCd}~ww W d   n1 sw   Y  g }|D ]	}
|t|
 qt | }t	dt
| d|dd |W S  ty } z	td|   d}~ww )zBProcess list of records and return results as list of dictionariesr   r   r   r   r   r    r   r   r   r   r   r   r|   r   r   r   r   r   Nr   zError processing records: )r   r!   r"   rX   r   rl   r   r   rY   rZ   rB   r[   r\   r.   r   r   r   r   r   )r:   r   r]   record_datar   r   r   rj   rk   r   r   r   rb   r   r   r   r$   r$   r%   process_recordsC  sh   
(z$ACTLicenseCheckerAPI.process_recordsN)r'   r(   r)   r*   r;   r+   rE   r   r   rc   r   rl   r   rg   r   rr   rv   r   r   r   r   r   r.   r   r   r   r   r$   r$   r$   r%   r6   5   s    
& E5r6   )%rS   r   loggingseleniumr   !selenium.webdriver.chrome.optionsr   selenium.webdriver.common.byr   selenium.common.exceptionsr   r   selenium.webdriver.support.uir   	rapidfuzzr   r	   dateutilr
   
contextlibr   typingr   r   r   r   dataclassesr   r   basicConfigINFO	getLoggerr'   rY   r   r.   r6   r$   r$   r$   r%   <module>   s*    
