o
    &_h8                    @   s  d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlZddlmZmZ ddlZddlZddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z!m"Z" ddl#m$Z$ ddl%Z&eG dd dZ'eG dd dZ(eG dd dZ)G dd dZ*G dd dZ+G dd dZ,G dd dZ-G dd dZ.G d d! d!Z/d"d# Z0G d$d% d%Z1d&d' Z2e3d(kre2  dS dS ))zv
QLD License Checker
================================================================================================
    N)Path)	dataclass)ListDictOptional)datetime)ThreadPoolExecutoras_completed	webdriverOptionsBy)KeysWebDriverWaitexpected_conditions)TimeoutExceptionWebDriverExceptionNoSuchElementException)fuzzc                   @   sr   e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed	< d
Zeed< dZeed< dZeed< dZeed< dS )SimpleConfigzSimple configurationhttps://www.qld.gov.au/law/laws-regulated-industries-and-accountability/queensland-laws-and-regulations/check-a-licence-association-charity-or-register/check-a-licencewebsite_urlU   name_similarity_required   max_retry_attempts   request_timeout_seconds      ?delay_between_requests   parallel_browsersT	test_mode   test_record_limitN)__name__
__module____qualname____doc__r   str__annotations__r   intr   r!   r#   floatr%   r&   boolr(    r2   r2   /var/www/html/scrapers/qld.pyr   #   s   
 r   c                   @   sT   e Zd ZU dZeed< eed< eed< eed< dZeed< dd	 Zd
e	fddZ
dS )EmployeeRecordz)Holds one employee's information from CSVpayroll_numberemployee_namelicense_number
csv_expiryr   csv_row_numberc                 C   sD   t | j | _t | j | _t | j | _t | j | _dS )z)Remove extra spaces and clean up the dataN)r-   r5   stripr6   r7   r8   selfr2   r2   r3   
clean_dataK   s   zEmployeeRecord.clean_datareturnc                 C   s   t | jo| jS )z9Check if this record has the minimum required information)r1   r7   r6   r;   r2   r2   r3   has_required_dataR      z EmployeeRecord.has_required_dataN)r)   r*   r+   r,   r-   r.   r9   r/   r=   r1   r?   r2   r2   r2   r3   r4   B   s   
 r4   c                   @   s   e Zd ZU dZdZeed< dZee ed< dZ	eed< dZ
eed< dZeed	< d
Zeed< dZeed< dZeed< dZeed< dd ZdefddZdefddZdefddZdS )SearchResultz)Holds the result of searching QLD website	Not Found
found_nameNlicense_activitieslicense_expiry
No Licensename_matchesexpiry_status error_messager   how_many_retries        search_time_secondsF
was_cachedc                 C   s   | j d u r
g | _ d S d S N)rD   r;   r2   r2   r3   __post_init__d   s   

zSearchResult.__post_init__r>   c                 C   s   | j dko| j S )z-Did we successfully find license information?rB   )rC   rJ   r;   r2   r2   r3   is_successfulh   s   zSearchResult.is_successfulc                 C   s&   | j sdS t| j dd d}d|S )zEReturn formatted activities string with security officer prioritized.rB   c                 S   $   d|   v rd|   fS d|   fS NzSECURITY OFFICER (UNARMEDr   r'   upperxr2   r2   r3   <lambda>t      $ z7SearchResult.get_formatted_activities.<locals>.<lambda>key, )rD   sortedjoinr<   sorted_activitiesr2   r2   r3   get_formatted_activitiesl   s   
z%SearchResult.get_formatted_activitiesc                 C   s$   | j sdS t| j dd d}|d S )z Return the primary license type.rB   c                 S   rR   rS   rT   rV   r2   r2   r3   rX      rY   z2SearchResult.get_primary_license.<locals>.<lambda>rZ   r   )rD   r]   r_   r2   r2   r3   get_primary_licensex   s   z SearchResult.get_primary_license)r)   r*   r+   r,   rC   r-   r.   rD   r   rE   rG   rH   rJ   rK   r/   rM   r0   rN   r1   rP   rQ   ra   rb   r2   r2   r2   r3   rA   W   s   
 rA   c                   @   sN   e Zd ZdZdefddZdd Zdejfdd	Z	d
d Z
dd Zdd ZdS )OptimizedBrowserManagerz6Manages multiple Chrome browsers for reliable scrapingconfigc                 C   s*   || _ t | _g | _d| _t | _d S NF)	rd   queueQueuebrowser_poolall_browserssetup_complete	threadingLocklock)r<   rd   r2   r2   r3   __init__   s
   
z OptimizedBrowserManager.__init__c                 C   s   | j rdS | jI | j r	 W d   dS td| jj d t| jjD ]}|  }| j| | j	
| td|d  d q%d| _ td W d   dS 1 sTw   Y  dS )	z.Create Chrome browsers for parallel processingNzSetting up z Chrome browsers...z   Browser r'   z readyTz+All browsers ready for parallel processing!)rj   rm   printrd   r%   range_create_chrome_browserri   appendrh   put)r<   ibrowserr2   r2   r3   setup_browsers   s   
"z&OptimizedBrowserManager.setup_browsersr>   c                 C   s   t  }|d |d |d |d |d |d |d |d |d	 |d
 |d |d |d |d |d |d |d |d |ddg |dd tj|d}|d |S )z/Create one Chrome browser with optimal settings
--headless--no-sandbox--disable-dev-shm-usagez--disable-gpuz--disable-extensionsz--disable-loggingz--disable-web-securityz--window-size=1280,720z%--disable-background-timer-throttlingz --disable-renderer-backgroundingz(--disable-backgrounding-occluded-windowsz!--disable-ipc-flooding-protectionz--disable-features=TranslateUIz--disable-default-appsz--no-first-runz--memory-pressure-offz --allow-running-insecure-contentz---disable-blink-features=AutomationControlledexcludeSwitcheszenable-automationuseAutomationExtensionFoptionszEObject.defineProperty(navigator, 'webdriver', {get: () => undefined}))r   add_argumentadd_experimental_optionr   Chromeexecute_script)r<   chrome_optionsdriverr2   r2   r3   rq      s0   


















z.OptimizedBrowserManager._create_chrome_browserc                 C   s.   | j s|   | j }t|| jj}||fS )z)Get a browser from the pool (thread-safe))rj   rv   rh   getr   rd   r!   )r<   ru   wait_helperr2   r2   r3   get_browser   s
   
z#OptimizedBrowserManager.get_browserc                 C   s   | j | dS )z Return browser to pool when doneN)rh   rs   )r<   ru   r2   r2   r3   return_browser   r@   z&OptimizedBrowserManager.return_browserc                 C   sN   d}| j D ]}z|  W q   |d7 }Y q|dkr%td| d dS dS )z$Close all browsers when program endsr   r'   z	WARNING: z" browsers failed to close properlyN)ri   quitro   )r<   failed_cleanupsru   r2   r2   r3   cleanup_all_browsers   s   
z,OptimizedBrowserManager.cleanup_all_browsersN)r)   r*   r+   r,   r   rn   rv   r   r   rq   r   r   r   r2   r2   r2   r3   rc      s     
rc   c                   @   s   e Zd ZdZdedefddZdedefdd	Zdedefd
dZ	dededefddZ
dededefddZdedefddZdee dedefddZdedefddZdedefddZdejd ededefd!d"Zd#S )$SeleniumLicenseSearcherz.Searches QLD government website using Seleniumrd   browser_managerc                 C   s&   || _ || _i | _t | _i | _d S rO   )rd   r   search_cacherk   rl   
cache_lock
name_cache)r<   rd   r   r2   r2   r3   rn      s
   

z SeleniumLicenseSearcher.__init__namer>   c                 C   s~   |sdS || j v r| j | S | |}|   }t|dkr2|d  dd|dd  }n|  }|| j |< |S )zEConvert name to standard format for comparison with enhanced cleaningrI   r   r\    N)r   _clean_name_for_matchingr:   rU   splitlenr^   )r<   r   cleaned_nameparts
normalizedr2   r2   r3   normalize_employee_name   s   


"
z/SeleniumLicenseSearcher.normalize_employee_namec                 C   s   |sdS t | }d| }|  }g }|D ] }t|dkr*t|dkr*q|dr6t|dkr6q|| qg }|D ]}d|v rQ|d}|| q@|| q@dd |D }|red|S |S )	z>Clean name by removing common variations and formatting issuesrI   r   r'   .r$   -c                 S   s   g | ]}|  r|qS r2   )r:   ).0wordr2   r2   r3   
<listcomp>+  s    zDSeleniumLicenseSearcher._clean_name_for_matching.<locals>.<listcomp>)	r-   r:   r^   r   rU   r   endswithrr   extend)r<   r   cleanedwordsfiltered_wordsr   processed_wordshyphen_partsr2   r2   r3   r     s(   
z0SeleniumLicenseSearcher._clean_name_for_matchingcsv_namewebsite_namec           	      C   s   |dkrdS |r
|sdS |  |}|  |}| |}| |}||kr&dS | | kr0dS t||t| | t| | t| | g}t|}|| j	j
kr^dS d|ddS )z<Compare names with enhanced cleaning and return match statusrB   zNo MatchYesNo (.1fz%))r   r   rU   r   token_set_ratiotoken_sort_ratiopartial_ratioratiomaxrd   r   )	r<   r   r   cleaned_csvcleaned_websitenorm_csvnorm_websitesimilarity_methods
similarityr2   r2   r3   check_name_similarity0  s*   




z-SeleniumLicenseSearcher.check_name_similarityr8   
web_expiryc           	   
   C   s  t d| dt| dtt| d| d	 |r|dkr#t d dS |r+| d	krit d
 z| |}|s:W dS t|d}|t krIW dS W dS  t	yh } zdt|dd  W  Y d}~S d}~ww zCt|d}| |}|szW dS t|d}|t k }|
 |
 k}|r|sW dS |r|rW dS |s|sW dS |s|rW dS W dS  t	y } zdt|dd  W  Y d}~S d}~ww )z<Calculate expiry status comparison between CSV and web datesz7DEBUG: calculate_expiry_status called with csv_expiry='z	' (type: z, len: z) and web_expiry=''rB   z3DEBUG: Web expiry not found, returning 'No License'rF   rI   z<DEBUG: CSV expiry is empty, checking web license status onlyzInvalid Date%d-%b-%YActiveExpiredzDate Error: N   z%d/%m/%YzActive - Date WrongzExpired - Date WrongUnknown)ro   typer   r-   r:   _clean_web_dater   strptimetoday	Exceptiondate)	r<   r8   r   cleaned_web_expiryweb_dateecsv_date
is_expireddates_matchr2   r2   r3   calculate_expiry_statusX  sN   .
"
"z/SeleniumLicenseSearcher.calculate_expiry_statusr   c                 C   s>  |r|dkrdS |  }t|dkrU|ddkrU|d}t|dkrSt|d dkrSt j}t|dd }|d |dd rS||d d  |d< d	|}|S t|dkr|ddkr|d}t|dkrt|d dkr|d 
 rt|d }|d	krd
|d  |d< n	d|d  |d< d	|}|S )z2Clean and fix malformed web dates from QLD websiterB   rI   	   r   r   r$   Nr      2019)r:   r   countr   r   nowyearr-   
startswithr^   isdigitr/   )r<   r   r   r   current_yeardecadeyear_suffixr2   r2   r3   r     s*   



(
z'SeleniumLicenseSearcher._clean_web_datematching_recordsemployeec           	         s   |si S t |dkr|d S tdt | d  fdd}g }|D ](}||}|||f td|d  d	|d
  d|d  d|d  d| 
 q"|jdd dd |d d }|d d }td| d|d
  d|d  d|d   |S )zPSelect the best record from multiple matching records based on priority criteriar'   r   zDEBUG: Evaluating z) matching records to find the best one...c                    s   d}| d dkr|d7 }n|d7 }z8| d rH| d dkrH  | d }|rHt|d}|t krD|d	7 }|t  j}|t|d
7 }n|d7 }W n   |d7 }Y | dd }d|v rc|d7 }nd|v rk|d7 }|d| d  7 }|S )Nr   
match_typeexacti  i  expiryrB   r      im  2   
   license_typerI   zSECURITY OFFICERd   SECURITY	row_index)r   r   r   r   daysminr   rU   )recordscorecleaned_expiryexpiry_datedays_until_expiryr   r;   r2   r3   calculate_priority_score  s0   


zMSeleniumLicenseSearcher._select_best_record.<locals>.calculate_priority_scorezDEBUG: Record r   z	 - Name: r   , Type: r   
, Expiry: r   z	, Score: c                 S   s   | d S Nr   r2   rV   r2   r2   r3   rX     s    z=SeleniumLicenseSearcher._select_best_record.<locals>.<lambda>T)r[   reversez"DEBUG: Selected record with score : z - )r   ro   rr   sort)	r<   r   r   r   scored_recordsr   r   best_record
best_scorer2   r;   r3   _select_best_record  s    	&8,z+SeleniumLicenseSearcher._select_best_recordc                 C   s~   |j | jv r| j|j  }| |j|j|_d|_|S | |}| r=| j	 || j|j < W d   |S 1 s8w   Y  |S )z0Search for one employee's license using SeleniumTN)
r7   r   r   r6   rC   rG   rN   _search_with_retriesrQ   r   )r<   r   cached_resultresultr2   r2   r3   search_single_license  s   

z-SeleniumLicenseSearcher.search_single_licensec           
      C   s8  |j  st S d}t| jjD ]{}zR| j \}}zAt }| 	|||}t | }|
 rT| |j|j|_| |j|j|_||_||_|W | j| W   S W | j| n| j| w W q ty }	 zt|	}|| jjd k rtd|d   W Y d}	~	qd}	~	ww td| jj d| | jjdS )z(Try searching multiple times if it failsrI   r'   r"   NzFailed after z attempts: )rJ   rK   )r7   r:   rA   rp   rd   r   r   r   time_do_selenium_searchrQ   r   r6   rC   rG   r   r8   rE   rH   rK   rM   r   r   r-   sleep)
r<   r   
last_errorattemptru   r   
start_timer   search_timer   r2   r2   r3   r     sB   
 z,SeleniumLicenseSearcher._search_with_retriesru   r   c                 C   s  zt d|j  || jj t d|j  z|tt	j
df}|j| t d W n ty=   t d Y nw z|tt	jdf}t d W n< ty   t d g d	}d
}|D ] }z|tt	j|f}t d|  W  n	 ty   Y qbw |stdY nw |  ||j  t d|j   z|tj t d W n$   z|t	jd}|  t d W n   |d| t d Y Y t d d}	d
}
g d}|D ]#}z|tt	j|f}
t d|  d}	W  n
 ty   Y qw |	s<z|t	jd}t d|j  tdg ddW W S    Y |d|j d t d|j d tdz|d  td! t d" W n tyf } zt d#|  W Y d
}~nd
}~ww |
t	j
d$d%d
 }t d&t | d' g }t!|D ]\}}|t	j
d(}|rt |d)k rt d*| d+|rt |nd,  q|d, j }t d*| d-| d.|j  d/ ||j kr||d% j |d! j |d0 j d1|d2}|"| t d3|d4  d5|d6  d7|d8   q|j |v r4||d% j |d! j |d0 j d9|d2}|"| t d:|d4  d5|d6  d7|d8   qt d&t | d; |rrt d< t!|d%D ]%\}}t d=| d>|d?  d@|d4  d5|d6  d7|d8  dA|dB   qLd}g }d}d}|r| #||}|d4 }|d8 }|d6 r|"|d6  d}t dC| d5|d6  d7|  n3t dD|j  t dE t!|d
dF D ]\}}|t	j
d(}|rt dG| dH|d, j   qt|||dW S  ty } zt dI|  tdJ|j d
}~w t$y } zt dK|  tdLt%| d
}~w t&y7 } zt dM|  tdNt%| d
}~w tyS } zt dO|  tdPt%| d
}~ww )QzQActually search the QLD government website using Selenium with enhanced debuggingzDEBUG: Searching for license: zDEBUG: Loaded page: iframezDEBUG: Switched to iframez1DEBUG: No iframe found, continuing with main pageLicenceNumberz DEBUG: Found license input fieldz8DEBUG: Trying alternative selectors for license input...)input[name='LicenceNumber']input[placeholder*='licence']input[placeholder*='license']input[type='text']Nz"DEBUG: Found input with selector: z4Could not find license input field with any selectorzDEBUG: Entered license number: z$DEBUG: Submitted form with Enter keyzFbutton[type='submit'], input[type='submit'], button:contains('Search')z(DEBUG: Submitted form with submit buttonzarguments[0].form.submit();z%DEBUG: Submitted form with JavaScriptzDEBUG: Waiting for results...F)ztable#licenceListztable[id*='licence']ztable[id*='license']ztable[class*='data']tablez*DEBUG: Found results table with selector: TzH*:contains('No results'), *:contains('not found'), *:contains('no data')z#DEBUG: Found 'no results' message: rB   )rC   rD   rE   debug_qld_search_z.pngz,DEBUG: Screenshot saved as debug_qld_search_zCould not find results tablea  
                    let dropdown = document.querySelector('select[name="licenceList_length"], select[class*="length"]');
                    if (dropdown) {
                        dropdown.value = '50';
                        dropdown.dispatchEvent(new Event('change', { bubbles: true }));
                        console.log('Set dropdown to 50 entries');
                    }
                r   z&DEBUG: Set dropdown to show 50 entrieszDEBUG: Could not set dropdown: trr'   zDEBUG: Found z result rowstd   zDEBUG: Row z has insufficient cells: r   z license: 'z' vs search: 'r   r$   r   )r7   r   r   r   r   r   z!DEBUG: Exact match found - Name: r   r   r   r   r   partialz#DEBUG: Partial match found - Name: z matching recordsz"DEBUG: All matching records found:z  z. License: r7   z, Name: z	, Match: r   z$DEBUG: Selected best record - Name: z%DEBUG: No matching license found for z,DEBUG: Available license numbers in results:   z  Row r   zDEBUG: Timeout error: zWebsite timeout for license: zDEBUG: Element not found: zPage element not found: zDEBUG: WebDriver error: zWebDriver error: zDEBUG: General error: zSelenium search error: )'ro   r7   r   rd   r   titleuntilECpresence_of_element_locatedr   TAG_NAME	switch_toframer   element_to_be_clickableIDCSS_SELECTORr   clear	send_keysr:   r   RETURNfind_elementclickr   textrA   save_screenshotr   r   find_elementsr   	enumeraterr   r   r   r-   r   )r<   ru   r   r   r   search_inputalternative_selectorsselectorsubmit_buttonresults_foundr  table_selectors
no_resultsr   rowsr   rt   rowcellsrow_licenser   web_nameweb_activitiesr   license_foundr   r2   r2   r3   r   =  s<  



""
*
&D
 z+SeleniumLicenseSearcher._do_selenium_searchN)r)   r*   r+   r,   r   rc   rn   r-   r   r   r   r   r   r   r   r4   r   rA   r   r   r   r   r   r   r2   r2   r2   r3   r      s     )(;#G)r   c                   @   s.   e Zd ZdZedededee fddZ	dS )SimpleCSVHandlerz"Reads employee data from CSV files	file_pathrd   r>   c              
      s  zt j| tdd  jj jdd _g d} fdd|D }|r5tdd	| d
t j dg } 	 D ]9\}}t
|d |d |d |d |d d}|  | rt|| |jrtt||jkrttd|j d  nq;|s{tdg }t }	d}
|D ]}|j|	vr|	|j || q|
d7 }
q|
dkrtd|
 d |W S  ty } z
tdt|   d}~ww )z Load employee data from CSV fileF)dtype	na_filteru   ﻿rI   )Payroll NumberEmployee NameLicense NumberExpiry/Update  Datec                    s   g | ]	}| j vr|qS r2   )columns)r   coldfr2   r3   r   (  s    z7SimpleCSVHandler.load_employee_data.<locals>.<listcomp>z"
ERROR: Missing required columns: r\   z

Your CSV file MUST have these exact column names:
- Payroll Number
- Employee Name
- License Number
- Expiry/Update  Date

Current columns in your file: z
                r2  r3  r4  r5  r   r5   r6   r7   r8   r9   z!TEST MODE: Processing only first  recordsz2ERROR: No valid employee records found in CSV filer   r'   zNOTE: Removed z duplicate license numberszERROR reading CSV file: N)pdread_csvr-   r6  r:   replace
ValueErrorr^   listiterrowsr4   r=   r?   rr   r&   r   r(   ro   setr7   addr   )r/  rd   required_columnsmissing_columns	employeesindexr(  r   unique_employeesseen_licensesduplicates_removedr   r2   r8  r3   load_employee_data  sZ   	



z#SimpleCSVHandler.load_employee_dataN)
r)   r*   r+   r,   staticmethodr-   r   r   r4   rK  r2   r2   r2   r3   r.         r.  c                   @   st   e Zd ZdZdefddZdd Zdedefd	d
Z	dde
dedefddZde
defddZde
defddZdS )SimpleProgressTrackerz1Shows nice progress bar and processing statisticstotal_employeesc                 C   s<   || _ d| _d| _d| _d| _t | _d| _t	 | _
d S r   )rO  	completed
successfulfailedcachedr   r   last_updaterk   rl   rm   )r<   rO  r2   r2   r3   rn   l  s   
zSimpleProgressTracker.__init__c              
   C   sd   t dd  t d| j d t d  t ddddd	dd
dddddd 	 t d  dS )zShow the processing header
U=====================================================================================z!QLD LICENSE CHECKER - PROCESSING z
 EMPLOYEESProgress<15r   Employee<25LicenseStatus<12SpeedzU-------------------------------------------------------------------------------------N)ro   rO  r;   r2   r2   r3   show_headerv  s
   
,z!SimpleProgressTracker.show_headerr   r   c                 C   sl  | j - |  jd7  _|jr|  jd7  _n| r"|  jd7  _n|  jd7  _W d   n1 s3w   Y  t }|| j dk rEdS || _| j| j	 d }| 
|}t|| j d}| j| d }t|jdkrt|jdd d	 n|j}t|jd
kr|jdd d	 n|j}	|jrd}
n	| rd}
nd}
td| d|dd|	dd|
dd|ddddd dS )zUpdate progress displayr'   Nr"   r   g{Gz?<         z..r      CachedSuccessFailedr   rZ  rX  r]  .0fz/minrI   T)endflush)rm   rP  rN   rS  rQ   rQ  rR  r   rT  rO  _make_progress_barr   r   r   r6   r7   ro   )r<   r   r   current_timepercentprogress_barelapsedspeeddisplay_namedisplay_licensestatusr2   r2   r3   update_progress~  sN   
.
z%SimpleProgressTracker.update_progressr    rm  widthr>   c                 C   s8   t || d }d| d||   }d| d|ddS )zCreate ASCII progress barr   Xr   [z] z5.1f%r/   )r<   rm  ru  filledbarr2   r2   r3   rk    s   z(SimpleProgressTracker._make_progress_barelapsed_timeexcel_file_pathc                 C   s   t dd  t d t d  t d| j  t d| j  t d| j  t d| j  t d| jt| jd	 d
 dd t d| |  t d| j| d dd t d|  t d  dS )zShow final processing summaryz

rV  zPROCESSING COMPLETED!zTotal employees processed: zSuccessful searches: zFailed searches: zCached results: zSuccess rate: r'   r   r   rx  zTotal time: zProcessing speed: r`  z employees/minutezExcel report saved: N)ro   rP  rQ  rR  rS  r   _format_time)r<   r|  r}  r2   r2   r3   show_final_summary  s   
$z(SimpleProgressTracker.show_final_summarysecondsc                 C   sp   |dk r
|ddS |dk r"t |d }t |d }| d| dS t |d }t |d d }| d| dS )	z"Convert seconds to readable formatr`  rh  z secondsi  zm szh mry  )r<   r  minutessecshoursr2   r2   r3   r~    s   z"SimpleProgressTracker._format_timeN)r    )r)   r*   r+   r,   r/   rn   r_  r4   rA   rt  r0   r-   rk  r  r~  r2   r2   r2   r3   rN  i  s    
,rN  c                   @   s.   e Zd ZdZedee dedefddZdS )SimpleExcelGeneratorz'Creates Excel reports with color codingresultsoriginal_file_pathr>   c                 C   s  z@t |}t d}|j|j d| d }g }| D ]W}|dd}|dd}|dd}	|dd}
|d	kpl|d
kpl|dkpl|dpl|d	kpl|dkpl|	d	kpl|	dkpl|
d
kpl|
dkpl|
dpld|
v pld|
v }|rt|| q|sdddt	|  ddddddd	}|g}|j|j d| d }t
|}t
j|dd}t	|dksdt|d d dvrd!nd"}|j|d#|d$ |j}|j| }|d%d&d'dd(d)d%d*}t|jD ]5\}}|d||| tt	t||js|| tj	  nd}tt|d+ d,d-}|||| q|dd. |ddt	|t	|jd  |dd W d/   n	1 s8w   Y  t|W S  tyX } z
td0t|   d/}~ww )1zDCreate Excel report - EXCEPTIONS ONLY (problems that need attention)z%Y%m%d_%H%M%S_Exceptions_z.xlsx
Name MatchrI   Licence NameLicence Type(s)Expiry StatusrB   rF   Errorr   r   zError:z
Date Wrongr   zN/Az"ALL RECORDS PROCESSED SUCCESSFULLYr;  zNo exceptionsz	All foundz	All validzAll dates matchz
All active	r2  zLicence NumberRolecall Namer  r  r  zLicence ExpiryzRolecall Expiryr  _QLD_ALL_SUCCESS_
xlsxwriter)enginer'   zALL RECORDSr   r  
ExceptionszProcessing SummaryF)rG  
sheet_nameTz#8C1E31whitecentervcenter)boldbg_color
font_colorborderalignvalign	text_wrapr$      r   r    NzERROR creating Excel report: )r   r   r   strftimeparentstemr   r   rr   r   r<  	DataFrameExcelWriterr-   to_excelbooksheets
add_formatr  r6  writer   emptyastyper   
set_columnset_row
autofilterfreeze_panesr   ro   )r  r  original_path	timestamp
excel_pathexception_resultsr   
name_matchlicence_namelicence_typesrH   is_exceptionsummary_resultr9  writerr  workbook	worksheetheader_colorcol_numheader
max_length	col_widthr   r2   r2   r3   create_excel_report  s   	





 
)z(SimpleExcelGenerator.create_excel_reportN)	r)   r*   r+   r,   rL  r   r   r-   r  r2   r2   r2   r3   r    rM  r  c                   @   st   e Zd ZdZdd Zdd Zdd Zdefd	d
Zde	de
fddZdee dee fddZdedefddZdS )SeleniumQLDLicenseCheckerz0Main application class - using reliable Seleniumc                 C   s   t  | _t| j| _d | _d S rO   )r   rd   rc   r   progress_trackerr;   r2   r2   r3   rn   C  s   
z"SeleniumQLDLicenseChecker.__init__c              
   C   s4  zz?|    |  }t|| j}| t|s%td W W | j	  dS | 
|}t||}t | jj }| j|| W n; tyM   td Y n7 ty{ } z#tdt|  td td td td td	 W Y d}~nd}~ww W | j	  dS W | j	  dS W | j	  dS | j	  w )
z0Main function - this is where everything happenszProcess cancelled by userNz*

Process stopped by user (Ctrl+C pressed)z
ERROR: z
TROUBLESHOOTING TIPS:z1. Make sure your CSV file has the exact column names: 'Payroll Number', 'Employee Name', 'License Number', 'Expiry/Update  Date'z)2. Check that Chrome browser is installedz)3. Make sure you have internet connectionz<4. Try running as administrator if you get permission errors)_show_welcome_get_csv_file_from_userr.  rK  rd   _ask_user_to_continuer   ro   r   r   _process_all_employeesr  r  r   r  r   r  KeyboardInterruptr   r-   )r<   csv_file_pathrF  r  r  r|  r   r2   r2   r3   runH  s>   

zSeleniumQLDLicenseChecker.runc                 C   s   t d dS )zShow welcome messagez!QLD License Checker - Starting...N)ro   r;   r2   r2   r3   r  p  s   z'SeleniumQLDLicenseChecker._show_welcomer>   c                 C   s   t tjdkrtjd  d}td|  ntd td td td td td	 d}|s<td
t| sItd| tdt|j	  |S )zGet CSV file path from userr'   z"'zUsing file from command line: z
Please provide your CSV file:z   You can either:z   1. Type the full file pathz-   2. Drag and drop the file into this windowz"   3. Copy and paste the file pathz
Enter CSV file path: zNo file path providedzFile not found: zFile found: )
r   sysargvr:   ro   inputr   r   existsr   )r<   r/  r2   r2   r3   r  t  s   z1SeleniumQLDLicenseChecker._get_csv_file_from_useremployee_countc                 C   s~   | j jrtdt|| j j d dS td| d td| j j d 	 td  }|dv r4dS |d	v r:d
S td q&)z.Ask user if they want to process the employeesz
TEST MODE: Will process z
 employeesTz
Ready to process z	Will use z parallel Chrome browsersz"
Continue with processing? (Y/N): )yyes)nnoFz&Please enter 'y' for yes or 'n' for no)	rd   r&   ro   r   r(   r%   r  r:   lower)r<   r  responser2   r2   r3   r    s   z/SeleniumQLDLicenseChecker._ask_user_to_continuerF  c                    s`  t t|| _t| j| jg }t| jjd  fdd|D }t|D ]w}|| }z/|	 }|j
|j|j|j|j| |j|j|jd	}|| | j|| t| jj W q& ty } z6td|j dt|  |j
|j|jdddd|jdd	}	||	 tt|d}
| j||
 W Y d	}~q&d	}~ww W d	   |S 1 sw   Y  |S )
zAProcess all employees with minimal output unless there are issuesmax_workersc                    s   i | ]
}  j||qS r2   )submitr   r   r   executorsearcherr2   r3   
<dictcomp>  s    zDSeleniumQLDLicenseChecker._process_all_employees.<locals>.<dictcomp>r  zERROR processing r   r  rJ   N)rN  r   r  r   rd   r   r   r%   r	   r   r5   r7   r6   rC   rG   ra   rE   r8   rH   rr   rt  r   r   r#   r   ro   r-   rA   )r<   rF  all_resultsfuture_to_employeefuturer   search_resultresult_dictr   error_resulterror_search_resultr2   r  r3   r    s\   


<<z0SeleniumQLDLicenseChecker._process_all_employeesr   c                 C   s   g }|j r
|d |jdkr|d|jdd |jdkr(|d|j  |jr5|d|j  n|d |rAd	|S d
S )z-Create informative notes for the Excel reportz!Cached result (duplicate license)r   zSearch time: z.2fr  z	Retries: zError: zSelenium scrapingz | zProcessed successfully)rN   rr   rM   rK   rJ   r^   )r<   r   notesr2   r2   r3   _create_notes_for_result  s   



z2SeleniumQLDLicenseChecker._create_notes_for_resultN)r)   r*   r+   r,   rn   r  r  r-   r  r/   r1   r  r   r4   r   r  rA   r  r2   r2   r2   r3   r  @  s    (Ir  c               
   C   s  zddl m}  ddlm} ddlm} ddlm} ddlm	} | }|
d |
d |
d	 | j|d
}||d}ztd |d td|j  td|j  z|||jdf}|j| td W n   td Y d }	|jdf|jdf|jdf|jdf|jdfg}
|
D ]!\}}z||||f}	td|  W  n	 ty   Y qw |	rtd W |  W dS td |d td W |  W dS |  w  ty
 } z$tdt|  td  td! td" td# td$ W Y d }~dS d }~ww )%Nr   r
   r   r   r   r   rw   rx   ry   r|   r   z$Testing connection to QLD website...r   zPage title: zCurrent URL: r   zFound iframe and switched to itz*No iframe found, continuing with main pager  r  r  r  r  z#Found license input with selector: z Selenium connection test PASSED!Tz8License input field not found - website may have changedzdebug_connection_test.pngz-Screenshot saved as debug_connection_test.pngFz!Selenium connection test failed: zCommon fixes:z 1. Install Google Chrome browserz"2. Update Chrome to latest version3. Run as administratorz4. Check internet connection)seleniumr   !selenium.webdriver.chrome.optionsr   selenium.webdriver.common.byr   selenium.webdriver.support.uir   selenium.webdriver.supportr   r~   r   ro   r   r  current_urlr  r  r  r  r  r  r  r   r   r  r   r-   )r   r   r   r   r  r   r   waitr   license_inputselectors_to_trybyr"  r   r2   r2   r3   test_selenium_connection  sx   



	
r  c                   @   st   e Zd ZdZdd Zdd Zdedee fdd	Z	defd
dZ
defddZdee dee fddZdd ZdS )QLDLicenseCheckerAPIzIAPI wrapper for Flask routes to process CSV files and return JSON resultsc                 C   s    t  | _d| j_d | _d | _d S re   )r   rd   r&   r   r  r;   r2   r2   r3   rn   X  s   
zQLDLicenseCheckerAPI.__init__c                 C   s@   | j du s	| j jst| j| _ | j   t| j| j | _dS dS )z/Ensure browser manager is initialized and readyN)r   rj   rc   rd   rv   r   r  r;   r2   r2   r3   _ensure_browser_manager_  s
   
z,QLDLicenseCheckerAPI._ensure_browser_managerr  r>   c                    s  z   t|j}|sg W S tt|}g }tjjd  fdd|D }t|D ]~}|| }z<|	 }|j
|j|j|j|j| | |j|j|j|j|j|j|j| d}	||	 ||| tjj W q/ ty }
 z0|j
|j|jddddd|jddddt|
dd}|| t t|
d	}||| W Y d
}
~
q/d
}
~
ww W d
   |W S 1 sw   Y  |W S  ty }
 zdt|
ddgW  Y d
}
~
S d
}
~
ww )zKProcess CSV file and return results as JSON-compatible list of dictionariesr  c                       i | ]}  jj||qS r2   r  r  r   r  r  r<   r2   r3   r  y      z9QLDLicenseCheckerAPI.process_csv_file.<locals>.<dictcomp>r5   r7   r6   rC   r  license_typesprimary_licenserE   r8   rH   rM   retriesrN   rJ   rQ   r  rL   r   Fr  NzCSV processing failederrormessagerQ   )!r  r.  rK  rd   rN  r   r   r%   r	   r   r5   r7   r6   rC   rG   ra   rb   rE   r8   rH   rM   rK   rN   rJ   rQ   rr   rt  r   r   r#   r   r-   rA   )r<   r  rF  r  r  r  r  r   r  r  r   r  r  r2   r  r3   process_csv_filef  s   


DDz%QLDLicenseCheckerAPI.process_csv_filec              
   C   sX   zt  r
dddW S dddW S  ty+ } zddt| dW  Y d}~S d}~ww )	z0Test connection to QLD website and return statussuccessz"QLD website connection test passed)rs  r  warningz=QLD website connection test failed - website may have changedr   zConnection test failed: N)r  r   r-   )r<   r   r2   r2   r3   test_connection  s   z$QLDLicenseCheckerAPI.test_connectionc                 C   s0   | j j| j j| j j| j j| j j| j j| j jdS )z"Get current configuration settings)r   r   r   r!   r#   r%   r&   )rd   r   r   r   r!   r#   r%   r&   r;   r2   r2   r3   
get_config  s   zQLDLicenseCheckerAPI.get_configrecords_datac                    s  zH   |rtdt|d    td|d   g }t|D ]m\}}d}g d}|D ]
}||v r:|} nq0|sNtd| dt|   d}nt||d }td	| d
| d|  tt|dd t|dd t|dd ||d d}	|		  |
|	 q$|sg W S tt|}
g }tjjd  fdd|D }t|D ]}|| }	z<| }|	j|	j|	j|j|j| | |j|	j|j|j|j|j|j|  d}|
| |
!|	| t"#jj$ W q t%y4 } z0|	j|	j|	jddddd|	jddddt|dd}|
| t&t|d}|
!|	| W Y d}~qd}~ww W d   |W S 1 sBw   Y  |W S  t%ye } zdt|ddgW  Y d}~S d}~ww )zRProcess list of records and return results as JSON-compatible list of dictionariesz'DEBUG: Available keys in first record: r   zDEBUG: First record data: N)r5  zExpiry/Update DatezExpiry DateExpiryr8   r   z%DEBUG: No expiry key found in record z, available keys: rI   zDEBUG: Using key 'z' with value 'z' for record r2  r3  r4  r'   r:  r  c                    r  r2   r  r  r  r2   r3   r    r  z8QLDLicenseCheckerAPI.process_records.<locals>.<dictcomp>r  r  rL   Fr  zRecords processing failedr  )'r  ro   r@  keysr  r-   r   r:   r4   r=   rr   rN  r   r   rd   r%   r	   r   r5   r7   r6   rC   rG   ra   rb   rE   r8   rH   rM   rK   rN   rJ   rQ   rt  r   r   r#   r   rA   )r<   r  rF  rt   record_data
expiry_keypossible_keysr[   expiry_valuer   r  r  r  r  r  r  r   r  r  r2   r  r3   process_records  s   	


DDz$QLDLicenseCheckerAPI.process_recordsc                 C   s$   | j r| j   d| _ d| _dS dS )z<Clean up browsers when completely done with the API instanceN)r   r   r  r;   r2   r2   r3   cleanupi  s
   

zQLDLicenseCheckerAPI.cleanupN)r)   r*   r+   r,   rn   r  r-   r   r   r  r  r  r  r  r2   r2   r2   r3   r  U  s    _ r  c               
   C   s   t tjdkrtjd  } | dv rt  dS ztd td t s+td td t }|  W dS  tyB   td Y dS  t	yy } z,td	t
|  td
 td td td td td td W Y d}~dS d}~ww )zMain entry pointr'   )z--testz-ttestNzStarting QLD License Checker...z.
Testing Selenium connection to QLD website...z4Connection issues detected, but continuing anyway...zKIf all searches fail, the website may have changed or Chrome needs updatingz%
Selenium application stopped by userz
FATAL ERROR: z
SELENIUM TROUBLESHOOTING:z,1. Check your CSV file has the right columnsz'2. Install/update Google Chrome browserr  z!4. Check your internet connectionz85. Try reducing parallel_browsers to 1 if getting errorsz16. Run with --test to check Selenium connectivity)r   r  r  r  r  ro   r  r  r  r   r-   )argappr   r2   r2   r3   mainu  s4   r  __main__)4r,   jsonr   pathlibr   dataclassesr   typingr   r   r   r   r  concurrent.futuresr   r	   rk   rf   r  r   r  r   r  r   selenium.webdriver.common.keysr   r  r   r  r   r  selenium.common.exceptionsr   r   r   	rapidfuzzr   pandasr<  r   r4   rA   rc   r   r.  rN  r  r  r  r  r  r)   r2   r2   r2   r3   <module>   sZ    0^    8Oho EQ  "*
