"""
TAS License Checker
================================================================================================
"""

import json
import time
from pathlib import Path
from dataclasses import dataclass
from typing import List, Dict, Optional
from datetime import datetime
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import queue

# Selenium libraries for web scraping
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException

# Data processing libraries
from rapidfuzz import fuzz
import pandas as pd
from dateutil import parser

# CAPTCHA automation libraries
import requests
import time
import re


# ================================================================================================
# CONFIGURATION - Change these settings if needed
# ================================================================================================

@dataclass
class SimpleConfig:
    """Simple configuration"""

    # Website URL - Don't change unless TAS government changes their site
    website_url: str = "https://occupationallicensing.justice.tas.gov.au/Search/onlinesearch.aspx"

    # How similar names should be to match (85 = 85% similar)
    name_similarity_required: int = 85

    # How many times to retry if website is slow
    max_retry_attempts: int = 2

    # How long to wait for website to respond (seconds)
    request_timeout_seconds: int = 20

    # Delay between requests to be nice to the website (seconds)
    delay_between_requests: float = 0.5

    # How many parallel browsers to run (1 is required due to CAPTCHA)
    parallel_browsers: int = 1

    # Test mode - only process first 10 records for testing
    test_mode: bool = True
    test_record_limit: int = 20

    # CAPTCHA handling
    show_browser_for_captcha: bool = True  # Show browser window for CAPTCHA solving
    captcha_timeout_seconds: int = 300  # Maximum time to wait for CAPTCHA solving (5 minutes)
    captcha_check_interval: float = 2.0  # How often to check if CAPTCHA is solved (seconds)

    # CAPTCHA automation settings
    enable_captcha_automation: bool = True  # Enable automatic CAPTCHA solving
    captcha_service_api_key: str = ""  # 2captcha API key - ADD YOUR KEY HERE
    captcha_service_url: str = "http://2captcha.com/in.php"  # 2captcha service URL
    captcha_result_url: str = "http://2captcha.com/res.php"  # 2captcha result URL
    captcha_solve_timeout: int = 120  # Maximum time to wait for CAPTCHA solution (seconds)


# ================================================================================================
# DATA CLASSES
# ================================================================================================

@dataclass
class EmployeeRecord:
    """Holds one employee's information from CSV"""
    payroll_number: str
    employee_name: str
    license_number: str
    csv_expiry: str
    csv_row_number: int = 0

    def clean_data(self):
        """Remove extra spaces and clean up the data"""
        self.payroll_number = str(self.payroll_number).strip()
        self.employee_name = str(self.employee_name).strip()
        self.license_number = str(self.license_number).strip()
        self.csv_expiry = str(self.csv_expiry).strip()

    def has_required_data(self) -> bool:
        """Check if this record has the minimum required information"""
        return bool(self.license_number and self.employee_name)


@dataclass
class SearchResult:
    """Holds the result of searching TAS website"""
    found_name: str = "Not Found"
    license_type: str = "Not Found"
    license_expiry: str = "Not Found"
    name_matches: str = "No License"
    expiry_status: str = "No License"
    error_message: str = ""
    how_many_retries: int = 0
    search_time_seconds: float = 0.0
    was_cached: bool = False

    def is_successful(self) -> bool:
        """Did we successfully find license information?"""
        return self.found_name != "Not Found" and not self.error_message


# ================================================================================================
# CAPTCHA AUTOMATION
# ================================================================================================

class CaptchaAutomation:
    """Handles automatic CAPTCHA solving using 2captcha service"""

    def __init__(self, config: SimpleConfig):
        self.config = config
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        })

    def solve_recaptcha(self, site_key: str, page_url: str) -> str:
        """Solve reCAPTCHA using 2captcha service"""
        if not self.config.enable_captcha_automation or not self.config.captcha_service_api_key:
            return ""

        try:
            print("🤖 Attempting automatic CAPTCHA solving...")
            
            # Submit CAPTCHA to solving service
            submit_data = {
                'key': self.config.captcha_service_api_key,
                'method': 'userrecaptcha',
                'googlekey': site_key,
                'pageurl': page_url,
                'json': 1
            }
            
            response = self.session.post(self.config.captcha_service_url, data=submit_data)
            result = response.json()
            
            if result.get('status') != 1:
                print(f"❌ CAPTCHA submission failed: {result.get('error_text', 'Unknown error')}")
                return ""
            
            captcha_id = result.get('request')
            print(f"✅ CAPTCHA submitted for solving (ID: {captcha_id})")
            
            # Wait for solution
            solution = self._wait_for_solution(captcha_id)
            
            if solution:
                print("✅ CAPTCHA solved automatically!")
                return solution
            else:
                print("❌ CAPTCHA solving failed or timed out")
                return ""
                
        except Exception as e:
            print(f"❌ CAPTCHA automation error: {str(e)}")
            return ""

    def _wait_for_solution(self, captcha_id: str) -> str:
        """Wait for CAPTCHA solution from service"""
        start_time = time.time()
        
        while time.time() - start_time < self.config.captcha_solve_timeout:
            try:
                # Check for solution
                check_data = {
                    'key': self.config.captcha_service_api_key,
                    'action': 'get',
                    'id': captcha_id,
                    'json': 1
                }
                
                response = self.session.get(self.config.captcha_result_url, params=check_data)
                result = response.json()
                
                if result.get('status') == 1:
                    return result.get('request', '')
                elif result.get('request') == 'CAPCHA_NOT_READY':
                    time.sleep(5)  # Wait 5 seconds before checking again
                    continue
                else:
                    print(f"❌ CAPTCHA solving failed: {result.get('error_text', 'Unknown error')}")
                    return ""
                    
            except Exception as e:
                print(f"❌ Error checking CAPTCHA solution: {str(e)}")
                time.sleep(5)
        
        print("❌ CAPTCHA solving timeout")
        return ""

    def inject_recaptcha_solution(self, browser: webdriver.Chrome, solution: str):
        """Inject the CAPTCHA solution into the page and handle form submission"""
        try:
            # First, try to find and click the reCAPTCHA checkbox
            self._click_recaptcha_checkbox(browser)
            
            # Inject the solution into the reCAPTCHA response textarea
            script = f"""
            var textarea = document.querySelector('textarea[name="g-recaptcha-response"]');
            if (textarea) {{
                textarea.value = "{solution}";
                textarea.style.display = 'block';
                
                // Trigger reCAPTCHA callback
                if (typeof ___grecaptcha_cfg !== 'undefined') {{
                    var widgets = Object.keys(___grecaptcha_cfg.clients);
                    if (widgets.length > 0) {{
                        var widgetId = widgets[0];
                        grecaptcha.getResponse(widgetId);
                    }}
                }}
                
                // Trigger any form submission events
                var form = textarea.closest('form');
                if (form) {{
                    var submitEvent = new Event('submit', {{ bubbles: true }});
                    form.dispatchEvent(submitEvent);
                }}
            }}
            """
            browser.execute_script(script)
            
            # Wait a moment for the solution to be processed
            time.sleep(2)
            
            # Try TAS-specific form submission first
            if self._handle_tas_specific_captcha(browser):
                return True
            
            # Fall back to generic form submission
            if self._submit_form_if_needed(browser):
                return True
            
            return True
            
        except Exception as e:
            print(f"❌ Error injecting CAPTCHA solution: {str(e)}")
            return False

    def _click_recaptcha_checkbox(self, browser: webdriver.Chrome):
        """Try to click the reCAPTCHA checkbox"""
        try:
            # Method 1: Click the iframe directly
            iframes = browser.find_elements("xpath", '//iframe[contains(@src, "recaptcha")]')
            for iframe in iframes:
                if iframe.is_displayed():
                    try:
                        # Switch to iframe
                        browser.switch_to.frame(iframe)
                        
                        # Look for the checkbox
                        checkbox = browser.find_element("xpath", '//div[@class="recaptcha-checkbox-border"]')
                        if checkbox:
                            checkbox.click()
                            print("✅ Clicked reCAPTCHA checkbox")
                            browser.switch_to.default_content()
                            return True
                    except:
                        pass
                    finally:
                        # Switch back to main content
                        browser.switch_to.default_content()
            
            # Method 2: Use JavaScript to click the checkbox
            script = """
            var iframes = document.querySelectorAll('iframe[src*="recaptcha"]');
            for (var i = 0; i < iframes.length; i++) {
                try {
                    var iframe = iframes[i];
                    var iframeDoc = iframe.contentDocument || iframe.contentWindow.document;
                    var checkbox = iframeDoc.querySelector('.recaptcha-checkbox-border');
                    if (checkbox) {
                        checkbox.click();
                        return true;
                    }
                } catch (e) {
                    // Continue to next iframe
                }
            }
            return false;
            """
            result = browser.execute_script(script)
            if result:
                print("✅ Clicked reCAPTCHA checkbox via JavaScript")
                return True
                
            return False
                
        except Exception as e:
            print(f"⚠️  Could not click CAPTCHA checkbox: {str(e)}")
            return False

    def _submit_form_if_needed(self, browser: webdriver.Chrome):
        """Try to submit the form after CAPTCHA is solved"""
        try:
            # Look for submit buttons - TAS specific
            submit_buttons = browser.find_elements("xpath", 
                '//input[@type="submit"] | //button[@type="submit"] | //button[contains(text(), "Submit")] | //button[contains(text(), "Search")] | //input[@value="Search"]')
            
            for button in submit_buttons:
                if button.is_displayed() and button.is_enabled():
                    try:
                        # Scroll to button
                        browser.execute_script("arguments[0].scrollIntoView(true);", button)
                        time.sleep(0.5)
                        
                        # Click the button
                        button.click()
                        print("✅ Submitted form after CAPTCHA solving")
                        return True
                    except Exception as e:
                        print(f"⚠️  Could not click submit button: {str(e)}")
                        continue
            
            # If no submit button found, try JavaScript form submission
            script = """
            var forms = document.querySelectorAll('form');
            for (var i = 0; i < forms.length; i++) {
                var form = forms[i];
                if (form.querySelector('textarea[name="g-recaptcha-response"]')) {
                    form.submit();
                    return true;
                }
            }
            return false;
            """
            result = browser.execute_script(script)
            if result:
                print("✅ Submitted form via JavaScript")
                return True
                
        except Exception as e:
            print(f"⚠️  Could not submit form: {str(e)}")
        
        return False

    def _handle_tas_specific_captcha(self, browser: webdriver.Chrome):
        """Handle CAPTCHA specifically for TAS website search form"""
        try:
            # Look for the TAS search button specifically
            tas_search_button = browser.find_element("xpath", 
                '//*[@id="ctl00_ctl00_ctl00_ctlMainContent_ctlMainContent_MainContent_ctlOnlineSearch_btnFilterMainGrid"]')
            
            if tas_search_button and tas_search_button.is_displayed():
                try:
                    # Scroll to the button
                    browser.execute_script("arguments[0].scrollIntoView(true);", tas_search_button)
                    time.sleep(1)
                    
                    # Click the search button
                    tas_search_button.click()
                    print("✅ Clicked TAS search button after CAPTCHA solving")
                    return True
                except Exception as e:
                    print(f"⚠️  Could not click TAS search button: {str(e)}")
                    
        except Exception as e:
            print(f"⚠️  TAS-specific CAPTCHA handling failed: {str(e)}")
        
        return False

    def get_recaptcha_site_key(self, browser: webdriver.Chrome) -> str:
        """Extract reCAPTCHA site key from the page"""
        try:
            # Look for reCAPTCHA site key in various locations
            site_key = browser.execute_script("""
                // Check for data-sitekey attribute
                var recaptcha = document.querySelector('.g-recaptcha');
                if (recaptcha) {
                    return recaptcha.getAttribute('data-sitekey');
                }
                
                // Check for reCAPTCHA script src
                var scripts = document.querySelectorAll('script[src*="recaptcha"]');
                for (var i = 0; i < scripts.length; i++) {
                    var src = scripts[i].src;
                    var match = src.match(/k=([^&]+)/);
                    if (match) {
                        return match[1];
                    }
                }
                
                return '';
            """)
            
            return site_key if site_key else ""
            
        except Exception as e:
            print(f"❌ Error extracting CAPTCHA site key: {str(e)}")
            return ""


# ================================================================================================
# BROWSER MANAGER
# ================================================================================================

class OptimizedBrowserManager:
    """Manages Chrome browser for reliable scraping with CAPTCHA support"""

    def __init__(self, config: SimpleConfig):
        self.config = config
        self.browser = None
        self.wait_helper = None
        self.setup_complete = False
        self.lock = threading.Lock()
        self.captcha_handled = False
        self.captcha_automation = CaptchaAutomation(config) if config.enable_captcha_automation else None

    def setup_browsers(self):
        """Create Chrome browser for processing"""
        if self.setup_complete:
            return

        with self.lock:
            if self.setup_complete:
                return

            print(f"Setting up Chrome browser for TAS license checking...")
            self.browser = self._create_chrome_browser()
            self.wait_helper = WebDriverWait(self.browser, self.config.request_timeout_seconds)

            # Initialize the search page once
            self._initialize_search_page()

            self.setup_complete = True
            print(f"Browser ready for TAS license processing!")

    def _create_chrome_browser(self) -> webdriver.Chrome:
        """Create Chrome browser with optimal settings"""
        chrome_options = Options()

        # Show browser window if CAPTCHA handling is enabled
        if not self.config.show_browser_for_captcha:
            chrome_options.add_argument("--headless")

        # Optimized settings for reliability
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")
        chrome_options.add_argument("--disable-gpu")
        chrome_options.add_argument("--disable-extensions")
        chrome_options.add_argument("--disable-logging")
        chrome_options.add_argument("--window-size=1280,720")
        chrome_options.add_argument("--disable-blink-features=AutomationControlled")
        chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
        chrome_options.add_experimental_option('useAutomationExtension', False)

        driver = webdriver.Chrome(options=chrome_options)
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        return driver

    def _initialize_search_page(self):
        """Initialize the TAS search page with required settings"""
        try:
            self.browser.get(self.config.website_url)

            # Set radio button options for Security licenses
            radio_option_3 = self.wait_helper.until(EC.element_to_be_clickable(
                (By.XPATH,
                 '//*[@id="ctl00_ctl00_ctl00_ctlMainContent_ctlMainContent_MainContent_ctlOnlineSearch_rblLicenceTypeOptions_3"]')
            ))
            radio_option_3.click()

            radio_category_1 = self.wait_helper.until(EC.element_to_be_clickable(
                (By.XPATH,
                 '//*[@id="ctl00_ctl00_ctl00_ctlMainContent_ctlMainContent_MainContent_ctlOnlineSearch_rblLicenceTypeCategoryOptions_1"]')
            ))
            radio_category_1.click()

            print("✓ TAS search page initialized successfully")

        except Exception as e:
            print(f"ERROR: Failed to initialize TAS search page: {e}")
            raise

    def get_browser(self):
        """Get the browser instance (single browser for TAS due to CAPTCHA)"""
        if not self.setup_complete:
            self.setup_browsers()
        return self.browser, self.wait_helper

    def return_browser(self, browser):
        """Return browser - no-op for single browser system"""
        pass

    def handle_captcha_if_needed(self):
        """Handle CAPTCHA if it appears - Enhanced version with automatic detection"""
        try:
            # Check for CAPTCHA iframe
            captcha_iframe = WebDriverWait(self.browser, 3).until(
                lambda d: any(iframe.is_displayed()
                              for iframe in d.find_elements(By.XPATH, '//iframe[contains(@src, "recaptcha")]'))
            )

            if captcha_iframe:
                print("\n" + "=" * 60)
                print("⚠️  CAPTCHA DETECTED!")
                print("=" * 60)
                
                # Try automatic solving first if enabled
                if self.captcha_automation and self.config.captcha_service_api_key:
                    print("🤖 Attempting automatic CAPTCHA solving...")
                    
                    # Get the current page URL
                    page_url = self.browser.current_url
                    
                    # Extract reCAPTCHA site key
                    site_key = self.captcha_automation.get_recaptcha_site_key(self.browser)
                    
                    if site_key:
                        print(f"🔑 Found reCAPTCHA site key: {site_key[:20]}...")
                        
                        # Try to click the CAPTCHA checkbox first
                        print("🔍 Attempting to click CAPTCHA checkbox...")
                        checkbox_clicked = self.captcha_automation._click_recaptcha_checkbox(self.browser)
                        
                        if checkbox_clicked:
                            print("✅ CAPTCHA checkbox clicked automatically")
                            time.sleep(3)  # Wait longer for CAPTCHA to process
                            
                            # Check if CAPTCHA is still visible (might need solving)
                            if self._is_captcha_still_visible():
                                print("⚠️  CAPTCHA still visible after clicking, attempting to solve...")
                                
                                # Solve CAPTCHA automatically
                                solution = self.captcha_automation.solve_recaptcha(site_key, page_url)
                                
                                if solution:
                                    print("✅ CAPTCHA solution received from service")
                                    # Inject the solution
                                    if self.captcha_automation.inject_recaptcha_solution(self.browser, solution):
                                        print("✅ CAPTCHA solution injected successfully!")
                                        
                                        # Wait a moment for the solution to be processed
                                        time.sleep(3)
                                        
                                        # Check if CAPTCHA is still visible
                                        if not self._is_captcha_still_visible():
                                            print("✅ CAPTCHA solved automatically, continuing processing...")
                                            return
                                        else:
                                            print("⚠️  CAPTCHA still visible after automatic solving, trying manual...")
                                    else:
                                        print("❌ Failed to inject CAPTCHA solution, trying manual...")
                                else:
                                    print("❌ Automatic CAPTCHA solving failed, trying manual...")
                            else:
                                print("✅ CAPTCHA solved by clicking checkbox, continuing processing...")
                                return
                        else:
                            print("❌ Could not click CAPTCHA checkbox, trying manual...")
                    else:
                        print("❌ Could not extract CAPTCHA site key, trying manual...")
                else:
                    print("❌ Automatic CAPTCHA solving not configured, using manual...")
                
                # Fall back to manual solving only if automatic failed
                print("A CAPTCHA has appeared on the TAS website.")
                print("Please solve the CAPTCHA in the browser window.")
                print("The script will automatically continue when CAPTCHA is solved.")
                print("=" * 60)
                
                # Wait for CAPTCHA to be solved manually
                self._wait_for_captcha_solution()
                
                print("✓ CAPTCHA solved, continuing processing...")

        except TimeoutException:
            # No CAPTCHA found, continue normally
            pass

    def _wait_for_captcha_solution(self):
        """Wait for CAPTCHA to be solved by monitoring page state"""
        import time
        start_time = time.time()
        
        while time.time() - start_time < self.config.captcha_timeout_seconds:
            try:
                # Check if CAPTCHA iframe is still present
                captcha_iframes = self.browser.find_elements(By.XPATH, '//iframe[contains(@src, "recaptcha")]')
                visible_captchas = [iframe for iframe in captcha_iframes if iframe.is_displayed()]
                
                if not visible_captchas:
                    # CAPTCHA iframe is no longer visible - likely solved
                    return True
                
                # Check for reCAPTCHA success indicators
                try:
                    # Look for reCAPTCHA success response
                    recaptcha_response = self.browser.execute_script(
                        "return document.querySelector('textarea[name=\"g-recaptcha-response\"]')?.value;"
                    )
                    if recaptcha_response and len(recaptcha_response) > 0:
                        return True
                except:
                    pass
                
                # Check if page has changed (indicating successful submission)
                try:
                    # Look for success indicators like form submission or page redirect
                    current_url = self.browser.current_url
                    if "success" in current_url.lower() or "result" in current_url.lower():
                        return True
                except:
                    pass
                
                # Wait before next check
                time.sleep(self.config.captcha_check_interval)
                
                # Show progress indicator
                elapsed = int(time.time() - start_time)
                print(f"\r⏳ Waiting for CAPTCHA solution... ({elapsed}s elapsed)", end="", flush=True)
                
            except Exception as e:
                # If we can't check CAPTCHA status, assume it might be solved
                print(f"\n⚠️  CAPTCHA check error: {str(e)[:50]}...")
                time.sleep(self.config.captcha_check_interval)
        
        # Timeout reached
        print(f"\n⚠️  CAPTCHA timeout reached ({self.config.captcha_timeout_seconds}s)")
        print("Continuing anyway - CAPTCHA may have been solved...")
        return False

    def check_for_captcha_during_processing(self):
        """Check for CAPTCHA during processing and handle if found"""
        try:
            # Quick check for CAPTCHA without waiting
            captcha_iframes = self.browser.find_elements(By.XPATH, '//iframe[contains(@src, "recaptcha")]')
            visible_captchas = [iframe for iframe in captcha_iframes if iframe.is_displayed()]
            
            if visible_captchas:
                print("\n⚠️  CAPTCHA detected during processing!")
                self._wait_for_captcha_solution()
                return True
                
        except Exception:
            # Ignore errors during quick CAPTCHA check
            pass
        
        return False

    def cleanup_all_browsers(self):
        """Close browser when program ends"""
        if self.browser:
            try:
                self.browser.quit()
            except:
                print(f"WARNING: Browser failed to close properly")

    def _is_captcha_still_visible(self) -> bool:
        """Check if CAPTCHA is still visible on the page"""
        try:
            captcha_iframes = self.browser.find_elements(By.XPATH, '//iframe[contains(@src, "recaptcha")]')
            visible_captchas = [iframe for iframe in captcha_iframes if iframe.is_displayed()]
            return len(visible_captchas) > 0
        except:
            return False


# ================================================================================================
# LICENSE SEARCHER
# ================================================================================================

class SeleniumLicenseSearcher:
    """Searches TAS government website using Selenium"""

    def __init__(self, config: SimpleConfig, browser_manager: OptimizedBrowserManager):
        self.config = config
        self.browser_manager = browser_manager
        self.search_cache = {}  # Store results to avoid duplicate searches
        self.cache_lock = threading.Lock()
        self.name_cache = {}  # Cache normalized names for speed

    def normalize_employee_name(self, name: str) -> str:
        """Convert name to standard format for comparison with enhanced cleaning"""
        if not name:
            return ""

        # Check if we already normalized this name
        if name in self.name_cache:
            return self.name_cache[name]

        # Clean and normalize the name
        cleaned_name = self._clean_name_for_matching(name)

        # Convert "First Last" to "LAST, FIRST" format
        parts = cleaned_name.strip().upper().split()
        if len(parts) >= 2:
            normalized = f"{parts[-1]}, {' '.join(parts[:-1])}"
        else:
            normalized = cleaned_name.strip().upper()

        # Cache the result
        self.name_cache[name] = normalized
        return normalized

    def _clean_name_for_matching(self, name: str) -> str:
        """Clean name by removing common variations and formatting issues"""
        if not name:
            return ""

        # Convert to string and strip whitespace
        cleaned = str(name).strip()

        # Remove extra whitespace and normalize spacing
        cleaned = ' '.join(cleaned.split())

        # Split into words for processing
        words = cleaned.upper().split()

        # Remove common middle name abbreviations and single letters
        filtered_words = []
        for word in words:
            # Skip single letters (middle initials) unless it's the only remaining word
            if len(word) == 1 and len(words) > 1:
                continue
            # Skip common middle name patterns
            if word.endswith('.') and len(word) <= 3:
                continue
            filtered_words.append(word)

        # Handle hyphenated names
        processed_words = []
        for word in filtered_words:
            if '-' in word:
                # Split hyphenated names and take both parts
                hyphen_parts = word.split('-')
                processed_words.extend(hyphen_parts)
            else:
                processed_words.append(word)

        # Remove empty strings
        processed_words = [word for word in processed_words if word.strip()]

        # Rejoin the cleaned words
        return ' '.join(processed_words) if processed_words else cleaned

    def check_name_similarity(self, csv_name: str, website_name: str) -> str:
        """Compare names with enhanced cleaning and return match status"""
        if website_name == "Not Found":
            return "Not Found"

        if not csv_name or not website_name:
            return "No Match"

        # Clean both names before comparison
        cleaned_csv = self._clean_name_for_matching(csv_name)
        cleaned_website = self._clean_name_for_matching(website_name)

        # Normalize both names to standard format
        norm_csv = self.normalize_employee_name(cleaned_csv)
        norm_website = self.normalize_employee_name(cleaned_website)

        # Quick exact match check on cleaned names
        if norm_csv == norm_website:
            return "Yes"

        # Try direct comparison of cleaned names (without format conversion)
        if cleaned_csv.upper() == cleaned_website.upper():
            return "Yes"

        # Calculate similarity percentage using multiple methods for better accuracy
        similarity_methods = [
            fuzz.token_set_ratio(norm_website, norm_csv),
            fuzz.token_sort_ratio(cleaned_website.upper(), cleaned_csv.upper()),
            fuzz.partial_ratio(cleaned_website.upper(), cleaned_csv.upper()),
            fuzz.ratio(cleaned_website.upper(), cleaned_csv.upper())
        ]

        # Use the highest similarity score
        similarity = max(similarity_methods)

        if similarity >= self.config.name_similarity_required:
            return "Yes"
        else:
            return f"No ({similarity:.1f}%)"

    def calculate_expiry_status(self, csv_expiry: str, web_expiry: str) -> str:
        """Calculate expiry status comparison between CSV and web dates"""
        if not csv_expiry or not web_expiry or web_expiry == "Not Found":
            return "No License"

        try:
            # Parse dates using dateutil parser for flexibility
            csv_date = parser.parse(csv_expiry, dayfirst=True)
            web_date = parser.parse(web_expiry, dayfirst=True)

            is_expired = web_date < datetime.today()
            dates_match = (csv_date.date() == web_date.date())

            if dates_match and not is_expired:
                return "Active"
            elif dates_match and is_expired:
                return "Expired"
            elif not dates_match and not is_expired:
                return "Active - Date Wrong"
            elif not dates_match and is_expired:
                return "Expired - Date Wrong"
            else:
                return "Unknown"

        except Exception as e:
            return f"Date Error: {str(e)[:15]}"

    def search_single_license(self, employee: EmployeeRecord) -> SearchResult:
        """Search for one employee's license using Selenium"""

        # Check cache first to avoid duplicate searches
        if employee.license_number in self.search_cache:
            cached_result = self.search_cache[employee.license_number]
            # Update name match for this specific employee
            cached_result.name_matches = self.check_name_similarity(
                employee.employee_name, cached_result.found_name
            )
            cached_result.was_cached = True
            return cached_result

        # Do the actual search with retries
        result = self._search_with_retries(employee)

        # Cache successful results
        if result.is_successful():
            with self.cache_lock:
                self.search_cache[employee.license_number] = result

        return result

    def _search_with_retries(self, employee: EmployeeRecord) -> SearchResult:
        """Try searching multiple times if it fails"""
        if not employee.license_number.strip():
            return SearchResult()

        last_error = ""

        for attempt in range(self.config.max_retry_attempts):
            try:
                browser, wait_helper = self.browser_manager.get_browser()

                try:
                    start_time = time.time()
                    result = self._do_selenium_search(browser, wait_helper, employee)
                    search_time = time.time() - start_time

                    if result.is_successful():
                        result.name_matches = self.check_name_similarity(
                            employee.employee_name, result.found_name
                        )
                        result.expiry_status = self.calculate_expiry_status(
                            employee.csv_expiry, result.license_expiry
                        )
                        result.how_many_retries = attempt
                        result.search_time_seconds = search_time
                        return result

                finally:
                    self.browser_manager.return_browser(browser)

            except Exception as e:
                last_error = str(e)
                if attempt < self.config.max_retry_attempts - 1:
                    time.sleep(0.5 * (attempt + 1))  # Wait longer each retry

        # All attempts failed
        return SearchResult(
            error_message=f"Failed after {self.config.max_retry_attempts} attempts: {last_error}",
            how_many_retries=self.config.max_retry_attempts
        )

    def _do_selenium_search(self, browser: webdriver.Chrome, wait_helper: WebDriverWait,
                            employee: EmployeeRecord) -> SearchResult:
        """Actually search the TAS government website using Selenium"""
        try:
            # Handle CAPTCHA if needed before searching
            self.browser_manager.handle_captcha_if_needed()

            # Navigate to search page (already initialized)
            browser.get(self.config.website_url)

            # Check for CAPTCHA after navigation
            self.browser_manager.check_for_captcha_during_processing()

            # Set radio options again (in case page refreshed)
            wait_helper.until(EC.element_to_be_clickable(
                (By.XPATH,
                 '//*[@id="ctl00_ctl00_ctl00_ctlMainContent_ctlMainContent_MainContent_ctlOnlineSearch_rblLicenceTypeOptions_3"]')
            )).click()

            wait_helper.until(EC.element_to_be_clickable(
                (By.XPATH,
                 '//*[@id="ctl00_ctl00_ctl00_ctlMainContent_ctlMainContent_MainContent_ctlOnlineSearch_rblLicenceTypeCategoryOptions_1"]')
            )).click()

            # Check for CAPTCHA after setting options
            self.browser_manager.check_for_captcha_during_processing()

            # Enter search term (clean license number - digits only)
            search_input = wait_helper.until(EC.presence_of_element_located(
                (By.XPATH,
                 '//*[@id="ctl00_ctl00_ctl00_ctlMainContent_ctlMainContent_MainContent_ctlOnlineSearch_txtLicenceNumberSearch"]')
            ))

            # Clear the input and enter clean license number
            browser.execute_script("arguments[0].value = '';", search_input)
            clean_licence = ''.join(filter(str.isdigit, employee.license_number))
            search_input.send_keys(clean_licence)

            # Click search button
            search_button = wait_helper.until(EC.element_to_be_clickable(
                (By.XPATH,
                 '//*[@id="ctl00_ctl00_ctl00_ctlMainContent_ctlMainContent_MainContent_ctlOnlineSearch_btnFilterMainGrid"]')
            ))
            browser.execute_script("arguments[0].click();", search_button)

            # Check for CAPTCHA after clicking search
            self.browser_manager.check_for_captcha_during_processing()

            # Wait for results to load
            self._wait_for_results(browser, wait_helper)

            # Check for CAPTCHA after results load
            self.browser_manager.check_for_captcha_during_processing()

            # Parse results
            return self._parse_search_results(browser, employee.license_number)

        except TimeoutException:
            raise Exception(f"Website timeout for license: {employee.license_number}")
        except NoSuchElementException as e:
            raise Exception(f"Page element not found: {str(e)}")
        except WebDriverException as e:
            raise Exception(f"WebDriver error: {str(e)}")
        except Exception as e:
            raise Exception(f"Selenium search error: {str(e)}")

    def _wait_for_results(self, browser: webdriver.Chrome, wait_helper: WebDriverWait):
        """Wait for search results to load"""
        try:
            # Get initial row count
            initial_rows = len(browser.find_elements(
                By.XPATH,
                '//*[@id="ctl00_ctl00_ctl00_ctlMainContent_ctlMainContent_MainContent_ctlOnlineSearch_Main_Grid"]/tbody/tr'
            ))

            # Wait for table to update
            def table_updated(driver):
                current_rows = len(driver.find_elements(
                    By.XPATH,
                    '//*[@id="ctl00_ctl00_ctl00_ctlMainContent_ctlMainContent_MainContent_ctlOnlineSearch_Main_Grid"]/tbody/tr'
                ))
                return current_rows != initial_rows

            WebDriverWait(browser, 10).until(table_updated)

        except TimeoutException:
            # Continue anyway, results might be there
            pass

    def _parse_search_results(self, browser: webdriver.Chrome, licence_number: str) -> SearchResult:
        """Parse search results and extract relevant information"""
        try:
            rows = browser.find_elements(
                By.XPATH,
                '//*[@id="ctl00_ctl00_ctl00_ctlMainContent_ctlMainContent_MainContent_ctlOnlineSearch_Main_Grid"]/tbody/tr'
            )

            if len(rows) <= 1:  # Only header row
                return SearchResult()

            # Search for matching licence in results
            for row in rows[1:]:  # Skip header
                try:
                    cells = row.find_elements(By.TAG_NAME, "td")
                    if len(cells) < 4:
                        continue

                    licence_cell = cells[1].text.strip()  # License number is in second column
                    if licence_number not in licence_cell:
                        continue

                    # Extract data from the row
                    name = cells[0].text.strip()  # Name in first column
                    expiry = cells[2].text.strip()  # Expiry in third column

                    # License type might be in a nested div
                    try:
                        licence_type_div = cells[3].find_element(By.TAG_NAME, "div")
                        licence_type = licence_type_div.text.strip()
                    except:
                        licence_type = cells[3].text.strip()

                    return SearchResult(
                        found_name=name if name else "Not Found",
                        license_type=licence_type if licence_type else "Not Found",
                        license_expiry=expiry if expiry else "Not Found"
                    )

                except Exception as e:
                    # Skip this row and continue
                    continue

            # No matching license found
            return SearchResult()

        except Exception as e:
            raise Exception(f"Error parsing search results: {str(e)}")


# ================================================================================================
# CSV FILE HANDLER - Reads employee data from Excel/CSV files
# ================================================================================================

class SimpleCSVHandler:
    """Reads employee data from CSV files"""

    @staticmethod
    def load_employee_data(file_path: str, config: SimpleConfig) -> List[EmployeeRecord]:
        """Load employee data from CSV file"""

        try:
            # Silent CSV reading - only show errors
            df = pd.read_csv(file_path, dtype=str, na_filter=False)
            df.columns = df.columns.str.strip().str.replace('\ufeff', '')

            # Check if required columns exist
            required_columns = ['Payroll Number', 'Employee Name', 'License Number', 'Expiry/Update  Date']
            missing_columns = [col for col in required_columns if col not in df.columns]

            if missing_columns:
                raise ValueError(f"""
ERROR: Missing required columns: {', '.join(missing_columns)}

Your CSV file MUST have these exact column names:
- Payroll Number
- Employee Name
- License Number
- Expiry/Update  Date

Current columns in your file: {list(df.columns)}
                """)

            # Convert to employee records
            employees = []
            for index, row in df.iterrows():
                employee = EmployeeRecord(
                    payroll_number=row['Payroll Number'],
                    employee_name=row['Employee Name'],
                    license_number=row['License Number'],
                    csv_expiry=row['Expiry/Update  Date'],
                    csv_row_number=index + 2
                )

                employee.clean_data()

                if employee.has_required_data():
                    employees.append(employee)

                    if config.test_mode and len(employees) >= config.test_record_limit:
                        print(f"TEST MODE: Processing only first {config.test_record_limit} records")
                        break

            if not employees:
                raise ValueError("ERROR: No valid employee records found in CSV file")

            # Remove duplicates
            unique_employees = []
            seen_licenses = set()
            duplicates_removed = 0

            for employee in employees:
                if employee.license_number not in seen_licenses:
                    seen_licenses.add(employee.license_number)
                    unique_employees.append(employee)
                else:
                    duplicates_removed += 1

            # Only show duplicate message if there were duplicates
            if duplicates_removed > 0:
                print(f"NOTE: Removed {duplicates_removed} duplicate license numbers")

            return unique_employees

        except Exception as e:
            print(f"ERROR reading CSV file: {str(e)}")
            raise


# ================================================================================================
# PROGRESS TRACKER
# ================================================================================================

class SimpleProgressTracker:
    """Shows nice progress bar and processing statistics"""

    def __init__(self, total_employees: int):
        self.total_employees = total_employees
        self.completed = 0
        self.successful = 0
        self.failed = 0
        self.cached = 0
        self.start_time = time.time()
        self.last_update = 0
        self.lock = threading.Lock()

    def show_header(self):
        """Show the processing header"""
        print(f"\n{'=' * 85}")
        print(f"TAS LICENSE CHECKER - PROCESSING {self.total_employees} EMPLOYEES")
        print(f"{'=' * 85}")
        print(f"{'Progress':<15} {'Employee':<25} {'License':<15} {'Status':<12} {'Speed'}")
        print(f"{'-' * 85}")

    def update_progress(self, employee: EmployeeRecord, result: SearchResult):
        """Update progress display"""
        with self.lock:
            self.completed += 1

            if result.was_cached:
                self.cached += 1
            elif result.is_successful():
                self.successful += 1
            else:
                self.failed += 1

        # Only update display every 0.5 seconds for smoother experience
        current_time = time.time()
        if current_time - self.last_update < 0.5:
            return
        self.last_update = current_time

        # Calculate progress
        percent = (self.completed / self.total_employees) * 100
        progress_bar = self._make_progress_bar(percent)

        # Calculate speed
        elapsed = max(current_time - self.start_time, 0.01)
        speed = (self.completed / elapsed) * 60  # Records per minute

        # Prepare display data
        display_name = employee.employee_name[:23] + ".." if len(
            employee.employee_name) > 25 else employee.employee_name
        display_license = employee.license_number[:13] + ".." if len(
            employee.license_number) > 15 else employee.license_number

        # Determine status
        if result.was_cached:
            status = "Cached"
        elif result.is_successful():
            status = "Success"
        else:
            status = "Failed"

        # Show progress line
        print(f"\r{progress_bar} {display_name:<25} {display_license:<15} {status:<12} {speed:.0f}/min", end="",
              flush=True)

    def _make_progress_bar(self, percent: float, width: int = 20) -> str:
        """Create ASCII progress bar"""
        filled = int(width * percent / 100)
        bar = 'X' * filled + '-' * (width - filled)
        return f"[{bar}] {percent:5.1f}%"

    def show_final_summary(self, elapsed_time: float, excel_file_path: str):
        """Show final processing summary"""
        print(f"\n\n{'=' * 85}")
        print("PROCESSING COMPLETED!")
        print(f"{'=' * 85}")
        print(f"Total employees processed: {self.completed}")
        print(f"Successful searches: {self.successful}")
        print(f"Failed searches: {self.failed}")
        print(f"Cached results: {self.cached}")
        print(f"Success rate: {(self.successful / max(self.completed, 1)) * 100:.1f}%")
        print(f"Total time: {self._format_time(elapsed_time)}")
        print(f"Processing speed: {(self.completed / elapsed_time) * 60:.1f} employees/minute")
        print(f"Excel report saved: {excel_file_path}")
        print(f"{'=' * 85}")

    def _format_time(self, seconds: float) -> str:
        """Convert seconds to readable format"""
        if seconds < 60:
            return f"{seconds:.0f} seconds"
        elif seconds < 3600:
            minutes = int(seconds // 60)
            secs = int(seconds % 60)
            return f"{minutes}m {secs}s"
        else:
            hours = int(seconds // 3600)
            minutes = int((seconds % 3600) // 60)
            return f"{hours}h {minutes}m"


# ================================================================================================
# EXCEL REPORT GENERATOR
# ================================================================================================

class SimpleExcelGenerator:
    """Creates Excel reports with color coding"""

    @staticmethod
    def create_excel_report(results: List[Dict], original_file_path: str) -> str:
        """Create Excel report - EXCEPTIONS ONLY (problems that need attention)"""

        try:
            original_path = Path(original_file_path)
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            excel_path = original_path.parent / f"{original_path.stem}_Exceptions_{timestamp}.xlsx"

            # Filter results to only include exceptions (problems)
            exception_results = []
            for result in results:
                name_match = result.get('Name Match', '')
                licence_name = result.get('Licence Name', '')
                licence_type = result.get('Licence Type', '')
                expiry_status = result.get('Expiry Status', '')

                # Include row if ANY of these conditions are true (exceptions):
                is_exception = (
                        name_match == "Not Found" or
                        name_match == "No License" or
                        name_match == "Error" or
                        name_match.startswith("No (") or  # Partial matches like "No (75.2%)"
                        licence_name == "Not Found" or
                        licence_name == "Error" or
                        licence_type == "Not Found" or
                        licence_type == "Error" or
                        expiry_status == "No License" or
                        expiry_status == "Unknown" or
                        expiry_status.startswith("Date Error:") or
                        "Date Wrong" in expiry_status or
                        "Expired" in expiry_status
                )

                if is_exception:
                    exception_results.append(result)

            # If no exceptions found, create a summary message
            if not exception_results:
                summary_result = {
                    'Payroll Number': 'N/A',
                    'Licence Number': 'N/A',
                    'Rolecall Name': 'ALL RECORDS PROCESSED SUCCESSFULLY',
                    'Licence Name': f'{len(results)} records',
                    'Name Match': 'No exceptions',
                    'Licence Type': 'All found',
                    'Licence Expiry': 'All valid',
                    'Rolecall Expiry': 'All dates match',
                    'Expiry Status': 'All active'
                }
                exception_results = [summary_result]
                excel_path = original_path.parent / f"{original_path.stem}_TAS_ALL_SUCCESS_{timestamp}.xlsx"

            # Create DataFrame with exceptions only
            df = pd.DataFrame(exception_results)

            with pd.ExcelWriter(excel_path, engine='xlsxwriter') as writer:
                sheet_name = 'Exceptions' if len(exception_results) > 1 or 'ALL RECORDS' not in str(
                    exception_results[0].get('Rolecall Name', '')) else 'Processing Summary'
                df.to_excel(writer, index=False, sheet_name=sheet_name)

                workbook = writer.book
                worksheet = writer.sheets[sheet_name]

                # Define header color only
                header_color = workbook.add_format({
                    'bold': True,
                    'bg_color': '#8C1E31',  # Custom burgundy background
                    'font_color': 'white',  # White text
                    'border': 1,
                    'align': 'center',
                    'valign': 'vcenter',
                    'text_wrap': True
                })

                # Format headers with enhanced styling
                for col_num, header in enumerate(df.columns):
                    worksheet.write(0, col_num, header, header_color)

                    # Auto-adjust column width with minimum and maximum limits
                    max_length = max(
                        len(str(header)),
                        df[header].astype(str).str.len().max() if not df.empty else 0
                    )
                    # Set column width with reasonable limits
                    col_width = min(max(max_length + 3, 12), 50)  # Min 12, Max 50 characters
                    worksheet.set_column(col_num, col_num, col_width)

                # Set header row height for better visibility
                worksheet.set_row(0, 20)  # 20 pixels height for header row

                # Add filters and freeze panes for better usability
                worksheet.autofilter(0, 0, len(df), len(df.columns) - 1)

                # Freeze the header row and first column (Payroll Number)
                worksheet.freeze_panes(1, 1)  # Freeze row 1 (header) and column 1 (Payroll Number)

            return str(excel_path)

        except Exception as e:
            print(f"ERROR creating Excel report: {str(e)}")
            raise


# ================================================================================================
# MAIN APPLICATION
# ================================================================================================

class SeleniumTASLicenseChecker:
    """Main application class - using reliable Selenium with CAPTCHA support"""

    def __init__(self):
        self.config = SimpleConfig()
        self.browser_manager = OptimizedBrowserManager(self.config)
        self.progress_tracker = None

    def run(self):
        """Main function - this is where everything happens"""
        try:
            # Show welcome message
            self._show_welcome()

            # Get CSV file from user
            csv_file_path = self._get_csv_file_from_user()

            # Read employee data
            employees = SimpleCSVHandler.load_employee_data(csv_file_path, self.config)

            # Ask user if they want to continue
            if not self._ask_user_to_continue(len(employees)):
                print("Process cancelled by user")
                return

            # Show CAPTCHA warning
            self._show_captcha_warning()

            # Process all employees (sequential due to CAPTCHA)
            results = self._process_all_employees_sequential(employees)

            # Create Excel report
            excel_path = SimpleExcelGenerator.create_excel_report(results, csv_file_path)

            # Show final summary
            elapsed_time = time.time() - self.progress_tracker.start_time
            self.progress_tracker.show_final_summary(elapsed_time, excel_path)

        except KeyboardInterrupt:
            print("\n\nProcess stopped by user (Ctrl+C pressed)")
        except Exception as e:
            print(f"\nERROR: {str(e)}")
            print("\nTROUBLESHOOTING TIPS:")
            print(
                "1. Make sure your CSV file has the exact column names: 'Payroll Number', 'Employee Name', 'License Number', 'Expiry/Update  Date'")
            print("2. Check that Chrome browser is installed")
            print("3. Make sure you have internet connection")
            print("4. Solve any CAPTCHA that appears in the browser window - the script will automatically continue")
            print("5. Try running as administrator if you get permission errors")
            print("6. If CAPTCHA timeout occurs, the script will continue anyway")
        finally:
            self.browser_manager.cleanup_all_browsers()

    def _show_welcome(self):
        """Show welcome message"""
        print("TAS License Checker - Starting...")

    def _show_captcha_warning(self):
        """Show CAPTCHA warning to user"""
        print("\n" + "=" * 60)
        print("⚠️  IMPORTANT: ENHANCED CAPTCHA HANDLING")
        print("=" * 60)
        print("The TAS website uses CAPTCHA protection.")
        if self.config.show_browser_for_captcha:
            print("A browser window will open - please be ready to solve")
            print("any CAPTCHA that appears during processing.")
        print("The script will automatically detect when CAPTCHA is solved")
        print("and continue processing immediately.")
        print(f"Maximum wait time: {self.config.captcha_timeout_seconds} seconds")
        print("=" * 60)

    def _get_csv_file_from_user(self) -> str:
        """Get CSV file path from user"""

        # Check if file path was provided as command line argument
        if len(sys.argv) > 1:
            file_path = sys.argv[1].strip().strip('"\'')
            print(f"Using file from command line: {file_path}")
        else:
            # Ask user for file path
            print("\nPlease provide your CSV file:")
            print("   You can either:")
            print("   1. Type the full file path")
            print("   2. Drag and drop the file into this window")
            print("   3. Copy and paste the file path")

            file_path = input("\nEnter CSV file path: ").strip().strip('"\'')

        # Check if file exists
        if not file_path:
            raise Exception("No file path provided")

        if not Path(file_path).exists():
            raise Exception(f"File not found: {file_path}")

        print(f"File found: {Path(file_path).name}")
        return file_path

    def _ask_user_to_continue(self, employee_count: int) -> bool:
        """Ask user if they want to process the employees"""

        if self.config.test_mode:
            print(f"\nTEST MODE: Will process {min(employee_count, self.config.test_record_limit)} employees")
            return True

        print(f"\nReady to process {employee_count} employees")
        print(f"Will use single browser (required for CAPTCHA handling)")

        while True:
            response = input("\nContinue with processing? (Y/N): ").strip().lower()
            if response in ['y', 'yes']:
                return True
            elif response in ['n', 'no']:
                return False
            else:
                print("Please enter 'y' for yes or 'n' for no")

    def _process_all_employees_sequential(self, employees: List[EmployeeRecord]) -> List[Dict]:
        """Process all employees sequentially (required for CAPTCHA handling)"""

        # Setup progress tracking
        self.progress_tracker = SimpleProgressTracker(len(employees))

        # Setup license searcher
        searcher = SeleniumLicenseSearcher(self.config, self.browser_manager)
        all_results = []

        # Sequential processing due to CAPTCHA requirements
        print("\nStarting sequential processing (required for CAPTCHA)...")

        for i, employee in enumerate(employees):
            try:
                # Get search result
                search_result = searcher.search_single_license(employee)

                # Create result dictionary for Excel
                result_dict = {
                    'Payroll Number': employee.payroll_number,
                    'Licence Number': employee.license_number,
                    'Rolecall Name': employee.employee_name,
                    'Licence Name': search_result.found_name,
                    'Name Match': search_result.name_matches,
                    'Licence Type': search_result.license_type,
                    'Licence Expiry': search_result.license_expiry,
                    'Rolecall Expiry': employee.csv_expiry,
                    'Expiry Status': search_result.expiry_status
                }

                all_results.append(result_dict)

                # Update progress
                self.progress_tracker.update_progress(employee, search_result)

                # Add delay between requests
                time.sleep(self.config.delay_between_requests)

            except Exception as e:
                # Show processing errors immediately
                print(f"\nERROR processing {employee.employee_name}: {str(e)}")

                # Handle any processing errors
                error_result = {
                    'Payroll Number': employee.payroll_number,
                    'Licence Number': employee.license_number,
                    'Rolecall Name': employee.employee_name,
                    'Licence Name': "Error",
                    'Name Match': "Error",
                    'Licence Type': "Error",
                    'Licence Expiry': "Error",
                    'Rolecall Expiry': employee.csv_expiry,
                    'Expiry Status': "Error"
                }

                all_results.append(error_result)

                # Create error result for progress tracking
                error_search_result = SearchResult(error_message=str(e))
                self.progress_tracker.update_progress(employee, error_search_result)

        return all_results

    def _create_notes_for_result(self, result: SearchResult) -> str:
        """Create informative notes for the Excel report"""
        notes = []

        if result.was_cached:
            notes.append("Cached result (duplicate license)")

        if result.search_time_seconds > 0:
            notes.append(f"Search time: {result.search_time_seconds:.2f}s")

        if result.how_many_retries > 0:
            notes.append(f"Retries: {result.how_many_retries}")

        if result.error_message:
            notes.append(f"Error: {result.error_message}")
        else:
            notes.append("Selenium scraping")

        return " | ".join(notes) if notes else "Processed successfully"


# ================================================================================================
# HELPER FUNCTIONS
# ================================================================================================

def test_selenium_connection():
    try:
        from selenium import webdriver
        from selenium.webdriver.chrome.options import Options
        from selenium.webdriver.common.by import By
        from selenium.webdriver.support.ui import WebDriverWait
        from selenium.webdriver.support import expected_conditions as EC

        # Create test browser
        chrome_options = Options()
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")

        driver = webdriver.Chrome(options=chrome_options)
        wait = WebDriverWait(driver, 15)

        try:
            # Test navigation to TAS website
            driver.get("https://occupationallicensing.justice.tas.gov.au/Search/onlinesearch.aspx")

            # Test finding the license search elements
            radio_option = wait.until(
                EC.presence_of_element_located((By.XPATH,
                                                '//*[@id="ctl00_ctl00_ctl00_ctlMainContent_ctlMainContent_MainContent_ctlOnlineSearch_rblLicenceTypeOptions_3"]'))
            )

            if radio_option:
                print("Selenium connection test PASSED!")
                return True
            else:
                print("License search elements not found - website may have changed")
                return False

        finally:
            driver.quit()

    except Exception as e:
        print(f"Selenium connection test failed: {str(e)}")
        print("Common fixes:")
        print("1. Install Google Chrome browser")
        print("2. Update Chrome to latest version")
        print("3. Run as administrator")
        print("4. Check internet connection")
        return False


# ================================================================================================
# MAIN PROGRAM ENTRY POINT
# ================================================================================================

def main():
    """Main entry point"""

    # Handle command line arguments
    if len(sys.argv) > 1:
        arg = sys.argv[1].lower()

        if arg in ['--test', '-t', 'test']:
            test_selenium_connection()
            return

    # Run the main application
    try:
        print("Starting TAS License Checker...")

        # Quick connection test
        print("\nTesting Selenium connection to TAS website...")
        if not test_selenium_connection():
            print("Connection issues detected, but continuing anyway...")
            print("If all searches fail, the website may have changed or Chrome needs updating")

        app = SeleniumTASLicenseChecker()
        app.run()

    except KeyboardInterrupt:
        print("\nSelenium application stopped by user")

    except Exception as e:
        print(f"\nFATAL ERROR: {str(e)}")
        print("\nSELENIUM TROUBLESHOOTING:")
        print("1. Check your CSV file has the right columns")
        print("2. Install/update Google Chrome browser")
        print("3. Run as administrator")
        print("4. Check your internet connection")
        print("5. Solve any CAPTCHA that appears in the browser - script will auto-continue")
        print("6. Run with --test to check Selenium connectivity")
        print("7. If CAPTCHA timeout occurs, the script will continue anyway")


# ================================================================================================
# RUN THE PROGRAM
# ================================================================================================

if __name__ == "__main__":
    main()