#!/usr/bin/env python3
"""
Tailwind Site Rebuilder
-----------------------
Creates a blueprint for rebuilding websites with Tailwind CSS.
"""

import os
import re
import json
import urllib.request
from urllib.parse import urlparse, urljoin
from datetime import datetime

# Import the OpenAI vision analyzer
try:
    from .openai_analyzer import OpenAILayoutAnalyzer
except ImportError:
    from openai_analyzer import OpenAILayoutAnalyzer

# Optional: PIL for image processing if we want to analyze the screenshots
try:
    from PIL import Image, ImageDraw
    PIL_AVAILABLE = True
except ImportError:
    PIL_AVAILABLE = False
    print("Warning: PIL/Pillow not installed. Some image analysis features will be limited.")
    print("Install with: pip install Pillow")

# BeautifulSoup for HTML parsing
try:
    from bs4 import BeautifulSoup
    BS4_AVAILABLE = True
except ImportError:
    BS4_AVAILABLE = False
    print("Warning: BeautifulSoup not installed. Content extraction will be limited.")
    print("Install with: pip install beautifulsoup4")


class SiteRebuilder:
    """Builds a Tailwind CSS rebuild context"""
    
    def __init__(self, output_dir="site_rebuilder"):
        self.output_dir = output_dir
        os.makedirs(output_dir, exist_ok=True)
        
        # Paths for output files
        self.context_file = os.path.join(output_dir, "rebuild_context.json")
        
        # Initialize the OpenAI vision analyzer
        try:
            self.openai_analyzer = OpenAILayoutAnalyzer()
            print("✅ OpenAI Vision Analyzer initialized")
        except Exception as e:
            print(f"⚠️  OpenAI Vision Analyzer failed to initialize: {e}")
            self.openai_analyzer = None
    
    def create_rebuild_context(self, screenshot_data, sections, colors, typography, images=None, text_content=None, asr=None):
        """Create rebuild context JSON from screenshot and section data"""
        print("Creating rebuild context JSON...")
        
        # Get domain name from URL
        domain = urlparse(screenshot_data["url"]).netloc
        if domain.startswith("www."):
            domain = domain[4:]
        
        # Normalize dimensions to standard breakpoints
        normalized_dimensions = self._normalize_dimensions(
            width=screenshot_data["width"],
            height=screenshot_data["height"],
            viewport_width=screenshot_data["viewport_width"],
            viewport_height=screenshot_data["viewport_height"]
        )
        
        # Build the rebuild context
        rebuild_context = {
            "site_info": {
                "title": screenshot_data["title"],
                "domain": domain,
                "url": screenshot_data["url"],
                "analyzed_at": datetime.now().isoformat(),
                "screenshot": screenshot_data["filename"]
            },
            "layout": {
                "original_width": screenshot_data["width"],
                "original_height": screenshot_data["height"],
                "original_viewport_width": screenshot_data["viewport_width"],
                "original_viewport_height": screenshot_data["viewport_height"],
                "normalized_width": normalized_dimensions["width"],
                "normalized_height": normalized_dimensions["height"],
                "sections": []
            },
            "design": {
                "colors": {
                    "background": colors["background"][:5],  # Limit to top 5
                    "text": colors["text"][:5],
                    "accent": colors["accent"][:5]
                },
                "typography": {
                    "fonts": typography["fonts"],
                    "headings": typography["headings"],
                    "body": typography["body"]
                }
            },
            "tailwind_rebuild_notes": {
                "general": "This site should be rebuilt using Tailwind CSS utility classes for responsive design",
                "layout": f"Use Tailwind's container (max-width: {normalized_dimensions['width']}px), flex, and grid utilities",
                "components": {
                    "header": "Create a responsive header with Tailwind's flex utilities",
                    "navigation": "Build navigation with Tailwind's flex and spacing utilities",
                    "hero": "Use Tailwind's padding, margin, and text utilities for the hero section",
                    "sections": "Utilize Tailwind's grid and spacing utilities for content sections",
                    "footer": "Create a responsive footer with Tailwind's grid system"
                },
                "responsive_breakpoints": {
                    "sm": "640px",
                    "md": "768px",
                    "lg": "1024px",
                    "xl": "1280px",
                    "2xl": "1440px"
                }
            }
        }
        
        # Add ASR (Annotated Structure Reference) if available
        if asr:
            rebuild_context["asr"] = asr
            
            # Update layout patterns based on ASR analysis
            if asr.get('layout_analysis'):
                layout_analysis = asr['layout_analysis']
                rebuild_context["site_info"]["layout_patterns"] = {
                    "layout_type": layout_analysis.get('layout_type', 'unknown'),
                    "has_overlay_header": layout_analysis.get('has_overlay_header', False),
                    "navigation_style": layout_analysis.get('navigation_style', 'unknown'),
                    "total_sections": layout_analysis.get('total_sections', 0)
                }
        
        # Add collected images if available
        if images:
            rebuild_context["content"] = rebuild_context.get("content", {})
            rebuild_context["content"]["images"] = images
        
        # Add collected text content if available
        if text_content:
            rebuild_context["content"] = rebuild_context.get("content", {})
            rebuild_context["content"]["text"] = text_content
        
        # Process sections from ASR if available, otherwise use legacy sections
        if asr and 'template_structure' in asr:
            # Use ASR template structure for better section descriptions
            for i, section_desc in enumerate(asr['template_structure']):
                section_data = {
                    "type": section_desc.lower().split('(')[0].strip(),
                    "description": section_desc,
                    "tailwind_notes": self.get_tailwind_notes_for_section(section_desc.lower().split('(')[0].strip())
                }
                
                # Add detailed analysis if available
                if i < len(asr.get('detailed_sections', [])):
                    detailed = asr['detailed_sections'][i]
                    section_data.update({
                        "positioning": detailed.get('positioning', {}),
                        "styling": detailed.get('styling', {}),
                        "content_preview": detailed.get('content', {}).get('text_content', '')[:100]
                    })
                
                rebuild_context["layout"]["sections"].append(section_data)
        else:
            # Legacy section processing
            # Find header and navigation patterns to include in site info
            header_section = next((s for s in sections if s["type"] == "header"), None)
            nav_section = next((s for s in sections if s["type"] == "navigation"), None)
            
            # Extract layout patterns for site_info
            layout_patterns = {}
            
            if header_section and "pattern" in header_section:
                layout_patterns["header_pattern"] = header_section["pattern"]
                if "logo_position" in header_section:
                    layout_patterns["logo_position"] = header_section["logo_position"]
            
            if nav_section and "pattern" in nav_section:
                layout_patterns["nav_pattern"] = nav_section["pattern"]
                if "position_in_header" in nav_section:
                    layout_patterns["nav_position"] = nav_section["position_in_header"]
            
            # Add the layout patterns to site_info
            if layout_patterns:
                rebuild_context["site_info"]["layout_patterns"] = layout_patterns
                
                # Add tailwind recommendations based on patterns
                if "header_pattern" in layout_patterns:
                    pattern = layout_patterns["header_pattern"]
                    if pattern == "logo-left-nav-right":
                        rebuild_context["tailwind_rebuild_notes"]["components"]["header"] = "Create header with flex: logo on left (flex-shrink-0) and nav on right (flex-grow justify-end)"
                    elif pattern == "centered-logo":
                        rebuild_context["tailwind_rebuild_notes"]["components"]["header"] = "Create header with logo centered (mx-auto) and flex justify-center"
                    elif pattern == "overlay-hero":
                        rebuild_context["tailwind_rebuild_notes"]["components"]["header"] = "Create transparent header with absolute positioning over hero section"
            
            # Process sections - remove position data
            for section in sections:
                section_data = {
                    "type": section["type"],
                    "content_preview": section["text"],
                    "tailwind_notes": self.get_tailwind_notes_for_section(section["type"])
                }
                
                # Include pattern information if available
                if "pattern" in section:
                    section_data["pattern"] = section["pattern"]
                    
                # Include special flags for overlay relationships
                if "has_overlay_header" in section:
                    section_data["has_overlay_header"] = section["has_overlay_header"]
                    
                rebuild_context["layout"]["sections"].append(section_data)
        
        # Save to file
        with open(self.context_file, 'w', encoding='utf-8') as f:
            json.dump(rebuild_context, f, indent=2)
            
        print(f"Rebuild context saved to {self.context_file}")
        return rebuild_context
    
    def _normalize_dimensions(self, width, height, viewport_width, viewport_height):
        """Normalize dimensions to standard breakpoints"""
        # Standard Tailwind CSS breakpoints
        breakpoints = {
            "sm": 640,
            "md": 768,
            "lg": 1024,
            "xl": 1280,
            "2xl": 1440
        }
        
        # Use 1440px as the standard desktop width
        normalized_width = breakpoints["2xl"]
        
        # Calculate the normalized height based on aspect ratio
        aspect_ratio = height / width if width > 0 else 1
        normalized_height = int(normalized_width * aspect_ratio)
        
        print(f"Normalized dimensions: {normalized_width}px × {normalized_height}px (original: {width}px × {height}px)")
        
        return {
            "width": normalized_width,
            "height": normalized_height,
            "breakpoint": "2xl"
        }
    
    def get_tailwind_notes_for_section(self, section_type):
        """Get Tailwind-specific notes for a section type"""
        notes = {
            "header": "Use sticky positioning, flex layout, and responsive utilities",
            "navigation": "Implement responsive navigation with flex and hidden utilities",
            "hero": "Create with flex or grid, large text utilities, and padding",
            "main": "Structure with flex or grid depending on content layout",
            "section": "Use container, padding, and margin utilities for spacing",
            "features": "Implement with grid or flex for responsive layouts",
            "testimonials": "Create cards with rounded corners, shadows, and padding",
            "pricing": "Use grid for pricing tables with consistent spacing",
            "contact": "Implement form elements with Tailwind form utilities",
            "footer": "Structure with grid for responsive column layout"
        }
        
        return notes.get(section_type, "Use appropriate flex/grid layout and spacing")


def extract_images_and_text(url, html, output_dir):
    """Extract images and text content from HTML and save to directories"""
    images = []
    text_content = {}
    
    # Create content directories
    content_dir = os.path.join(output_dir, "content")
    images_dir = os.path.join(content_dir, "images")
    text_dir = os.path.join(content_dir, "text")
    
    os.makedirs(content_dir, exist_ok=True)
    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(text_dir, exist_ok=True)
    
    if not BS4_AVAILABLE:
        print("BeautifulSoup not available, can't extract content")
        return images, text_content
    
    try:
        soup = BeautifulSoup(html, 'html.parser')
        base_url = url
        
        # Extract all images from img tags
        for idx, img in enumerate(soup.find_all('img')):
            try:
                src = img.get('src', '')
                if src:
                    # Make relative URLs absolute
                    if not src.startswith(('http://', 'https://')):
                        src = urljoin(base_url, src)
                    
                    # Get image dimensions
                    width = img.get('width', 'unknown')
                    height = img.get('height', 'unknown')
                    
                    # Generate a safe filename from the source URL
                    img_filename = f"image_{idx}_{os.path.basename(src)}"
                    # Handle cases where there's no filename or it contains invalid characters
                    img_filename = re.sub(r'[^\w\-\.]', '_', img_filename)
                    if not img_filename.endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg')):
                        img_filename += '.jpg'  # Default extension
                    
                    img_path = os.path.join(images_dir, img_filename)
                    local_path = os.path.join("content", "images", img_filename)
                    
                    # Try to download the image
                    downloaded = False
                    try:
                        img_req = urllib.request.Request(
                            src,
                            headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
                        )
                        with urllib.request.urlopen(img_req, timeout=10) as response, open(img_path, 'wb') as out_file:
                            out_file.write(response.read())
                            downloaded = True
                            print(f"Downloaded image: {img_filename}")
                    except Exception as e:
                        print(f"Failed to download image {src}: {e}")
                    
                    # Extract image info
                    img_info = {
                        'src': src,
                        'alt': img.get('alt', ''),
                        'dimensions': f"{width}x{height}" if width != 'unknown' and height != 'unknown' else 'unknown',
                        'parent_tag': img.parent.name if img.parent else 'unknown',
                        'local_path': local_path if downloaded else None,
                        'type': 'regular'
                    }
                    
                    # Try to determine context
                    if img.parent and img.parent.name == 'a':
                        img_info['is_link'] = True
                        img_info['link_href'] = img.parent.get('href', '')
                    
                    images.append(img_info)
            except Exception as e:
                print(f"Error extracting image: {e}")
        
        # Extract background images from style attributes
        background_img_count = 0
        background_pattern = re.compile(r'background-image\s*:\s*url\([\'"]?([^\'"]+)[\'"]?\)')
        
        for element in soup.find_all(lambda tag: tag.has_attr('style') and 'background-image' in tag.get('style', '')):
            try:
                style = element.get('style', '')
                match = background_pattern.search(style)
                
                if match:
                    src = match.group(1)
                    
                    # Make relative URLs absolute
                    if not src.startswith(('http://', 'https://')):
                        src = urljoin(base_url, src)
                    
                    # Generate a safe filename
                    img_filename = f"bg_image_{background_img_count}_{os.path.basename(src)}"
                    # Handle cases where there's no filename or it contains invalid characters
                    img_filename = re.sub(r'[^\w\-\.]', '_', img_filename)
                    if not img_filename.endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg')):
                        img_filename += '.jpg'  # Default extension
                    
                    img_path = os.path.join(images_dir, img_filename)
                    local_path = os.path.join("content", "images", img_filename)
                    
                    # Try to download the image
                    downloaded = False
                    try:
                        img_req = urllib.request.Request(
                            src,
                            headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
                        )
                        with urllib.request.urlopen(img_req, timeout=10) as response, open(img_path, 'wb') as out_file:
                            out_file.write(response.read())
                            downloaded = True
                            print(f"Downloaded background image: {img_filename}")
                    except Exception as e:
                        print(f"Failed to download background image {src}: {e}")
                    
                    # Element context info
                    element_id = element.get('id', '')
                    element_class = ' '.join(element.get('class', []))
                    element_context = f"{element.name}"
                    if element_id:
                        element_context += f" (id: {element_id})"
                    if element_class:
                        element_context += f" (class: {element_class})"
                    
                    # Extract image info
                    img_info = {
                        'src': src,
                        'element': element_context,
                        'local_path': local_path if downloaded else None,
                        'type': 'background'
                    }
                    
                    images.append(img_info)
                    background_img_count += 1
            except Exception as e:
                print(f"Error extracting background image: {e}")
        
        # Also look for CSS background images in stylesheets
        for style_tag in soup.find_all('style'):
            try:
                css_content = style_tag.string
                if css_content:
                    # Find all background-image declarations in the CSS
                    bg_img_matches = background_pattern.findall(css_content)
                    
                    for idx, src in enumerate(bg_img_matches):
                        if src:
                            # Make relative URLs absolute
                            if not src.startswith(('http://', 'https://')):
                                src = urljoin(base_url, src)
                                
                            # Generate a safe filename
                            img_filename = f"css_bg_{background_img_count}_{os.path.basename(src)}"
                            # Handle cases where there's no filename or it contains invalid characters
                            img_filename = re.sub(r'[^\w\-\.]', '_', img_filename)
                            if not img_filename.endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg')):
                                img_filename += '.jpg'  # Default extension
                            
                            img_path = os.path.join(images_dir, img_filename)
                            local_path = os.path.join("content", "images", img_filename)
                            
                            # Try to download the image
                            downloaded = False
                            try:
                                img_req = urllib.request.Request(
                                    src,
                                    headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
                                )
                                with urllib.request.urlopen(img_req, timeout=10) as response, open(img_path, 'wb') as out_file:
                                    out_file.write(response.read())
                                    downloaded = True
                                    print(f"Downloaded CSS background image: {img_filename}")
                            except Exception as e:
                                print(f"Failed to download CSS background image {src}: {e}")
                            
                            # Extract image info
                            img_info = {
                                'src': src,
                                'local_path': local_path if downloaded else None,
                                'type': 'css_background'
                            }
                            
                            images.append(img_info)
                            background_img_count += 1
            except Exception as e:
                print(f"Error parsing CSS in style tag: {e}")
        
        # Extract text content by section
        # Header content
        header = soup.find('header')
        if header:
            text_content['header'] = header.get_text(strip=True)
            with open(os.path.join(text_dir, "header.txt"), 'w', encoding='utf-8') as f:
                f.write(header.get_text(strip=True))
        
        # Navigation content
        nav = soup.find('nav')
        if nav:
            nav_items = []
            for link in nav.find_all('a'):
                href = link.get('href', '')
                text = link.get_text(strip=True)
                if text:
                    nav_items.append({
                        'text': text,
                        'href': urljoin(base_url, href) if href and not href.startswith(('http://', 'https://')) else href
                    })
            text_content['navigation'] = nav_items
            
            # Save navigation items to file
            with open(os.path.join(text_dir, "navigation.txt"), 'w', encoding='utf-8') as f:
                for item in nav_items:
                    f.write(f"{item['text']} - {item['href']}\n")
        
        # Main content
        main = soup.find('main') or soup.find(id='main') or soup.find(id='content') or soup.find(class_='content')
        if main:
            text_content['main'] = main.get_text(strip=True)
            with open(os.path.join(text_dir, "main_content.txt"), 'w', encoding='utf-8') as f:
                f.write(main.get_text(strip=True))
        
        # All headings
        headings = {}
        with open(os.path.join(text_dir, "headings.txt"), 'w', encoding='utf-8') as f:
            for level in range(1, 7):
                h_tags = soup.find_all(f'h{level}')
                if h_tags:
                    headings[f'h{level}'] = [h.get_text(strip=True) for h in h_tags]
                    f.write(f"--- H{level} Headings ---\n")
                    for h in h_tags:
                        f.write(f"{h.get_text(strip=True)}\n")
                    f.write("\n")
        
        if headings:
            text_content['headings'] = headings
        
        # Paragraphs
        paragraphs = soup.find_all('p')
        if paragraphs:
            text_content['paragraphs'] = [p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)]
            with open(os.path.join(text_dir, "paragraphs.txt"), 'w', encoding='utf-8') as f:
                for p in paragraphs:
                    text = p.get_text(strip=True)
                    if text:
                        f.write(f"{text}\n\n")
        
        # Footer
        footer = soup.find('footer')
        if footer:
            text_content['footer'] = footer.get_text(strip=True)
            with open(os.path.join(text_dir, "footer.txt"), 'w', encoding='utf-8') as f:
                f.write(footer.get_text(strip=True))
        
        # Extract all visible text as fallback
        body = soup.find('body')
        if body:
            # Save full body text in case other sections missed content
            clean_text = []
            for item in body.find_all(string=True):
                if item.strip():
                    parent = item.parent.name
                    if parent not in ['script', 'style', 'meta', 'noscript']:
                        clean_text.append(item.strip())
            
            full_text = ' '.join(clean_text)
            text_content['full_text'] = full_text
            with open(os.path.join(text_dir, "full_text.txt"), 'w', encoding='utf-8') as f:
                f.write(full_text)
        
        # Save a summary of extracted content
        with open(os.path.join(content_dir, "content_summary.txt"), 'w', encoding='utf-8') as f:
            f.write(f"Content extracted from: {url}\n")
            f.write(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            
            f.write("=== STATISTICS ===\n")
            f.write(f"Total images found: {len(images)}\n")
            
            # Count image types
            regular_images = sum(1 for img in images if img.get('type') == 'regular')
            background_images = sum(1 for img in images if img.get('type') == 'background')
            css_background_images = sum(1 for img in images if img.get('type') == 'css_background')
            
            f.write(f"- Regular <img> tags: {regular_images}\n")
            f.write(f"- Inline background images: {background_images}\n")
            f.write(f"- CSS background images: {css_background_images}\n\n")
            
            f.write(f"Text sections found: {len(text_content)}\n")
            f.write("Text content sections:\n")
            for key in text_content.keys():
                if key == "full_text":
                    continue  # Skip full_text in the section list as it's a combination of all text
                f.write(f"- {key}\n")
        
        # Create an organized index.html file
        with open(os.path.join(output_dir, "index.html"), 'w', encoding='utf-8') as f:
            f.write(f"""<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Site Analysis: {url}</title>
    <style>
        body {{
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
            line-height: 1.6;
            color: #333;
            max-width: 1200px;
            margin: 0 auto;
            padding: 20px;
        }}
        header {{
            background-color: #f8f9fa;
            padding: 20px;
            margin-bottom: 30px;
            border-radius: 5px;
            border-left: 5px solid #0066cc;
        }}
        h1, h2, h3 {{
            color: #0066cc;
        }}
        .section {{
            margin-bottom: 40px;
            padding: 20px;
            background-color: #fff;
            border-radius: 5px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }}
        .image-grid {{
            display: grid;
            grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
            gap: 20px;
        }}
        .image-item {{
            border: 1px solid #ddd;
            border-radius: 4px;
            padding: 10px;
            background: #f8f9fa;
        }}
        .image-item img {{
            max-width: 100%;
            height: auto;
            border-radius: 2px;
            margin-bottom: 10px;
        }}
        .image-details {{
            font-size: 12px;
            color: #666;
        }}
        .text-content {{
            white-space: pre-wrap;
            background-color: #f8f9fa;
            padding: 15px;
            border-radius: 5px;
            max-height: 300px;
            overflow-y: auto;
            font-family: monospace;
            font-size: 14px;
        }}
        .footer {{
            margin-top: 50px;
            padding-top: 20px;
            border-top: 1px solid #ddd;
            color: #666;
            font-size: 14px;
            text-align: center;
        }}
    </style>
</head>
<body>
    <header>
        <h1>Tailwind Site Rebuilder</h1>
        <p>Analysis of <a href="{url}" target="_blank">{url}</a></p>
        <p>Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
    </header>
    
    <div class="section">
        <h2>Images ({len(images)})</h2>
        <p>Regular images: {regular_images} | Background images: {background_images} | CSS Background images: {css_background_images}</p>
        
        <h3>Regular &lt;img&gt; Tags</h3>
        <div class="image-grid">
""")
            
            # Add regular images
            for img in [i for i in images if i.get('type') == 'regular' and i.get('local_path')]:
                local_path = img.get('local_path', '')
                alt = img.get('alt', 'No alt text')
                src = img.get('src', '')
                f.write(f"""
            <div class="image-item">
                <img src="{local_path}" alt="{alt}" loading="lazy">
                <div class="image-details">
                    <strong>Alt:</strong> {alt}<br>
                    <strong>Source:</strong> {os.path.basename(src)}<br>
                </div>
            </div>""")

            f.write("""
        </div>
        
        <h3>Background Images</h3>
        <div class="image-grid">
""")
            
            # Add background images
            for img in [i for i in images if i.get('type') == 'background' and i.get('local_path')]:
                local_path = img.get('local_path', '')
                element = img.get('element', 'Unknown element')
                src = img.get('src', '')
                f.write(f"""
            <div class="image-item">
                <img src="{local_path}" alt="Background image" loading="lazy">
                <div class="image-details">
                    <strong>Element:</strong> {element}<br>
                    <strong>Source:</strong> {os.path.basename(src)}<br>
                </div>
            </div>""")
                
            f.write("""
        </div>
    </div>
    
    <div class="section">
        <h2>Text Content</h2>
""")
            
            # Add headings section
            if 'headings' in text_content:
                f.write("""
        <h3>Headings</h3>
        <div class="text-content">""")
                
                for level in range(1, 7):
                    key = f'h{level}'
                    if key in text_content['headings'] and text_content['headings'][key]:
                        f.write(f"\nH{level}:\n")
                        for heading in text_content['headings'][key]:
                            f.write(f"- {heading}\n")
                            
                f.write("</div>")

            # Add paragraphs section
            if 'paragraphs' in text_content:
                f.write("""
        <h3>Paragraphs</h3>
        <div class="text-content">""")
                
                for p in text_content['paragraphs'][:10]:  # Limit to first 10 paragraphs
                    f.write(f"{p}\n\n")
                    
                if len(text_content['paragraphs']) > 10:
                    f.write(f"\n... and {len(text_content['paragraphs']) - 10} more paragraphs.")
                    
                f.write("</div>")
                
            # Add navigation section
            if 'navigation' in text_content:
                f.write("""
        <h3>Navigation</h3>
        <div class="text-content">""")
                
                for item in text_content['navigation']:
                    f.write(f"- {item['text']} ({item['href']})\n")
                    
                f.write("</div>")
                
            f.write("""
    </div>
    
    <div class="footer">
        Generated by Tailwind Site Rebuilder
    </div>
</body>
</html>
""")
        
        print(f"Extracted {len(images)} images and text content from {len(text_content)} sections")
        print(f"Content organized in {content_dir}")
        
        # Create directories to organize images by type
        regular_dir = os.path.join(images_dir, "regular")
        background_dir = os.path.join(images_dir, "background")
        os.makedirs(regular_dir, exist_ok=True)
        os.makedirs(background_dir, exist_ok=True)
        
        # Create symbolic links to organize images by type
        for img in images:
            if not img.get('local_path'):
                continue
                
            img_type = img.get('type', 'regular')
            img_file = os.path.basename(img.get('local_path', ''))
            
            if not img_file:
                continue
                
            # Create symbolic links based on image type
            try:
                src_path = os.path.join(images_dir, img_file)
                if img_type == 'regular':
                    dest_path = os.path.join(regular_dir, img_file)
                else:
                    dest_path = os.path.join(background_dir, img_file)
                    
                # Only create if source exists (in case of download failure)
                if os.path.exists(src_path) and not os.path.exists(dest_path):
                    # For cross-platform compatibility, copy the file instead of symlink
                    import shutil
                    shutil.copy2(src_path, dest_path)
            except Exception as e:
                print(f"Error organizing image {img_file}: {e}")
        
        return images, text_content
        
    except Exception as e:
        print(f"Error extracting content: {e}")
        return images, text_content


def extract_css_properties(html, url):
    """Extract CSS properties like colors and fonts from HTML"""
    if not BS4_AVAILABLE:
        print("BeautifulSoup not available, can't extract CSS properties")
        return {
            "colors": {
                "background": ["rgb(255, 255, 255)", "rgb(249, 250, 251)"],
                "text": ["rgb(31, 41, 55)", "rgb(55, 65, 81)"],
                "accent": ["rgb(37, 99, 235)", "rgb(59, 130, 246)"]
            },
            "fonts": ["Arial, sans-serif", "Helvetica, sans-serif"]
        }
    
    try:
        soup = BeautifulSoup(html, 'html.parser')
        base_url = url
        
        # Initialize results with default values to ensure they exist
        colors = {
            "background": ["rgb(255, 255, 255)", "rgb(249, 250, 251)"],
            "text": ["rgb(31, 41, 55)", "rgb(55, 65, 81)"],
            "accent": ["rgb(37, 99, 235)", "rgb(59, 130, 246)"]
        }
        fonts = ["Arial, sans-serif", "Helvetica, sans-serif"]
        
        # Color patterns
        color_patterns = [
            re.compile(r'color\s*:\s*([^;]+)'),
            re.compile(r'background-color\s*:\s*([^;]+)'),
            re.compile(r'background\s*:\s*([^;]+)'),
            re.compile(r'border-color\s*:\s*([^;]+)')
        ]
        
        # Font patterns
        font_family_pattern = re.compile(r'font-family\s*:\s*([^;]+)')
        
        # Extract from inline styles
        for element in soup.find_all(lambda tag: tag.has_attr('style')):
            style = element.get('style', '')
            
            # Extract colors
            for pattern in color_patterns:
                matches = pattern.findall(style)
                for match in matches:
                    color = match.strip()
                    if 'background' in pattern.pattern:
                        if color not in colors['background'] and is_valid_color(color):
                            colors['background'].append(color)
                    elif 'border' in pattern.pattern:
                        if color not in colors['accent'] and is_valid_color(color):
                            colors['accent'].append(color)
                    else:  # text color
                        if color not in colors['text'] and is_valid_color(color):
                            colors['text'].append(color)
            
            # Extract fonts
            font_matches = font_family_pattern.findall(style)
            for match in font_matches:
                font = match.strip().strip("'").strip('"')
                if font and font not in fonts:
                    fonts.append(font)
        
        # Extract from style tags
        for style_tag in soup.find_all('style'):
            css_content = style_tag.string
            if css_content:
                # Extract colors
                for pattern in color_patterns:
                    matches = pattern.findall(css_content)
                    for match in matches:
                        color = match.strip()
                        if 'background' in pattern.pattern:
                            if color not in colors['background'] and is_valid_color(color):
                                colors['background'].append(color)
                        elif 'border' in pattern.pattern:
                            if color not in colors['accent'] and is_valid_color(color):
                                colors['accent'].append(color)
                        else:  # text color
                            if color not in colors['text'] and is_valid_color(color):
                                colors['text'].append(color)
                
                # Extract fonts
                font_matches = font_family_pattern.findall(css_content)
                for match in font_matches:
                    font = match.strip().strip("'").strip('"')
                    if font and font not in fonts:
                        fonts.append(font)
        
        # Ensure we have each required key
        for key in ["background", "text", "accent"]:
            if key not in colors:
                colors[key] = []
            
            # Add defaults if empty
            if not colors[key]:
                if key == "background":
                    colors[key] = ["rgb(255, 255, 255)", "rgb(249, 250, 251)"]
                elif key == "text":
                    colors[key] = ["rgb(31, 41, 55)", "rgb(55, 65, 81)"]
                elif key == "accent":
                    colors[key] = ["rgb(37, 99, 235)", "rgb(59, 130, 246)"]
        
        return {
            "colors": colors,
            "fonts": fonts
        }
    
    except Exception as e:
        print(f"Error extracting CSS properties: {e}")
        # Return default values if there's an error
        return {
            "colors": {
                "background": ["rgb(255, 255, 255)", "rgb(249, 250, 251)"],
                "text": ["rgb(31, 41, 55)", "rgb(55, 65, 81)"],
                "accent": ["rgb(37, 99, 235)", "rgb(59, 130, 246)"]
            },
            "fonts": ["Arial, sans-serif", "Helvetica, sans-serif"]
        }

def is_valid_color(color):
    """Check if a string represents a valid CSS color"""
    # Remove whitespace
    color = color.strip()
    
    # Skip 'inherit', 'transparent', etc.
    if color.lower() in ['inherit', 'transparent', 'initial', 'currentcolor', 'none', 'auto']:
        return False
    
    # Valid formats: #hex, rgb(), rgba(), hsl(), hsla(), named colors
    if color.startswith('#'):
        return len(color) in [4, 7, 9]  # #rgb, #rrggbb, #rrggbbaa
    
    if color.startswith('rgb') or color.startswith('rgba') or color.startswith('hsl') or color.startswith('hsla'):
        return '(' in color and ')' in color
    
    # Named colors (approximation)
    return len(color) > 0 and ',' not in color and ';' not in color

def extract_typography(html):
    """Extract typography information from HTML"""
    if not BS4_AVAILABLE:
        print("BeautifulSoup not available, can't extract typography")
        return {
            "fonts": ["Arial, sans-serif", "Helvetica, sans-serif"],
            "headings": {
                "h1": {"fontSize": "2.25rem", "fontWeight": "700", "color": "rgb(31, 41, 55)"},
                "h2": {"fontSize": "1.875rem", "fontWeight": "600", "color": "rgb(31, 41, 55)"},
                "h3": {"fontSize": "1.5rem", "fontWeight": "600", "color": "rgb(31, 41, 55)"}
            },
            "body": {
                "paragraph": {"fontSize": "1rem", "fontWeight": "400", "color": "rgb(55, 65, 81)"}
            }
        }
    
    try:
        soup = BeautifulSoup(html, 'html.parser')
        
        # Initialize result with defaults
        typography = {
            "fonts": [],
            "headings": {
                "h1": {"fontSize": "2.25rem", "fontWeight": "700", "color": "rgb(31, 41, 55)"},
                "h2": {"fontSize": "1.875rem", "fontWeight": "600", "color": "rgb(31, 41, 55)"},
                "h3": {"fontSize": "1.5rem", "fontWeight": "600", "color": "rgb(31, 41, 55)"},
                "h4": {"fontSize": "1.25rem", "fontWeight": "600", "color": "rgb(31, 41, 55)"},
                "h5": {"fontSize": "1.125rem", "fontWeight": "600", "color": "rgb(31, 41, 55)"},
                "h6": {"fontSize": "1rem", "fontWeight": "600", "color": "rgb(31, 41, 55)"}
            },
            "body": {
                "paragraph": {"fontSize": "1rem", "fontWeight": "400", "color": "rgb(55, 65, 81)"}
            }
        }
        
        # Will be populated from extract_css_properties
        return typography
    
    except Exception as e:
        print(f"Error extracting typography: {e}")
        return {
            "fonts": ["Arial, sans-serif", "Helvetica, sans-serif"],
            "headings": {
                "h1": {"fontSize": "2.25rem", "fontWeight": "700", "color": "rgb(31, 41, 55)"},
                "h2": {"fontSize": "1.875rem", "fontWeight": "600", "color": "rgb(31, 41, 55)"},
                "h3": {"fontSize": "1.5rem", "fontWeight": "600", "color": "rgb(31, 41, 55)"}
            },
            "body": {
                "paragraph": {"fontSize": "1rem", "fontWeight": "400", "color": "rgb(55, 65, 81)"}
            }
        }

def create_simulation(url, output_dir="site_rebuilder"):
    """Create a simulated rebuild context without using a browser"""
    from datetime import datetime
    import urllib.request
    
    print(f"Creating simulation for {url}")
    
    os.makedirs(output_dir, exist_ok=True)
    screenshots_dir = os.path.join(output_dir, "screenshots")
    os.makedirs(screenshots_dir, exist_ok=True)
    
    # Parse domain from URL
    parsed_url = urlparse(url)
    domain = parsed_url.netloc
    if domain.startswith("www."):
        domain = domain[4:]
        
    # Add scheme if missing
    if not url.startswith(('http://', 'https://')):
        url = 'https://' + url
    
    # Fetch the actual HTML content
    images = []
    text_content = {}
    html_content = ""
    title = f"{domain.title()} Website"  # Default title
    asr = None  # Annotated Structure Reference
    
    try:
        print(f"Fetching content from {url}")
        req = urllib.request.Request(
            url, 
            headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
        )
        with urllib.request.urlopen(req, timeout=15) as response:
            html_content = response.read().decode('utf-8', errors='replace')
            
            # Extract title
            title_match = re.search(r'<title>(.*?)</title>', html_content, re.IGNORECASE)
            if title_match:
                title = title_match.group(1)
            
            # Extract images and text content
            images, text_content = extract_images_and_text(url, html_content, output_dir)
            
            # Extract CSS properties including colors and fonts
            css_properties = extract_css_properties(html_content, url)
            
            # Create color data from extracted colors
            colors = css_properties["colors"]
            
            # Create typography data
            typography = extract_typography(html_content)
            typography["fonts"] = css_properties["fonts"]
            
    except Exception as e:
        print(f"Error fetching content: {e}")
        print("Continuing with simulation using defaults")
        
        # Create simulated color data as fallback
        colors = {
            "background": ["rgb(255, 255, 255)", "rgb(249, 250, 251)", "rgb(243, 244, 246)"],
            "text": ["rgb(31, 41, 55)", "rgb(55, 65, 81)", "rgb(107, 114, 128)"],
            "accent": ["rgb(37, 99, 235)", "rgb(59, 130, 246)"]
        }
        
        # Create simulated typography data as fallback
        typography = {
            "fonts": ["Arial, sans-serif", "Helvetica, sans-serif"],
            "headings": {
                "h1": {"fontSize": "2.25rem", "fontWeight": "700", "color": "rgb(31, 41, 55)"},
                "h2": {"fontSize": "1.875rem", "fontWeight": "600", "color": "rgb(31, 41, 55)"},
                "h3": {"fontSize": "1.5rem", "fontWeight": "600", "color": "rgb(31, 41, 55)"}
            },
            "body": {
                "paragraph": {"fontSize": "1rem", "fontWeight": "400", "color": "rgb(55, 65, 81)"}
            }
        }
    
    # Create a timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Create a fake screenshot filename
    screenshot_filename = f"{domain.replace('.', '_')}_{timestamp}_screenshot.png"
    
    # Ensure the colors structure is complete with all required fields
    for key in ["background", "text", "accent"]:
        if key not in colors:
            if key == "background":
                colors[key] = ["rgb(255, 255, 255)", "rgb(249, 250, 251)"]
            elif key == "text":
                colors[key] = ["rgb(31, 41, 55)", "rgb(55, 65, 81)"]
            elif key == "accent":
                colors[key] = ["rgb(37, 99, 235)", "rgb(59, 130, 246)"]
    
    # Try to capture a real screenshot first
    screenshot_path = os.path.join(screenshots_dir, screenshot_filename)
    screenshot_captured = False
    
    print("Attempting to capture real website screenshot...")
    
    # Try real screenshot capture
    if capture_real_screenshot(url, screenshot_path, 1440, 900):
        screenshot_captured = True
        print("Real screenshot captured successfully!")
    elif capture_screenshot_selenium(url, screenshot_path, 1440, 900):
        screenshot_captured = True
        print("Screenshot captured using Selenium!")
    else:
        print("Failed to capture real screenshot, creating placeholder...")
        
        # Create a placeholder image as fallback (using PIL if available)
        if PIL_AVAILABLE:
            try:
                from PIL import Image, ImageDraw, ImageFont
                
                # Create a blank image
                img = Image.new('RGB', (1440, 900), color=(248, 250, 252))
                draw = ImageDraw.Draw(img)
                
                # Add some text
                draw.text((50, 50), f"Screenshot placeholder for: {url}", fill=(30, 41, 59))
                draw.text((50, 100), f"Generated: {timestamp}", fill=(30, 41, 59))
                draw.text((50, 150), "Real screenshot capture failed - using placeholder", fill=(220, 38, 127))
                draw.text((50, 200), "Note: Color detection will work on this placeholder", fill=(220, 38, 127))
                
                # Draw some rectangles to simulate sections
                section_layouts = [
                    ("header", 0, 240, 1440, 80),
                    ("hero", 0, 320, 1440, 300),
                    ("features", 0, 620, 1440, 200),
                    ("footer", 0, 820, 1440, 80)
                ]
                
                section_colors = {
                    "header": (37, 99, 235),  # blue-600
                    "hero": (59, 130, 246),   # blue-500
                    "features": (96, 165, 250), # blue-400
                    "footer": (37, 99, 235)   # blue-600
                }
                
                for section_type, x, y, w, h in section_layouts:
                    draw.rectangle([(x, y), (x + w, y + h)], outline=(0, 0, 0), fill=section_colors.get(section_type, (200, 200, 200)))
                    draw.text((x + 10, y + 10), section_type.upper(), fill=(255, 255, 255))
                
                # Save the image
                img.save(screenshot_path)
                print(f"Created placeholder screenshot at {screenshot_path}")
                screenshot_captured = True
            except Exception as e:
                print(f"Could not create placeholder image: {e}")
                # Just create an empty file as fallback
                with open(screenshot_path, 'w') as f:
                    f.write("Screenshot placeholder")
        else:
            # Just create an empty file
            with open(screenshot_path, 'w') as f:
                f.write("Screenshot placeholder")
    
    # NEW: Use OpenAI Vision to analyze the screenshot and generate ASR
    if screenshot_captured:
        try:
            print("🔍 Analyzing screenshot with OpenAI Vision...")
            openai_analyzer = OpenAILayoutAnalyzer()
            asr = openai_analyzer.analyze_screenshot(screenshot_path, url)
            
            if asr:
                print(f"✅ ASR Generated: {len(asr.get('template_structure', []))} sections detected")
                if asr.get('template_structure'):
                    for i, structure in enumerate(asr['template_structure']):
                        print(f"  {i+1}. {structure}")
            else:
                print("❌ ASR generation failed")
        except Exception as e:
            print(f"⚠️  OpenAI Vision analysis failed: {e}")
            asr = None
    else:
        print("⚠️  No screenshot available for OpenAI Vision analysis")
        asr = None
    
    # Legacy sections creation (fallback if ASR fails)
    sections = []
    
    if not asr:
        print("ASR generation failed, using legacy section detection...")
        # Use real sections from content if available, otherwise use simulated sections
        if text_content:
            # Check if we found a header
            if 'header' in text_content:
                sections.append({
                    "type": "header",
                    "text": text_content['header'][:100] if len(text_content['header']) > 100 else text_content['header'],
                    "pattern": "logo-left-nav-right",
                    "logo_position": "left"
                })
            else:
                # Add default header
                sections.append({
                    "type": "header",
                    "text": "Main navigation",
                    "pattern": "logo-left-nav-right",
                    "logo_position": "left"
                })
            
            # Check if we found navigation
            if 'navigation' in text_content:
                nav_text = " ".join([item['text'] for item in text_content['navigation']])
                sections.append({
                    "type": "navigation",
                    "text": nav_text[:100] if len(nav_text) > 100 else nav_text,
                    "pattern": "horizontal",
                    "position_in_header": "right"
                })
            else:
                # Add default navigation
                sections.append({
                    "type": "navigation",
                    "text": "HOME ABOUT SERVICES CONTACT",
                    "pattern": "horizontal",
                    "position_in_header": "right"
                })
            
            # Add a hero section (usually h1 or first heading)
            hero_text = ""
            if 'headings' in text_content and 'h1' in text_content['headings'] and text_content['headings']['h1']:
                hero_text = text_content['headings']['h1'][0]
            elif 'headings' in text_content:
                for level in range(1, 7):
                    key = f'h{level}'
                    if key in text_content['headings'] and text_content['headings'][key]:
                        hero_text = text_content['headings'][key][0]
                        break
            
            sections.append({
                "type": "hero",
                "text": hero_text if hero_text else "Welcome to our website!",
                "has_overlay_header": True
            })
            
            # Add content sections
            if 'paragraphs' in text_content and text_content['paragraphs']:
                # Group paragraphs into features and testimonials
                mid_point = len(text_content['paragraphs']) // 2
                features_text = ' '.join(text_content['paragraphs'][:mid_point])
                testimonials_text = ' '.join(text_content['paragraphs'][mid_point:])
                
                sections.append({
                    "type": "features",
                    "text": features_text[:100] + "..." if len(features_text) > 100 else features_text
                })
                
                sections.append({
                    "type": "testimonials",
                    "text": testimonials_text[:100] + "..." if len(testimonials_text) > 100 else testimonials_text
                })
            else:
                # Add default content sections
                sections.append({
                    "type": "features",
                    "text": "Our features and services"
                })
                
                sections.append({
                    "type": "testimonials",
                    "text": "What our customers say"
                })
            
            # Check if we found a footer
            if 'footer' in text_content:
                sections.append({
                    "type": "footer",
                    "text": text_content['footer'][:100] if len(text_content['footer']) > 100 else text_content['footer']
                })
            else:
                # Add default footer
                sections.append({
                    "type": "footer",
                    "text": "Contact information and links"
                })
        else:
            # Use simulated sections if no content was found
            sections = [
                {
                    "type": "header",
                    "text": "Main navigation",
                    "pattern": "logo-left-nav-right",
                    "logo_position": "left"
                },
                {
                    "type": "navigation",
                    "text": "HOME ABOUT SERVICES CONTACT",
                    "pattern": "horizontal",
                    "position_in_header": "right" 
                },
                {
                    "type": "hero",
                    "text": "Welcome to our website!",
                    "has_overlay_header": True
                },
                {
                    "type": "features",
                    "text": "Our features and services"
                },
                {
                    "type": "testimonials",
                    "text": "What our customers say"
                },
                {
                    "type": "footer",
                    "text": "Contact information and links"
                }
            ]
    
    # Create the screenshot data
    screenshot_data = {
        "url": url,
        "title": title,
        "timestamp": timestamp,
        "filename": screenshot_filename,
        "path": screenshot_path,
        "width": 1440,
        "height": 900,
        "viewport_width": 1440,
        "viewport_height": 900,
        "is_real_screenshot": screenshot_captured,
        "screenshot_type": "real" if screenshot_captured else "placeholder"
    }
    
    # Create the rebuilder and generate the context
    rebuilder = SiteRebuilder(output_dir)
    rebuilder.create_rebuild_context(screenshot_data, sections, colors, typography, images, text_content, asr)
    
    # Save raw HTML for reference
    if html_content:
        with open(os.path.join(output_dir, "raw_html.txt"), 'w', encoding='utf-8') as f:
            f.write(html_content)
        print(f"Raw HTML saved to {os.path.join(output_dir, 'raw_html.txt')}")
    
    # Save ASR separately for easy access
    if asr:
        with open(os.path.join(output_dir, "asr.json"), 'w', encoding='utf-8') as f:
            json.dump(asr, f, indent=2)
        print(f"ASR saved to {os.path.join(output_dir, 'asr.json')}")
        
        # Create human-readable ASR summary
        with open(os.path.join(output_dir, "asr_summary.txt"), 'w', encoding='utf-8') as f:
            f.write(f"Annotated Structure Reference (ASR) for {url}\n")
            f.write("=" * 60 + "\n\n")
            
            f.write("Template Structure:\n")
            for i, structure in enumerate(asr.get('template_structure', [])):
                f.write(f"- {structure}\n")
            
            f.write(f"\nLayout Analysis:\n")
            layout_analysis = asr.get('layout_analysis', {})
            f.write(f"- Layout Type: {layout_analysis.get('layout_type', 'unknown')}\n")
            f.write(f"- Total Sections: {layout_analysis.get('total_sections', 0)}\n")
            f.write(f"- Has Overlay Header: {layout_analysis.get('has_overlay_header', False)}\n")
            f.write(f"- Navigation Style: {layout_analysis.get('navigation_style', 'unknown')}\n")
        
        print(f"ASR summary saved to {os.path.join(output_dir, 'asr_summary.txt')}")
    
    print("\nSimulation complete!")
    print(f"Check {os.path.join(output_dir, 'rebuild_context.json')} for the generated rebuild context.")
    if asr:
        print(f"Check {os.path.join(output_dir, 'asr_summary.txt')} for the Annotated Structure Reference.")

def capture_screenshot_selenium(url, output_path, width=1440, height=900):
    """
    Capture a full-page screenshot using Selenium WebDriver
    """
    try:
        from selenium import webdriver
        from selenium.webdriver.chrome.options import Options
        from selenium.webdriver.chrome.service import Service
        from selenium.webdriver.common.by import By
        from selenium.webdriver.support.ui import WebDriverWait
        from selenium.webdriver.support import expected_conditions as EC
        from webdriver_manager.chrome import ChromeDriverManager
        import time
        
        print("Setting up Chrome WebDriver for screenshot capture...")
        
        # Setup Chrome options
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--disable-web-security')
        chrome_options.add_argument('--allow-running-insecure-content')
        chrome_options.add_argument('--ignore-certificate-errors')
        chrome_options.add_argument('--ignore-ssl-errors')
        chrome_options.add_argument('--ignore-certificate-errors-spki-list')
        chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')
        chrome_options.add_argument(f'--window-size={width},{height}')
        
        # Try to create driver using webdriver-manager
        try:
            service = Service(ChromeDriverManager().install())
            driver = webdriver.Chrome(service=service, options=chrome_options)
        except Exception as e:
            print(f"Failed to create Chrome driver with webdriver-manager: {e}")
            # Fallback to system chromedriver
            try:
                driver = webdriver.Chrome(options=chrome_options)
            except Exception as e2:
                print(f"Failed to create Chrome driver from PATH: {e2}")
                return False
        
        try:
            print(f"Navigating to {url}...")
            # Navigate to URL
            driver.get(url)
            
            # Wait for page to load
            WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.TAG_NAME, "body"))
            )
            
            # Wait a bit more for dynamic content
            time.sleep(3)
            
            # Get full page dimensions
            total_width = driver.execute_script("return document.body.offsetWidth")
            total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
            
            print(f"Page dimensions: {total_width}x{total_height}")
            
            # Set window size to capture full page
            driver.set_window_size(total_width, total_height)
            
            # Wait a moment for resize
            time.sleep(1)
            
            # Take screenshot
            success = driver.save_screenshot(output_path)
            
            if success:
                print(f"Full-page screenshot saved to: {output_path}")
                return True
            else:
                print("Failed to save screenshot")
                return False
            
        finally:
            driver.quit()
            
    except ImportError:
        print("Selenium not available. Install with: pip install selenium webdriver-manager")
        return False
    except Exception as e:
        print(f"Error with Selenium screenshot: {e}")
        return False


def capture_real_screenshot(url, output_path, width=1440, height=900):
    """
    Capture a real screenshot of a website using local tools
    """
    print("Attempting to capture screenshot using Selenium WebDriver...")
    
    # Try Selenium first (most reliable)
    if capture_screenshot_selenium(url, output_path, width, height):
        return True
    
    print("Selenium failed, trying external service as fallback...")
    
    try:
        # Fallback to external service
        screenshot_url = f"https://image.thum.io/get/width/{width}/crop/{height}/{url}"
        
        print(f"Attempting to capture screenshot from: {screenshot_url}")
        
        # Create request with proper headers
        req = urllib.request.Request(
            screenshot_url,
            headers={
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }
        )
        
        # Download the screenshot
        with urllib.request.urlopen(req, timeout=30) as response:
            if response.getcode() == 200:
                with open(output_path, 'wb') as f:
                    f.write(response.read())
                print(f"External service screenshot saved to: {output_path}")
                return True
            else:
                print(f"External service returned status: {response.getcode()}")
                return False
                
    except Exception as e:
        print(f"Error with external screenshot service: {e}")
        return False 