#!/usr/bin/env python3
"""
OpenAI Vision-Based Layout Analyzer
-----------------------------------
Uses OpenAI's GPT-4 Vision API to analyze website screenshots and generate
accurate Annotated Structure References (ASR).
"""

import os
import json
import base64
from dotenv import load_dotenv
import openai
import logging

# Load environment variables
load_dotenv()

logger = logging.getLogger(__name__)

class OpenAILayoutAnalyzer:
    """OpenAI Vision-based layout structure detection and analysis"""
    
    def __init__(self):
        self.api_key = os.getenv('OPENAI_API_KEY')
        if not self.api_key:
            raise ValueError("OPENAI_API_KEY not found in environment variables. Please set it in your .env file.")
        
        # Initialize OpenAI client
        self.client = openai.OpenAI(api_key=self.api_key)
        
        # ASR generation prompt template
        self.asr_prompt = """
You MUST analyze this website screenshot and provide a response in the EXACT format shown below. Do NOT deviate from this format.

REQUIRED OUTPUT FORMAT:

Annotated Structure Reference

Template Structure:

- Header
  Type: [Positioning details like "Transparent, Absolute Positioning" or "Fixed, Solid Background"]
  Overlay Behavior: [How it interacts with other sections, e.g., "Overlaps the hero section"]
  Contents:
    [Specific elements you can see, e.g., "Company logo (top-left)"]
    [Navigation items, e.g., "Navigation menu (Home, About, Services, Contact)"]
    [Other visible elements, e.g., "Contact information (top-right)"]

- Hero Section
  Type: [Height specification like "Full Height" or "Half Height"]
  Background: [What you see, e.g., "Dark background image", "Video background", "Solid color"]
  Content:
    [Specific text you can read, e.g., "Main headline: 'Welcome to our company'"]
    [Subtext and descriptions]
    [Call-to-action buttons and their text]
    [Any overlay elements]

- Navigation
  Location: [Where it appears, e.g., "Inside Header", "Separate Section", "Sidebar"]
  Style: [How it looks, e.g., "Horizontally aligned, white text over dark background"]
  Sticky/Behavior: [Any special behavior you can infer]

Main Content

- Section 1: [Section Name/Purpose]
  Title: [Actual title text you can see]
  Layout: [How content is arranged, e.g., "Two-column layout", "Grid of cards", "Single column"]
  Content:
    [Specific content elements you observe]
    [Images, text blocks, buttons]
    [Any special styling or positioning]

- Section 2: [Section Name/Purpose]
  Title: [Actual title text you can see]
  Format: [Layout description, e.g., "Grid of 4 cards", "Image gallery", "Feature list"]
  Elements:
    [Specific elements you can identify]
    [Card contents, image descriptions]
    [Button text and placement]

[Continue for all major content sections you can identify]

Footer
  Content Blocks:
    [Specific footer elements you observe]
    [Contact information, links, social media]
    [Newsletter signup, company info]
    [Any multi-column layout details]

Assets
  Logo: [Where logos appear and their positioning]
  Image Assets: [Types and locations of images you see]
  Videos: [Any video content you can identify]
  Buttons/CTAs: [Call-to-action elements throughout the page]

CRITICAL INSTRUCTIONS:
1. You MUST use the exact format above with "- Header", "- Hero Section", etc.
2. You MUST include the nested details like "Type:", "Overlay Behavior:", "Contents:"
3. You MUST start with "Annotated Structure Reference" as the first line
4. You MUST include "Template Structure:" as a section header
5. You MUST include "Main Content" as a section divider
6. DO NOT use tree symbols like ├─ or └─
7. DO NOT just list items without the detailed structure
8. BE SPECIFIC about positioning, styling, and visual characteristics
9. ONLY describe what you can actually SEE in the screenshot
10. Use proper indentation with spaces, not tabs

EXAMPLE OF WHAT NOT TO DO:
├─ Section 1: Example Information
├─ Text block with description
└─ Footer

EXAMPLE OF WHAT TO DO:
- Header
  Type: Transparent, Absolute Positioning
  Overlay Behavior: Overlaps the hero section
  Contents:
    Company logo (top-left)
    Navigation menu (Home, About, Contact)
"""
    
    def analyze_screenshot(self, screenshot_path, url):
        """
        Analyze a screenshot using OpenAI Vision API and generate ASR
        """
        try:
            print(f"Analyzing screenshot with OpenAI Vision API...")
            
            # Check if screenshot exists
            if not os.path.exists(screenshot_path):
                raise FileNotFoundError(f"Screenshot not found: {screenshot_path}")
            
            # Encode image to base64
            encoded_image = self._encode_image(screenshot_path)
            
            # Prepare the API request
            response = self.client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": self.asr_prompt
                            },
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/png;base64,{encoded_image}",
                                    "detail": "high"
                                }
                            }
                        ]
                    }
                ],
                max_tokens=1000,
                temperature=0.1  # Low temperature for consistent analysis
            )
            
            # Extract the ASR from the response
            asr_text = response.choices[0].message.content
            
            # Parse the ASR text into structured data
            asr_data = self._parse_asr_response(asr_text, url)
            
            print(f"✅ ASR generated successfully with {len(asr_data.get('template_structure', []))} sections")
            
            return asr_data
            
        except Exception as e:
            logger.error(f"Error analyzing screenshot with OpenAI: {e}")
            print(f"❌ OpenAI Vision analysis failed: {e}")
            return None
    
    def _encode_image(self, image_path):
        """Encode image to base64"""
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    
    def _parse_asr_response(self, asr_text, url):
        """Parse the ASR response text into structured data"""
        
        # Extract template structure with detailed formatting
        template_structure = []
        detailed_sections = []
        
        lines = asr_text.split('\n')
        in_template_section = False
        in_main_content = False
        current_section = None
        current_section_data = {}
        
        for i, line in enumerate(lines):
            original_line = line
            line = line.strip()
            
            # Start of template structure
            if 'Template Structure:' in line:
                in_template_section = True
                continue
            
            # Handle "Main Content" section
            if line.startswith('Main Content'):
                # Save current section if exists
                if current_section:
                    template_structure.append(self._format_section_description(current_section, current_section_data))
                    detailed_sections.append(self._create_detailed_section(current_section, current_section_data))
                
                in_template_section = False
                in_main_content = True
                current_section = None
                current_section_data = {}
                continue
            
            # Handle standalone Footer or Assets sections
            if line.startswith('Footer') and not line.startswith('Footer:'):
                # Save current section if exists
                if current_section:
                    template_structure.append(self._format_section_description(current_section, current_section_data))
                    detailed_sections.append(self._create_detailed_section(current_section, current_section_data))
                
                in_main_content = False
                current_section = "Footer"
                current_section_data = {
                    'name': "Footer",
                    'type': 'footer',
                    'details': {}
                }
                continue
            
            if line.startswith('Assets') and not line.startswith('Assets:'):
                # Save current section if exists
                if current_section:
                    template_structure.append(self._format_section_description(current_section, current_section_data))
                    detailed_sections.append(self._create_detailed_section(current_section, current_section_data))
                
                in_main_content = False
                current_section = "Assets"
                current_section_data = {
                    'name': "Assets",
                    'type': 'assets',
                    'details': {}
                }
                continue
            
            # Parse main template structure sections (- Header, - Hero Section, etc.)
            if (in_template_section or in_main_content) and line.startswith('- ') and not line.startswith('  '):
                # Save previous section
                if current_section:
                    template_structure.append(self._format_section_description(current_section, current_section_data))
                    detailed_sections.append(self._create_detailed_section(current_section, current_section_data))
                
                # Start new section
                current_section = line[2:].strip()  # Remove "- "
                current_section_data = {
                    'name': current_section,
                    'type': self._determine_section_type(current_section),
                    'details': {}
                }
            
            # Parse section details (Type:, Overlay Behavior:, Contents:, etc.)
            elif current_section and ':' in line and not line.startswith('- ') and not original_line.startswith('    '):
                key, value = line.split(':', 1)
                key = key.strip()
                value = value.strip()
                
                if key and value:
                    current_section_data['details'][key.lower().replace(' ', '_')] = value
            
            # Parse content items (indented lines)
            elif current_section and line and not line.startswith('- ') and (original_line.startswith('  ') or original_line.startswith('    ')):
                if 'content_items' not in current_section_data:
                    current_section_data['content_items'] = []
                current_section_data['content_items'].append(line)
        
        # Save the last section
        if current_section:
            template_structure.append(self._format_section_description(current_section, current_section_data))
            detailed_sections.append(self._create_detailed_section(current_section, current_section_data))
        
        # If no template structure found, try fallback
        if not template_structure:
            print("⚠️  No structured template found, parsing entire response...")
            template_structure = self._extract_fallback_structure(asr_text)
        
        # Analyze layout characteristics
        layout_analysis = self._analyze_layout_characteristics(asr_text, template_structure)
        
        asr_data = {
            'template_structure': template_structure,
            'layout_analysis': layout_analysis,
            'detailed_sections': detailed_sections,
            'raw_response': asr_text,
            'analyzed_url': url,
            'analysis_method': 'openai_vision'
        }
        
        return asr_data
    
    def _format_section_description(self, section_name, section_data):
        """Format section into the desired description format"""
        details = section_data.get('details', {})
        
        # Start with the section name
        description = section_name
        
        # Add key characteristics in parentheses
        characteristics = []
        
        if 'type' in details:
            characteristics.append(details['type'])
        
        if 'overlay_behavior' in details:
            characteristics.append(details['overlay_behavior'])
        
        if 'background' in details and 'image' in details['background'].lower():
            characteristics.append("Background Image")
        
        if 'type' in details and 'full height' in details['type'].lower():
            characteristics.append("Full Height")
        
        if 'location' in details and 'inside header' in details['location'].lower():
            characteristics.append("Inside Header")
        
        # Add characteristics to description
        if characteristics:
            description += f" ({', '.join(characteristics)})"
        
        return description
    
    def _create_detailed_section(self, section_name, section_data):
        """Create detailed section data for JSON output"""
        return {
            'name': section_name,
            'type': section_data.get('type', 'section'),
            'details': section_data.get('details', {}),
            'content_items': section_data.get('content_items', []),
            'positioning': self._extract_positioning_from_details(section_data.get('details', {})),
            'styling': self._extract_styling_from_details(section_data.get('details', {})),
            'detection_method': 'openai_vision_detailed'
        }
    
    def _determine_section_type(self, section_name):
        """Determine section type from name"""
        section_name_lower = section_name.lower()
        
        if any(word in section_name_lower for word in ['header', 'top', 'navbar']):
            return 'header'
        elif any(word in section_name_lower for word in ['hero', 'banner', 'jumbotron']):
            return 'hero'
        elif any(word in section_name_lower for word in ['nav', 'navigation', 'menu']):
            return 'navigation'
        elif any(word in section_name_lower for word in ['footer', 'bottom']):
            return 'footer'
        elif any(word in section_name_lower for word in ['main', 'content']):
            return 'main'
        elif any(word in section_name_lower for word in ['sidebar', 'aside']):
            return 'sidebar'
        else:
            return 'section'
    
    def _extract_positioning_from_details(self, details):
        """Extract positioning info from detailed section data"""
        positioning = {
            'position': 'static',
            'is_fixed': False,
            'is_absolute': False,
            'is_sticky': False,
            'is_overlay': False,
            'is_transparent': False
        }
        
        type_info = details.get('type', '').lower()
        overlay_info = details.get('overlay_behavior', '').lower()
        
        if 'absolute' in type_info:
            positioning['position'] = 'absolute'
            positioning['is_absolute'] = True
            positioning['is_overlay'] = True
        
        if 'fixed' in type_info:
            positioning['position'] = 'fixed'
            positioning['is_fixed'] = True
            positioning['is_overlay'] = True
        
        if 'transparent' in type_info:
            positioning['is_transparent'] = True
        
        if 'overlaps' in overlay_info:
            positioning['is_overlay'] = True
        
        return positioning
    
    def _extract_styling_from_details(self, details):
        """Extract styling info from detailed section data"""
        styling = {
            'is_full_height': False,
            'is_full_width': False,
            'has_background_image': False,
            'has_background_video': False,
            'layout_type': 'default'
        }
        
        type_info = details.get('type', '').lower()
        background_info = details.get('background', '').lower()
        layout_info = details.get('layout', '').lower()
        format_info = details.get('format', '').lower()
        
        if 'full height' in type_info:
            styling['is_full_height'] = True
        
        if 'background image' in background_info or 'image' in background_info:
            styling['has_background_image'] = True
        
        if 'video' in background_info:
            styling['has_background_video'] = True
        
        if 'grid' in layout_info or 'grid' in format_info:
            styling['layout_type'] = 'grid'
        
        if 'column' in layout_info:
            styling['layout_type'] = 'columns'
        
        return styling
    
    def _extract_fallback_structure(self, asr_text):
        """Extract structure from response if structured parsing fails"""
        lines = asr_text.split('\n')
        structure = []
        
        for line in lines:
            line = line.strip()
            if line.startswith('-') and len(line) > 3:
                structure.append(line[1:].strip())
        
        return structure if structure else ["Header", "Main Content", "Footer"]
    
    def _analyze_layout_characteristics(self, asr_text, template_structure):
        """Analyze overall layout characteristics"""
        
        asr_lower = asr_text.lower()
        
        layout_analysis = {
            'total_sections': len(template_structure),
            'has_overlay_header': 'overlaps' in asr_lower and 'header' in asr_lower,
            'has_hero_section': any('hero' in section.lower() for section in template_structure),
            'has_sidebar': any('sidebar' in section.lower() for section in template_structure),
            'navigation_style': 'unknown',
            'layout_type': 'unknown'
        }
        
        # Determine navigation style
        if 'inside header' in asr_lower:
            layout_analysis['navigation_style'] = 'header_integrated'
        elif 'sidebar navigation' in asr_lower:
            layout_analysis['navigation_style'] = 'sidebar'
        elif any('nav' in section.lower() for section in template_structure):
            layout_analysis['navigation_style'] = 'separate'
        
        # Determine layout type
        if layout_analysis['has_hero_section'] and layout_analysis['has_overlay_header']:
            layout_analysis['layout_type'] = 'landing_page'
        elif layout_analysis['has_sidebar']:
            layout_analysis['layout_type'] = 'blog_layout'
        elif layout_analysis['total_sections'] > 5:
            layout_analysis['layout_type'] = 'multi_section'
        else:
            layout_analysis['layout_type'] = 'simple_layout'
        
        return layout_analysis 