import os
import json
import pandas as pd
import time
import random
from django.conf import settings
from django.http import JsonResponse
from django.views.decorators.http import require_http_methods
from shaoApp.models import DevelopmentApplication
import openai
from shaoApp.prompts import EXTRACT_STRUCTURED_DATA_PROMPT, TITLE_SEARCH_EXTRACTION_PROMPT, APPLICATION_FORM_EXTRACTION_PROMPT, APPLICATION_FORM_FALLBACK_PROMPT
import re
import pdfplumber
from openai import RateLimitError, APIError, APIConnectionError, APITimeoutError


client = openai.OpenAI(api_key=settings.OPENAI_API_KEY)


def parse_date(date_str):
    try:
        return pd.to_datetime(date_str).date()
    except Exception:
        return None


def safe_get(val):
    """Safely get value, handling None, NaN, and empty strings"""
    if pd.isna(val) or val == '' or val is None:
        return None
    return str(val).strip()


def validate_structured_data(data: dict) -> dict:
    """Validate and clean structured data from OpenAI response"""
    if not isinstance(data, dict):
        return {}
    
    expected_fields = {
        "development_type": "",
        "land_use": "",
        "number_of_dwellings": 0,
        "number_of_storeys": 0,
        "number_of_places": 0,
        "number_of_units": 0,
        "number_of_lots": 0
    }
    
    validated_data = {}
    
    for field, default_value in expected_fields.items():
        value = data.get(field, default_value)

        # Special handling for "land_use"
        if field == "land_use" and (not value):
            land_uses = data.get("land_uses")
            if isinstance(land_uses, list) and land_uses:
                extracted_uses = [
                    str(item.get("land_use", "")).strip()
                    for item in land_uses
                    if isinstance(item, dict) and item.get("land_use")
                ]
                value = ", ".join(extracted_uses) if extracted_uses else default_value

        # Numeric fields
        if field.startswith("number_of_"):
            try:
                validated_data[field] = int(float(str(value))) if value not in (None, "") else 0
            except (ValueError, TypeError):
                validated_data[field] = 0
        else:
            validated_data[field] = str(value).strip() if value else ""
    
    return validated_data


def make_openai_request_with_retry(prompt: str, max_retries: int = 3, base_delay: float = 1.0):
    """Make OpenAI API request with exponential backoff and retry logic"""
    
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="gpt-4o-2024-05-13",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.2,
                max_tokens=1000
            )
            
            content = response.choices[0].message.content.strip()
            return content, None
            
        except RateLimitError as e:
            if attempt < max_retries - 1:
                # Exponential backoff with jitter
                delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
                print(f"Rate limit hit, retrying in {delay:.2f} seconds... (attempt {attempt + 1}/{max_retries})")
                time.sleep(delay)
                continue
            else:
                return None, f"Rate limit exceeded after {max_retries} attempts: {str(e)}"
                
        except (APIError, APIConnectionError, APITimeoutError) as e:
            if attempt < max_retries - 1:
                delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
                print(f"API error, retrying in {delay:.2f} seconds... (attempt {attempt + 1}/{max_retries})")
                time.sleep(delay)
                continue
            else:
                return None, f"API error after {max_retries} attempts: {str(e)}"
                
        except Exception as e:
            return None, f"Unexpected error: {str(e)}"
    
    return None, "Max retries exceeded"


def extract_structured_data(description: str) -> dict:
    """Extract structured data from description with proper error handling"""
    
    # Validate input
    if not description or not description.strip():
        return {}
    
    # Truncate description if too long (OpenAI has token limits)
    max_length = 4000  # Conservative limit
    if len(description) > max_length:
        description = description[:max_length] + "..."
    
    prompt = EXTRACT_STRUCTURED_DATA_PROMPT.format(description=description)
    
    # Make API request with retry logic
    content, error = make_openai_request_with_retry(prompt)
    
    if error:
        print(f"OpenAI request failed: {error}")
        return {}
    
    if not content:
        print("Empty response from OpenAI")
        return {}
    
    try:
        # Remove markdown JSON wrapper
        if content.startswith("```"):
            content = re.sub(r"^```[a-z]*\n?", "", content)
            content = content.rstrip("```")
        
        # Parse JSON
        data = json.loads(content)
        print("data================",data)
        # Validate and clean the data
        validated_data = validate_structured_data(data)
        
        print("validated_data",validated_data)
        # print("validated_data",validated_data)

        return validated_data
        
    except json.JSONDecodeError as e:
        print(f"JSON decoding error: {e}")
        print(f"Raw content: {content}")
        return {}
    except Exception as e:
        print(f"Unexpected error in data processing: {e}")
        return {}


def detect_pdf_type(pdf_path: str) -> dict:
    """Detect if PDF is digital (text-based) or scanned (image-based)"""
    try:
        with pdfplumber.open(pdf_path) as pdf:
            total_pages = len(pdf.pages)
            pages_with_text = 0
            total_text_length = 0
            
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text and len(page_text.strip()) > 10:  # Minimum text threshold
                    pages_with_text += 1
                    total_text_length += len(page_text)
            
            # Calculate text density
            text_density = pages_with_text / total_pages if total_pages > 0 else 0
            avg_text_per_page = total_text_length / total_pages if total_pages > 0 else 0
            
            # Classification logic
            if text_density >= 0.7 and avg_text_per_page > 100:
                pdf_type = "digital"
                confidence = "high"
            elif text_density >= 0.3 and avg_text_per_page > 50:
                pdf_type = "digital"
                confidence = "medium"
            elif text_density > 0 and avg_text_per_page > 10:
                pdf_type = "mixed"
                confidence = "low"
            else:
                pdf_type = "scanned"
                confidence = "high"
            
            return {
                "type": pdf_type,
                "confidence": confidence,
                "total_pages": total_pages,
                "pages_with_text": pages_with_text,
                "text_density": round(text_density, 3),
                "avg_text_per_page": round(avg_text_per_page, 1),
                "total_text_length": total_text_length
            }
            
    except Exception as e:
        return {
            "type": "unknown",
            "confidence": "low",
            "error": str(e),
            "total_pages": 0,
            "pages_with_text": 0,
            "text_density": 0,
            "avg_text_per_page": 0,
            "total_text_length": 0
        }


def extract_text_from_pdf(pdf_path: str) -> dict:
    """Extract text from PDF with enhanced error handling and type detection"""
    try:
        # First detect PDF type
        pdf_info = detect_pdf_type(pdf_path)
        
        # If it's clearly scanned, return early
        if pdf_info["type"] == "scanned" and pdf_info["confidence"] == "high":
            return {
                "text": "",
                "pdf_type": "scanned",
                "confidence": "high",
                "reason": "PDF appears to be scanned/image-based with no extractable text"
            }
        
        # Extract text for digital/mixed PDFs
        text = ""
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
        
        extracted_text = text.strip()
        
        # Determine final result
        if extracted_text:
            return {
                "text": extracted_text,
                "pdf_type": pdf_info["type"],
                "confidence": pdf_info["confidence"],
                "text_length": len(extracted_text),
                "pages_processed": pdf_info["pages_with_text"]
            }
        else:
            return {
                "text": "",
                "pdf_type": "scanned",
                "confidence": "high",
                "reason": "No text could be extracted despite PDF type detection"
            }
            
    except Exception as e:
        return {
            "text": "",
            "pdf_type": "error",
            "confidence": "low",
            "error": str(e)
        }


def detect_document_type(text: str) -> str:
    """Detect document type based on content analysis"""
    
    text_lower = text.lower()
    
    # Title Search detection keywords
    title_search_keywords = [
        "title search", "title", "certificate of title", "land description", 
        "registered proprietor", "encumbrances", "activity in the last 125 days",
        "administrative notices", "title reference", "volume", "folio"
    ]
    
    # Application Form detection keywords
    application_form_keywords = [
        "application form", "planning permit", "proposed use", "description",
        "for what use", "development or other matter", "permit required",
        "online form application", "planning permit application"
    ]
    
    # Count matches for each document type
    title_search_matches = sum(1 for keyword in title_search_keywords if keyword in text_lower)
    application_form_matches = sum(1 for keyword in application_form_keywords if keyword in text_lower)
    
    # Determine document type based on match count and confidence
    if title_search_matches >= 3:
        return "title_search"
    elif application_form_matches >= 3:
        return "application_form"
    elif title_search_matches > application_form_matches:
        return "title_search"
    elif application_form_matches > title_search_matches:
        return "application_form"
    else:
        return "unknown"


def extract_structured_data_from_text(text: str) -> dict:
    """Extract structured data from PDF text with proper error handling"""
    
    # Validate input
    if not text or not text.strip():
        return {"error": "Empty or invalid text input"}
    
    # Truncate text if too long
    max_length = 8000  # Conservative limit for PDF text
    if len(text) > max_length:
        text = text[:max_length] + "..."
    
    # Detect document type using improved detection
    document_type = detect_document_type(text)
    
    if document_type == "title_search":
        prompt = TITLE_SEARCH_EXTRACTION_PROMPT.format(text=text)
    elif document_type == "application_form":
        prompt = APPLICATION_FORM_EXTRACTION_PROMPT.format(text=text)
    else:
        # Use new application form fallback instead of general extraction
        prompt = APPLICATION_FORM_FALLBACK_PROMPT.format(text=text)

    # Make API request with retry logic
    content, error = make_openai_request_with_retry(prompt)
    
    if error:
        return {"error": f"OpenAI request failed: {error}"}
    
    if not content:
        return {"error": "Empty response from OpenAI"}
    
    try:
        # Remove markdown JSON wrapper
        if content.startswith("```"):
            content = re.sub(r"^```[a-z]*\n?", "", content)
            content = content.rstrip("```")
        
        # Parse JSON
        data = json.loads(content)
        
        # Basic validation
        if not isinstance(data, dict):
            return {"error": "Invalid response format"}
        
        # Add detected document type if not already present
        if "document_type" not in data:
            data["detected_type"] = document_type
        
        return data
        
    except json.JSONDecodeError as e:
        print(f"JSON decoding error in PDF extraction: {e}")
        print(f"Raw content: {content}")
        return {"error": f"Failed to parse OpenAI response: {str(e)}"}
    except Exception as e:
        return {"error": f"Unexpected error in PDF data processing: {str(e)}"}


def process_batch_with_delay(items, process_func, batch_size=10, delay_between_batches=2.0):
    """Process items in batches with delays to avoid rate limits"""
    results = []
    
    for i in range(0, len(items), batch_size):
        batch = items[i:i + batch_size]
        batch_results = []
        
        for item in batch:
            try:
                result = process_func(item)
                batch_results.append(result)
            except Exception as e:
                print(f"Error processing item: {e}")
                batch_results.append({"error": str(e)})
        
        results.extend(batch_results)
        
        # Add delay between batches (except for the last batch)
        if i + batch_size < len(items):
            print(f"Processed batch {i//batch_size + 1}, waiting {delay_between_batches} seconds...")
            time.sleep(delay_between_batches)
    
    return results


def save_pdf_data_to_database(application_id, pdf_path, extraction_result, extracted_data):
    """
    Save PDF extraction data to the database
    
    Args:
        application_id (str): The application ID
        pdf_path (str): Path to the PDF file
        extraction_result (dict): Result from extract_text_from_pdf function
        extracted_data (dict): Structured data extracted from the PDF
    
    Returns:
        dict: Result with success status and any errors
    """
    try:
        from shaoApp.models import DevelopmentApplication, PDFDocument, ExtractedPDFData
        
        # Get or create the development application
        application, created = DevelopmentApplication.objects.get_or_create(
            application_id=application_id,
            defaults={
                'application_id': application_id,
                'council_name': 'Unknown'  # You can update this later
            }
        )
        
        # Create PDF document record
        pdf_document = PDFDocument.objects.create(
            application=application,
            file_path=pdf_path,
            file_name=os.path.basename(pdf_path),
            document_type=extracted_data.get('document_type', 'Unknown'),
            pdf_type=extraction_result.get('pdf_type', 'unknown'),
            confidence=extraction_result.get('confidence', 'low'),
            text_length=extraction_result.get('text_length', 0),
            pages_processed=extraction_result.get('pages_processed', 0),
            extraction_status='success' if extracted_data and not extracted_data.get('error') else 'failed',
            error_message=extracted_data.get('error') if extracted_data and extracted_data.get('error') else None
        )
        
        # If extraction was successful, save the structured data
        if extracted_data and not extracted_data.get('error'):
            # Extract development summary if it exists
            development_summary = extracted_data.get('development_summary', {})
            
            extracted_pdf_data = ExtractedPDFData.objects.create(
                pdf_document=pdf_document,
                
                # Basic document information
                land_description=extracted_data.get('land_description', ''),
                registered_proprietor=extracted_data.get('registered_proprietor', ''),
                encumbrances=extracted_data.get('encumbrances', ''),
                activity_last_125_days=extracted_data.get('activity_last_125_days', ''),
                administrative_notices=extracted_data.get('administrative_notices', ''),
                
                # Application-specific fields
                proposed_use=extracted_data.get('proposed_use', ''),
                description=extracted_data.get('description', ''),
                
                # Contact information
                applicant_name=extracted_data.get('applicant_name', ''),
                contact_name=extracted_data.get('contact_name', ''),
                contact_address=extracted_data.get('contact_address', ''),
                contact_email=extracted_data.get('contact_email', ''),
                contact_phone=extracted_data.get('contact_phone', ''),
                applicant_address=extracted_data.get('applicant_address', ''),
                applicant_email=extracted_data.get('applicant_email', ''),
                applicant_phone=extracted_data.get('applicant_phone', ''),
                
                # Development summary fields
                lot_size=development_summary.get('lot_size', ''),
                site_coverage=development_summary.get('site_coverage', ''),
                total_area=development_summary.get('total_area', ''),
                ground_floor_area=development_summary.get('ground_floor_area', ''),
                first_floor_area=development_summary.get('first_floor_area', ''),
                pos=development_summary.get('pos', ''),
                spos=development_summary.get('spos', ''),
                
                # Store raw data for debugging
                raw_extracted_data=extracted_data
            )
        
        return {
            'success': True,
            'application_id': application_id,
            'pdf_document_id': pdf_document.id,
            'extraction_status': pdf_document.extraction_status,
            'message': 'PDF data saved successfully'
        }
        
    except DevelopmentApplication.DoesNotExist:
        return {
            'success': False,
            'application_id': application_id,
            'error': 'Development application not found'
        }
    except Exception as e:
        return {
            'success': False,
            'application_id': application_id,
            'error': f'Database error: {str(e)}'
        }


def process_and_save_pdf_data(application_id, pdf_path):
    """
    Process a PDF file and save the extracted data to the database
    
    Args:
        application_id (str): The application ID
        pdf_path (str): Path to the PDF file
    
    Returns:
        dict: Result with processing status and database save status
    """
    try:
        # Extract text from PDF
        extraction_result = extract_text_from_pdf(pdf_path)
        
        if not extraction_result.get("text"):
            # Save failed extraction to database
            save_result = save_pdf_data_to_database(
                application_id, 
                pdf_path, 
                extraction_result, 
                {"error": extraction_result.get("reason", "No text extracted from PDF")}
            )
            
            return {
                "application_id": application_id,
                "pdf_path": pdf_path,
                "extraction_success": False,
                "reason": extraction_result.get("reason", "No text extracted from PDF"),
                "database_save": save_result
            }
        
        # Extract structured data from text
        extracted_data = extract_structured_data_from_text(extraction_result["text"])
        
        # Save to database
        save_result = save_pdf_data_to_database(
            application_id, 
            pdf_path, 
            extraction_result, 
            extracted_data
        )
        
        return {
            "application_id": application_id,
            "pdf_path": pdf_path,
            "extraction_success": True,
            "pdf_type": extraction_result.get("pdf_type"),
            "confidence": extraction_result.get("confidence"),
            "text_length": extraction_result.get("text_length", 0),
            "pages_processed": extraction_result.get("pages_processed", 0),
            "extracted_data": extracted_data,
            "database_save": save_result
        }
        
    except Exception as e:
        return {
            "application_id": application_id,
            "pdf_path": pdf_path,
            "extraction_success": False,
            "error": str(e),
            "database_save": {
                'success': False,
                'error': f'Processing error: {str(e)}'
            }
        }