import os
import json
import pandas as pd
from django.conf import settings
from django.http import JsonResponse
from django.views.decorators.http import require_http_methods
from shaoApp.models import DevelopmentApplication
import openai
from shaoApp.prompts import EXTRACT_STRUCTURED_DATA_PROMPT
import re
from shaoApp.functions import *
import time
from django.shortcuts import render
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
from django.views.decorators.http import require_http_methods
from django.utils.decorators import method_decorator
from django.views import View
import json
from rest_framework.decorators import api_view, permission_classes
from rest_framework.permissions import AllowAny
from rest_framework.response import Response
from rest_framework import status
from .rag_service import RAGChatbotService
from .models import DevelopmentApplication, PDFDocument, ExtractedPDFData
from django.db.models import Q


@require_http_methods(["GET"])
def mergeCsvFilesView(request):
    """
    Merges two CSV files using only exactly matching column names.
    Removes duplicates and prevents creation of unwanted columns.
    """
    try:
        # File paths
        file1 = os.path.join(settings.MEDIA_ROOT, 'Application Tables - 2025-07-20 08_21_55.csv')
        file2 = os.path.join(settings.MEDIA_ROOT, 'historical_scrape.csv')
        merged_file = os.path.join(settings.MEDIA_ROOT, 'merged_output.csv')
        dup_file = os.path.join(settings.MEDIA_ROOT, 'duplicate_entries.csv')

        # Load files
        df1 = pd.read_csv(file1)
        df2 = pd.read_csv(file2)

        # Remove "Unnamed" or empty columns
        df1 = df1.loc[:, ~df1.columns.str.contains("^Unnamed", case=False)]
        df2 = df2.loc[:, ~df2.columns.str.contains("^Unnamed", case=False)]


        # Only keep exactly matching columns
        common_columns = df1.columns.intersection(df2.columns)
        df1 = df1[common_columns]
        df2 = df2[common_columns]

        # Concatenate and drop duplicates
        combined_df = pd.concat([df1, df2], ignore_index=True)
        duplicate_rows = combined_df[combined_df.duplicated(keep=False)]
        unique_df = combined_df.drop_duplicates(ignore_index=True)

        # Save clean files
        unique_df.to_csv(merged_file, index=False)
        duplicate_rows.to_csv(dup_file, index=False)

        return JsonResponse({
            'message': 'Merge completed successfully using exact column matches',
            'output_file': request.build_absolute_uri(settings.MEDIA_URL + 'merged_output.csv'),
            'duplicates_file': request.build_absolute_uri(settings.MEDIA_URL + 'duplicate_entries.csv'),
            'num_duplicates': int(len(duplicate_rows)),
            'num_final_rows': int(len(unique_df))
        })

    except Exception as e:
        return JsonResponse({'error': f'Merge failed: {str(e)}'}, status=500)


@require_http_methods(["GET"])
def enrichMergedFileView(request):
    """
    Enriches merged CSV data using determined application data.
    """
    try:
        merged_path = os.path.join(settings.MEDIA_ROOT, 'merged_output.csv')
        determined_path = os.path.join(settings.MEDIA_ROOT, 'cumulative_applicationsdetermined.csv')
        enriched_output_path = os.path.join(settings.MEDIA_ROOT, 'enriched_merged_output.csv')

        merged_df = pd.read_csv(merged_path)
        determined_df = pd.read_csv(determined_path)

        # Keep only necessary columns
        cols = ['application_id', 'decision', 'lodgement_date', 'determined_date', 'cost', 'council_name']
        determined_df = determined_df[cols]

        enriched_df = pd.merge(merged_df, determined_df, on='application_id', how='left')
        enriched_df.to_csv(enriched_output_path, index=False)

        enriched_count = enriched_df['decision'].notna().sum()

        return JsonResponse({
            'message': 'Enrichment completed successfully',
            'enriched_file': request.build_absolute_uri(settings.MEDIA_URL + 'enriched_merged_output.csv'),
            'num_records_enriched': int(enriched_count)
        })

    except Exception as e:
        return JsonResponse({'error': f'Enrichment failed: {str(e)}'}, status=500)


def parse_date(date_str):
    try:
        date = pd.to_datetime(date_str, errors='coerce')
        if pd.isnull(date) or isinstance(date, pd.NaT):
            return None
        return date.date()
    except Exception:
        return None

def safe_get(val):
    return None if pd.isna(val) or val == '' else val

@require_http_methods(["GET"])
def importEnrichedDataView(request):
    """
    Imports enriched CSV data into the database with improved error handling.
    """
    try:
        file_path = os.path.join(settings.MEDIA_ROOT, 'enriched_merged_output.csv')
        if not os.path.exists(file_path):
            return JsonResponse({'error': 'enriched_merged_output.csv not found.'}, status=404)

        df = pd.read_csv(file_path)
        created, updated, failed = 0, 0, 0
        
        # Process in batches to avoid overwhelming the API
        batch_size = 5  # Small batch size to avoid rate limits
        total_rows = len(df)
        
        for i in range(0, total_rows, batch_size):
            batch_df = df.iloc[i:i + batch_size]
            print(f"Processing batch {i//batch_size + 1}/{(total_rows + batch_size - 1)//batch_size}")
            
            for _, row in batch_df.iterrows():
                try:
                    description = safe_get(row.get('description', ''))
                    
                    # Only process if we have a description
                    if description:
                        structured = extract_structured_data(description)
                    else:
                        structured = {}    
                    print("extracted",structured)                       
                    # Create or update the record
                    obj, is_created = DevelopmentApplication.objects.update_or_create(
                        application_id=safe_get(row.get('application_id')),
                        defaults={
                            "application_url": safe_get(row.get("application_web_url")),
                            "decision": safe_get(row.get("decision")),
                            "lodgement_date": parse_date(row.get("lodgement_date")),
                            "determined_date": parse_date(row.get("determined_date")),
                            "cost": safe_get(row.get("cost")),
                            "council_name": safe_get(row.get("council_name_x")),
                            "development_type": safe_get(structured.get("development_type")),
                            "land_use":safe_get(structured.get("land_use")),
                            "category":safe_get(structured.get("development_type")),
                            "number_of_dwellings": safe_get(structured.get("number_of_dwellings")),
                            "number_of_storeys": safe_get(structured.get("number_of_storeys")),
                            "number_of_places": safe_get(structured.get("number_of_places")),
                            "number_of_apartments": safe_get(structured.get("number_of_units")),
                            "number_of_subdivisions": safe_get(structured.get("number_of_lots")),
                        }
                    )

                    if is_created:
                        created += 1
                    else:
                        updated += 1
                        
                except Exception as e:
                    print(f"Error processing row: {e}")
                    failed += 1
                    continue
            
            # Add delay between batches to avoid rate limits
            if i + batch_size < total_rows:
                time.sleep(2)  # 2 second delay between batches

        return JsonResponse({
            'message': 'Data import completed with improved error handling',
            'created': created,
            'updated': updated,
            'failed': failed,
            'total_processed': created + updated + failed
        })

    except Exception as e:
        return JsonResponse({'error': f'Data import failed: {str(e)}'}, status=500)


@require_http_methods(["GET"])
def mergeWithNameFromScraperView(request):
    """
    Merges enriched data with Name field from the scraper document based on Application ID.
    Filters out rows with missing Application ID before merging.
    Replaces Windows-style file path with /media/PDFs/ format.
    """
    try:
        # File paths
        enriched_path = os.path.join(settings.MEDIA_ROOT, 'enriched_merged_output.csv')
        scraper_docs_path = os.path.join(settings.MEDIA_ROOT, 'Scraper Documents - 2025-07-20 08_21_54.csv')
        output_path = os.path.join(settings.MEDIA_ROOT, 'enriched_with_name.csv')

        # Check files
        if not os.path.exists(enriched_path) or not os.path.exists(scraper_docs_path):
            return JsonResponse({'error': 'One or both source files are missing.'}, status=404)

        # Load data
        enriched_df = pd.read_csv(enriched_path)
        scraper_df = pd.read_csv(scraper_docs_path)

        # Rename 'Application ID' to match enriched_df
        scraper_df.rename(columns={'Application ID': 'application_id'}, inplace=True)

        # Drop missing or blank application_id
        enriched_df = enriched_df.dropna(subset=['application_id'])
        enriched_df = enriched_df[enriched_df['application_id'].astype(str).str.strip() != '']

        # Merge on application_id
        merged_df = pd.merge(
            enriched_df,
            scraper_df[['application_id', 'Name', 'Path']],
            on='application_id',
            how='left'
        )

        # Replace any path with just the filename, then prefix with /media/PDFs/
        merged_df['Path'] = merged_df['Path'].astype(str).apply(
            lambda p: '/media/' + '/'.join([
                part for part in p.replace('\\', '/').split('/')
                # Remove drive letters and state abbreviations (all uppercase ≤ 4 letters)
                if part and not (':' in part or (part.isupper() and len(part) <= 4))
            ]) if isinstance(p, str) and p.strip() else ''
        )
        
                # Save merged file
        merged_df.to_csv(output_path, index=False)

        return JsonResponse({
            'message': 'Merge with Name completed successfully',
            'output_file': request.build_absolute_uri(settings.MEDIA_URL + 'enriched_with_name.csv'),
            'num_records': int(len(merged_df)),
            'num_with_name': int(merged_df['Name'].notna().sum())
        })

    except Exception as e:
        return JsonResponse({'error': f'Merge failed: {str(e)}'}, status=500)


@require_http_methods(["GET"])
def checkExistingPdfFilesView(request):
    """
    Checks the existence of PDF files specified in the 'Path' column
    of enriched_with_name.csv and returns a list of application IDs
    for which the file exists.
    """
    try:
        # File path
        input_path = os.path.join(settings.MEDIA_ROOT, 'enriched_with_name.csv')

        # Check if file exists
        if not os.path.exists(input_path):
            return JsonResponse({'error': 'Merged file not found.'}, status=404)

        # Load CSV
        df = pd.read_csv(input_path)
        # Ensure necessary columns exist
        if 'application_id' not in df.columns or 'Path' not in df.columns:
            return JsonResponse({'error': 'Required columns are missing in the CSV.'}, status=400)

        # Normalize paths and check existence
        existing_ids = []
        for _, row in df.iterrows():
            relative_path = row['Path']
            # print(relative_path)
            full_path = os.path.join(settings.BASE_DIR, relative_path.lstrip('/'))  
            print(full_path)
            if os.path.exists(full_path):
                existing_ids.append(row['application_id'])

        return JsonResponse({
            'message': 'File existence check complete.',
            'num_files_found': len(existing_ids),
            'application_ids': existing_ids
        })

    except Exception as e:
        return JsonResponse({'error': f'File check failed: {str(e)}'}, status=500)
    

@require_http_methods(["GET"])
def extractPdfData(request):
    """
    Extract data from PDF files and save to database with improved error handling and batch processing.
    """
    try:
        input_csv = os.path.join(settings.MEDIA_ROOT, 'enriched_with_name.csv')
        if not os.path.exists(input_csv):
            return JsonResponse({'error': 'enriched_with_name.csv not found.'}, status=404)
            
        df = pd.read_csv(input_csv)

        if 'application_id' not in df.columns or 'Path' not in df.columns:
            return JsonResponse({"error": "CSV missing required columns"}, status=400)

        result = []
        processed = 0
        failed = 0
        saved_to_db = 0
        
        # Filter rows that have valid paths
        valid_rows = []
        for _, row in df.iterrows():
            app_id = row['application_id']
            rel_path = row['Path']
            
            if pd.isna(rel_path) or rel_path == '':
                continue
                
            pdf_path = os.path.join(settings.BASE_DIR, rel_path.lstrip('/'))
            if os.path.exists(pdf_path):
                valid_rows.append((app_id, pdf_path))
            else:
                result.append({
                    "application_id": app_id,
                    "error": "PDF file not found",
                    "path": rel_path
                })
                failed += 1

        # Process valid PDFs in batches
        batch_size = 3  # Small batch size for PDF processing
        total_pdfs = len(valid_rows)
        
        for i in range(0, total_pdfs, batch_size):
            batch = valid_rows[i:i + batch_size]
            print(f"Processing PDF batch {i//batch_size + 1}/{(total_pdfs + batch_size - 1)//batch_size}")
            
            for app_id, pdf_path in batch:
                try:
                    # Process PDF and save to database
                    processing_result = process_and_save_pdf_data(app_id, pdf_path)
                    
                    if processing_result.get("extraction_success"):
                        result.append({
                            "application_id": app_id,
                            "data": processing_result.get("extracted_data"),
                            "path": pdf_path,
                            "pdf_type": processing_result.get("pdf_type"),
                            "confidence": processing_result.get("confidence"),
                            "text_length": processing_result.get("text_length", 0),
                            "pages_processed": processing_result.get("pages_processed", 0),
                            "database_save": processing_result.get("database_save")
                        })
                        processed += 1
                        
                        # Check if database save was successful
                        if processing_result.get("database_save", {}).get("success"):
                            saved_to_db += 1
                    else:
                        result.append({
                            "application_id": app_id,
                            "error": processing_result.get("reason") or processing_result.get("error"),
                            "path": pdf_path,
                            "database_save": processing_result.get("database_save")
                        })
                        failed += 1
                    
                except Exception as e:
                    result.append({
                        "application_id": app_id,
                        "error": str(e),
                        "path": pdf_path
                    })
                    failed += 1
            
            # Add delay between batches
            if i + batch_size < total_pdfs:
                time.sleep(3)  # 3 second delay between PDF batches

        # Save the result to a JSON file
        output_path = os.path.join(settings.MEDIA_ROOT, 'extracted_applications.json')
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(result, f, indent=2, ensure_ascii=False)

        return JsonResponse({
            "message": "PDF extraction and database save completed",
            "output_file": output_path,
            "total_pdfs": total_pdfs,
            "successfully_processed": processed,
            "successfully_saved_to_db": saved_to_db,
            "failed": failed,
            "total_records": len(result)
        })

    except Exception as e:
        return JsonResponse({'error': f'PDF extraction failed: {str(e)}'}, status=500)


@require_http_methods(["GET"])
def getPdfDataFromDatabase(request):
    """
    Retrieve PDF data from the database for a specific application or all applications.
    """
    try:
        from shaoApp.models import DevelopmentApplication, PDFDocument, ExtractedPDFData
        
        application_id = request.GET.get('application_id')
        
        if application_id:
            # Get data for specific application
            try:
                application = DevelopmentApplication.objects.get(application_id=application_id)
                pdf_documents = application.pdf_documents.all()
                
                result = {
                    'application_id': application_id,
                    'application_data': {
                        'council_name': application.council_name,
                        'decision': application.decision,
                        'lodgement_date': application.lodgement_date,
                        'determined_date': application.determined_date,
                        'cost': application.cost,
                        'development_type': application.development_type,
                        'number_of_dwellings': application.number_of_dwellings,
                        'number_of_storeys': application.number_of_storeys,
                        'number_of_places': application.number_of_places,
                        'number_of_apartments': application.number_of_apartments,
                        'number_of_subdivisions': application.number_of_subdivisions,
                    },
                    'pdf_documents': []
                }
                
                for pdf_doc in pdf_documents:
                    pdf_data = {
                        'id': pdf_doc.id,
                        'file_name': pdf_doc.file_name,
                        'file_path': pdf_doc.file_path,
                        'document_type': pdf_doc.document_type,
                        'pdf_type': pdf_doc.pdf_type,
                        'confidence': pdf_doc.confidence,
                        'text_length': pdf_doc.text_length,
                        'pages_processed': pdf_doc.pages_processed,
                        'extraction_status': pdf_doc.extraction_status,
                        'error_message': pdf_doc.error_message,
                        'created_at': pdf_doc.created_at.isoformat(),
                    }
                    
                    # Add extracted data if available
                    if hasattr(pdf_doc, 'extracted_data'):
                        extracted = pdf_doc.extracted_data
                        pdf_data['extracted_data'] = {
                            'land_description': extracted.land_description,
                            'registered_proprietor': extracted.registered_proprietor,
                            'encumbrances': extracted.encumbrances,
                            'activity_last_125_days': extracted.activity_last_125_days,
                            'administrative_notices': extracted.administrative_notices,
                            'proposed_use': extracted.proposed_use,
                            'description': extracted.description,
                            'applicant_name': extracted.applicant_name,
                            'contact_name': extracted.contact_name,
                            'contact_address': extracted.contact_address,
                            'contact_email': extracted.contact_email,
                            'contact_phone': extracted.contact_phone,
                            'applicant_address': extracted.applicant_address,
                            'applicant_email': extracted.applicant_email,
                            'applicant_phone': extracted.applicant_phone,
                            'lot_size': extracted.lot_size,
                            'site_coverage': extracted.site_coverage,
                            'total_area': extracted.total_area,
                            'ground_floor_area': extracted.ground_floor_area,
                            'first_floor_area': extracted.first_floor_area,
                            'pos': extracted.pos,
                            'spos': extracted.spos,
                        }
                    
                    result['pdf_documents'].append(pdf_data)
                
                return JsonResponse(result)
                
            except DevelopmentApplication.DoesNotExist:
                return JsonResponse({'error': f'Application {application_id} not found'}, status=404)
        else:
            # Get summary of all applications with PDF data
            applications_with_pdfs = DevelopmentApplication.objects.filter(pdf_documents__isnull=False).distinct()
            
            summary = []
            for app in applications_with_pdfs:
                pdf_count = app.pdf_documents.count()
                successful_extractions = app.pdf_documents.filter(extraction_status='success').count()
                
                summary.append({
                    'application_id': app.application_id,
                    'council_name': app.council_name,
                    'total_pdfs': pdf_count,
                    'successful_extractions': successful_extractions,
                    'failed_extractions': pdf_count - successful_extractions
                })
            
            return JsonResponse({
                'message': 'PDF data summary',
                'total_applications_with_pdfs': len(summary),
                'applications': summary
            })
            
    except Exception as e:
        return JsonResponse({'error': f'Database query failed: {str(e)}'}, status=500)


@api_view(['POST'])
@permission_classes([AllowAny])
def chat_endpoint(request):
    """
    Main chat endpoint for the RAG chatbot
    """
    try:
        data = request.data
        message = data.get('message', '').strip()
        
        if not message:
            return Response({
                'error': 'Message is required'
            }, status=status.HTTP_400_BAD_REQUEST)
        
        # Initialize chatbot service
        chatbot = RAGChatbotService()
        
        # Get response
        result = chatbot.chat(message)
        
        return Response({
            'success': True,
            'response': result['response'],
            'sources': result['sources'],
            'confidence': result['confidence']
        })
        
    except Exception as e:
        return Response({
            'error': f'An error occurred: {str(e)}'
        }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)


@api_view(['GET'])
@permission_classes([AllowAny])
def chatbot_stats(request):
    """
    Get statistics about the chatbot knowledge base
    """
    try:
        chatbot = RAGChatbotService()
        stats = chatbot.get_statistics()
        
        return Response({
            'success': True,
            'statistics': stats
        })
        
    except Exception as e:
        return Response({
            'error': f'An error occurred: {str(e)}'
        }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)


@api_view(['POST'])
@permission_classes([AllowAny])
def rebuild_vector_store(request):
    """
    Rebuild the FAISS vector store from database
    """
    try:
        chatbot = RAGChatbotService()
        chatbot.build_vector_store()
        
        return Response({
            'success': True,
            'message': 'Vector store rebuilt successfully'
        })
        
    except Exception as e:
        return Response({
            'error': f'An error occurred: {str(e)}'
        }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)


@api_view(['GET'])
@permission_classes([AllowAny])
def search_applications(request):
    """
    Search development applications by various criteria
    """
    try:
        query = request.GET.get('q', '')
        council = request.GET.get('council', '')
        decision = request.GET.get('decision', '')
        development_type = request.GET.get('development_type', '')
        
        applications = DevelopmentApplication.objects.all()
        
        if query:
            applications = applications.filter(
                Q(application_id__icontains=query) |
                Q(council_name__icontains=query) |
                Q(development_type__icontains=query)
            )
        
        if council:
            applications = applications.filter(council_name__icontains=council)
        
        if decision:
            applications = applications.filter(decision__icontains=decision)
        
        if development_type:
            applications = applications.filter(development_type__icontains=development_type)
        
        # Limit results
        applications = applications[:50]
        
        results = []
        for app in applications:
            results.append({
                'application_id': app.application_id,
                'council_name': app.council_name,
                'decision': app.decision,
                'lodgement_date': app.lodgement_date,
                'determined_date': app.determined_date,
                'cost': app.cost,
                'development_type': app.development_type,
                'number_of_dwellings': app.number_of_dwellings,
                'number_of_storeys': app.number_of_storeys,
            })
        
        return Response({
            'success': True,
            'applications': results,
            'count': len(results)
        })
        
    except Exception as e:
        return Response({
            'error': f'An error occurred: {str(e)}'
        }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)


@api_view(['GET'])
@permission_classes([AllowAny])
def get_application_details(request, application_id):
    """
    Get detailed information about a specific application
    """
    try:
        application = DevelopmentApplication.objects.get(application_id=application_id)
        
        # Get related PDF documents
        pdf_documents = []
        for pdf in application.pdf_documents.all():
            pdf_data = {
                'file_name': pdf.file_name,
                'document_type': pdf.document_type,
                'pdf_type': pdf.pdf_type,
                'confidence': pdf.confidence,
                'text_length': pdf.text_length,
                'pages_processed': pdf.pages_processed,
                'extraction_status': pdf.extraction_status,
            }
            
            # Add extracted data if available
            if hasattr(pdf, 'extracted_data') and pdf.extracted_data:
                extracted = pdf.extracted_data
                pdf_data['extracted_data'] = {
                    'land_description': extracted.land_description,
                    'registered_proprietor': extracted.registered_proprietor,
                    'proposed_use': extracted.proposed_use,
                    'description': extracted.description,
                    'applicant_name': extracted.applicant_name,
                    'contact_name': extracted.contact_name,
                    'contact_email': extracted.contact_email,
                    'contact_phone': extracted.contact_phone,
                    'lot_size': extracted.lot_size,
                    'site_coverage': extracted.site_coverage,
                    'total_area': extracted.total_area,
                }
            
            pdf_documents.append(pdf_data)
        
        result = {
            'application_id': application.application_id,
            'application_url': application.application_url,
            'council_name': application.council_name,
            'decision': application.decision,
            'lodgement_date': application.lodgement_date,
            'determined_date': application.determined_date,
            'cost': application.cost,
            'development_type': application.development_type,
            'number_of_dwellings': application.number_of_dwellings,
            'number_of_storeys': application.number_of_storeys,
            'number_of_places': application.number_of_places,
            'number_of_apartments': application.number_of_apartments,
            'number_of_subdivisions': application.number_of_subdivisions,
            'pdf_documents': pdf_documents,
        }
        
        return Response({
            'success': True,
            'application': result
        })
        
    except DevelopmentApplication.DoesNotExist:
        return Response({
            'error': 'Application not found'
        }, status=status.HTTP_404_NOT_FOUND)
    except Exception as e:
        return Response({
            'error': f'An error occurred: {str(e)}'
        }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)


@api_view(['GET'])
@permission_classes([AllowAny])
def get_councils(request):
    """
    Get list of all councils in the database
    """
    try:
        councils = DevelopmentApplication.objects.values_list('council_name', flat=True).distinct()
        councils = [council for council in councils if council]
        
        return Response({
            'success': True,
            'councils': councils
        })
        
    except Exception as e:
        return Response({
            'error': f'An error occurred: {str(e)}'
        }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)


@api_view(['GET'])
@permission_classes([AllowAny])
def get_development_types(request):
    """
    Get list of all development types in the database
    """
    try:
        development_types = DevelopmentApplication.objects.values_list('development_type', flat=True).distinct()
        development_types = [dev_type for dev_type in development_types if dev_type]
        
        return Response({
            'success': True,
            'development_types': development_types
        })
        
    except Exception as e:
        return Response({
            'error': f'An error occurred: {str(e)}'
        }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)


def chatbot_interface(request):
    """
    Serve the chatbot interface
    """
    return render(request, 'chatbot.html')