import os
import json
import pickle
from typing import List, Dict, Any, Optional
import numpy as np
import faiss
from django.conf import settings
from django.db.models import Q
from sentence_transformers import SentenceTransformer
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document
from .models import DevelopmentApplication, PDFDocument, ExtractedPDFData


class RAGChatbotService:
    def __init__(self):
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.llm = ChatOpenAI(
            model="gpt-4o-2024-05-13",
            temperature=0.7,
            api_key=settings.OPENAI_API_KEY
        )
        self.vector_store_path = os.path.join(settings.MEDIA_ROOT, 'faiss_index')
        self.vector_store = None
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len,
        )
        
    def create_documents_from_database(self) -> List[Document]:
        """Create documents from the database for vectorization"""
        documents = []
        
        print("================",documents)
        
        # Get all development applications with their related data
        applications = DevelopmentApplication.objects.select_related().prefetch_related(
            'pdf_documents__extracted_data'
        ).all()
        
        for app in applications:
            # Create document from application metadata
            # Handle cost formatting safely
            cost_str = f"${app.cost:,.2f}" if app.cost is not None else "Not specified"
            
            app_metadata = f"""
            Application ID: {app.application_id or 'Unknown'}
            Council: {app.council_name or 'Unknown'}
            Decision: {app.decision or 'Unknown'}
            Lodgement Date: {app.lodgement_date or 'Unknown'}
            Determined Date: {app.determined_date or 'Unknown'}
            Cost: {cost_str}
            Development Type: {app.development_type or 'Unknown'}
            Number of Dwellings: {app.number_of_dwellings or 'Unknown'}
            Number of Storeys: {app.number_of_storeys or 'Unknown'}
            Number of Places: {app.number_of_places or 'Unknown'}
            Number of Apartments: {app.number_of_apartments or 'Unknown'}
            Number of Subdivisions: {app.number_of_subdivisions or 'Unknown'}
            """
            
            documents.append(Document(
                page_content=app_metadata,
                metadata={
                    'source': 'application_metadata',
                    'application_id': app.application_id,
                    'council_name': app.council_name,
                    'type': 'metadata'
                }
            ))
            
            # Add PDF document data
            for pdf_doc in app.pdf_documents.all():
                try:
                    extracted = pdf_doc.extracted_data
                    if extracted:
                        # Create comprehensive document from extracted data
                        pdf_content = f"""
                        Document: {pdf_doc.file_name or 'Unknown'}
                        Document Type: {pdf_doc.document_type or 'Unknown'}
                        
                        Land Description: {extracted.land_description or 'Not available'}
                        Registered Proprietor: {extracted.registered_proprietor or 'Not available'}
                        Encumbrances: {extracted.encumbrances or 'Not available'}
                        
                        Proposed Use: {extracted.proposed_use or 'Not available'}
                        Description: {extracted.description or 'Not available'}
                        
                        Applicant: {extracted.applicant_name or 'Not available'}
                        Contact: {extracted.contact_name or 'Not available'}
                        Contact Address: {extracted.contact_address or 'Not available'}
                        Contact Email: {extracted.contact_email or 'Not available'}
                        Contact Phone: {extracted.contact_phone or 'Not available'}
                        
                        Lot Size: {extracted.lot_size or 'Not available'}
                        Site Coverage: {extracted.site_coverage or 'Not available'}
                        Total Area: {extracted.total_area or 'Not available'}
                        Ground Floor Area: {extracted.ground_floor_area or 'Not available'}
                        First Floor Area: {extracted.first_floor_area or 'Not available'}
                        Private Open Space: {extracted.pos or 'Not available'}
                        Secluded Private Open Space: {extracted.spos or 'Not available'}
                        """
                        
                        documents.append(Document(
                            page_content=pdf_content,
                            metadata={
                                'source': 'pdf_extracted_data',
                                'application_id': app.application_id,
                                'pdf_file': pdf_doc.file_name,
                                'document_type': pdf_doc.document_type,
                                'type': 'extracted_data'
                            }
                        ))
                except Exception as e:
                    # Skip PDF documents without extracted data
                    continue
        
        return documents
    
    def build_vector_store(self):
        """Build and save the FAISS vector store"""
        print("Building vector store from database...")
        
        # Create documents from database
        documents = self.create_documents_from_database()
        
        if not documents:
            print("No documents found in database")
            return
        
        # Split documents into chunks
        print(f"Splitting {len(documents)} documents into chunks...")
        split_docs = self.text_splitter.split_documents(documents)
        print(f"Created {len(split_docs)} chunks")
        
        # Create embeddings and vector store
        embeddings = HuggingFaceEmbeddings(
            model_name="all-MiniLM-L6-v2",
            model_kwargs={'device': 'cpu'}
        )
        
        # Create FAISS vector store
        self.vector_store = FAISS.from_documents(split_docs, embeddings)
        
        # Save the vector store
        os.makedirs(self.vector_store_path, exist_ok=True)
        self.vector_store.save_local(self.vector_store_path)
        print(f"Vector store saved to {self.vector_store_path}")
    
    def load_vector_store(self):
        """Load the existing FAISS vector store"""
        if os.path.exists(self.vector_store_path):
            embeddings = HuggingFaceEmbeddings(
                model_name="all-MiniLM-L6-v2",
                model_kwargs={'device': 'cpu'}
            )
            self.vector_store = FAISS.load_local(
                self.vector_store_path, 
                embeddings,
                allow_dangerous_deserialization=True
            )
            print("Vector store loaded successfully")
            return True
        return False
    
    def search_similar_documents(self, query: str, k: int = 5) -> List[Document]:
        """Search for similar documents in the vector store"""
        if not self.vector_store:
            if not self.load_vector_store():
                self.build_vector_store()
        
        if not self.vector_store:
            return []
        
        # Search for similar documents
        similar_docs = self.vector_store.similarity_search(query, k=k)
        return similar_docs
    
    def generate_response(self, query: str, context_docs: List[Document]) -> str:
        """Generate response using LLM with retrieved context"""
        
        # Prepare context from retrieved documents
        context = "\n\n".join([doc.page_content for doc in context_docs])
        
        # Create system message
        system_message = SystemMessage(content=f"""
        You are a helpful assistant that provides information about development applications and planning data.
        Use the following context to answer the user's question. If the context doesn't contain relevant information,
        say so politely. Always provide accurate information based on the available data.
        
        Context:
        {context}
        """)
        
        # Create user message
        user_message = HumanMessage(content=query)
        
        # Generate response
        response = self.llm.invoke([system_message, user_message])
        return response.content
    
    def chat(self, message: str) -> Dict[str, Any]:
        """Main chat method that combines retrieval and generation"""
        try:
            # Search for relevant documents
            similar_docs = self.search_similar_documents(message, k=5)
            
            if not similar_docs:
                return {
                    'response': 'I apologize, but I couldn\'t find any relevant information in the database to answer your question. Please try rephrasing your query or ask about development applications, planning data, or council information.',
                    'sources': [],
                    'confidence': 'low'
                }
            
            # Generate response
            response = self.generate_response(message, similar_docs)
            
            # Extract sources from metadata
            sources = []
            for doc in similar_docs:
                source_info = {
                    'application_id': doc.metadata.get('application_id', 'Unknown'),
                    'council_name': doc.metadata.get('council_name', 'Unknown'),
                    'document_type': doc.metadata.get('document_type', 'Unknown'),
                    'source_type': doc.metadata.get('type', 'Unknown')
                }
                sources.append(source_info)
            
            return {
                'response': response,
                'sources': sources,
                'confidence': 'high' if len(similar_docs) >= 3 else 'medium'
            }
            
        except Exception as e:
            return {
                'response': f'I encountered an error while processing your request: {str(e)}',
                'sources': [],
                'confidence': 'error'
            }
    
    def get_statistics(self) -> Dict[str, Any]:
        """Get statistics about the knowledge base"""
        try:
            total_applications = DevelopmentApplication.objects.count()
            total_pdfs = PDFDocument.objects.count()
            total_extracted = ExtractedPDFData.objects.count()
            
            councils = DevelopmentApplication.objects.values_list('council_name', flat=True).distinct()
            
            return {
                'total_applications': total_applications,
                'total_pdf_documents': total_pdfs,
                'total_extracted_data': total_extracted,
                'councils': list(councils),
                'vector_store_exists': os.path.exists(self.vector_store_path)
            }
        except Exception as e:
            return {'error': str(e)} 