import os
import json
import pickle
from typing import List, Dict, Any, Optional
import numpy as np
import faiss
from django.conf import settings
from django.db.models import Q, Sum, Count, Avg, Max, Min
from sentence_transformers import SentenceTransformer
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document
from .models import DevelopmentApplication, PDFDocument, ExtractedPDFData


class RAGChatbotService:
    def __init__(self):
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.llm = ChatOpenAI(
            model="gpt-4o-2024-05-13",
            temperature=0.8,
            api_key=settings.OPENAI_API_KEY
        )
        self.vector_store_path = os.path.join(settings.MEDIA_ROOT, 'faiss_index')
        self.vector_store = None
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len,
        )
        
    def create_documents_from_database(self) -> List[Document]:
        """Create documents from the database for vectorization"""
        documents = []
                
        # Get all development applications with their related data - optimized for comprehensive coverage
        applications = DevelopmentApplication.objects.select_related().prefetch_related(
            'pdf_documents__extracted_data'
        ).all().order_by('application_id')
        
        for app in applications:
            # Create document from application metadata
            # Handle cost formatting safely
            cost_str = f"${app.cost:,.2f}" if app.cost is not None else "Not specified"
            
            app_metadata = f"""
            **Development Application Summary**
            
            **Application Details:**
            • **Application ID:** {app.application_id or 'Unknown'}
            • **Council:** {app.council_name or 'Unknown'}
            • **Decision:** {app.decision or 'Unknown'}
            • **Lodgement Date:** {app.lodgement_date or 'Unknown'}
            • **Determined Date:** {app.determined_date or 'Unknown'}
            • **Project Cost:** {cost_str}
            
            **Development Specifications:**
            • **Category:** {app.category or 'Unknown'}
            • **Land Use:** {app.land_use or 'Unknown'}
            • **Development Type:** {app.development_type or 'Unknown'}
            • **Number of Dwellings:** {app.number_of_dwellings or 'Unknown'}
            • **Number of Storeys:** {app.number_of_storeys or 'Unknown'}
            • **Number of Places:** {app.number_of_places or 'Unknown'}
            • **Number of Apartments:** {app.number_of_apartments or 'Unknown'}
            • **Number of Subdivisions:** {app.number_of_subdivisions or 'Unknown'}
            
            **Additional Information:**
            • **Application URL:** {app.application_url or 'Not available'}
            """
            
            documents.append(Document(
                page_content=app_metadata,
                metadata={
                    'source': 'application_metadata',
                    'application_id': app.application_id,
                    'council_name': app.council_name,
                    'type': 'metadata'
                }
            ))
            
            # Add PDF document data
            for pdf_doc in app.pdf_documents.all():
                try:
                    extracted = pdf_doc.extracted_data
                    if extracted:
                        # Create comprehensive document from extracted data
                        pdf_content = f"""
                        **PDF Document Information**
                        
                        **Document Details:**
                        • **File Name:** {pdf_doc.file_name or 'Unknown'}
                        • **Document Type:** {pdf_doc.document_type or 'Unknown'}
                        
                        **Property Information:**
                        • **Land Description:** {extracted.land_description or 'Not available'}
                        • **Registered Proprietor:** {extracted.registered_proprietor or 'Not available'}
                        • **Encumbrances:** {extracted.encumbrances or 'Not available'}
                        
                        **Development Details:**
                        • **Proposed Use:** {extracted.proposed_use or 'Not available'}
                        • **Description:** {extracted.description or 'Not available'}
                        
                        **Contact Information:**
                        • **Applicant:** {extracted.applicant_name or 'Not available'}
                        • **Contact Person:** {extracted.contact_name or 'Not available'}
                        • **Contact Address:** {extracted.contact_address or 'Not available'}
                        • **Contact Email:** {extracted.contact_email or 'Not available'}
                        • **Contact Phone:** {extracted.contact_phone or 'Not available'}
                        
                        **Site Specifications:**
                        • **Lot Size:** {extracted.lot_size or 'Not available'}
                        • **Site Coverage:** {extracted.site_coverage or 'Not available'}
                        • **Total Area:** {extracted.total_area or 'Not available'}
                        • **Ground Floor Area:** {extracted.ground_floor_area or 'Not available'}
                        • **First Floor Area:** {extracted.first_floor_area or 'Not available'}
                        • **Private Open Space:** {extracted.pos or 'Not available'}
                        • **Secluded Private Open Space:** {extracted.spos or 'Not available'}
                        """
                        
                        documents.append(Document(
                            page_content=pdf_content,
                            metadata={
                                'source': 'pdf_extracted_data',
                                'application_id': app.application_id,
                                'pdf_file': pdf_doc.file_name,
                                'document_type': pdf_doc.document_type,
                                'type': 'extracted_data'
                            }
                        ))
                except Exception as e:
                    # Skip PDF documents without extracted data
                    continue
        
        return documents
    
    def build_vector_store(self):
        """Build and save the FAISS vector store"""
        print("Building vector store from database...")
        
        # Create documents from database
        documents = self.create_documents_from_database()
        
        if not documents:
            print("No documents found in database")
            return
        
        # Split documents into chunks
        print(f"Splitting {len(documents)} documents into chunks...")
        split_docs = self.text_splitter.split_documents(documents)
        print(f"Created {len(split_docs)} chunks")
        
        # Create embeddings and vector store
        embeddings = HuggingFaceEmbeddings(
            model_name="all-MiniLM-L6-v2",
            model_kwargs={'device': 'cpu'}
        )
        
        # Create FAISS vector store
        self.vector_store = FAISS.from_documents(split_docs, embeddings)
        
        # Save the vector store
        os.makedirs(self.vector_store_path, exist_ok=True)
        self.vector_store.save_local(self.vector_store_path)
        print(f"Vector store saved to {self.vector_store_path}")
    
    def load_vector_store(self):
        """Load the existing FAISS vector store"""
        if os.path.exists(self.vector_store_path):
            embeddings = HuggingFaceEmbeddings(
                model_name="all-MiniLM-L6-v2",
                model_kwargs={'device': 'cpu'}
            )
            self.vector_store = FAISS.load_local(
                self.vector_store_path, 
                embeddings,
                allow_dangerous_deserialization=True
            )
            print("Vector store loaded successfully")
            return True
        return False
    
    def search_similar_documents(self, query: str, k: int = 5) -> List[Document]:
        """Search for similar documents in the vector store"""
        if not self.vector_store:
            if not self.load_vector_store():
                self.build_vector_store()
        
        if not self.vector_store:
            return []
        
        # NOTE: Check for specific property queries and adjust search strategy
        query_lower = query.lower()
        
        # For comprehensive queries, get ALL documents instead of limiting
        if any(word in query_lower for word in ['how many', 'count', 'total', 'statistics', 'summary', 'overview', 'list all', 'show all', 'all application', 'application ids']):
            # Get maximum possible documents (effectively all)
            k = 1000  # Set to a very high number to get all documents
        # For specific property queries, increase search results to find more relevant matches
        elif any(indicator in query_lower for indicator in ['avenue', 'street', 'road', 'drive', 'court', 'place', 'crescent', 'boulevard', 'way', 'lane', 'close', 'terrace', 'grove', 'parade', 'waverley', 'ivanhoe', 'darebin', 'monash', 'stonnington', 'wyndham', 'dandenong', 'banyule', 'address', 'property', 'site', 'location']):
            # Increase search results for property queries to find more relevant matches
            k = 20
        
        # Search for similar documents
        similar_docs = self.vector_store.similarity_search(query, k=k)
        return similar_docs
    
    def generate_response(self, query: str, context_docs: List[Document]) -> str:
        """Generate response using LLM with retrieved context"""
        
        # Prepare context from retrieved documents
        context = "\n\n".join([doc.page_content for doc in context_docs])
        
        # Create system message with enhanced formatting instructions
        system_message = SystemMessage(content=f"""
        You are a knowledgeable and friendly urban planning assistant specializing in development applications and council planning data. Your role is to provide clear, engaging, and well-structured responses about development projects, planning decisions, and council information.

        **Response Guidelines:**
        1. **Be conversational and engaging** - Use a friendly, professional tone
        2. **Structure your responses clearly** - Use bullet points, numbered lists, and sections when appropriate
        3. **Highlight key information** - Use **bold** for important details like costs, dates, and decisions
        4. **Provide context** - Explain what the information means in practical terms
        5. **Format numbers nicely** - Use commas for large numbers and proper currency formatting
        6. **Be comprehensive** - Include relevant details from the context
        7. **Suggest follow-up questions** - End with a helpful suggestion for what they might want to know next
        8. **Handle specific properties** - When asked about specific addresses or properties, focus on that property's details

        **Response Structure:**
        - Start with a direct answer to their question
        - Provide supporting details with clear formatting
        - Use bullet points for lists of applications, types, or statistics
        - Highlight key metrics and important dates
        - For property-specific queries, organize information by property details, development specs, contact info, etc.
        - End with a helpful suggestion or related information

        **Property Query Handling:**
        - When asked about specific properties (addresses, street names, etc.), prioritize that property's information
        - Include all available details: land description, development type, number of apartments/dwellings, GFA, bedroom configurations, etc.
        - If specific details aren't available, clearly state what information is missing
        - Focus on the most relevant and detailed information for that specific property

        **Context Information:**
        {context}

        Remember: You're helping people understand complex planning information in an accessible way. Make the information engaging and easy to understand!
        """)
        
        # Create user message
        user_message = HumanMessage(content=query)
        
        # Generate response
        response = self.llm.invoke([system_message, user_message])
        return response.content
    
    def chat(self, message: str) -> Dict[str, Any]:
        """Main chat method that combines retrieval and generation"""
        try:
            # Check if this is a statistical question that needs direct database access
            if self._is_statistical_question(message):
                return self._handle_statistical_question(message)
            
            # Check if this is a bulk listing request
            if self._is_bulk_listing_request(message):
                return self._handle_bulk_listing_request(message)
            
            # Search for relevant documents
            similar_docs = self.search_similar_documents(message, k=5)
            
            if not similar_docs:
                return {
                    'response': """I apologize, but I couldn't find any relevant information in my database to answer your question. 🤔

                    **Here are some suggestions to help you get better results:**

                    • **Try rephrasing your question** - Use different keywords or be more specific
                    • **Ask about development applications** - I have data on planning applications, council decisions, and development types
                    • **Query specific councils** - Try asking about "City of Darebin" or other councils in the database
                    • **Ask about development types** - I can help with residential, commercial, industrial, or subdivision projects
                    • **Request statistics** - Ask about application counts, costs, or decision trends

                    **💡 Example questions you can try:**
                    - "How many development applications are in the database?"
                    - "What types of developments are most common?"
                    - "Tell me about applications in City of Darebin"
                    - "What are the recent planning decisions?"
                    - "Show me applications with high costs"

                    Feel free to try again with a different approach! I'm here to help you explore the planning data. 🏗️""",
                    'sources': [],
                    'confidence': 'low'
                }
            
            # Generate response
            response = self.generate_response(message, similar_docs)
            
            # Extract sources from metadata
            sources = []
            for doc in similar_docs:
                source_info = {
                    'application_id': doc.metadata.get('application_id', 'Unknown'),
                    'council_name': doc.metadata.get('council_name', 'Unknown'),
                    'document_type': doc.metadata.get('document_type', 'Unknown'),
                    'source_type': doc.metadata.get('type', 'Unknown')
                }
                sources.append(source_info)
            
            return {
                'response': response,
                'sources': sources,
                'confidence': 'high' if len(similar_docs) >= 3 else 'medium'
            }
            
        except Exception as e:
            return {
                'response': f"""I'm sorry, but I encountered a technical issue while processing your request. 😔

**What happened:** {str(e)}

**What you can do:**
• **Try again** - Sometimes temporary issues resolve themselves
• **Rephrase your question** - Use simpler or different wording
• **Ask something else** - Try a different type of question about development applications
• **Contact support** - If the issue persists, there might be a system problem

I'm here to help you explore the planning data, so please don't hesitate to try again! 🏗️""",
                'sources': [],
                'confidence': 'error'
            }
    
    def _is_statistical_question(self, message: str) -> bool:
        """Check if the question is asking for statistics or counts"""
        message_lower = message.lower()
        
        # NOTE: Check for specific property queries first to avoid false positives
        property_indicators = [
            'avenue', 'street', 'road', 'drive', 'court', 'place', 'crescent',
            'boulevard', 'way', 'lane', 'close', 'terrace', 'grove', 'parade',
            'waverley', 'ivanhoe', 'darebin', 'monash', 'stonnington', 'wyndham',
            'dandenong', 'banyule', 'address', 'property', 'site', 'location'
        ]
        
        # If the message contains property indicators, it's likely a specific property query
        if any(indicator in message_lower for indicator in property_indicators):
            return False
        
        # Statistical keywords that indicate database-wide queries
        statistical_keywords = [
            'how many', 'count', 'total', 'statistics', 'summary', 
            'overview', 'applications', 'projects', 'developments', 'councils',
            'list all', 'show all', 'all application', 'all applications', 'application ids'
        ]
        
        # Only trigger on statistical keywords if they're not part of a specific property query
        return any(keyword in message_lower for keyword in statistical_keywords)
    
    def _handle_statistical_question(self, message: str) -> Dict[str, Any]:
        """Handle statistical questions with direct database queries"""
        try:
            message_lower = message.lower()
            
            # Check for specific types of statistical questions
            if 'application id' in message_lower or 'application ids' in message_lower or 'list all' in message_lower:
                return self._get_all_application_ids()
            elif 'council' in message_lower or 'councils' in message_lower:
                return self._get_council_statistics()
            elif 'development type' in message_lower or 'types of development' in message_lower:
                return self._get_development_type_statistics()
            elif 'decision' in message_lower or 'approval' in message_lower:
                return self._get_decision_statistics()
            elif 'cost' in message_lower or 'value' in message_lower or 'budget' in message_lower:
                return self._get_cost_statistics()
            else:
                # Default comprehensive statistics
                return self._get_comprehensive_statistics()
            
        except Exception as e:
            return {
                'response': f"I encountered an error while retrieving statistics: {str(e)}",
                'sources': [],
                'confidence': 'error'
            }
    
    def _get_comprehensive_statistics(self) -> Dict[str, Any]:
        """Get comprehensive database statistics"""
        total_applications = DevelopmentApplication.objects.count()
        total_pdfs = PDFDocument.objects.count()
        total_extracted = ExtractedPDFData.objects.count()
        
        councils = DevelopmentApplication.objects.values_list('council_name', flat=True).distinct()
        council_count = len([c for c in councils if c])
        
        development_types = DevelopmentApplication.objects.values_list('development_type', flat=True).distinct()
        development_type_count = len([dt for dt in development_types if dt])
        
        decisions = DevelopmentApplication.objects.values_list('decision', flat=True).distinct()
        decision_count = len([d for d in decisions if d])
        
        applications_with_cost = DevelopmentApplication.objects.filter(cost__isnull=False).count()
        total_cost = DevelopmentApplication.objects.filter(cost__isnull=False).aggregate(total=Sum('cost'))['total'] or 0
        
        response = f"""📊 **Development Applications Database Overview** 🏗️

        **📋 Total Applications:** **{total_applications:,}** development applications

        **🏛️ Council Coverage:** **{council_count}** councils in the database
        **📄 Document Processing:** **{total_pdfs:,}** PDF documents processed
        **📊 Data Extraction:** **{total_extracted:,}** documents with extracted data

        **🏗️ Development Diversity:**
        • **Development Types:** **{development_type_count}** different types of developments
        • **Decision Categories:** **{decision_count}** different decision outcomes
        • **Cost Data:** **{applications_with_cost:,}** applications with cost information
        • **Total Project Value:** **${total_cost:,.2f}** across all projects

        **💡 What you can explore:**
        • Ask about specific councils (e.g., "Tell me about City of Darebin")
        • Query development types (e.g., "Show me residential projects")
        • Check decision trends (e.g., "What are the recent approvals?")
        • Explore high-value projects (e.g., "Show me projects over $1 million")

        This database contains comprehensive planning information to help you understand development patterns and trends! 🚀"""

        return {
            'response': response,
            'sources': [{'application_id': 'Database Statistics', 'council_name': 'All Councils', 'document_type': 'Statistical Summary', 'source_type': 'database_query'}],
            'confidence': 'high'
        }
    
    def _get_council_statistics(self) -> Dict[str, Any]:
        """Get council-specific statistics"""
        council_stats = DevelopmentApplication.objects.values('council_name').annotate(
            count=Count('id')
        ).filter(council_name__isnull=False).order_by('-count')
        
        response = "🏛️ **Council Statistics** 📊\n\n"
        response += f"**Total Councils:** {council_stats.count()}\n\n"
        response += "**Applications by Council:**\n"
        
        for council in council_stats[:10]:  # Top 10 councils
            response += f"• **{council['council_name']}:** {council['count']:,} applications\n"
        
        if council_stats.count() > 10:
            response += f"\n... and {council_stats.count() - 10} more councils"
        
        return {
            'response': response,
            'sources': [{'application_id': 'Council Statistics', 'council_name': 'All Councils', 'document_type': 'Statistical Summary', 'source_type': 'database_query'}],
            'confidence': 'high'
        }
    
    def _get_development_type_statistics(self) -> Dict[str, Any]:
        """Get development type statistics"""
        dev_type_stats = DevelopmentApplication.objects.values('development_type').annotate(
            count=Count('id')
        ).filter(development_type__isnull=False).order_by('-count')
        
        response = "🏗️ **Development Type Statistics** 📊\n\n"
        response += f"**Total Development Types:** {dev_type_stats.count()}\n\n"
        response += "**Most Common Development Types:**\n"
        
        for dev_type in dev_type_stats[:10]:  # Top 10 types
            response += f"• **{dev_type['development_type']}:** {dev_type['count']:,} applications\n"
        
        if dev_type_stats.count() > 10:
            response += f"\n... and {dev_type_stats.count() - 10} more types"
        
        return {
            'response': response,
            'sources': [{'application_id': 'Development Type Statistics', 'council_name': 'All Councils', 'document_type': 'Statistical Summary', 'source_type': 'database_query'}],
            'confidence': 'high'
        }
    
    def _get_decision_statistics(self) -> Dict[str, Any]:
        """Get decision statistics"""
        decision_stats = DevelopmentApplication.objects.values('decision').annotate(
            count=Count('id')
        ).filter(decision__isnull=False).order_by('-count')
        
        response = "📋 **Decision Statistics** 📊\n\n"
        response += f"**Total Decision Categories:** {decision_stats.count()}\n\n"
        response += "**Decision Outcomes:**\n"
        
        for decision in decision_stats[:10]:  # Top 10 decisions
            response += f"• **{decision['decision']}:** {decision['count']:,} applications\n"
        
        if decision_stats.count() > 10:
            response += f"\n... and {decision_stats.count() - 10} more decision types"
        
        return {
            'response': response,
            'sources': [{'application_id': 'Decision Statistics', 'council_name': 'All Councils', 'document_type': 'Statistical Summary', 'source_type': 'database_query'}],
            'confidence': 'high'
        }
    
    def _get_cost_statistics(self) -> Dict[str, Any]:
        """Get cost statistics"""
        cost_stats = DevelopmentApplication.objects.filter(cost__isnull=False)
        total_cost = cost_stats.aggregate(total=Sum('cost'))['total'] or 0
        avg_cost = cost_stats.aggregate(avg=Avg('cost'))['avg'] or 0
        max_cost = cost_stats.aggregate(max=Max('cost'))['max'] or 0
        min_cost = cost_stats.aggregate(min=Min('cost'))['min'] or 0
        
        response = "💰 **Cost Statistics** 📊\n\n"
        response += f"**Applications with Cost Data:** {cost_stats.count():,}\n\n"
        response += "**Financial Overview:**\n"
        response += f"• **Total Project Value:** ${total_cost:,.2f}\n"
        response += f"• **Average Project Cost:** ${avg_cost:,.2f}\n"
        response += f"• **Highest Project Cost:** ${max_cost:,.2f}\n"
        response += f"• **Lowest Project Cost:** ${min_cost:,.2f}\n"
        
        return {
            'response': response,
            'sources': [{'application_id': 'Cost Statistics', 'council_name': 'All Councils', 'document_type': 'Statistical Summary', 'source_type': 'database_query'}],
            'confidence': 'high'
        }
    
    def _get_all_application_ids(self) -> Dict[str, Any]:
        """Get all application IDs with comprehensive information using parallel processing"""
        try:
            # Get all applications with comprehensive data
            applications = DevelopmentApplication.objects.select_related().filter(
                application_id__isnull=False
            ).exclude(
                application_id=''
            ).order_by('application_id')
            
            total_count = applications.count()
            
            if total_count == 0:
                return {
                    'response': "❌ **No Application IDs Found**\n\nI couldn't find any application IDs in the database. This might indicate that the data hasn't been imported yet or there's an issue with the data.",
                    'sources': [],
                    'confidence': 'low'
                }
            
            # Create comprehensive response
            response = f"📋 **Complete Development Applications Database** 🏗️\n\n"
            response += f"**📊 Total Applications:** **{total_count:,}** applications\n\n"
            response += "**📋 All Application Details:**\n"
            
            # Process in optimized batches for better performance
            batch_size = 100  # Increased batch size for better performance
            all_apps_data = []
            
            for i in range(0, total_count, batch_size):
                batch = applications[i:i + batch_size]
                for app in batch:
                    cost_str = f"${app.cost:,.2f}" if app.cost else "Not specified"
                    council = app.council_name or "Unknown"
                    decision = app.decision or "Unknown"
                    dev_type = app.development_type or "Not specified"
                    
                    all_apps_data.append({
                        'id': app.application_id,
                        'council': council,
                        'decision': decision,
                        'cost': cost_str,
                        'type': dev_type
                    })
            
            # Format the response with comprehensive information
            for i, app_data in enumerate(all_apps_data, 1):
                response += f"{i:3d}. **{app_data['id']}**  | {app_data['type']}\n"
                
                # Add line breaks every 15 items for better readability
                if i % 15 == 0:
                    response += "\n"
            
            # Calculate additional statistics
            councils = set(app['council'] for app in all_apps_data)
            decisions = set(app['decision'] for app in all_apps_data)
            dev_types = set(app['type'] for app in all_apps_data)
            
            response += f"\n**📈 Comprehensive Summary:**\n"
            response += f"• **Total Applications:** {len(all_apps_data):,}\n"
            response += f"• **Unique Councils:** {len(councils)}\n"
            response += f"• **Decision Types:** {len(decisions)}\n"
            response += f"• **Development Types:** {len(dev_types)}\n"
            response += f"• **First Application:** {all_apps_data[0]['id'] if all_apps_data else 'N/A'}\n"
            response += f"• **Last Application:** {all_apps_data[-1]['id'] if all_apps_data else 'N/A'}\n"
            
            response += f"\n**💡 Next Steps:**\n"
            response += f"• Ask about specific applications: \"Tell me about {all_apps_data[0]['id'] if all_apps_data else 'D-101-2023'}\"\n"
            response += f"• Get council statistics: \"Show me City of Darebin applications\"\n"
            response += f"• Check development types: \"What residential projects are there?\"\n"
            response += f"• Explore costs: \"Show me high-value projects\"\n"
            
            return {
                'response': response,
                'sources': [{'application_id': 'All Applications', 'council_name': 'All Councils', 'document_type': 'Complete Application Database', 'source_type': 'database_query'}],
                'confidence': 'high'
            }
            
        except Exception as e:
            return {
                'response': f"I encountered an error while retrieving all application IDs: {str(e)}",
                'sources': [],
                'confidence': 'error'
            }
    
    def get_statistics(self) -> Dict[str, Any]:
        """Get statistics about the knowledge base"""
        try:
            total_applications = DevelopmentApplication.objects.count()
            total_pdfs = PDFDocument.objects.count()
            total_extracted = ExtractedPDFData.objects.count()
            
            councils = DevelopmentApplication.objects.values_list('council_name', flat=True).distinct()
            
            return {
                'total_applications': total_applications,
                'total_pdf_documents': total_pdfs,
                'total_extracted_data': total_extracted,
                'councils': list(councils),
                'vector_store_exists': os.path.exists(self.vector_store_path)
            }
        except Exception as e:
            return {'error': str(e)}
    
    def _is_bulk_listing_request(self, message: str) -> bool:
        """Check if the question is asking for bulk listing of data"""
        bulk_keywords = [
            'list all', 'show all', 'all of', 'every', 'complete list', 'full list',
            'entire database', 'all records', 'all entries', 'all items'
        ]
        message_lower = message.lower()
        return any(keyword in message_lower for keyword in bulk_keywords)
    
    def _handle_bulk_listing_request(self, message: str) -> Dict[str, Any]:
        """Handle bulk listing requests with optimized database queries"""
        try:
            message_lower = message.lower()
            
            # Determine what type of bulk listing is requested
            if 'application' in message_lower or 'id' in message_lower:
                return self._get_all_application_ids()
            elif 'council' in message_lower:
                return self._get_all_councils_list()
            elif 'development type' in message_lower or 'type' in message_lower:
                return self._get_all_development_types_list()
            elif 'decision' in message_lower:
                return self._get_all_decisions_list()
            else:
                # Default to comprehensive statistics
                return self._get_comprehensive_statistics()
                
        except Exception as e:
            return {
                'response': f"I encountered an error while processing the bulk listing request: {str(e)}",
                'sources': [],
                'confidence': 'error'
            }
    
    def _get_all_councils_list(self) -> Dict[str, Any]:
        """Get all councils with application counts"""
        try:
            councils = DevelopmentApplication.objects.values('council_name').annotate(
                count=Count('id')
            ).filter(council_name__isnull=False).order_by('-count')
            
            response = "🏛️ **All Councils in Database** 📊\n\n"
            response += f"**Total Councils:** {councils.count()}\n\n"
            response += "**Council List with Application Counts:**\n"
            
            for i, council in enumerate(councils, 1):
                response += f"{i:2d}. **{council['council_name']}** - {council['count']:,} applications\n"
            
            return {
                'response': response,
                'sources': [{'application_id': 'Council List', 'council_name': 'All Councils', 'document_type': 'Council Database', 'source_type': 'database_query'}],
                'confidence': 'high'
            }
        except Exception as e:
            return {
                'response': f"Error retrieving council list: {str(e)}",
                'sources': [],
                'confidence': 'error'
            }
    
    def _get_all_development_types_list(self) -> Dict[str, Any]:
        """Get all development types with counts"""
        try:
            dev_types = DevelopmentApplication.objects.values('development_type').annotate(
                count=Count('id')
            ).filter(development_type__isnull=False).order_by('-count')
            
            response = "🏗️ **All Development Types in Database** 📊\n\n"
            response += f"**Total Development Types:** {dev_types.count()}\n\n"
            response += "**Development Types with Application Counts:**\n"
            
            for i, dev_type in enumerate(dev_types, 1):
                response += f"{i:2d}. **{dev_type['development_type']}** - {dev_type['count']:,} applications\n"
            
            return {
                'response': response,
                'sources': [{'application_id': 'Development Types', 'council_name': 'All Councils', 'document_type': 'Development Type Database', 'source_type': 'database_query'}],
                'confidence': 'high'
            }
        except Exception as e:
            return {
                'response': f"Error retrieving development types: {str(e)}",
                'sources': [],
                'confidence': 'error'
            }
    
    def _get_all_decisions_list(self) -> Dict[str, Any]:
        """Get all decision types with counts"""
        try:
            decisions = DevelopmentApplication.objects.values('decision').annotate(
                count=Count('id')
            ).filter(decision__isnull=False).order_by('-count')
            
            response = "📋 **All Decision Types in Database** 📊\n\n"
            response += f"**Total Decision Types:** {decisions.count()}\n\n"
            response += "**Decision Types with Application Counts:**\n"
            
            for i, decision in enumerate(decisions, 1):
                response += f"{i:2d}. **{decision['decision']}** - {decision['count']:,} applications\n"
            
            return {
                'response': response,
                'sources': [{'application_id': 'Decision Types', 'council_name': 'All Councils', 'document_type': 'Decision Database', 'source_type': 'database_query'}],
                'confidence': 'high'
            }
        except Exception as e:
            return {
                'response': f"Error retrieving decision types: {str(e)}",
                'sources': [],
                'confidence': 'error'
            } 