o
    h                     @   s   d dl Z d dlZd dlZd dlmZmZmZmZ d dlZ	d dl
Z
d dlmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lm Z  d dl!m"Z" ddl#m$Z$m%Z%m&Z& G dd dZ'dS )    N)ListDictAnyOptional)settings)QSumCountAvgMaxMin)SentenceTransformer)
ChatOpenAI)HumanMessageSystemMessage)RecursiveCharacterTextSplitter)HuggingFaceEmbeddings)FAISS)Document   )DevelopmentApplicationPDFDocumentExtractedPDFDatac                   @   s  e Zd Zdd Zdee fddZdd Zdd	 Zd2de	de
dee fddZde	dee de	fddZde	dee	ef fddZde	defddZde	dee	ef fddZdee	ef fddZdee	ef fddZdee	ef fddZdee	ef fdd Zdee	ef fd!d"Zdee	ef fd#d$Zdee	ef fd%d&Zde	defd'd(Zde	dee	ef fd)d*Zdee	ef fd+d,Zdee	ef fd-d.Zdee	ef fd/d0Zd1S )3RAGChatbotServicec                 C   sH   t d| _tddtjd| _tjtj	d| _
d | _tddtd| _d S )	Nall-MiniLM-L6-v2zgpt-4o-2024-05-13g?)modeltemperatureapi_keyfaiss_index     )
chunk_sizechunk_overlaplength_function)r   embedding_modelr   r   OPENAI_API_KEYllmospathjoin
MEDIA_ROOTvector_store_pathvector_storer   lentext_splitter)self r0   :/home/shobhit/Desktop/shao (1)/shao/shaoApp/rag_service.py__init__   s   
zRAGChatbotService.__init__returnc           
      C   s  g }t d| tj d d}|D ]H}|jdur%d|jdnd}d|jp,d	 d
|j	p2d	 d|j
p8d	 d|jp>d	 d|jpDd	 d| d|jpMd	 d|jpSd	 d|jpYd	 d|jp_d	 d|jped	 d|jpkd	 d|jpqd d}|t|d|j|j	ddd |j D ]}z|j}|rJdg d|jpd	 d|jpd	 d|jpd d |jpd d!|jpd d"|jpd d#|jpd d$|j pd d%|j!pd d&|j"pd d'|j#pd d(|j$pd d)|j%pd d*|j&pd d+|j'pd d,|j(pd d-|j)p d d.|j*p)d d/|j+p2d d0}|t|d1|j|j|jd2d3d W q t,y^ }	 zW Y d}	~	qd}	~	ww q|S )4z4Create documents from the database for vectorizationz================pdf_documents__extracted_dataapplication_idN$,.2fNot specifiedu   
            **Development Application Summary**
            
            **Application Details:**
            • **Application ID:** Unknownu   
            • **Council:** u   
            • **Decision:** u%   
            • **Lodgement Date:** u&   
            • **Determined Date:** u#   
            • **Project Cost:** u`   
            
            **Development Specifications:**
            • **Development Type:** u*   
            • **Number of Dwellings:** u(   
            • **Number of Storeys:** u'   
            • **Number of Places:** u+   
            • **Number of Apartments:** u-   
            • **Number of Subdivisions:** u[   
            
            **Additional Information:**
            • **Application URL:** zNot availablez
            application_metadatametadata)sourcer5   council_nametype)page_contentr;    u   
                        **PDF Document Information**
                        
                        **Document Details:**
                        • **File Name:** u0   
                        • **Document Type:** u~   
                        
                        **Property Information:**
                        • **Land Description:** u8   
                        • **Registered Proprietor:** u/   
                        • **Encumbrances:** uy   
                        
                        **Development Details:**
                        • **Proposed Use:** u.   
                        • **Description:** uv   
                        
                        **Contact Information:**
                        • **Applicant:** u1   
                        • **Contact Person:** u2   
                        • **Contact Address:** u0   
                        • **Contact Email:** u0   
                        • **Contact Phone:** uu   
                        
                        **Site Specifications:**
                        • **Lot Size:** u0   
                        • **Site Coverage:** u-   
                        • **Total Area:** u4   
                        • **Ground Floor Area:** u3   
                        • **First Floor Area:** u5   
                        • **Private Open Space:** u>   
                        • **Secluded Private Open Space:** z
                        pdf_extracted_dataextracted_data)r<   r5   pdf_filedocument_typer>   )-printr   objectsselect_relatedprefetch_relatedallorder_bycostr5   r=   decisionlodgement_datedetermined_datedevelopment_typenumber_of_dwellingsnumber_of_storeysnumber_of_placesnumber_of_apartmentsnumber_of_subdivisionsapplication_urlappendr   pdf_documentsrB   r)   	file_namerD   land_descriptionregistered_proprietorencumbrancesproposed_usedescriptionapplicant_namecontact_namecontact_addresscontact_emailcontact_phonelot_sizesite_coverage
total_areaground_floor_areafirst_floor_areaposspos	Exception)
r/   	documentsapplicationsappcost_strapp_metadatapdf_doc	extractedpdf_contenter0   r0   r1   create_documents_from_database#   s   



	
	






!
4z0RAGChatbotService.create_documents_from_databasec                 C   s   t d |  }|st d dS t dt| d | j|}t dt| d tdd	d
id}t||| _t	j
| jdd | j| j t d| j  dS )z%Build and save the FAISS vector storez&Building vector store from database...zNo documents found in databaseNz
Splitting z documents into chunks...zCreated z chunksr   devicecpu
model_namemodel_kwargsT)exist_okzVector store saved to )rE   rt   r-   r.   split_documentsr   r   from_documentsr,   r'   makedirsr+   
save_local)r/   rk   
split_docs
embeddingsr0   r0   r1   build_vector_store   s    z$RAGChatbotService.build_vector_storec                 C   sB   t j| jrtdddid}tj| j|dd| _td dS dS )	z$Load the existing FAISS vector storer   ru   rv   rw   T)allow_dangerous_deserializationz Vector store loaded successfullyF)	r'   r(   existsr+   r   r   
load_localr,   rE   )r/   r   r0   r0   r1   load_vector_store   s   z#RAGChatbotService.load_vector_store   querykc                    sN   | j s|  s|   | j sg S t fdddD rd}| j j |d}|S )z0Search for similar documents in the vector storec                 3   s    | ]	}|   v V  qd S N)lower).0wordr   r0   r1   	<genexpr>   s    z=RAGChatbotService.search_similar_documents.<locals>.<genexpr>)
how manycounttotal
statisticssummaryoverviewlist allshow allall applicationapplication idsr   r   )r,   r   r   anysimilarity_search)r/   r   r   similar_docsr0   r   r1   search_similar_documents   s   z*RAGChatbotService.search_similar_documentscontext_docsc                 C   sF   d dd |D }td| dd}t|d}| j||g}|jS )z2Generate response using LLM with retrieved context

c                 S   s   g | ]}|j qS r0   )r?   )r   docr0   r0   r1   
<listcomp>   s    z7RAGChatbotService.generate_response.<locals>.<listcomp>ad  
        You are a knowledgeable and friendly urban planning assistant specializing in development applications and council planning data. Your role is to provide clear, engaging, and well-structured responses about development projects, planning decisions, and council information.

        **Response Guidelines:**
        1. **Be conversational and engaging** - Use a friendly, professional tone
        2. **Structure your responses clearly** - Use bullet points, numbered lists, and sections when appropriate
        3. **Highlight key information** - Use **bold** for important details like costs, dates, and decisions
        4. **Provide context** - Explain what the information means in practical terms
        6. **Format numbers nicely** - Use commas for large numbers and proper currency formatting
        7. **Be comprehensive** - Include relevant details from the context
        8. **Suggest follow-up questions** - End with a helpful suggestion for what they might want to know next

        **Response Structure:**
        - Start with a direct answer to their question
        - Provide supporting details with clear formatting
        - Use bullet points for lists of applications, types, or statistics
        - Highlight key metrics and important dates
        - End with a helpful suggestion or related information

        **Context Information:**
        z

        Remember: You're helping people understand complex planning information in an accessible way. Make the information engaging and easy to understand!
        )content)r)   r   r   r&   invoker   )r/   r   r   contextsystem_messageuser_messageresponser0   r0   r1   generate_response   s   
z#RAGChatbotService.generate_responsemessagec              
   C   s  ze|  |r| |W S | |r| |W S | j|dd}|s'dg ddW S | ||}g }|D ]"}|jdd|jdd|jd	d|jd
dd}|| q1||t	|dkraddW S ddW S  t
y } zdt| dg ddW  Y d}~S d}~ww )z7Main chat method that combines retrieval and generationr   r   u)  I apologize, but I couldn't find any relevant information in my database to answer your question. 🤔

**Here are some suggestions to help you get better results:**

• **Try rephrasing your question** - Use different keywords or be more specific
• **Ask about development applications** - I have data on planning applications, council decisions, and development types
• **Query specific councils** - Try asking about "City of Darebin" or other councils in the database
• **Ask about development types** - I can help with residential, commercial, industrial, or subdivision projects
• **Request statistics** - Ask about application counts, costs, or decision trends

**💡 Example questions you can try:**
- "How many development applications are in the database?"
- "What types of developments are most common?"
- "Tell me about applications in City of Darebin"
- "What are the recent planning decisions?"
- "Show me applications with high costs"

Feel free to try again with a different approach! I'm here to help you explore the planning data. 🏗️lowr   sources
confidencer5   r9   r=   rD   r>   r5   r=   rD   source_type   highmediumug   I'm sorry, but I encountered a technical issue while processing your request. 😔

**What happened:** u  

**What you can do:**
• **Try again** - Sometimes temporary issues resolve themselves
• **Rephrase your question** - Use simpler or different wording
• **Ask something else** - Try a different type of question about development applications
• **Contact support** - If the issue persists, there might be a system problem

I'm here to help you explore the planning data, so please don't hesitate to try again! 🏗️errorN)_is_statistical_question_handle_statistical_question_is_bulk_listing_request_handle_bulk_listing_requestr   r   r;   getrV   r-   rj   str)r/   r   r   r   r   r   source_infors   r0   r0   r1   chat   sF   

zRAGChatbotService.chatc                    &   g d}|   t fdd|D S )z8Check if the question is asking for statistics or counts)r   r   r   z	number ofr   r   r   rl   projectsdevelopmentscouncilsr   r   r   zall applicationsr   c                 3       | ]}| v V  qd S r   r0   r   keywordmessage_lowerr0   r1   r   E      z=RAGChatbotService._is_statistical_question.<locals>.<genexpr>r   r   )r/   r   statistical_keywordsr0   r   r1   r   =  s   z*RAGChatbotService._is_statistical_questionc              
   C   s   zR|  }d|v sd|v sd|v r|  W S d|v sd|v r#|  W S d|v s+d|v r0|  W S d|v s8d	|v r=|  W S d
|v sId|v sId|v rN|  W S |  W S  tyo } zdt| g ddW  Y d}~S d}~ww )z9Handle statistical questions with direct database querieszapplication idr   r   councilr   development typeztypes of developmentrL   approvalrK   valuebudgetz4I encountered an error while retrieving statistics: r   r   N)	r   _get_all_application_ids_get_council_statistics _get_development_type_statistics_get_decision_statistics_get_cost_statistics_get_comprehensive_statisticsrj   r   r/   r   r   rs   r0   r0   r1   r   G  s(   





z.RAGChatbotService._handle_statistical_questionc                 C   s  t j }tj }tj }t jjddd }tdd |D }t jjddd }tdd |D }t jjddd }td	d |D }	t jjd
d }
t jjd
dj	t
ddd pad}d|dd| d|dd|dd| d|	 d|
dd|dd}|dddddgd d!S )"z%Get comprehensive database statisticsr=   Tflatc                 S      g | ]}|r|qS r0   r0   )r   cr0   r0   r1   r   i      zCRAGChatbotService._get_comprehensive_statistics.<locals>.<listcomp>rO   c                 S   r   r0   r0   )r   dtr0   r0   r1   r   l  r   rL   c                 S   r   r0   r0   )r   dr0   r0   r1   r   o  r   Fcost__isnullrK   r   r   r   u\   📊 **Development Applications Database Overview** 🏗️

**📋 Total Applications:** **,u=   ** development applications

**🏛️ Council Coverage:** **u<   ** councils in the database
**📄 Document Processing:** **u7   ** PDF documents processed
**📊 Data Extraction:** **ub   ** documents with extracted data

**🏗️ Development Diversity:**
• **Development Types:** **uB   ** different types of developments
• **Decision Categories:** **u4   ** different decision outcomes
• **Cost Data:** **uF   ** applications with cost information
• **Total Project Value:** **$r7   u  ** across all projects

**💡 What you can explore:**
• Ask about specific councils (e.g., "Tell me about City of Darebin")
• Query development types (e.g., "Show me residential projects")
• Check decision trends (e.g., "What are the recent approvals?")
• Explore high-value projects (e.g., "Show me projects over $1 million")

This database contains comprehensive planning information to help you understand development patterns and trends! 🚀zDatabase StatisticsAll CouncilsStatistical Summarydatabase_queryr   r   r   )r   rF   r   r   r   values_listdistinctr-   filter	aggregater   )r/   total_applications
total_pdfstotal_extractedr   council_countdevelopment_typesdevelopment_type_count	decisionsdecision_countapplications_with_cost
total_costr   r0   r0   r1   r   b  s@   


"	

z/RAGChatbotService._get_comprehensive_statisticsc                 C      t jdjtddjddd}d}|d|  d	7 }|d
7 }|dd D ]}|d|d  d|d dd7 }q)| dkrN|d| d  d7 }|dddddgddS )zGet council-specific statisticsr=   idr   Fcouncil_name__isnull-countu%   🏛️ **Council Statistics** 📊

**Total Councils:** r   z**Applications by Council:**
N
      • **:** r   r    applications
	
... and z more councilszCouncil Statisticsr   r   r   r   r   r   r   rF   valuesannotater	   r   rJ   r   )r/   council_statsr   r   r0   r0   r1   r     $   "z)RAGChatbotService._get_council_statisticsc                 C   r   )zGet development type statisticsrO   r   r   Fdevelopment_type__isnullr   u.   🏗️ **Development Type Statistics** 📊

**Total Development Types:** r   z#**Most Common Development Types:**
Nr   r   r   r   r   r   r   z more typeszDevelopment Type Statisticsr   r   r   r   r   r   r   )r/   dev_type_statsr   dev_typer0   r0   r1   r     r   z2RAGChatbotService._get_development_type_statisticsc                 C   r   )zGet decision statisticsrL   r   r   Fdecision__isnullr   u#   📋 **Decision Statistics** 📊

z**Total Decision Categories:** r   z**Decision Outcomes:**
Nr   r   r   r   r   r   r   z more decision typeszDecision Statisticsr   r   r   r   r   r   r   )r/   decision_statsr   rL   r0   r0   r1   r     r   z*RAGChatbotService._get_decision_statisticsc                 C   s   t jjdd}|jtddd pd}|jtddd pd}|jtdd	d
 p*d}|jtddd p6d}d}|d| dd7 }|d7 }|d|dd7 }|d|dd7 }|d|dd7 }|d|dd7 }|dddddgddS )zGet cost statisticsFr   rK   r   r   r   )avgr	  )maxr
  )minr  u   💰 **Cost Statistics** 📊

z!**Applications with Cost Data:** r   r   z**Financial Overview:**
u   • **Total Project Value:** $r7   
u   • **Average Project Cost:** $u   • **Highest Project Cost:** $u   • **Lowest Project Cost:** $zCost Statisticsr   r   r   r   r   r   )	r   rF   r   r   r   r
   r   r   r   )r/   
cost_statsr   avg_costmax_costmin_costr   r0   r0   r1   r     s    z&RAGChatbotService._get_cost_statisticsc                 C   s  z;t j jddjddd}| }|dkr!dg dd	W S d
}|d|dd7 }|d7 }d}g }td||D ]6}||||  }|D ])}|jrRd|jdnd}	|j	pXd}
|j
p]d}|jpbd}||j|
||	|d qFq:t|dD ]/\}}||dd|d  d|d  d|d  d|d  d|d  d7 }|d  dkr|d7 }qvtd!d" |D }td#d" |D }td$d" |D }|d%7 }|d&t|dd7 }|d't| d7 }|d(t| d7 }|d)t| d7 }|d*|r|d d nd+ d7 }|d,|r	|d- d nd+ d7 }|d.7 }|d/|r|d d nd0 d17 }|d27 }|d37 }|d47 }|d5d6d7d8d9gd:d	W S  tyZ } zd;t| g d<d	W  Y d=}~S d=}~ww )>zPGet all application IDs with comprehensive information using parallel processingF)application_id__isnullr@   )r5   r5   r   u   ❌ **No Application IDs Found**

I couldn't find any application IDs in the database. This might indicate that the data hasn't been imported yet or there's an issue with the data.r   r   u=   📋 **Complete Development Applications Database** 🏗️

u   **📊 Total Applications:** **r   z** applications

u"   **📋 All Application Details:**
d   r6   r7   r8   r9   )r   r   rL   rK   r>   r   3d. **r   z** | r   z | rL   rK   r>   r     c                 s       | ]}|d  V  qdS )r   Nr0   r   rm   r0   r0   r1   r     r   z=RAGChatbotService._get_all_application_ids.<locals>.<genexpr>c                 s   r  )rL   Nr0   r  r0   r0   r1   r     r   c                 s   r  )r>   Nr0   r  r0   r0   r1   r     r   u!   
**📈 Comprehensive Summary:**
u   • **Total Applications:** u   • **Unique Councils:** u   • **Decision Types:** u   • **Development Types:** u   • **First Application:** zN/Au   • **Last Application:** u   
**💡 Next Steps:**
u4   • Ask about specific applications: "Tell me about z
D-101-2023z"
uC   • Get council statistics: "Show me City of Darebin applications"
uD   • Check development types: "What residential projects are there?"
u1   • Explore costs: "Show me high-value projects"
zAll Applicationsr   zComplete Application Databaser   r   r   z=I encountered an error while retrieving all application IDs: r   N)r   rF   rG   r   excluderJ   r   rangerK   r=   rL   rO   rV   r5   	enumeratesetr-   rj   r   )r/   rl   total_countr   
batch_sizeall_apps_dataibatchrm   rn   r   rL   r  app_datar   r   	dev_typesrs   r0   r0   r1   r     s   




B ""z*RAGChatbotService._get_all_application_idsc              
   C   s   z)t j }tj }tj }t jjddd }|||t|tj	
| jdW S  tyA } zdt|iW  Y d}~S d}~ww )z'Get statistics about the knowledge baser=   Tr   )r   total_pdf_documentstotal_extracted_datar   vector_store_existsr   N)r   rF   r   r   r   r   r   listr'   r(   r   r+   rj   r   )r/   r   r   r   r   rs   r0   r0   r1   get_statistics<  s   


z RAGChatbotService.get_statisticsc                    r   )z8Check if the question is asking for bulk listing of data)
r   r   zall ofeveryzcomplete listz	full listzentire databasezall recordszall entriesz	all itemsc                 3   r   r   r0   r   r   r0   r1   r   V  r   z=RAGChatbotService._is_bulk_listing_request.<locals>.<genexpr>r   )r/   r   bulk_keywordsr0   r   r1   r   O  s   z*RAGChatbotService._is_bulk_listing_requestc              
   C   s   z5|  }d|v sd|v r|  W S d|v r|  W S d|v s#d|v r(|  W S d|v r1|  W S |  W S  tyR } zdt| g dd	W  Y d
}~S d
}~ww )z<Handle bulk listing requests with optimized database queriesapplicationr   r   r   r>   rL   zBI encountered an error while processing the bulk listing request: r   r   N)r   r   _get_all_councils_list_get_all_development_types_list_get_all_decisions_listr   rj   r   r   r0   r0   r1   r   X  s$   




z.RAGChatbotService._handle_bulk_listing_requestc              
   C      zMt jdjtddjddd}d}|d|  d	7 }|d
7 }t|dD ]\}}||dd|d  d|d dd7 }q)|dddddgddW S  t	yj } zdt
| g ddW  Y d}~S d}~ww )z(Get all councils with application countsr=   r   r   Fr   r   u+   🏛️ **All Councils in Database** 📊

r   r   z***Council List with Application Counts:**
r   2dr  ** - r   r   r   zCouncil Listr   zCouncil Databaser   r   r   r   zError retrieving council list: r   Nr   rF   r   r   r	   r   rJ   r   r  rj   r   )r/   r   r   r   r   rs   r0   r0   r1   r,  q  0   (z(RAGChatbotService._get_all_councils_listc              
   C   r/  )z%Get all development types with countsrO   r   r   Fr  r   u4   🏗️ **All Development Types in Database** 📊

r  r   z/**Development Types with Application Counts:**
r   r0  r  r1  r   r   r   zDevelopment Typesr   zDevelopment Type Databaser   r   r   r   z$Error retrieving development types: r   Nr2  )r/   r#  r   r   r  rs   r0   r0   r1   r-    r3  z1RAGChatbotService._get_all_development_types_listc              
   C   r/  )z"Get all decision types with countsrL   r   r   Fr  r   u.   📋 **All Decision Types in Database** 📊

z**Total Decision Types:** r   z,**Decision Types with Application Counts:**
r   r0  r  r1  r   r   r   zDecision Typesr   zDecision Databaser   r   r   r   z!Error retrieving decision types: r   Nr2  )r/   r   r   r   rL   rs   r0   r0   r1   r.    r3  z)RAGChatbotService._get_all_decisions_listN)r   )__name__
__module____qualname__r2   r   r   rt   r   r   r   intr   r   r   r   r   boolr   r   r   r   r   r   r   r   r(  r   r   r,  r-  r.  r0   r0   r0   r1   r      s,    h'K
.T	r   )(r'   jsonpickletypingr   r   r   r   numpynpfaissdjango.confr   django.db.modelsr   r   r	   r
   r   r   sentence_transformersr   langchain_openair   langchain.schemar   r   langchain.text_splitterr   langchain_community.embeddingsr    langchain_community.vectorstoresr   langchain.docstore.documentr   modelsr   r   r   r   r0   r0   r0   r1   <module>   s"     