o
    whtx                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlZd dlmZ d dlZd dlT d dlZd dlmZ d dlmZ d d	lmZ d dlm	Z	 d d
lmZ d dlmZ d dlZd dlmZmZ d dlmZ d dlm Z  d dl!m"Z" ddl#m$Z$ ddl%mZm&Z&m'Z' d dl(m)Z) e	dgdd Z*e	dgdd Z+dd Z,dd Z-e	dgdd Z.e	dgdd  Z/e	dgd!d" Z0e	dgd#d$ Z1e	dgd%d& Z2ed'geegd(d) Z3edgeegd*d+ Z4ed'geegd,d- Z5edgeegd.d/ Z6edgeegd0d1 Z7edgeegd2d3 Z8edgeegd4d5 Z9d6d7 Z:dS )8    N)settings)JsonResponse)require_http_methods)DevelopmentApplication)EXTRACT_STRUCTURED_DATA_PROMPT)*render)csrf_exempt)method_decorator)View)api_viewpermission_classes)AllowAny)Responsestatus   )RAGChatbotServicer   PDFDocumentExtractedPDFData)QGETc              
   C   s~  zt jtjd}t jtjd}t jtjd}t jtjd}t|}t|}|jdd|jj	j
ddd f }|jdd|jj	j
ddd f }|j|j}|| }|| }tj||gd	d
}||jdd }	|jd	d
}
|
j|dd |	j|dd td| tjd | tjd tt|	tt|
dW S  ty } ztddt	| iddW  Y d}~S d}~ww )z
    Merges two CSV files using only exactly matching column names.
    Removes duplicates and prevents creation of unwanted columns.
    z,Application Tables - 2025-07-20 08_21_55.csvzhistorical_scrape.csvmerged_output.csvzduplicate_entries.csvNz^UnnamedF)caseT)ignore_index)keepindexz7Merge completed successfully using exact column matches)messageoutput_fileduplicates_filenum_duplicatesnum_final_rowserrorMerge failed:   r   )ospathjoinr   
MEDIA_ROOTpdread_csvloccolumnsstrcontainsintersectionconcat
duplicateddrop_duplicatesto_csvr   build_absolute_uri	MEDIA_URLintlen	Exception)requestfile1file2merged_filedup_filedf1df2common_columnscombined_dfduplicate_rows	unique_dfe rH   4/home/shobhit/Desktop/shao (1)/shao/shaoApp/views.pymergeCsvFilesView   s8   

""


&rJ   c           
   
   C   s   zSt jtjd}t jtjd}t jtjd}t|}t|}g d}|| }tj||ddd}|j|dd	 |d
 	 
 }td| tjd t|dW S  tyr }	 ztddt|	 iddW  Y d}	~	S d}	~	ww )zE
    Enriches merged CSV data using determined application data.
    r   z%cumulative_applicationsdetermined.csvenriched_merged_output.csv)application_iddecisionlodgement_datedetermined_datecostcouncil_namerL   leftonhowFr   rM   z!Enrichment completed successfully)r    enriched_filenum_records_enrichedr%   zEnrichment failed: r'   r   N)r(   r)   r*   r   r+   r,   r-   merger6   notnasumr   r7   r8   r9   r;   r0   )
r<   merged_pathdetermined_pathenriched_output_path	merged_dfdetermined_dfcolsenriched_dfenriched_countrG   rH   rH   rI   enrichMergedFileViewN   s(   


&rc   c                 C   sJ   zt j| dd}t |st|t jrW d S | W S  ty$   Y d S w )Ncoerce)errors)r,   to_datetimeisnull
isinstanceNaTdater;   )date_strrj   rH   rH   rI   
parse_daten   s   
rl   c                 C   s   t | s	| dkrd S | S )N )r,   isna)valrH   rH   rI   safe_getw   s   rp   c                 C   sJ  zt jtjd}t j|stddiddW S t|}d\}}}d}t	|}t
d||D ]}|j|||  }	td	|| d
  d|| d
 |   |	 D ]\}
}zvt|dd}|rft|}ni }tjjt|dt|dt|dt|dt|dt|dt|dt|dt|dt|dt|dt|dt|ddd\}}|r|d
7 }n|d
7 }W qR ty } ztd|  |d
7 }W Y d}~qRd}~ww || |k rtd q/td ||||| | d!W S  ty$ } ztdd"t| id#dW  Y d}~S d}~ww )$zS
    Imports enriched CSV data into the database with improved error handling.
    zenriched_merged_output2.csvr%   z&enriched_merged_output2.csv not found.  r   )r   r   r      r   zProcessing batch r   /descriptionrm   rL   application_web_urlrM   rN   rO   rP   council_name_xdevelopment_typenumber_of_dwellingsnumber_of_storeysnumber_of_placesnumber_of_unitsnumber_of_lots)application_urlrM   rN   rO   rP   rQ   rw   rx   ry   rz   number_of_apartmentsnumber_of_subdivisions)rL   defaultszError processing row: N   z2Data import completed with improved error handling)r    createdupdatedfailedtotal_processedzData import failed: r'   )r(   r)   r*   r   r+   existsr   r,   r-   r:   rangeilocprintiterrowsrp   getextract_structured_datar   objectsupdate_or_createrl   r;   timesleepr0   )r<   	file_pathdfr   r   r   
batch_size
total_rowsibatch_df_rowrt   
structuredobj
is_createdrG   rH   rH   rI   importEnrichedDataViewz   sr   

(





&r   c              
   C   sf  zt jtjd}t jtjd}t jtjd}t j|r%t j|s.tddiddW S t|}t|}|j	dd	id
d |j
d	gd}||d	 tj dk }tj||g d d	dd}|d tdd |d< |j|dd td| tjd tt|t|d   dW S  ty } ztddt| iddW  Y d}~S d}~ww )z
    Merges enriched data with Name field from the scraper document based on Application ID.
    Filters out rows with missing Application ID before merging.
    Replaces Windows-style file path with /media/PDFs/ format.
    rK   z+Scraper Documents - 2025-07-20 08_21_54.csvenriched_with_name.csvr%   z%One or both source files are missing.rq   r   zApplication IDrL   T)r/   inplace)subsetrm   )rL   NamePathrR   rS   r   c                 S   s<   t | tr|  rdddd | dddD  S dS )Nz/media/rs   c                 S   s0   g | ]}|rd |v s|  rt|dks|qS ):   )isupperr:   ).0partrH   rH   rI   
<listcomp>   s    zBmergeWithNameFromScraperView.<locals>.<lambda>.<locals>.<listcomp>\rm   )rh   r0   stripr*   replacesplit)prH   rH   rI   <lambda>   s   
z.mergeWithNameFromScraperView.<locals>.<lambda>Fr   z&Merge with Name completed successfullyr   )r    r!   num_recordsnum_with_namer&   r'   N)r(   r)   r*   r   r+   r   r   r,   r-   renamedropnaastyper0   r   rX   applyr6   r7   r8   r9   r:   rY   rZ   r;   )r<   enriched_pathscraper_docs_pathoutput_pathra   
scraper_dfr^   rG   rH   rH   rI   mergeWithNameFromScraperView   s>   


	

&r   c           	   
   C   s
  zet jtjd}t j|stddiddW S t|}d|j	vs'd|j	vr0tddid	dW S g }|
 D ]$\}}|d }t jtj|d
}t| t j|rZ||d  q6tdt||dW S  ty } ztddt| iddW  Y d}~S d}~ww )z
    Checks the existence of PDF files specified in the 'Path' column
    of enriched_with_name.csv and returns a list of application IDs
    for which the file exists.
    r   r%   zMerged file not found.rq   r   rL   r   z(Required columns are missing in the CSV.  rs   zFile existence check complete.)r    num_files_foundapplication_idszFile check failed: r'   N)r(   r)   r*   r   r+   r   r   r,   r-   r/   r   BASE_DIRlstripr   appendr:   r;   r0   )	r<   
input_pathr   existing_idsr   r   relative_path	full_pathrG   rH   rH   rI   checkExistingPdfFilesView   s0   

&r   c                 C   s  zkt jtjd}t j|stddiddW S t|}d|j	vs(d|j	vr1tddid	dW S g }d
}d
}d
}g }|
 D ]<\}}	|	d }
|	d }t|sT|dkrUq?t jtj|d}t j|rn||
|f q?||
d|d |d7 }q?d}t|}td
||D ]}||||  }td|| d  d|| d |   |D ]\}
}zYt|
|}|dr||
|d||d|d|dd
|dd
|dd |d7 }|di dr|d7 }n||
|dp|d||dd |d7 }W q ty' } z||
t||d |d7 }W Y d}~qd}~ww || |k r4td qt jtjd}t|dd d!}tj||d"d#d$ W d   n	1 sYw   Y  td%|||||t|d&W S  ty } ztdd't| id(dW  Y d}~S d}~ww ))zm
    Extract data from PDF files and save to database with improved error handling and batch processing.
    r   r%   z!enriched_with_name.csv not found.rq   r   rL   r   zCSV missing required columnsr   r   rm   rs   zPDF file not found)rL   r%   r)   r      zProcessing PDF batch extraction_successextracted_datapdf_type
confidencetext_lengthpages_processeddatabase_save)rL   datar)   r   r   r   r   r   successreason)rL   r%   r)   r   Nzextracted_applications.jsonwzutf-8)encodingr   F)indentensure_asciiz*PDF extraction and database save completed)r    r!   
total_pdfssuccessfully_processedsuccessfully_saved_to_dbr   total_recordszPDF extraction failed: r'   )r(   r)   r*   r   r+   r   r   r,   r-   r/   r   rn   r   r   r   r:   r   r   process_and_save_pdf_datar   r;   r0   r   r   openjsondump)r<   	input_csvr   result	processedr   saved_to_db
valid_rowsr   r   app_idrel_pathpdf_pathr   r   r   batchprocessing_resultrG   r   frH   rH   rI   extractPdfData)  s   

(




	


&r   c                 C   sn  zddl m}m}m} | jd}|rz|jj|d}|j }||j	|j
|j|j|j|j|j|j|j|j|jdg d}|D ]}|j|j|j|j|j|j|j|j|j|j|j d}	t |dr|j!}
i d	|
j"d
|
j#d|
j$d|
j%d|
j&d|
j'd|
j(d|
j)d|
j*d|
j+d|
j,d|
j-d|
j.d|
j/d|
j0d|
j1d|
j2|
j3|
j4|
j5|
j6|
j7d|	d< |d 8|	 q@t9|W W S  |j:y   t9dd| didd  Y W S w |jj;d!d"< }g }|D ]}|j= }|jj;d#d$= }|8|j>|j	|||| d% qt9d&t?||d'W S  t@y6 } zt9dd(tA| id)d W  Y d*}~S d*}~ww )+z]
    Retrieve PDF data from the database for a specific application or all applications.
    r   r   rL   rL   )rQ   rM   rN   rO   rP   rw   rx   ry   rz   r~   r   )rL   application_datapdf_documents)id	file_namer   document_typer   r   r   r   extraction_statuserror_message
created_atr   land_descriptionregistered_proprietorencumbrancesactivity_last_125_daysadministrative_noticesproposed_usert   applicant_namecontact_namecontact_addresscontact_emailcontact_phoneapplicant_addressapplicant_emailapplicant_phonelot_sizesite_coverage)
total_areaground_floor_areafirst_floor_areapossposr   r%   zApplication z
 not foundrq   r   F)pdf_documents__isnullr   )r   )rL   rQ   r   successful_extractionsfailed_extractionszPDF data summary)r    total_applications_with_pdfsapplicationszDatabase query failed: r'   N)BshaoApp.modelsr   r   r   r   r   r   r   allrQ   rM   rN   rO   rP   rw   rx   ry   rz   r~   r   r   r   r   r   r   r   r   r   r   r   r   	isoformathasattrr   r   r   r   r   r   r   rt   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r   r   DoesNotExistfilterdistinctcountrL   r:   r;   r0   )r<   r   r   r   rL   applicationr   r   pdf_docpdf_data	extractedapplications_with_pdfssummaryapp	pdf_countr  rG   rH   rH   rI   getPdfDataFromDatabase  s   

	



&r  POSTc              
   C   s   z/| j }|dd }|stdditjdW S t }||}td|d |d |d	 d
W S  tyO } ztddt	| itj
dW  Y d}~S d}~ww )z0
    Main chat endpoint for the RAG chatbot
    r    rm   r%   zMessage is requiredr   Tresponsesourcesr   )r   r  r  r   An error occurred: N)r   r   r   r   r   HTTP_400_BAD_REQUESTr   chatr;   r0   HTTP_500_INTERNAL_SERVER_ERROR)r<   r   r    chatbotr   rG   rH   rH   rI   chat_endpoint  s2   

r#  c              
   C   s`   zt  }| }td|dW S  ty/ } ztddt| itjdW  Y d}~S d}~ww )z9
    Get statistics about the chatbot knowledge base
    T)r   
statisticsr%   r  r   N)r   get_statisticsr   r;   r0   r   r!  )r<   r"  statsrG   rH   rH   rI   chatbot_stats&     
r'  c              
   C   s`   zt  }|  tdddW S  ty/ } ztddt| itjdW  Y d}~S d}~ww )z6
    Rebuild the FAISS vector store from database
    Tz!Vector store rebuilt successfully)r   r    r%   r  r   N)r   build_vector_storer   r;   r0   r   r!  )r<   r"  rG   rH   rH   rI   rebuild_vector_store;  r(  r*  c           	      C   s<  z}| j dd}| j dd}| j dd}| j dd}tj }|r6|t|dt|dB t|dB }|r>|j|d}|rF|j|d	}|rN|j|d}|d
d }g }|D ]}||j|j	|j
|j|j|j|j|j|jd	 qXtd|t|dW S  ty } ztddt| itjdW  Y d
}~S d
}~ww )z=
    Search development applications by various criteria
    qrm   councilrM   rw   )application_id__icontains)council_name__icontains)development_type__icontains)decision__icontainsN2   )	rL   rQ   rM   rN   rO   rP   rw   rx   ry   T)r   r	  r  r%   r  r   )r   r   r   r   r  r  r   r   rL   rQ   rM   rN   rO   rP   rw   rx   ry   r   r:   r;   r0   r   r!  )	r<   queryr,  rM   rw   r	  resultsr  rG   rH   rH   rI   search_applicationsP  s^   


r4  c           	      C   sL  zst jj|d}g }|j D ]>}|j|j|j|j|j	|j
|jd}t|drH|jrH|j}|j|j|j|j|j|j|j|j|j|j|jd|d< || q|j|j|j|j|j|j|j |j!|j"|j#|j$|j%|j&|d}t'd|dW S  t j(y   t'dd	it)j*d
 Y S  t+y } zt'ddt,| it)j-d
W  Y d}~S d}~ww )z?
    Get detailed information about a specific application
    r   )r   r   r   r   r   r   r   r   )r   r   r   rt   r   r   r   r   r   r   r   )rL   r}   rQ   rM   rN   rO   rP   rw   rx   ry   rz   r~   r   r   T)r   r  r%   zApplication not foundr   r  N).r   r   r   r   r  r   r   r   r   r   r   r   r  r   r   r   r   rt   r   r   r   r   r   r   r   r   rL   r}   rQ   rM   rN   rO   rP   rw   rx   ry   rz   r~   r   r   r  r   HTTP_404_NOT_FOUNDr;   r0   r!  )	r<   rL   r  r   pdfr  r  r   rG   rH   rH   rI   get_application_details  sx   


r7  c              
   C   t   zt jjddd }dd |D }td|dW S  ty9 } ztddt| itjd	W  Y d
}~S d
}~ww )z2
    Get list of all councils in the database
    rQ   Tflatc                 S      g | ]}|r|qS rH   rH   )r   r,  rH   rH   rI   r         z get_councils.<locals>.<listcomp>)r   councilsr%   r  r   N	r   r   values_listr  r   r;   r0   r   r!  )r<   r=  rG   rH   rH   rI   get_councils     
r@  c              
   C   r8  )z;
    Get list of all development types in the database
    rw   Tr9  c                 S   r;  rH   rH   )r   dev_typerH   rH   rI   r     r<  z)get_development_types.<locals>.<listcomp>)r   development_typesr%   r  r   Nr>  )r<   rC  rG   rH   rH   rI   get_development_types  rA  rD  c                 C   s
   t | dS )z%
    Serve the chatbot interface
    zchatbot.htmlr   )r<   rH   rH   rI   chatbot_interface  s   
rE  );r(   r   pandasr,   django.confr   django.httpr   django.views.decorators.httpr   r
  r   openaishaoApp.promptsr   reshaoApp.functionsr   django.shortcutsr	   django.views.decorators.csrfr
   django.utils.decoratorsr   django.viewsr   rest_framework.decoratorsr   r   rest_framework.permissionsr   rest_framework.responser   rest_frameworkr   rag_servicer   modelsr   r   django.db.modelsr   rJ   rc   rl   rp   r   r   r   r   r  r#  r'  r*  r4  r7  r@  rD  rE  rH   rH   rH   rI   <module>   s    
/
	
J
:
(
l
m 9G