3
hx                 @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlZd dlmZ d dlZd dlT d dlZd dlmZ d dlmZ d d	lmZ d dlm	Z	 d d
lmZ d dlmZ d dlZd dlmZmZ d dlmZ d dlm Z  d dl!m"Z" ddl#m$Z$ ddl%mZm&Z&m'Z' d dl(m)Z) e	dgdd Z*e	dgdd Z+dd Z,dd Z-e	dgdd Z.e	dgdd  Z/e	dgd!d" Z0e	dgd#d$ Z1e	dgd%d& Z2ed'geegd(d) Z3edgeegd*d+ Z4ed'geegd,d- Z5edgeegd.d/ Z6edgeegd0d1 Z7edgeegd2d3 Z8edgeegd4d5 Z9d6d7 Z:dS )8    N)settings)JsonResponse)require_http_methods)DevelopmentApplication)EXTRACT_STRUCTURED_DATA_PROMPT)*)render)csrf_exempt)method_decorator)View)api_viewpermission_classes)AllowAny)Response)status   )RAGChatbotService)r   PDFDocumentExtractedPDFData)QGETc             C   s|  y<t jjtjd}t jjtjd}t jjtjd}t jjtjd}tj|}tj|}|jdd|jj	j
ddd f }|jdd|jj	j
ddd f }|jj|j}|| }|| }tj||gd	d
}||jdd }	|jd	d
}
|
j|dd |	j|dd td| jtjd | jtjd tt|	tt|
dS  tk
rv } ztddt	| iddS d}~X nX dS )z
    Merges two CSV files using only exactly matching column names.
    Removes duplicates and prevents creation of unwanted columns.
    z,Application Tables - 2025-07-20 08_21_55.csvzhistorical_scrape.csvzmerged_output.csvzduplicate_entries.csvNz^UnnamedF)caseT)Zignore_index)Zkeep)indexz7Merge completed successfully using exact column matches)messageoutput_fileZduplicates_fileZnum_duplicatesZnum_final_rowserrorzMerge failed: i  )r   )ospathjoinr   
MEDIA_ROOTpdread_csvloccolumnsstrcontainsintersectionconcatZ
duplicatedZdrop_duplicatesto_csvr   build_absolute_uri	MEDIA_URLintlen	Exception)requestZfile1Zfile2Zmerged_fileZdup_fileZdf1Zdf2Zcommon_columnsZcombined_dfZduplicate_rowsZ	unique_dfe r0   2/home/developer/Desktop/shao/shao/shaoApp/views.pymergeCsvFilesView   s2    

""
r2   c       
      C   s   yt jjtjd}t jjtjd}t jjtjd}tj|}tj|}dddddd	g}|| }tj||dd
d}|j|dd |d j	 j
 }td| jtjd t|dS  tk
r }	 ztddt|	 iddS d}	~	X nX dS )zE
    Enriches merged CSV data using determined application data.
    zmerged_output.csvz%cumulative_applicationsdetermined.csvzenriched_merged_output.csvapplication_iddecisionlodgement_datedetermined_datecostcouncil_nameleft)onhowF)r   z!Enrichment completed successfully)r   Zenriched_fileZnum_records_enrichedr   zEnrichment failed: i  )r   N)r   r   r   r   r   r    r!   merger(   notnasumr   r)   r*   r+   r-   r$   )
r.   Zmerged_pathZdetermined_pathZenriched_output_path	merged_dfZdetermined_dfcolsenriched_dfZenriched_countr/   r0   r0   r1   enrichMergedFileViewN   s"    

rB   c             C   sJ   y0t j| dd}t j|s&t|t jr*d S |j S  tk
rD   d S X d S )Ncoerce)errors)r    Zto_datetimeisnull
isinstanceZNaTdater-   )Zdate_strrG   r0   r0   r1   
parse_daten   s    rH   c             C   s   t j| s| dkrd S | S )N )r    isna)valr0   r0   r1   safe_getw   s    rL   c             C   sp  y0t jjtjd}t jj|s0tddiddS tj|}d%\}}}d}t	|}xt
d||D ]}|j|||  }	td|| d	  d
|| d	 |   xZ|	j D ]L\}
}yt|jdd}|rt|}ni }td| tjjt|jdt|jdt|jdt|jdt|jdt|jdt|jdt|jdt|jdt|jdt|jdt|jdt|jdt|jddd\}}|r|d	7 }n|d	7 }W q tk
r } ztd|  |d	7 }wW Y dd}~X qX qW || |k r`tjd  q`W td!||||| | d"S  tk
rj } ztdd#t| id$dS d}~X nX dS )&zS
    Imports enriched CSV data into the database with improved error handling.
    zenriched_merged_output.csvr   z%enriched_merged_output.csv not found.i  )r   r      zProcessing batch r   /descriptionrI   	extractedr3   Zapplication_web_urlr4   r5   r6   r7   Zcouncil_name_xdevelopment_typeland_usenumber_of_dwellingsnumber_of_storeysnumber_of_placesZnumber_of_unitsZnumber_of_lots)application_urlr4   r5   r6   r7   r8   rQ   rR   rS   rT   rU   number_of_apartmentsnumber_of_subdivisions)r3   defaultszError processing row: N   z2Data import completed with improved error handling)r   createdupdatedfailedZtotal_processedzData import failed: i  )r   r   r   )r   r   r   r   r   existsr   r    r!   r,   rangeZilocprintiterrowsrL   getZextract_structured_datar   objectsupdate_or_createrH   r-   timesleepr$   )r.   	file_pathdfr[   r\   r]   
batch_sizeZ
total_rowsiZbatch_df_rowrO   Z
structuredobjZ
is_createdr/   r0   r0   r1   importEnrichedDataViewz   sd    

(


rn   c             C   sh  y(t jjtjd}t jjtjd}t jjtjd}t jj| sPt jj| r`tddiddS tj|}tj|}|j	dd	id
d |j
d	gd}||d	 jtjj dk }tj||d	ddg d	dd}|d jtjdd |d< |j|dd td| jtjd tt|t|d j j dS  tk
rb } ztddt| iddS d}~X nX dS )z
    Merges enriched data with Name field from the scraper document based on Application ID.
    Filters out rows with missing Application ID before merging.
    Replaces Windows-style file path with /media/PDFs/ format.
    zenriched_merged_output.csvz+Scraper Documents - 2025-07-20 08_21_54.csvzenriched_with_name.csvr   z%One or both source files are missing.i  )r   zApplication IDr3   T)r#   Zinplace)subsetrI   NamePathr9   )r:   r;   c             S   s<   t | tr8| j r8ddjdd | jddjdD  S dS )Nz/media/rN   c             S   s2   g | ]*}|rd |kp&|j  o&t|dk r|qS ):   )isupperr,   ).0partr0   r0   r1   
<listcomp>   s    zBmergeWithNameFromScraperView.<locals>.<lambda>.<locals>.<listcomp>\rI   )rF   r$   stripr   replacesplit)pr0   r0   r1   <lambda>   s    z.mergeWithNameFromScraperView.<locals>.<lambda>F)r   z&Merge with Name completed successfully)r   r   Znum_recordsZnum_with_namezMerge failed: i  N)r   r   r   r   r   r^   r   r    r!   renameZdropnaZastyper$   ry   r<   applyr(   r)   r*   r+   r,   r=   r>   r-   )r.   Zenriched_pathZscraper_docs_pathoutput_pathrA   Z
scraper_dfr?   r/   r0   r0   r1   mergeWithNameFromScraperView   s4    


r   c       	      C   s  yt jjtjd}t jj|s.tddiddS tj|}d|j	ksLd|j	kr\tddid	dS g }xT|j
 D ]H\}}|d }t jjtj|jd
}t| t jj|rj|j|d  qjW tdt||dS  tk
r  } ztddt| iddS d}~X nX dS )z
    Checks the existence of PDF files specified in the 'Path' column
    of enriched_with_name.csv and returns a list of application IDs
    for which the file exists.
    zenriched_with_name.csvr   zMerged file not found.i  )r   r3   rq   z(Required columns are missing in the CSV.i  rN   zFile existence check complete.)r   Znum_files_foundZapplication_idszFile check failed: i  N)r   r   r   r   r   r^   r   r    r!   r#   ra   BASE_DIRlstripr`   appendr,   r-   r$   )	r.   Z
input_pathrh   Zexisting_idsrk   rl   relative_path	full_pathr/   r0   r0   r1   checkExistingPdfFilesView  s(    

r   c             C   s  yt jjtjd}t jj|s0tddiddS tj|}d|j	ksNd|j	kr^tddid	dS g }d
}d
}d
}g }x|j
 D ]x\}}	|	d }
|	d }tj|s||dkrq|t jjtj|jd}t jj|r|j|
|f q||j|
d|d |d7 }q|W d}t|}x|td
||D ]j}||||  }td|| d  d|| d |   x|D ]\}
}yt|
|}|jdr|j|
|jd||jd|jd|jdd
|jdd
|jdd |d7 }|jdi jdr|d7 }n4|j|
|jdp |jd||jdd |d7 }W nB tk
r^ } z$|j|
t||d |d7 }W Y dd}~X nX qZW || |k rtjd qW t jjtjd}t|dd d!}tj||d"d#d$ W dQ R X td%|||||t|d&S  tk
r } ztdd't| id(dS d}~X nX dS ))zm
    Extract data from PDF files and save to database with improved error handling and batch processing.
    zenriched_with_name.csvr   z!enriched_with_name.csv not found.i  )r   r3   rq   zCSV missing required columnsi  r   rI   rN   zPDF file not found)r3   r   r   r      zProcessing PDF batch Zextraction_successextracted_datapdf_type
confidencetext_lengthpages_processeddatabase_save)r3   datar   r   r   r   r   r   successreason)r3   r   r   r   Nzextracted_applications.jsonwzutf-8)encodingrZ   F)indentensure_asciiz*PDF extraction and database save completed)r   r   
total_pdfsZsuccessfully_processedZsuccessfully_saved_to_dbr]   Ztotal_recordszPDF extraction failed: i  )r   r   r   r   r   r^   r   r    r!   r#   ra   rJ   r   r   r   r,   r_   r`   Zprocess_and_save_pdf_datarb   r-   r$   re   rf   openjsondump)r.   Z	input_csvrh   result	processedr]   Zsaved_to_dbZ
valid_rowsrk   rl   Zapp_idrel_pathZpdf_pathri   r   rj   batchZprocessing_resultr/   r   fr0   r0   r1   extractPdfData+  s    

(




 r   c          $   C   s0  yddl m}m}m} | jjd}|r|y |jj|d}|jj }||j	|j
|j|j|j|j|j|j|j|j|jdg d}x|D ]}|j|j|j|j|j|j|j|j|j|j|jj d}	t |dr4|j!}
|
j"|
j#|
j$|
j%|
j&|
j'|
j(|
j)|
j*|
j+|
j,|
j-|
j.|
j/|
j0|
j1|
j2|
j3|
j4|
j5|
j6|
j7d	|	d< |d
 j8|	 qW t9|S  |j:k
rx   t9dd| diddS X nt|jj;ddj< }g }xH|D ]@}|jj= }|jj;ddj= }|j8|j>|j	|||| d qW t9dt?||dS W n8 t@k
r* } zt9ddtA| iddS d}~X nX dS )z]
    Retrieve PDF data from the database for a specific application or all applications.
    r   )r   r   r   r3   )r3   )r8   r4   r5   r6   r7   rQ   rS   rT   rU   rW   rX   )r3   Zapplication_datapdf_documents)id	file_namerg   document_typer   r   r   r   extraction_statuserror_message
created_atr   )land_descriptionregistered_proprietorencumbrancesactivity_last_125_daysadministrative_noticesproposed_userO   applicant_namecontact_namecontact_addresscontact_emailcontact_phoneapplicant_addressapplicant_emailapplicant_phonelot_sizesite_coverage
total_areaground_floor_areafirst_floor_areapossposr   r   zApplication z
 not foundi  )r   F)Zpdf_documents__isnullr   )r   )r3   r8   r   successful_extractionsZfailed_extractionszPDF data summary)r   Ztotal_applications_with_pdfsapplicationszDatabase query failed: i  N)BshaoApp.modelsr   r   r   r   rb   rc   r   allr8   r4   r5   r6   r7   rQ   rS   rT   rU   rW   rX   r   r   rg   r   r   r   r   r   r   r   r   	isoformathasattrr   r   r   r   r   r   r   rO   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   DoesNotExistfilterdistinctcountr3   r,   r-   r$   )r.   r   r   r   r3   applicationr   r   Zpdf_docpdf_datarP   Zapplications_with_pdfssummaryappZ	pdf_countr   r/   r0   r0   r1   getPdfDataFromDatabase  s    



r   POSTc             C   s   yZ| j }|jddj }|s.tdditjdS t }|j|}td|d |d |d	 d
S  tk
r } ztddt	| itj
dS d}~X nX dS )z0
    Main chat endpoint for the RAG chatbot
    r   rI   r   zMessage is required)r   Tresponsesourcesr   )r   r   r   r   zAn error occurred: N)r   rb   ry   r   r   ZHTTP_400_BAD_REQUESTr   Zchatr-   r$   HTTP_500_INTERNAL_SERVER_ERROR)r.   r   r   chatbotr   r/   r0   r0   r1   chat_endpoint  s$    

r   c             C   sZ   yt  }|j }td|dS  tk
rT } ztddt| itjdS d}~X nX dS )z9
    Get statistics about the chatbot knowledge base
    T)r   Z
statisticsr   zAn error occurred: )r   N)r   Zget_statisticsr   r-   r$   r   r   )r.   r   statsr/   r0   r0   r1   chatbot_stats(  s    
r   c             C   sZ   yt  }|j  tdddS  tk
rT } ztddt| itjdS d}~X nX dS )z6
    Rebuild the FAISS vector store from database
    Tz!Vector store rebuilt successfully)r   r   r   zAn error occurred: )r   N)r   Zbuild_vector_storer   r-   r$   r   r   )r.   r   r/   r0   r0   r1   rebuild_vector_store=  s    
r   c       	      C   s>   y| j jdd}| j jdd}| j jdd}| j jdd}tjj }|rn|jt|dt|dB t|dB }|r~|j|d}|r|j|d	}|r|j|d}|d
d }g }x<|D ]4}|j|j|j	|j
|j|j|j|j|j|jd	 qW td|t|dS  tk
r8 } ztddt| itjdS d
}~X nX d
S )z=
    Search development applications by various criteria
    qrI   councilr4   rQ   )Zapplication_id__icontains)Zcouncil_name__icontains)Zdevelopment_type__icontains)Zdecision__icontainsN2   )	r3   r8   r4   r5   r6   r7   rQ   rS   rT   T)r   r   r   r   zAn error occurred: )r   )r   rb   r   rc   r   r   r   r   r3   r8   r4   r5   r6   r7   rQ   rS   rT   r   r,   r-   r$   r   r   )	r.   queryr   r4   rQ   r   resultsr   r/   r0   r0   r1   search_applicationsR  sJ    

r   c       	      C   sL  yt jj|d}g }x|jj D ]|}|j|j|j|j|j	|j
|jd}t|dr|jr|j}|j|j|j|j|j|j|j|j|j|j|jd|d< |j| q W |j|j|j|j|j|j|j |j!|j"|j#|j$|j%|j&|d}t'd|dS  t j(k
r   t'dd	it)j*d
S  t+k
rF } zt'ddt,| it)j-d
S d}~X nX dS )z?
    Get detailed information about a specific application
    )r3   )r   r   r   r   r   r   r   r   )r   r   r   rO   r   r   r   r   r   r   r   )r3   rV   r8   r4   r5   r6   r7   rQ   rS   rT   rU   rW   rX   r   T)r   r   r   zApplication not found)r   zAn error occurred: N).r   rc   rb   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rO   r   r   r   r   r   r   r   r   r3   rV   r8   r4   r5   r6   r7   rQ   rS   rT   rU   rW   rX   r   r   r   ZHTTP_404_NOT_FOUNDr-   r$   r   )	r.   r3   r   r   Zpdfr   rP   r   r/   r0   r0   r1   get_application_details  sd    


r   c             C   sn   y0t jjdddj }dd |D }td|dS  tk
rh } ztddt| itjd	S d
}~X nX d
S )z2
    Get list of all councils in the database
    r8   T)flatc             S   s   g | ]}|r|qS r0   r0   )ru   r   r0   r0   r1   rw     s    z get_councils.<locals>.<listcomp>)r   councilsr   zAn error occurred: )r   N)	r   rc   values_listr   r   r-   r$   r   r   )r.   r   r/   r0   r0   r1   get_councils  s    
r   c             C   sn   y0t jjdddj }dd |D }td|dS  tk
rh } ztddt| itjd	S d
}~X nX d
S )z;
    Get list of all development types in the database
    rQ   T)r   c             S   s   g | ]}|r|qS r0   r0   )ru   Zdev_typer0   r0   r1   rw     s    z)get_development_types.<locals>.<listcomp>)r   development_typesr   zAn error occurred: )r   N)	r   rc   r   r   r   r-   r$   r   r   )r.   r   r/   r0   r0   r1   get_development_types  s    
r   c             C   s
   t | dS )z%
    Serve the chatbot interface
    zchatbot.html)r   )r.   r0   r0   r1   chatbot_interface   s    r   );r   r   Zpandasr    django.confr   django.httpr   Zdjango.views.decorators.httpr   r   r   ZopenaiZshaoApp.promptsr   reZshaoApp.functionsre   Zdjango.shortcutsr   django.views.decorators.csrfr	   django.utils.decoratorsr
   Zdjango.viewsr   Zrest_framework.decoratorsr   r   Zrest_framework.permissionsr   Zrest_framework.responser   rest_frameworkr   Zrag_servicer   modelsr   r   django.db.modelsr   r2   rB   rH   rL   rn   r   r   r   r   r   r   r   r   r   r   r   r   r0   r0   r0   r1   <module>   sb   0 	M;)mn!:H