
    hQ                        S SK r S SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  S SK
Jr  S SKJr  S SKrS SKJrJrJrJr  S SKrS SKrS SKJrJrJrJr  \R4                  " \R6                  S9rS	 rS
 rS\S\4S jr SS\!S\"S\#4S jjr$S\!S\4S jr%S\!S\4S jr&S\!S\4S jr'S\!S\!4S jr(S\!S\4S jr)SS jr*S r+S r,g)    N)settings)JsonResponse)require_http_methods)DevelopmentApplication)EXTRACT_STRUCTURED_DATA_PROMPTTITLE_SEARCH_EXTRACTION_PROMPT"APPLICATION_FORM_EXTRACTION_PROMPT APPLICATION_FORM_FALLBACK_PROMPT)RateLimitErrorAPIErrorAPIConnectionErrorAPITimeoutError)api_keyc                 l     [         R                  " U 5      R                  5       $ ! [         a     g f = f)N)pdto_datetimedate	Exception)date_strs    6/home/developer/Desktop/shao/shao/shaoApp/functions.py
parse_dater      s1    ~~h',,.. s   #& 
33c                 ~    [         R                  " U 5      (       d	  U S:X  d  U c  g[        U 5      R                  5       $ )z7Safely get value, handling None, NaN, and empty strings N)r   isnastrstrip)vals    r   safe_getr      s-    	wws||sbyCKs8>>    datareturnc           
      &   [        U [        5      (       d  0 $ SSSSSSSS.n0 nUR                  5        GH7  u  p4U R                  X45      nUS:X  a  U(       d  U R                  S5      n[        U[        5      (       a  U(       a  U Vs/ sH[  n[        U[        5      (       d  M  UR                  S5      (       d  M2  [        UR                  SS5      5      R                  5       PM]     nnU(       a  SR                  U5      OUnUR                  S5      (       a+   US;  a  [        [        [        U5      5      5      OSX#'   GM  U(       a  [        U5      R                  5       OSX#'   GM:     U$ s  snf ! [        [        4 a	    SX#'    GM[  f = f)	z7Validate and clean structured data from OpenAI responser   r   )development_typeland_usenumber_of_dwellingsnumber_of_storeysnumber_of_placesnumber_of_unitsnumber_of_lotsr$   	land_usesz, 
number_of_)Nr   )
isinstancedictitemsgetlistr   r   join
startswithintfloat
ValueError	TypeError)	r    expected_fieldsvalidated_datafielddefault_valuevaluer*   itemextracted_usess	            r   validate_structured_datar>   "   sr   dD!!	  O N / 5 5 7. J-I)T**y !*" )!$- :26((:2F :CR01779 )  "
 6D		.1 L))*BGzBYE#e*,=(>_`% ;@CJ$4$4$6RN!+ !8. !" 	* *()%*s$   	E2%E2=-E2'E77FFpromptmax_retries
base_delayc                 j   [        U5       Hg  n [        R                  R                  R	                  SSU S./SSS9nUR
                  S   R                  R                  R                  5       nUS4s  $    g! [         a|  nX1S	-
  :  aU  US
U-  -  [        R                  " SS	5      -   n[        SUS SUS	-    SU S35        [        R                  " U5         SnAM  SSU S[        U5       34s SnAs  $ SnAf[         ["        [$        4 a}  nX1S	-
  :  aV  US
U-  -  [        R                  " SS	5      -   n[        SUS SUS	-    SU S35        [        R                  " U5         SnAGMc  SSU S[        U5       34s SnAs  $ SnAf[&         a  nSS[        U5       34s SnAs  $ SnAff = f)z@Make OpenAI API request with exponential backoff and retry logiczgpt-4o-2024-05-13user)rolecontentg?i  )modelmessagestemperature
max_tokensr   N      zRate limit hit, retrying in z.2fz seconds... (attempt /)zRate limit exceeded after z attempts: zAPI error, retrying in zAPI error after zUnexpected error: )NzMax retries exceeded)rangeclientchatcompletionscreatechoicesmessagerE   r   r   randomuniformprinttimesleepr   r   r   r   r   )r?   r@   rA   attemptresponserE   edelays           r   make_openai_request_with_retryr^   M   s    %	7{{..55)#)f=>	 6 H &&q)1199??AGD=  &D (-  	[q("a7l3fnnQ6JJ4U3K?TU\_`U`Taabcnboopqr

5!9+kRUVWRXQYZZZ,o> 	Qq("a7l3fnnQ6JJ/c{:OPWZ[P[}\]^i]jjklm

5!/}KAxPPP 	7-c!fX666	7sP   A"A88
F2AC9C91F29F2AF	/F	F2	F2F-%F2-F2descriptionc                    U (       a  U R                  5       (       d  0 $ Sn[        U 5      U:  a  U SU S-   n [        R                  " U S9n[	        U5      u  p4U(       a  [        SU 35        0 $ U(       d  [        S5        0 $  UR                  S5      (       a)  [        R                  " SS	U5      nUR                  S5      n[        R                  " U5      n[        S
U5        [        U5      n[        SU5        U$ ! [        R                   a(  n[        SU 35        [        SU 35        0 s SnA$ SnAf[         a  n[        SU 35        0 s SnA$ SnAff = f)zCExtract structured data from description with proper error handlingi  N...)r_   OpenAI request failed: Empty response from OpenAI```^```[a-z]*\n?r   zdata================r8   zJSON decoding error: Raw content: z%Unexpected error in data processing: )r   lenr   formatr^   rW   r2   resubrstripjsonloadsr>   JSONDecodeErrorr   )r_   
max_lengthr?   rE   errorr    r8   r\   s           r   extract_structured_datarq   u   sM    k//11	 J
;*$!+:.6+22{KF 4F;NG'w/0	*+	e$$ff-r7;GnnU+G zz'"$T*1$7~.  %aS)*gY'(	 5aS9:	s1   A9C? ?ED60E6EEEEpdf_pathc           
          [         R                  " U 5       n[        UR                  5      nSnSnUR                   HN  nUR	                  5       nU(       d  M  [        UR                  5       5      S:  d  M;  US-  nU[        U5      -  nMP     US:  a  X2-  OSnUS:  a  XB-  OSnUS:  a  US:  a  Sn	Sn
O&US:  a  US	:  a  Sn	S
n
OUS:  a  US:  a  Sn	Sn
OSn	Sn
U	U
UU[        US5      [        US5      US.sSSS5        $ ! , (       d  f       g= f! [         a  nSS[        U5      SSSSSS.s SnA$ SnAff = f)z>Detect if PDF is digital (text-based) or scanned (image-based)r   
   rJ   gffffff?d   digitalhighg333333?2   mediummixedlowscanned   )type
confidencetotal_pagespages_with_texttext_densityavg_text_per_pagetotal_text_lengthNunknown)r~   r   rp   r   r   r   r   r   )	
pdfplumberopenrg   pagesextract_textr   roundr   r   )rr   pdfr   r   r   page	page_textr   r   pdf_typer   r\   s               r   detect_pdf_typer      sk   2
__X&#cii.KO !		 --/	9Y__%6!7"!<#q(O%Y7%	 " =H!O?8QRLCNQR? 1 ?XY s"'83'>$#
$):R)?$%
!&7"&<""
$#
 !(*#2 %lA 6%*+<a%@%6; '&&N  

V !"!"	
 		


sG   D >DD9BD<	D 
DD D 
D?!D:4D?:D?c                     [        U 5      nUS   S:X  a  US   S:X  a  SSSSS.$ Sn[        R                  " U 5       nUR                   H#  nUR	                  5       nU(       d  M  X%S-   -  nM%     S	S	S	5        UR                  5       nU(       a  UUS   US   [        U5      US
   S.$ SSSSS.$ ! , (       d  f       NF= f! [         a  nSSS[        U5      S.s S	nA$ S	nAff = f)zEExtract text from PDF with enhanced error handling and type detectionr~   r|   r   rw   r   z>PDF appears to be scanned/image-based with no extractable text)textr   r   reason
Nr   )r   r   r   text_lengthpages_processedz5No text could be extracted despite PDF type detectionrp   r{   )r   r   r   rp   )	r   r   r   r   r   r   rg   r   r   )rr   pdf_infor   r   r   r   extracted_textr\   s           r   extract_text_from_pdfr      s   .
"8, Fy(Xl-Cv-M%$Z	  __X&#		 --/	9,,D " '  &$V,&|4">2#+,=#>  %$Q	 % '&2  
V	
 	

sE   #C C %B2'B228C +C 2
C <C 
C'C"C'"C'r   c                    ^ U R                  5       m/ SQn/ SQn[        U4S jU 5       5      n[        U4S jU 5       5      nUS:  a  gUS:  a  gX4:  a  gXC:  a  gg)	z.Detect document type based on content analysis)ztitle searchtitlezcertificate of titlezland descriptionzregistered proprietorencumbranceszactivity in the last 125 dayszadministrative noticesztitle referencevolumefolio)	zapplication formzplanning permitzproposed user_   zfor what usezdevelopment or other matterzpermit requiredzonline form applicationzplanning permit applicationc              3   4   >#    U H  oT;   d  M
  S v   M     g7frJ   N .0keyword
text_lowers     r   	<genexpr>'detect_document_type.<locals>.<genexpr>&  s     _0EWT^I^qq0E   	c              3   4   >#    U H  oT;   d  M
  S v   M     g7fr   r   r   s     r   r   r   '  s     "g4M\fQf114Mr   r}   title_searchapplication_formr   )lowersum)r   title_search_keywordsapplication_form_keywordstitle_search_matchesapplication_form_matchesr   s        @r   detect_document_typer     ss     J! _0E__""g4M"gg q 	!Q	&!		8	!	8!r   c                 >   U (       a  U R                  5       (       d  SS0$ Sn[        U 5      U:  a  U SU S-   n [        U 5      nUS:X  a  [        R                  " U S9nO/US:X  a  [
        R                  " U S9nO[        R                  " U S9n[        U5      u  pEU(       a  SS	U 30$ U(       d  SS
0$  UR                  S5      (       a)  [        R                  " SSU5      nUR                  S5      n[        R                  " U5      n[        U[        5      (       d  SS0$ SU;  a  X&S'   U$ ! [        R                    a6  n[#        SU 35        [#        SU 35        SS[%        U5       30s SnA$ SnAf[&         a  nSS[%        U5       30s SnA$ SnAff = f)z@Extract structured data from PDF text with proper error handlingrp   zEmpty or invalid text inputi@  Nra   r   )r   r   rb   rc   rd   re   r   zInvalid response formatdocument_typedetected_typez'JSON decoding error in PDF extraction: rf   z!Failed to parse OpenAI response: z)Unexpected error in PDF data processing: )r   rg   r   r   rh   r	   r
   r^   r2   ri   rj   rk   rl   rm   r,   r-   rn   rW   r   r   )r   ro   r   r?   rE   rp   r    r\   s           r   !extract_structured_data_from_textr   6  s    tzz||677 J
4y:KZ 5( ).M&/66DA	,	,3::E 288dC 4F;NG25':;;566Oe$$ff-r7;GnnU+G zz'" $%%677 $&$1! G7s;<gY'(<SVHEFF ODSVHMNNOs7   6A-D0 $D0 0F+E5/F5FFFFc           	         / n[        S[        U 5      U5       H  nXXR-    n/ nU H  n U" U5      n	UR                  U	5        M     UR                  U5        XR-   [        U 5      :  d  MR  [	        SXR-  S-    SU S35        [        R                  " U5        M     U$ ! [         a5  n
[	        SU
 35        UR                  S[        U
5      05         Sn
A
M  Sn
A
ff = f)	z9Process items in batches with delays to avoid rate limitsr   zError processing item: rp   NzProcessed batch rJ   z
, waiting z seconds...)	rN   rg   appendr   rW   r   extendrX   rY   )r.   process_func
batch_sizedelay_between_batchesresultsibatchbatch_resultsr<   resultr\   s              r   process_batch_with_delayr   q  s    G1c%j*-'D8%d+$$V,  	}% >CJ&$Q]Q%6$7zBWAXXcdeJJ,-# .& N  8/s34$$gs1v%6778s   B
C(*CCc                 H    SSK JnJnJn  UR                  R                  U U SS.S9u  pxUR                  R                  UU[        R                  R                  U5      UR                  SS5      UR                  SS5      UR                  S	S
5      UR                  SS5      UR                  SS5      U(       a  UR                  S5      (       d  SOSU(       a'  UR                  S5      (       a  UR                  S5      OSS9
n	U(       Ga  UR                  S5      (       Gd  UR                  S0 5      n
UR                  R                  U	UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UR                  S S5      UR                  S!S5      UR                  S"S5      U
R                  S#S5      U
R                  S$S5      U
R                  S%S5      U
R                  S&S5      U
R                  S'S5      U
R                  S(S5      U
R                  S)S5      US*9nS+U U	R                  U	R                  S,S-.$ ! WR                   a	    S.U S/S0.s $ [         a  nS.U S1[        U5       3S0.s SnA$ SnAff = f)2aP  
Save PDF extraction data to the database

Args:
    application_id (str): The application ID
    pdf_path (str): Path to the PDF file
    extraction_result (dict): Result from extract_text_from_pdf function
    extracted_data (dict): Structured data extracted from the PDF

Returns:
    dict: Result with success status and any errors
r   )r   PDFDocumentExtractedPDFDataUnknown)application_idcouncil_name)r   defaultsr   r   r   r   r{   r   r   rp   successfailedN)
application	file_path	file_namer   r   r   r   r   extraction_statuserror_messagedevelopment_summaryland_descriptionr   registered_proprietorr   activity_last_125_daysadministrative_noticesproposed_user_   applicant_namecontact_namecontact_addresscontact_emailcontact_phoneapplicant_addressapplicant_emailapplicant_phonelot_sizesite_coverage
total_areaground_floor_areafirst_floor_areaposspos)pdf_documentr   r   r   r   r   r   r_   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   raw_extracted_dataTzPDF data saved successfully)r   r   pdf_document_idr   rT   Fz!Development application not found)r   r   rp   zDatabase error: )shaoApp.modelsr   r   r   objectsget_or_createrR   ospathbasenamer/   idr   DoesNotExistr   r   )r   rr   extraction_resultextracted_datar   r   r   r   createdr   r   extracted_pdf_datar\   s                r   save_pdf_data_to_databaser     s>   W
XX  6==KK)"0 )  L  
 #**11#gg&&x0(,,_iH&**:yA(,,\5A)--mQ?-112CQG+9.BTBTU\B]B]ick9GNL^L^_fLgLg.,,W5mq 2 
 ."4"4W"="="0"4"45JB"O!1!9!9!@!@) "0!3!34F!K&4&8&89PRT&U+//C'5'9'9:RTV'W'5'9'9:RTV'W ,//C*..}bA  .112BBG+//C . 2 23Db I,00"E,00"E"0"4"45H""M . 2 23Db I . 2 23Db I -00R@155orJ.22<D"5"9"9:Mr"R!4!8!89KR!P'++E26(,,VR8 $2E "A #"L ,+!-!?!?4
 	
 ".. 
,8
 	

  
,'Ax0
 	

s$   K!K$ $L!<	L!LL!L!c           
          [        U5      nUR                  S5      (       d8  [        U UUSUR                  SS5      05      nU USUR                  SS5      US.$ [        US   5      n[        U UUU5      nU USUR                  S5      UR                  S	5      UR                  S
S5      UR                  SS5      UUS.	$ ! [         a*  nU US[        U5      SS[        U5       3S.S.s SnA$ SnAff = f)z
Process a PDF file and save the extracted data to the database

Args:
    application_id (str): The application ID
    pdf_path (str): Path to the PDF file

Returns:
    dict: Result with processing status and database save status
r   rp   r   zNo text extracted from PDFF)r   rr   extraction_successr   database_saveTr   r   r   r   r   )	r   rr   r   r   r   r   r   r   r   zProcessing error: )r   rp   )r   rr   r   rp   r   N)r   r/   r   r   r   r   )r   rr   r   save_resultr   r\   s         r   process_and_save_pdf_datar     s:   6
1(; $$V,,3!+//:VWX	K #1$&++//:VW!,  ;;LV;TU 0	
 - "&)--j9+//=,00B0445FJ,(

 
	
  

, "'V -c!fX6	
 		


s%   AC A%C 
C5C0*C50C5)r}   g      ?)rt   g       @)-r   rl   pandasr   rX   rU   django.confr   django.httpr   django.views.decorators.httpr   r   r   openaishaoApp.promptsr   r   r	   r
   ri   r   r   r   r   r   OpenAIOPENAI_API_KEYrO   r   r   r-   r>   r   r3   r4   r^   rq   r   r   r   r   r   r   r   r   r   r   <module>r     s   	       $ = 1  a  a 	  P P 
x66	7(4 (D (V%(3 %(S %(RW %(P0 0 0f4
c 4
d 4
n0
C 0
D 0
f!s !s !H8OC 8OD 8Ov4d
NA
r   