
    #h:                     ^   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlZd dlmZmZmZ d dlZd dlZd dlmZmZmZmZ  ej        ej                  Zd	 Zd
 ZdedefdZd de de!de"fdZ#de defdZ$de defdZ%de defdZ&de de fdZ'de defdZ(d Z)d!dZ*dS )"    N)settings)JsonResponse)require_http_methods)DevelopmentApplication)EXTRACT_STRUCTURED_DATA_PROMPTTITLE_SEARCH_EXTRACTION_PROMPT"APPLICATION_FORM_EXTRACTION_PROMPT)RateLimitErrorAPIErrorAPIConnectionErrorAPITimeoutError)api_keyc                 r    	 t          j        |                                           S # t          $ r Y d S w xY w)N)pdto_datetimedate	Exception)date_strs    0/home/esfera/Downloads/shao/shaoApp/functions.py
parse_dater      sE    ~h'',,...   tts   %( 
66c                     t          j        |           s| dk    s| dS t          |                                           S )z7Safely get value, handling None, NaN, and empty strings N)r   isnastrstrip)vals    r   safe_getr      s?    	ws|| sby C ts88>>    datareturnc                    t          | t                    si S ddddddd}i }|                                D ]\  }}|                     ||          }|                    d          rX	 ||dk    rd||<   n,t          t          t          |                              ||<   l# t          t          f$ r d||<   Y w xY w||dk    rd||<   t          |          
                                ||<   |S )z7Validate and clean structured data from OpenAI responser   r   )development_typenumber_of_dwellingsnumber_of_storeysnumber_of_placesnumber_of_unitsnumber_of_lots
number_of_)
isinstancedictitemsget
startswithintfloatr   
ValueError	TypeErrorr   )r   expected_fieldsvalidated_datafielddefault_valuevalues         r   validate_structured_datar7   "   sJ   dD!! 	   O N / 5 5 7 7 ; ;}.. L)) 	;* CERK C,-N5)),/c%jj0A0A,B,BN5)	* * * *()u%%%*  ; ;(*u%%(+E

(8(8(:(:u%%s   (:B##B<;B<         ?promptmax_retries
base_delayc                 x   t          |          D ]}	 t          j        j                            dd| dgdd          }|j        d         j        j                                        }|dfc S # t          $ r}||d	z
  k     rT|d
|z  z  t          j        dd	          z   }t          d|dd|d	z    d| d           t          j        |           Y d}~dd| dt          |           fcY d}~c S d}~wt           t"          t$          f$ r}||d	z
  k     rU|d
|z  z  t          j        dd	          z   }t          d|dd|d	z    d| d           t          j        |           Y d}~\dd| dt          |           fcY d}~c S d}~wt&          $ r }ddt          |           fcY d}~c S d}~ww xY wdS )z@Make OpenAI API request with exponential backoff and retry logiczgpt-4o-2024-05-13user)rolecontentg?i  )modelmessagestemperature
max_tokensr   N      zRate limit hit, retrying in z.2fz seconds... (attempt /)zRate limit exceeded after z attempts: zAPI error, retrying in zAPI error after zUnexpected error: )NzMax retries exceeded)rangeclientchatcompletionscreatechoicesmessager@   r   r
   randomuniformprinttimesleepr   r   r   r   r   )r:   r;   r<   attemptresponser@   edelays           r   make_openai_request_with_retryrY   I   s    %%  7  7	7{.55)#)f==>	 6  H &q)19??AAGD=    	[ 	[ 	[q( ["a7l3fnQ6J6JJqUqqqU\_`U`qqcnqqqrrr
5!!!Z+ZZRUVWRXRXZZZZZZZZZZZ,o> 	Q 	Q 	Qq( Q"a7l3fnQ6J6JJllllPWZ[P[ll^illlmmm
5!!!PPPAPPPPPPPPPPP 	7 	7 	76c!ff66666666666	7 ('sP   AA//
F79AC4C4,F74F7AF
,F
F7
F7F2*F72F7descriptionc                    | r|                                  si S d}t          |           |k    r| d|         dz   } t          j        |           }t	          |          \  }}|rt          d|            i S |st          d           i S 	 |                    d          r+t          j        dd	|          }|	                    d          }t          j        |          }t          |          }|S # t          j        $ r0}t          d
|            t          d|            i cY d}~S d}~wt          $ r}t          d|            i cY d}~S d}~ww xY w)zCExtract structured data from description with proper error handlingi  N...rZ   OpenAI request failed: Empty response from OpenAI```^```[a-z]*\n?r   zJSON decoding error: Raw content: z%Unexpected error in data processing: )r   lenr   formatrY   rR   r-   resubrstripjsonloadsr7   JSONDecodeErrorr   )rZ   
max_lengthr:   r@   errorr   r3   rW   s           r   extract_structured_datarm   q   s     k//11 	 J
;*$ 7!+:+.6+2{KKKF 4F;;NGU ///000	 *+++	e$$ 	,f-r7;;GnnU++G z'"" 2$77   )a))***'g''(((						   9a99:::						s1   A$C1 1E %D+%E+E8EEEpdf_pathc           
         	 t          j        |           5 }t          |j                  }d}d}|j        D ]T}|                                }|r<t          |                                          dk    r|dz  }|t          |          z  }U|dk    r||z  nd}|dk    r||z  nd}|dk    r|dk    rd}	d}
n&|dk    r|d	k    rd}	d
}
n|dk    r|dk    rd}	d}
nd}	d}
|	|
||t          |d          t          |d          |dcddd           S # 1 swxY w Y   dS # t          $ r"}ddt          |          ddddddcY d}~S d}~ww xY w)z>Detect if PDF is digital (text-based) or scanned (image-based)r   
   rE   gffffff?d   digitalhighg333333?2   mediummixedlowscannedr8   )type
confidencetotal_pagespages_with_texttext_densityavg_text_per_pagetotal_text_lengthNunknown)ry   rz   rl   r{   r|   r}   r~   r   )	
pdfplumberopenrc   pagesextract_textr   roundr   r   )rn   pdfr{   r|   r   page	page_textr}   r~   pdf_typerz   rW   s               r   detect_pdf_typer      sH   2
_X&& %	#ci..KO !	 8 8 --//	 8Y__%6%6!7!7"!< 8#q(O%Y7% =H!OR?[88QRLCNQR? Y 1K ? ?XY s" $'83'> $$#

$ $):R)? $$%

! $&7"&< $""

$#
 !(*#2 %lA 6 6%*+<a%@%@%6 ;%	 %	 %	 %	 %	 %	 %	 %	 %	 %	 %	 %	 %	 %	 %	 %	 %	 %	N  

 

 

VV !"!"	
 	
 		
 		
 		
 		
 		
 		


sA   D C+DD DD DD 
E%E<EEc                    	 t          |           }|d         dk    r|d         dk    rdddddS d}t          j        |           5 }|j        D ] }|                                }|r||dz   z  }!	 d	d	d	           n# 1 swxY w Y   |                                }|r'||d         |d         t          |          |d
         dS dddddS # t          $ r}dddt          |          dcY d	}~S d	}~ww xY w)zEExtract text from PDF with enhanced error handling and type detectionry   rx   rz   rs   r   z>PDF appears to be scanned/image-based with no extractable text)textr   rz   reason
Nr|   )r   r   rz   text_lengthpages_processedz5No text could be extracted despite PDF type detectionrl   rw   )r   r   rz   rl   )	r   r   r   r   r   r   rc   r   r   )rn   pdf_infor   r   r   r   extracted_textrW   s           r   extract_text_from_pdfr      s   .
"8,, Fy( 	Xl-Cv-M 	%$Z	   _X&& 	-#	 - - --//	 -I,,D-	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	-   	&$V,&|4">22#+,=#>   %$Q	    
 
 
VV	
 
 	
 	
 	
 	
 	
 	

sL   -C C )A<0C <B  C B ?C C 
C3C.(C3.C3r   c                     |                                  g d}g d}t          fd|D                       }t          fd|D                       }|dk    rdS |dk    rdS ||k    rdS ||k    rdS dS )	z.Detect document type based on content analysis)ztitle searchtitlezcertificate of titlezland descriptionzregistered proprietorencumbranceszactivity in the last 125 dayszadministrative noticesztitle referencevolumefolio)	zapplication formzplanning permitzproposed userZ   zfor what usezdevelopment or other matterzpermit requiredzonline form applicationzplanning permit applicationc              3   $   K   | ]
}|v d V  dS rE   N .0keyword
text_lowers     r   	<genexpr>z'detect_document_type.<locals>.<genexpr>  s.      __WT^I^_q______r   c              3   $   K   | ]
}|v d V  dS r   r   r   s     r   r   z'detect_document_type.<locals>.<genexpr>   s/      "g"gQX\fQf"g1"g"g"g"g"g"gr   r8   title_searchapplication_formr   )lowersum)r   title_search_keywordsapplication_form_keywordstitle_search_matchesapplication_form_matchesr   s        @r   detect_document_typer     s     J  ! ! ! ____0E_____""g"g"g"g4M"g"g"ggg q  	~	!Q	& !!	 8	8 ~	!$8	8 !!yr   c                 Z   | r|                                  sddiS d}t          |           |k    r| d|         dz   } t          |           }|dk    rt          j        |           }n1|dk    rt          j        |           }nt          j        | 	          }t          |          \  }}|rdd
| iS |sddiS 	 |                    d          r+t          j
        dd|          }|                    d          }t          j        |          }t          |t                    sddiS d|vr||d<   |S # t          j        $ rB}t#          d|            t#          d|            ddt%          |           icY d}~S d}~wt&          $ r}ddt%          |           icY d}~S d}~ww xY w)z@Extract structured data from PDF text with proper error handlingrl   zEmpty or invalid text inputi@  Nr\   r   )r   r   r]   r^   r_   r`   ra   r   zInvalid response formatdocument_typedetected_typez'JSON decoding error in PDF extraction: rb   z!Failed to parse OpenAI response: z)Unexpected error in PDF data processing: )r   rc   r   r   rd   r	   r   rY   r-   re   rf   rg   rh   ri   r)   r*   rj   rR   r   r   )r   rk   r   r:   r@   rl   r   rW   s           r   !extract_structured_data_from_textr   /  sQ     8tzz|| 8677 J
4yy: )KZK 5( )..M& I/6DAAA	,	, I3:EEE 064HHH 4F;;NGU <:5::;; 7566Oe$$ 	,f-r7;;GnnU++G z'"" $%% 	8677 $& 	2$1D! G G G;;;<<<'g''(((ESVVEEFFFFFFF O O OMSVVMMNNNNNNNOs7   ;A,D3 (
D3 3F*7E?9F*?F*F%F*%F*c                  "   d} d}t          d           t          |           }t          dt          j        |d                      t          d           t          |          }t          dt          j        |d                      ||fS )	z>Test function to validate extraction with sample document dataz
    TITLE SEARCH DOCUMENT
    Land Description: 3 Hopetoun Street NORTHCOTE VIC 3070
    Registered Proprietor: John Smith and Jane Smith
    Encumbrances: None
    Activity in the last 125 days: No recent activity
    Administrative Notices: None
    a  
    ONLINE FORM APPLICATION FOR PLANNING PERMIT
    Application No: D/30/2024
    Date: 31 January 2024
    
    Land Description: 3 Hopetoun Street NORTHCOTE VIC 3070
    Current Use/Development: Residence
    
    Proposed Use: Residential/Accommodation
    Description: Two (2) Dwellings on Two (2) Lots as shown on the plans accompanying the application
    
    Estimated cost of development: $1,080,000
    z"Testing Title Search extraction...zTitle Search Result: rF   )indentz'
Testing Application Form extraction...zApplication Form Result: )rR   r   rh   dumps)title_search_sampleapplication_form_sampletitle_result
app_results       r    test_extraction_with_sample_datar   j  s     

.///45HIIL	
F$*\!"D"D"D
F
FGGG 

455523JKKJ	
HdjA&F&F&F
H
HIII##r   rp          @c           	         g }t          dt          |           |          D ]}| |||z            }g }|D ]p}	  ||          }	|                    |	           $# t          $ r@}
t	          d|
            |                    dt          |
          i           Y d}
~
id}
~
ww xY w|                    |           ||z   t          |           k     r0t	          d||z  dz    d| d           t          j        |           |S )	z9Process items in batches with delays to avoid rate limitsr   zError processing item: rl   NzProcessed batch rE   z
, waiting z seconds...)	rI   rc   appendr   rR   r   extendrS   rT   )r+   process_func
batch_sizedelay_between_batchesresultsibatchbatch_resultsitemresultrW   s              r   process_batch_with_delayr     sN   G1c%jj*-- . .aJ&' 	8 	8D8%d++$$V,,,, 8 8 8333444$$gs1vv%6777777778 	}%%% z>CJJ& 	.dQ
]Q%6ddBWdddeeeJ,---Ns    A
B""6BB")r8   r9   )rp   r   )+osrh   pandasr   rS   rP   django.confr   django.httpr   django.views.decorators.httpr   shaoApp.modelsr   openaishaoApp.promptsr   r   r	   re   r   r
   r   r   r   OpenAIOPENAI_API_KEYrJ   r   r   r*   r7   r   r.   r/   rY   rm   r   r   r   r   r   r   r   r   r   <module>r      sJ   				                    $ $ $ $ $ $ = = = = = = 1 1 1 1 1 1  ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 				     P P P P P P P P P P P P 
x6	7	7	7    $4 $D $ $ $ $N%( %(3 %(S %(RW %( %( %( %(P- - - - - -`4
c 4
d 4
 4
 4
 4
n0
C 0
D 0
 0
 0
 0
f!s !s ! ! ! !H8OC 8OD 8O 8O 8O 8Ov&$ &$ &$R     r   