a
    Šxd€(  ã                   @   sz  d dl Z d dlZd dlZd dlZd dlmZmZmZ d dlZd dl	m
Z
 d dlZddlmZ ddlmZmZ ddlmZmZmZmZmZ ddlmZ e e¡Zeeee d	œd
d„Zeeee dœdd„Zeeef eee dœdd„Zeeee dœdd„Z eeee dœdd„Z!eee dœdd„Z"G dd„ dƒZ#G dd„ de#ƒZ$G dd„ de#ƒZ%G d d!„ d!e#ƒZ&G d"d#„ d#e#ƒZ'dS )$é    N)ÚIteratorÚOptionalÚUnion)Ú	HTTPErroré   )Úutils)ÚFB_MOBILE_BASE_URLÚFB_MBASIC_BASE_URL)ÚURLÚPageÚRawPageÚRequestFunctionÚResponse)Ú
exceptions)ÚhashtagÚ
request_fnÚreturnc              
   K   st   |  dd ¡}|s`t td| › d¡}z||ƒ W n. ty^ } zt |¡ W Y d }~n
d }~0 0 t|t|fi |¤ŽS )NÚ	start_urlz	/hashtag/ú/)	Úpopr   Úurljoinr	   Ú	ExceptionÚloggerÚerrorÚgeneric_iter_pagesÚHashtagPageParser)r   r   Úkwargsr   Úex© r   úW/var/www/html/Ranjet/env/lib/python3.9/site-packages/facebook_scraper/page_iterators.pyÚiter_hashtag_pages   s     r    )Úaccountr   r   c              
   K   sˆ   |  dd ¡}|stt td| › d¡}z||ƒ W nB tyr } z*t |¡ t td| › d¡}W Y d }~n
d }~0 0 t|t|fi |¤ŽS )Nr   r   z/posts/)	r   r   r   r   r   r   r   r   Ú
PageParser)r!   r   r   r   r   r   r   r   Ú
iter_pages    s    
*r#   )Úgroupr   r   c                 K   s8   |  dd ¡}|s$t td| › d¡}t|t|fi |¤ŽS )Nr   zgroups/r   )r   r   r   r   r   ÚGroupPageParser)r$   r   r   r   r   r   r   Úiter_group_pages,   s    r&   )Úwordr   r   c              
   K   s†   |  dd ¡}|srt td| › d¡}z||ƒ W n@ typ } z(t |¡ t td| › ¡}W Y d }~n
d }~0 0 t|t|fi |¤ŽS )Nr   z/search/posts?q=z_&filters=eyJyZWNlbnRfcG9zdHM6MCI6IntcIm5hbWVcIjpcInJlY2VudF9wb3N0c1wiLFwiYXJnc1wiOlwiXCJ9In0%3D)	r   r   r   r   r   r   r   r   ÚSearchPageParser)r'   r   r   r   r   r   r   r   Úiter_search_pages7   s    
þ
(r)   c                 K   s(   t  td| › d¡}t|t|fi |¤ŽS )Nr   z/photos/)r   r   r   r   ÚPhotosPageParser)r!   r   r   r   r   r   r   Úiter_photosG   s    r+   )r   r   c                 k   s‚  | }|  dt¡}|  d¡}|r~|r,||ƒ d}td|d ƒD ]®}zt d|¡ ||ƒ}	W  qîW q> tyê }
 zr|
jjdkrÔ||k rÔ|d }t d|› d	|› d
¡ ||d krÈt d¡ |d  d¡ t	 
|¡ n‚ W Y d }
~
q>d }
~
0 0 q>t d¡ ||	ƒ}| ¡ }t dt|ƒ¡ |V  t d¡ | ¡ }|rn|  di ¡  d¡}|r`| dd|› ¡}t ||¡}qt d¡ d }qd S )NÚbase_urlÚrequest_url_callbacké   r   zRequesting page from: %siô  é   zCaught exception, retry number z. Sleeping for ÚszRequesting noscriptZscraperTzParsing page responsezGot %s raw posts from pagezLooking for next page URLÚoptionsÚposts_per_pageznum_to_fetch=4znum_to_fetch=z&Page parser did not find next page URL)Úgetr   Úranger   Údebugr   ÚresponseÚstatus_codeZset_noscriptÚtimeÚsleepÚget_pageÚlenÚget_next_pageÚreplacer   r   Úinfo)r   Zpage_parser_clsr   r   Znext_urlr,   r-   ZRETRY_LIMITÚretryr6   ÚeZsleep_durationÚparserÚpageÚ	next_pager2   r   r   r   r   L   sJ    

ÿ



r   c                   @   sž   e Zd ZdZdZe d¡Ze d¡Ze d¡Z	e d¡Z
edœdd	„Zed
œdd„Zed
œdd„Zee d
œdd„Zdd„ Zdd„ Zdd„ Zed
œdd„ZdS )r"   z)Class for Parsing a single page on a Pagez	for (;;);zhref[:=]"(/page_content[^"]+)"z!href"[:=]"(\\/page_content[^"]+)"z/href:"(/profile/timeline/stream/\?cursor[^"]+)"z5href\\":\\"\\+(/profile\\+/timeline\\+/stream[^"]+)\")r6   c                 C   s   || _ d | _d | _|  ¡  d S ©N)r6   ÚhtmlÚcursor_blobÚ_parse)Úselfr6   r   r   r   Ú__init__Ž   s    zPageParser.__init__©r   c                 C   s   |   dd¡S )NzG[data-ft*="top_level_post_id"]:not([data-sigil="m-see-translate-link"])Úarticle)Ú	_get_page©rH   r   r   r   r:   •   s    ÿzPageParser.get_pagec                 C   s   | j S rD   )rE   rM   r   r   r   Úget_raw_page›   s    zPageParser.get_raw_pagec                 C   sÎ   | j d usJ ‚| j | j ¡}|r:t | ¡ d ¡ dd¡S | j | j ¡}|r~| ¡ d }t | d¡ 	d¡ dd¡¡ dd¡S | j
 | j ¡}|rœ| ¡ d S | j | jj¡}|rÊ| ¡ d }t dd|¡S d S )	Nr   ú&amp;ú&úutf-8Úunicode_escapeú\/r   z\\+/)rF   Úcursor_regexÚsearchr   ÚunquoteÚgroupsr=   Úcursor_regex_2ÚencodeÚdecodeÚcursor_regex_3Úcursor_regex_4r6   ÚtextÚreÚsub©rH   ÚmatchÚvaluer   r   r   r<   ž   s(    ÿþzPageParser.get_next_pagec                 C   s&   | j j | j¡r|  ¡  n|  ¡  d S rD   )r6   r]   Ú
startswithÚjson_prefixÚ_parse_jsonÚ_parse_htmlrM   r   r   r   rG   ·   s    
zPageParser._parsec                 C   s   | j j| _| j j| _d S rD   )r6   rE   r]   rF   rM   r   r   r   rf   ½   s    
zPageParser._parse_htmlc                 C   sŠ   t | jƒ}t | jj|d … ¡}| d|¡d D ]F}|d dkr`tj|d t	d| _
| j
j
| _q0|d dkr0|d | _q0| j
d us†J ‚d S )	NÚpayloadÚactionsÚcmdr=   rE   )ÚurlÚscriptÚcode)r;   rd   ÚjsonÚloadsr6   r]   r3   r   Zmake_html_elementr   rE   rF   )rH   Zprefix_lengthÚdataÚactionr   r   r   re   Á   s    
zPageParser._parse_jsonc                 C   sn   |   ¡ }| |¡}|sjt d| ¡ t tj¡rjtj|j	ddd„ d}d}t 
d| jj¡ t 
d|||¡ |S )	Nz5No raw posts (<%s> elements) were found in this page.z| c                 S   s   dS )NTr   )Ú_r   r   r   Ú<lambda>Ú   ó    z&PageParser._get_page.<locals>.<lambda>)ÚprefixÚ	predicatez=+------------------------------------------------------------zThe page url is: %szThe page content is:
%s
%s%s
)rN   Úfindr   ÚwarningÚisEnabledForÚloggingÚDEBUGÚtextwrapÚindentr]   r5   r6   rj   )rH   Z	selectionZselection_nameZraw_pageZ	raw_postsÚcontentÚsepr   r   r   rL   Î   s     
ÿýzPageParser._get_pageN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__rd   r^   ÚcompilerT   rX   r[   r\   r   rI   r   r:   r   rN   r   r
   r<   rG   rf   re   rL   r   r   r   r   r"   €   s$   

ÿÿr"   c                       s<   e Zd ZdZe d¡Zee dœ‡ fdd„Z	dd„ Z
‡  ZS )r%   z*Class for parsing a single page of a groupz$href[=:]"(\/groups\/[^"]+bac=[^"]+)"rJ   c                    sZ   t ƒ  ¡ }|r|S | jd us J ‚| j | j¡}|rV| ¡ d }| d¡ d¡ dd¡S d S ©Nr   rQ   rR   rS   r   )	Úsuperr<   rF   r[   rU   rW   rY   rZ   r=   )rH   rC   ra   rb   ©Ú	__class__r   r   r<   è   s    
zGroupPageParser.get_next_pagec                 C   s   |   ¡  d S rD   )rf   rM   r   r   r   rG   ö   s    zGroupPageParser._parse)r   r€   r   r‚   r^   rƒ   r[   r   r
   r<   rG   Ú__classcell__r   r   r†   r   r%   ã   s   
r%   c                       sH   e Zd Ze d¡Ze d¡Zedœ‡ fdd„Ze	e
 dœdd„Z‡  ZS )r*   zhref:"(/photos/pandora/[^"]+)"z%href":"(\\/photos\\/pandora\\/[^"]+)"rJ   c                    s   t ƒ  dd¡S )Nz	div._5v64©r…   rL   rM   r†   r   r   r:   þ   s    zPhotosPageParser.get_pagec                 C   sb   | j d ur^| j | j ¡}|r(| ¡ d S | j | j ¡}|r^| ¡ d }| d¡ d¡ dd¡S d S r„   ©rF   rT   rU   rW   rX   rY   rZ   r=   r`   r   r   r   r<     s    
zPhotosPageParser.get_next_page)r   r€   r   r^   rƒ   rT   rX   r   r:   r   r
   r<   rˆ   r   r   r†   r   r*   ú   s   

r*   c                   @   s2   e Zd Ze d¡Ze d¡Zee dœdd„Z	dS )r(   zhref[:=]"[^"]+(/search/[^"]+)"zhref":"[^"]+(/search/[^"]+)"rJ   c                 C   sb   | j d ur^| j | j ¡}|r(| ¡ d S | j | j ¡}|r^| ¡ d }| d¡ d¡ dd¡S d S r„   rŠ   r`   r   r   r   r<     s    
zSearchPageParser.get_next_pageN)
r   r€   r   r^   rƒ   rT   rX   r   r
   r<   r   r   r   r   r(     s   

r(   c                       s>   e Zd Ze d¡Zedœ‡ fdd„Zee	 dœdd„Z
‡  ZS )r   z<(\/hashtag\/[a-z]+\/\?locale=[a-z_A-Z]+&amp;cursor=[^"]+).*$rJ   c                    s   t ƒ  dd¡S )NrK   r‰   rM   r†   r   r   r:      s    zHashtagPageParser.get_pagec                 C   s>   | j d usJ ‚| j | j ¡}|r:t | ¡ d ¡ dd¡S d S )Nr   rO   rP   )rF   rT   rU   r   rV   rW   r=   )rH   ra   r   r   r   r<   #  s
    zHashtagPageParser.get_next_page)r   r€   r   r^   rƒ   rT   r   r:   r   r
   r<   rˆ   r   r   r†   r   r     s   
r   )(rm   ry   r^   r{   Útypingr   r   r   r8   Zrequests.exceptionsr   ÚwarningsÚ r   Ú	constantsr   r	   Zfb_typesr
   r   r   r   r   r   Ú	getLoggerr   r   Ústrr    r#   Úintr&   r)   r+   r   r"   r%   r*   r(   r   r   r   r   r   Ú<module>   s6   
þþ4c