a
    Šxdkì  ã                   @   sÀ  d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlmZ d dlm	Z	m
Z
mZ d dlmZmZ d dlmZ ddlmZmZ dd	lmZmZmZ dd
lmZmZmZmZmZmZ zd dlm Z  d dl!m"Z" W n e#yæ   dZ Y n0 e $e%¡Z&ee
e'e	f  Z(d"eeeedœdd„Z)d#eeeedœdd„Z*d$eeeedœdd„Z+eeeedœdd„Z,d%eeeedœdd„Z-G dd„ dƒZ.G dd„ de.ƒZ/G dd„ de.ƒZ0G dd„ de.ƒZ1G d d!„ d!e.ƒZ2dS )&é    N)ÚJSONDecodeError)Údatetime)ÚAnyÚDictÚOptional)Úparse_qsÚurlparse)Útqdmé   )ÚutilsÚ
exceptions)ÚFB_BASE_URLÚFB_MOBILE_BASE_URLÚFB_W3_BASE_URL)ÚOptionsÚPostÚRawPostÚRequestFunctionÚResponseÚURL)Ú	YoutubeDL)ÚExtractorError)Úraw_postÚoptionsÚ
request_fnÚreturnc                 C   s   t | |||ƒ ¡ S ©N)ÚPostExtractorÚextract_post©r   r   r   Úfull_post_html© r!   úS/var/www/html/Ranjet/env/lib/python3.9/site-packages/facebook_scraper/extractors.pyr      s    r   c                 C   s   t | |||ƒ ¡ S r   )ÚGroupPostExtractorr   r   r!   r!   r"   Úextract_group_post$   s    r$   c                 C   s   t | |||ƒ ¡ S r   )ÚStoryExtractorr   r   r!   r!   r"   Úextract_story_post*   s    r&   c                 C   s   t | |||ƒ ¡ S r   )ÚPhotoPostExtractorr   r   r!   r!   r"   Úextract_photo_post0   s    r(   c                 C   s   t | |||ƒ ¡ S r   )ÚHashtagPostExtractorr   r   r!   r!   r"   Úextract_hashtag_post6   s    r*   c                   @   s’  e Zd ZdZe dej¡Ze dej¡Ze dej¡Z	e d¡Z
e d¡Ze d¡Ze dej¡Ze d	¡Ze d
¡Ze d¡Ze d¡Ze d¡Ze d¡Ze d¡Ze d¡Ze d¡Zdgdd„Zedœdd„Zedœdd„Zedœdd„Zedœdd„Zedœdd„Zedœd d!„Zedœd"d#„Z edœd$d%„Z!edœd&d'„Z"edœd(d)„Z#edœd*d+„Z$edœd,d-„Z%edœd.d/„Z&e'e(d0œd1d2„Z)edœd3d4„Z*e+j,fd5d6„Z-d7d8„ Z.dhedœd:d;„Z/d<d=„ Z0d>d?„ Z1d@dA„ Z2dBdC„ Z3dDdE„ Z4dFdG„ Z5dHdI„ Z6dJdK„ Z7dLdM„ Z8dNdO„ Z9dPdQ„ Z:dRdS„ Z;dTdU„ Z<dVdW„ Z=e'dXœdYdZ„Z>edœd[d\„Z?edœd]d^„Z@eAeBdœd_d`„ƒZCeAdadb„ ƒZDeAdcdd„ ƒZEdidedf„ZFdS )jr   z/Class for Extracting fields from a FacebookPostzG([\d,.KM]+)\s+(Like|left reaction|others reacted|others left reactions)z([\d,.KM]+)\s+commentz([\d,.KM]+)\s+Sharez;href=\"https:\/\/lm\.facebook\.com\/l\.php\?u=(.+?)\&amp;h=z!href=\"(/[^\"]+/photos/[^\"]+?)\"zhref=\"(/photo.php[^\"]+?)\"z6<a href=\"([^\"]+?)\" target=\"_blank\" class=\"sec\">zurl\('(.+)'\)zbackground: url\('(.+)'\)z/story.php\?story_fbid=z/.+/videos/.+/(.+)/.+z){&quot;videoID&quot;:&quot;([0-9]+)&quot;zf<script nonce=.*>.*bigPipe.onPageletArrive\((?P<data>\{.*RelayPrefetchedStreamCache.*\})\);.*</script>z(?P<prefix>[{,])(?P<key>\w+):u   (?<=â€¦\s)<azhref="(\/story[^"]+)" ariaNc                 C   s(   || _ || _|| _d | _|| _i | _d S r   )Úelementr   ÚrequestÚ_data_ftÚ_full_post_htmlÚ
_live_data)Úselfr+   r   r   r    r!   r!   r"   Ú__init__[   s    zPostExtractor.__init__©r   c              1   C   sf   d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d dd d d d d d d d d d d d d d d dœ0S )NF)0Úpost_idÚtextÚ	post_textÚshared_textÚoriginal_textÚtimeÚ	timestampÚimageÚimage_lowqualityÚimagesÚimages_descriptionÚimages_lowqualityÚimages_lowquality_descriptionÚvideoÚvideo_duration_secondsÚvideo_heightÚvideo_idÚvideo_qualityÚvideo_size_MBÚvideo_thumbnailÚvideo_watchesÚvideo_widthÚlikesÚcommentsÚsharesÚpost_urlÚlinkÚlinksÚuser_idÚusernameÚuser_urlÚsourceÚis_liveÚ	factcheckÚshared_post_idÚshared_timeÚshared_user_idÚshared_usernameÚshared_post_urlÚ	availableÚcomments_fullÚreactorsÚ	w3_fb_urlÚ	reactionsÚreaction_countÚwithÚpage_idÚsharersr!   ©r0   r!   r!   r"   Úmake_new_poste   sb    ÐzPostExtractor.make_new_postc                    sB  ˆ j ˆ jˆ jˆ jˆ jˆ jˆ jˆ jˆ jˆ j	ˆ j
ˆ jˆ jˆ jˆ jˆ jˆ jˆ jˆ jˆ jˆ jˆ jg}ˆ  ¡ }ˆ j|d< |ˆ _‡ fdd„}|D ]x}z,|ƒ }|du r¨|d|jƒ W q„| |¡ W q„ tjyÊ   ‚ Y q„ tyú } z|d|j|ƒ W Y d}~q„d}~0 0 q„ˆ j ˆ jj ¡}|r0ˆ j!r0ˆ j!j"ddd	|d< ˆ j# $d
¡sLˆ j# $d¡rÚz:ˆ  %¡ }|d r„ˆ j# $d¡dkr„t& '|d ¡|d< W n4 tyº } z|d|ƒ i }W Y d}~n
d}~0 0 |du rÐ|dƒ n
| |¡ ˆ j# $d¡rNz4ˆ  (¡ |d< ˆ j# $d¡dkrt& '|d ¡|d< W n0 tyL } z|d|ƒ W Y d}~n
d}~0 0 ˆ j# $d¡r>z°ˆ  )¡ |d< ˆ j# $d¡dkr
t& '|d ¡|d< |d D ]H}	t& '|	d ¡|	d< |	d D ]}
t& '|
d ¡ q´t& '|	d ¡|	d< q–| $d¡r
| $d¡s
t*| $d¡ƒ|d< W n0 ty< } z|d|ƒ W Y d}~n
d}~0 0 |S )z!Parses the element into self.itemrR   c                    s,   ˆ j  dd¡}tjd| › |g|¢R Ž  d S )Nr3   zunknown postz[%s] )ÚpostÚgetÚloggerÚwarning)ÚmsgÚargsr3   rc   r!   r"   Úlog_warning»   s    z/PostExtractor.extract_post.<locals>.log_warningNz(Extract method %s didn't return anythingzException while running %s: %rú.story_body_containerT©Úfirstr^   r\   Ú	generatorz(Exception while extracting reactions: %rz(Extract reactions didn't return anythingrb   z&Exception while extracting sharers: %rrJ   r[   ÚrepliesÚcomment_reactorsz'Exception while extracting comments: %r)+Úextract_post_urlÚextract_post_idÚextract_textÚextract_timeÚextract_photo_linkÚextract_image_lqÚextract_likesÚextract_commentsÚextract_sharesÚextract_linksÚextract_user_idÚextract_usernameÚextract_videoÚextract_video_thumbnailÚextract_video_idÚextract_video_metaÚextract_is_liveÚextract_factcheckÚextract_share_informationÚextract_availabilityÚextract_listingÚextract_withrd   r+   re   Ú__name__Úupdater   ÚTemporarilyBannedÚ	ExceptionÚmore_url_regexÚsearchÚhtmlr    Úfindr   rf   Úextract_reactionsr   Úsafe_consumeÚextract_sharersÚextract_comments_fullÚlen)r0   Úmethodsre   rk   ÚmethodZpartial_postÚexÚhas_morer^   ÚcommentÚreplyr!   rc   r"   r   ™   s˜    ê
&



 ÿ zPostExtractor.extract_postc                 C   s   d| j  d¡p| j d¡iS )Nr3   Úft_ent_identifierÚtop_level_post_id)Ú	live_datarf   Údata_ftrc   r!   r!   r"   rs   û   s    
þzPostExtractor.extract_post_idc                 C   s@   | j jddd}|r<|j d¡}|r0t t|¡}|j|dœS d S )Nzh3 strong a,a.actor-linkTrm   Úhref©rP   rQ   )r+   r   Úattrsrf   r   Úurljoinr   r4   ©r0   ÚelemÚurlr!   r!   r"   r}     s    zPostExtractor.extract_usernamec                 C   sÈ  | j }| j |j¡}|r.| jr.| jjddd}| d¡}|rxt|ƒdkrxg }g }d}tdd„ t|ƒD ƒdƒ}||d … D ]\}|j	d	krŽd}|j	d
krºt
j|j ddd¡ ddd¡d}|sÌ| |j¡ q|| |j¡ q|d}	|	 t ||¡¡}
|	 |¡}|	 |¡}d }|jddd}|rjg }| d¡D ]4}t
j|j ddd¡ ddd¡d}| |j¡ q*|	 |¡}|
|||dœS |jdddr¢|jdddj}
|
|
dœS t|ƒdkrÄ|d j}
|
|
dœS d S )Nrl   Trm   z"p, header, span[role=presentation]r
   Fc                 s   s    | ]\}}|j d kr|V  qdS )ÚheaderN)Útag)Ú.0ÚiÚnoder!   r!   r"   Ú	<genexpr>  ó    z-PostExtractor.extract_text.<locals>.<genexpr>r¦   Úpu   >â€¦ <z><z>More<Ú ©rŽ   z

zdiv[style="display:none"]zp,span[role=presentation])r4   r5   r6   r7   z.story_body_container>div©r4   r5   r   )r+   rŒ   r   rŽ   r    r   r”   ÚnextÚ	enumerater§   r   Úmake_html_elementÚreplaceÚappendr4   ÚjoinÚ	itertoolsÚchain)r0   r+   r˜   Únodesr5   r6   ZendedZindex_non_headerrª   Zparagraph_separatorr4   r7   Z
hidden_divr!   r!   r"   rt   
  s`    

ÿ

ÿ

ÿ
ü


zPostExtractor.extract_textc              
   C   s8  | j  di ¡}| ¡ D ]X}z8|d d }t dt |¡› ¡ t |¡|dœW   S  ttfyl   Y qY q0 q| j	j
ddd}|d ur´tj|jd	d
}|r¤d|iS t d|j¡ n
t d¡ t | j	j¡}|rØd|iS zL| jj
ddd}t |jd ¡d }t dt |¡› ¡ t |¡|dœW S    Y d S 0 d S )NÚpage_insightsZpost_contextZpublish_timez'Got exact timestamp from publish_time: ©r8   r9   ÚabbrTrm   F©r   r8   zCould not parse date: %sz,Could not find the abbr element for the dateúabbr[data-store*='time']ú
data-storeú+Got exact timestamp from abbr[data-store]: )rž   rf   Úvaluesrg   Údebugr   ÚfromtimestampÚKeyErrorÚ
ValueErrorr+   r   r   Úparse_datetimer4   rh   r    ÚjsonÚloadsr¡   )r0   rº   Úpager9   Údate_elementÚdater8   r!   r!   r"   ru   P  s:    ÿ
ÿzPostExtractor.extract_timec                 C   s   | j d | j  d¡dœS )NZcontent_owner_id_newra   )rO   ra   )rž   rf   rc   r!   r!   r"   r|   w  s    
þzPostExtractor.extract_user_idc                 C   sx  | j  d¡}|s| j  d¡}g }g }g }|D ]Â}d }|j d¡rN|j d¡}n6|j d¡r„| j |j d¡¡}|r„t | ¡ d ¡}|r¸d|vr¸| 	|¡ | 	|j d¡p´|j d¡¡ |j  
¡  
¡  
¡ j d	¡}|r,| 	t d
|¡ d¡¡ q,|rü|d nd }|r|d nd }	||	|||dœ}
|rtd|v rt| j d¡sttt|ƒjƒ d¡}|rt|d }|
 ||gdœ¡ |
S )Nz/div.story_body_container>div .img:not(.profpic)z%.img:not(.profpic), img:not(.profpic)ÚsrcÚstyler   zstatic.xx.fbcdn.netú
aria-labelÚaltrŸ   ú	[=/](\d+)r
   )r;   Úimage_idÚ	image_idsr>   r?   zsafe_image.phpr:   r¥   )r:   r<   )r+   r   r¡   rf   Úimage_regex_lqr   r   Údecode_css_urlÚgroupsrµ   Ú	getparentÚattribÚreÚgroupre   r   r   Úqueryr‰   )r0   Úelemsr<   rÒ   Údescriptionsr¤   r¥   Úmatchr:   rÑ   Úresultr!   r!   r"   rw   }  sD    
ûzPostExtractor.extract_image_lqc                 C   sJ   | j  | jj¡}|r&t | ¡ d ¡}| j d¡}dd„ |D ƒ}||dœS )Nr   z.story_body_container div p ac                 S   s   g | ]}|j d  |jdœ‘qS )rŸ   )rM   r4   )r¡   r4   )r¨   Úar!   r!   r"   Ú
<listcomp>©  r¬   z/PostExtractor.extract_links.<locals>.<listcomp>)rM   rN   )Ú
link_regexr   r+   rŽ   r   ÚunquoterÕ   r   )r0   rM   rN   r!   r!   r"   r{   ¤  s    zPostExtractor.extract_linksc                 C   sî   d}| j  d¡}| j d¡}d }d }|D ]r}|j dd¡}| j |¡}| j |¡}|rjtj	||d} qœq(|r(| 
d¡}	|d u rŒd|	› }q(|› d	|	› }q(| j d
¡}
|d u rÎ|d urÎ|
d urÎ|› d|
› }|d u rÚd S t t|¡}d|iS )N)Z
story_fbidÚidÚaccountrß   rŸ   r®   )Ú	whitelistr
   zwatch?v=z/videos/rœ   ú/posts/rL   )r   rf   r+   r   r¡   Úpost_url_regexrÝ   Úvideo_post_url_regexr   Zfilter_query_paramsrÙ   rž   r¢   r   )r0   Zquery_paramsrä   ÚelementsZvideo_post_matchÚpathr+   rŸ   Z
post_matchZvideo_post_idr3   r¥   r!   r!   r"   rr   ¬  s0    
zPostExtractor.extract_post_urlc                 C   s†   dt  | jd| jt j¡p‚| j d¡p‚| j d¡p‚| jjdddrXt  | jjdddj	¡p‚| jjdddr€t  | jjdddj	¡p‚d	iS )
NrI   ÚfooterZ
like_countÚreactioncountz.likesTrm   z	.like_defr   )
r   Úfind_and_searchr+   Úlikes_regexÚconvert_numeric_abbrr   rf   r   Ú	parse_intr4   rc   r!   r!   r"   rx   Ò  s    ÿ
ý
üù
õòzPostExtractor.extract_likesc                 C   sR   dt  | jd| jt j¡pN| j d¡pN| jjdddrLt  | jjdddj	¡pNdiS )NrJ   rë   Úcomment_countz.cmt_defTrm   r   )
r   rí   r+   Úcomments_regexrï   r   rf   r   rð   r4   rc   r!   r!   r"   ry   ä  s    ÿ
ýú÷zPostExtractor.extract_commentsc                 C   s*   dt  | jd| jt j¡p&| j d¡p&diS )NrK   rë   Úshare_countr   )r   rí   r+   Úshares_regexrï   r   rf   rc   r!   r!   r"   rz   ñ  s    ÿ
ýûzPostExtractor.extract_shares)rŽ   r   c              
   C   sÆ   | j  |¡}|r¾| ¡ d  dd¡}| d¡s:t t|¡}| t td¡¡rºt 	d|› ¡ z.|  
|¡}|jjddd	j d
¡ dd¡}W n. ty¸ } zt |¡ W Y d }~n
d }~0 0 |S d S d S )Nr   z&amp;ú&Úhttpz/photo/view_full_size/ú	Fetching rß   Trm   rŸ   )Úimage_regexr   rÕ   r´   Ú
startswithr   r¢   r   rg   rÂ   r,   rŽ   r   r¡   rf   r‹   Úerror)r0   rŽ   rÝ   r¥   Zredirect_responseÚer!   r!   r"   Úextract_photo_link_HQú  s&    

ÿþÿ z#PostExtractor.extract_photo_link_HQc                 C   s|  | j  dd¡r| j  dd¡s d S g }g }g }| j d¡}g }g }|D ]4}|jd  d¡d }||vrD| |¡ | |¡ qDt|ƒ}	t|ƒdv rÈ|d	 jrÈt|ƒt	|d	 j 
d
¡ƒ d }	t |	› d¡ |D ]l}
|
jd }d|v r|tt|ƒjƒ}|d d }|d d }|› d|› }t d|› ¡ |  |¡}|  d|j¡}t|d  ¡ ƒd d }g }g }|d D ]¼}|d }|d rÈ| |d ¡ | |d p¢|d ¡ | |d d ¡ | |d ¡ nN|d }| tt¡}t d|› ¡ |  |¡}| |  |j¡¡ | |d ¡ | |d ¡ qj|r6|d nd |||rJ|d nd ||r\|d nd |rl|d nd ||dœ	  S t t|¡}t d|› ¡ zh|  |¡}| |  |j¡¡ |jjddd }| |j d!¡pâ|j d"¡¡ | t d#|¡ d¡¡ W qÌ ty8 } zt |¡ |	d8 }	W Y d }~qÌd }~0 0 qÌd}t|ƒ|	k rHd$}|jjd%d&dd'rjd(}|jjd)|› d*dd jd }|  d+¡s t t|¡}t d|› ¡ |  |¡}|  |j¡}||vr$| |¡ |jjddd }| |j d!¡p|j d"¡¡ | t d#|¡ d¡¡ n |d7 }|d,kr@t d-¡ qHq@|rV|d nd }|rh|d nd }|||||d.œS )/NÚallow_extra_requestsTZ	HQ_imagesz‚div.story_body_container>div a[href*='photo.php'], div.story_body_container>div a[href*='/photos/'], div._5v64 a[href*='/photos/']rŸ   ú?r   )é   é   éÿÿÿÿú+r
   z total photos in galleryZphotoset_tokenZ	profileidræ   r÷   Zmtouch_snowflake_paged_queryZquery_resultsZmediaÚedgesrª   Zis_playablerã   Zplayable_url_hdZplayable_urlZfull_width_imageÚurir¥   Zaccessibility_caption)	r:   r<   r=   rÑ   rÒ   r@   rC   Ú	video_idsÚvideosz.img[data-sigil='photo-image']rm   rÏ   rÎ   rÐ   z{"tn":"+>"}rß   zPhotos from)Z
containingrn   z{"tn":"+="}za.touchable[data-gt='z']rö   r   zReached image error limit)r:   r<   r=   rÑ   rÒ   )!r   rf   r+   r   r¡   Úsplitrµ   r”   r4   ÚintÚstriprg   rÂ   r   r   rÚ   r,   Ú	get_jsmodrŽ   ÚlistrÁ   r´   r   r   rü   r   r¢   rØ   r   rÙ   r‹   rú   rù   )r0   r<   rÜ   rÒ   Zraw_photo_linksZphoto_linksZ	seen_urlsrß   Zpartial_urlZtotal_photos_in_galleryrM   r¥   rÚ   Z
profile_idÚtokenÚresponseÚresultsr  r  Úitemrª   r¤   rû   ÚerrorsÚ	directionÚ
photo_linkr:   rÑ   r!   r!   r"   rv     sÆ    ÿÿ
 





÷

 
 ÿ


 

ûz PostExtractor.extract_photo_linkc                 c   sÔ  i }d}t j}|  d¡}|r`| ¡ D ]:\}}|| d  ¡ }	| ¡ D ]}
|	||
d < |
d }qDq$|j d¡D ]z}t 	|j
 d¡¡}d|v r˜t|d ƒ}nt|d	 ƒ}|d
kr®ql|| d  ¡ }	|jdddj
 d¡}t  |¡}|	||< ql| j d¡}d}t|ƒttfv r||k r|}t d|› d¡ t|j d¡ƒ}|D ]}zD|jd|› ddj
 d¡d }| |¡}|s†t d|› ¡ W n‚ ty
   zD|jdddj
 d¡}t  |¡}| |¡}|sÚt d|› ¡ W n( ty   t d|› ¡ d}Y n0 Y n0 |jdddjt  t|jdddj
 d¡¡|dœV  q>|jjddd}|rÐt|ƒ|k rÐt  t|j
 d¡¡}t d|› ¡ z|  |¡}W n> tyØ } z$t |¡ W Y d}~qÐW Y d}~n
d}~0 0 td ƒ}t |j|d… ¡}d}|d! d" D ]Â}|d# d$kr’t j d%|d& › d'td(}| d)¡}|D ]J}|jd|› ddsŽ|jd*ddj
d }|D ]}| !d+¡rv|}qvzD|jd|› ddj
 d¡d }| |¡}|sÐt d|› ¡ W n„ tyV   zD|jdddj
 d¡}t  |¡}| |¡}|s$t d|› ¡ W n* tyP   t d|j› ¡ d}Y n0 Y n0 |jdddjt  t|jdddj
 d¡¡|dœV  qBn6|d# d,krt j d%|d& › d'td(}|jd-dd}qqTdS ).zFetch people reacting to an existing post obtained by `get_posts`.
        Note that this method may raise one more http request per post to get all reactorsZsp_LdwxfpG67BnZUFIReactionIconsÚdisplay_nameZspriteCssClassÚspriteMapCssClassú)span[data-sigil='reaction_profile_sigil']r¿   ÚreactionTypeÚ
reactionIDÚallr©   Trm   rÍ   r\   ç    eÍÍAr÷   z	 reactorsz'div[id^='reaction_profile_browser']>divzdiv>i.Úclassr  zDon't know zdiv>i[style]zNo div>i[style] elem in: NÚstrongrß   rŸ   )ÚnamerM   Útypez!div[id^=reaction_profile_pager] aú	for (;;);ÚpayloadÚactionsÚcmdrµ   z#<div id='reaction_profile_browser'>rŽ   z</div>©r¥   zBdiv#reaction_profile_browser>div,div#reaction_profile_browser1>divz	div>i.imgZsp_r´   zdiv#reaction_profile_pager a)"r   Úemoji_class_lookupr
  ÚitemsÚlowerrÁ   rŽ   r   ÚdemjsonÚdecoder¡   rf   ÚstrZget_background_image_urlr   r  r  Úfloatrg   rÂ   r  rú   ÚAttributeErrorr4   r¢   r   r”   r   r,   r‹   rÇ   rÈ   r³   rù   )r0   r  Úreaction_lookupZemoji_url_lookupr  r#  Zreaction_iconsÚkÚvr  r  ÚsigilÚsingle_reactionZemoji_styleZ	emoji_urlÚreactors_optÚlimitrÛ   r¤   Zemoji_classÚreaction_typeÚmorer¥   rû   Úprefix_lengthÚdataÚactionrŽ   ÚclassesÚcr!   r!   r"   Úextract_reactors  sâ    



ÿþ


ý
$þÿ

ÿþþ
ÿ

ÿûþzPostExtractor.extract_reactorsc              	   c   s¤   d| j  d¡› }|r t d|› ¡ |  |¡}|j d¡}|D ]6}|jdddjt 	t
|jdddj d	¡¡d
œV  q@|jjddd}|rš|j d	¡}qd}qdS )z•Fetch people sharing an existing post obtained by `get_posts`.
        Note that this method may raise more http requests per post to get all sharersz(https://m.facebook.com/browse/shares?id=r3   r÷   zdiv.item:not(#m_more_item)r  Trm   rß   rŸ   ©r  rM   z#m_more_item aN)re   rf   rg   rÂ   r,   rŽ   r   r4   r   r¢   r   r¡   )r0   Z	share_urlr  rÛ   r¤   r3  r!   r!   r"   r’   õ  s    
ÿþ
zPostExtractor.extract_sharersFc                 C   s‚  i }t j}|  d¡}|r(| | d¡¡ | j di ¡ ¡ D ],\}}|d r:|| d  ¡ }|d ||< q:| j d¡}	| j d¡}
|s| j d¡}|
o¨t  	|
¡j
d	d
 ¡ }| j d¡}g }|r²d|› }t d|› ¡ |  |¡}|rî|r¦i }d}	|j d¡D ]}t |j d¡¡}d|v r0t|d ƒ}nt|d ƒ}|jdddj dd¡}t  |¡}|dkrn|}	n"||v r|| d  ¡ }|||< q|	s¦t| ¡ ƒ}	|  ||¡}|rÔ| d¡||	|t ¡ |dœS |
rv|  |¡}|  |j¡D ]„}|d d d d d d d  d! d" }|d# |krð|d$ d% |d d% d&d'„ |d( d) D ƒ|d* d+ |d, t ¡ d-œ  S qðd.t ¡ iS )/aP  Fetch share and reactions information with a existing post obtained by `get_posts`.
        Return a merged post that has some new fields including `reactions`, `w3_fb_url`,
        `fetched_time`, and reactions fields `LIKE`, `ANGER`, `SORRY`, `WOW`, `LOVE`, `HAHA` if
        exist.
        Note that this method will raise one http request per post, use it when you want some more
        information.

        Example:
        ```
        for post in get_posts('fanpage'):
            more_info_post = fetch_share_and_reactions(post)
            print(more_info_post)
        ```
        ZUFIReactionTypesr^   ZreactioncountmapÚdefaultr  rì   rL   r3   zwww.facebook.com)Únetlocr\   zGhttps://m.facebook.com/ufi/reaction/profile/browser/?ft_ent_identifier=r÷   r   r  r¿   r  r  z-span[data-sigil='reaction_profile_tab_count']Trm   zAll r®   r  Úlike)rI   r^   r_   r\   Úfetched_timer]   ZjsmodsZpre_display_requiresé   r
   Z__bboxrÞ   r5  ZfeedbackZsubscription_target_idró   Úcountc                 S   s"   i | ]}|d  d   ¡ |d “qS )rª   r2  r_   )r%  )r¨   Zreactionr!   r!   r"   Ú
<dictcomp>Z  s   ÿz3PostExtractor.extract_reactions.<locals>.<dictcomp>Ztop_reactionsr  rñ   Ztotal_countr¥   )rK   rI   r^   rJ   r]   r>  r>  )r   r+  r
  r‰   rf   r   r$  r%  re   r   Ú_replaceÚgeturlr   rg   rÂ   r,   rŽ   r   r&  r'  r¡   r(  r4   r´   rï   ÚsumrÁ   r9  r   ÚnowÚparse_share_and_reactions)r0   r3   Úforce_parse_HTMLr^   r+  Zreaction_lookup_jsmodr,  r-  r  r_   r¥   r]   r0  r\   Zreaction_urlr  r.  r/  Úrespr  r5  r!   r!   r"   r   
  sŠ    




ÿþ


ú	
ÿþ


þ
÷zPostExtractor.extract_reactionsc           	      C   sð   | j jddd}| j jddd}|r¾| d¡r¾tt|j d¡ƒjƒ}|d d }|| jd	 kr¾t 	d
|› ¡ |  
|¡}t|j| j| j
|jd}d	|i|_| ¡ p¤i }|| ¡  d¡dœ|¥S |d u rÊd S | j d¡ræ|  ¡ }|ræ|S |  |¡S )Nz[data-sigil="inlineVideo"]Trm   za[href*='photoset_token']zi[aria-label='video']rŸ   Zphotor   r3   r÷   )r    r@   )rC   r@   Ú
youtube_dl)r+   r   r   r   r¡   rf   rÚ   re   rg   rÂ   r,   r   rŽ   r   r   r~   Úextract_video_highresÚextract_video_lowres)	r0   Úvideo_data_elementZphotoset_linkrÚ   rC   r  Z
video_postÚmetaZvidr!   r!   r"   r~   d  s2    
ÿ
þýzPostExtractor.extract_videoc              
   C   s€   z0t  |jd  dd¡¡}d| d¡ dd¡iW S  ty` } zt d|¡ W Y d }~n$d }~0  tyz   t d	¡ Y n0 d S )
Nr¿   ú\\ú\r@   rÌ   z\/ú/z!Error parsing data-store JSON: %rzdata-store attribute not found)	r&  r'  r¡   r´   rf   r   rg   rú   rÄ   )r0   rL  r5  r—   r!   r!   r"   rK  €  s     z"PostExtractor.extract_video_lowresc              
   C   s°   t stdƒ‚dddœ}| j d¡r*d|d< zPt |ƒ4}|j| j d¡dd	d
 }d|iW  d   ƒ W S 1 sn0    Y  W n0 tyª } zt d|¡ W Y d }~n
d }~0 0 d S )NzCyoutube-dl must be installed to download videos in high resolution.ÚbestT)ÚformatÚquietZyoutube_dl_verboseFrS  rL   )Údownloadr¥   r@   z*Error extracting video with youtube-dl: %r)	r   ÚModuleNotFoundErrorr   rf   Zextract_infore   r   rg   rú   )r0   Zydl_optsZydlr¥   r—   r!   r!   r"   rJ  Š  s     ÿþ
,"z#PostExtractor.extract_video_highresc                 C   sP   | j jddd}|sd S |j dd¡}| j |¡}|rLdt | ¡ d ¡iS d S )Nzi[data-sigil="playInlineVideo"]Trm   rÍ   r®   rF   r   )	r+   r   r¡   rf   Úvideo_thumbnail_regexr   r   rÔ   rÕ   )r0   Zthumbnail_elementrÍ   rÝ   r!   r!   r"   r      s    z%PostExtractor.extract_video_thumbnailc                 C   s(   | j  | jj¡}|r$d| ¡ d iS d S )NrC   r   )Úvideo_id_regexr   r+   rŽ   rÕ   ©r0   rÝ   r!   r!   r"   r€   ª  s    zPostExtractor.extract_video_idc                 C   sà   | j jddd}|sd S t |j¡}| d¡dkr6d S d}d|v rl|d D ] }| d¡d d	krJ| d
¡}qJd }| d¡rt|d  d¡ƒd }t 	|d ¡}| 
¡ jd d}|t | d¡¡|| d¡| d¡| d¡|dœS )Nz"script[type='application/ld+json']Trm   z@typeZVideoObjectr   ZinteractionStatisticZinteractionTypezhttp://schema.org/WatchActionZuserInteractionCountÚcontentSizeZkBiè  ZdatePublished)ÚtzinfoÚdurationZvideoQualityÚwidthÚheight)r8   rA   rG   rD   rH   rB   rE   )r    r   rÇ   rÈ   r4   rf   r)  r	  r   rÆ   Ú
astimezoner´   Zparse_duration)r0   r¤   rM  ZwatchesZinteractionrY  r8   r!   r!   r"   r   °  s0    
ùz PostExtractor.extract_video_metac                 C   s$   | j  d¡d j}d|v d|v dœS )Nr¦   r   zis livezwas live)rS   Zwas_live)r+   r   Z	full_text)r0   r¦   r!   r!   r"   r‚   Í  s    zPostExtractor.extract_is_livec                 C   sZ   | j jddd}|sd S |j  ¡  ¡ }d}| ¡ D ]}| ¡ dkrDq2||d 7 }q2d|iS )Nzbutton[value="See Why"]Trm   r®   zSee WhyÚ
rT   )r+   r   rÖ   Úitertextr	  )r0   ÚbuttonZfactcheck_divrT   r4   r!   r!   r"   rƒ   Ñ  s    zPostExtractor.extract_factcheckc                 C   s„   | j  d¡sd S t d| jd | j d ¡ | jjddd}t|| j| j	ƒ}| j d | 
¡  d¡| j d | ¡  d	¡| ¡  d
¡dœS )NZoriginal_content_idz%s is a share of %sr3   zCarticle article, .story_body_container .story_body_container headerTrm   r8   Zoriginal_content_owner_idrP   rL   )rU   rV   rW   rX   rY   )rž   rf   rg   rÂ   re   r+   r   r   r   r,   ru   r}   rr   )r0   r   Zshared_postr!   r!   r"   r„   Ý  s    ÿÿûz'PostExtractor.extract_share_informationc                 C   s   dd| j jviS )NrZ   z,>This content isn't available at the moment<)r+   rŽ   rc   r!   r!   r"   r…   ñ  s    ÿz"PostExtractor.extract_availabilityc                 C   s¦  |j  d¡}zv|jddd}|j  d¡p2|j  d¡}| d¡d }t d	|j¡}|r^| d
¡}|j 	¡ j
 d¡}|r€t t|¡}W nP tyÒ   |jdddj}d }d }|jddd}|rÎt t|j  d¡¡}Y n0 |jddd}|jddd}	|	s|jddd}	|	r|	j}
n|j}
d }|r>d|jv r>|j d¡d }|jddd}|r|tj|jdd}|s€t d|j› ¡ nd }|jddd}|r´tt|j d ƒjƒ d¡d }n>|jddd}|rò| j |j d ¡}|ròt | ¡ d ¡}i }| j d| j d¡p| j d¡¡}|rld| jd< |jddd}|rl| j|dd}|dkrlt | dg ¡¡|d< |t t|¡|||||
||| dg ¡| d¡| d¡d œS )!Nrã   z.profpic.imgTrm   rÏ   rÎ   ú,r   zfeed_story_ring(\d+)r
   rŸ   Úh3zh3>azSdiv:not([data-sigil])>a[href]:not([data-click]):not([data-store]):not([data-sigil])z,[data-sigil="comment-body"],div._14ye,div.blzdiv>div>divr_  r¼   r½   zUnable to parse z(a[href^="https://lm.facebook.com/l.php"]Úuzi.img:not(.profpic)[style]rÍ   rq   r^   r\   za[href^="/ufi/reaction/profile/browser/?ft_ent_identifier="] i,a[href^="/ufi/reaction/profile/browser/?ft_ent_identifier="] img)rG  ro   r_   )Ú
comment_idÚcomment_urlÚcommenter_idZcommenter_urlZcommenter_nameÚcommenter_metaZcomment_textZcomment_timeZcomment_imagerq   Zcomment_reactionsZcomment_reaction_count)r¡   rf   r   r  rØ   r   rŽ   rÙ   r+   rÖ   r×   r   r¢   r   r*  r4   rÆ   rg   rÂ   r   r   rÚ   rÓ   rÔ   rÕ   r   r   r‘   )r0   r™   re  Zprofile_picturer  rg  r¥   rM   Z
first_linkZcomment_body_elemr4   rh  rÊ   rË   Z	image_urlrÝ   r^   Zcomment_reactors_optr\   r!   r!   r"   Úparse_commentö  s–    
þÿ ÿ
ý


ôzPostExtractor.parse_commentc                 c   sà  | j  d¡st d|› ¡ zn| jjdddd u}|rz| jjdddjd }t d| jj	¡ 
d¡}| j|d||d	œd
}nd}|  |¡}W nD tjy    ‚ Y n0 tyÎ } zt |¡ W Y d }~d S d }~0 0 |rNtdƒ}t |j|d … ¡}|d d D ],}	|	d dk rþtj|	d td}
 q, qþd}| j  d¡rBd}|
 |¡}nd}|j	 |¡dd … }z|D ]}|  |¡V  qlW nX tjyš   ‚ Y nB tyÚ } z(t d|› d|› d|› ¡ W Y d }~n
d }~0 0 d S )NÚprogressr÷   zinput[name='fb_dtsg']Trm   Úvaluezencrypted":"([^"]+)r
   )Úfb_dtsgZ__a)re   ÚparamsFr  r  r   r!  r´   rŽ   r"  z&div[data-sigil="comment inline-reply"]Únoscriptz#root div[id]údiv[data-sigil="comment"]úUnable to parse comment z	 replies ú: )r   rf   rg   rÂ   r    r   r¡   rØ   r   rŽ   rÙ   r,   r   rŠ   r‹   rú   r”   rÇ   rÈ   r4   r   r³   r   ri  )r0   Úreplies_urlZuse_ajax_postrl  ZencryptedAjaxResponseTokenr  rû   r4  r5  r6  rŽ   Zreply_selectorrp   rš   r!   r!   r"   Úextract_comment_repliesO  s\    ÿþý
þ
z%PostExtractor.extract_comment_repliesc              
      sÈ   ztˆ   |¡}‡ fdd„| d¡D ƒ|d< |jddd}|rpˆ  |jd ¡}|d rht |d |¡|d< n||d< |W S  tjyŠ   ‚ Y n: tyÂ } z"t	 
d	|› d
|› ¡ W Y d }~n
d }~0 0 d S )Nc                    s   g | ]}ˆ   |¡‘qS r!   )ri  )r¨   rš   rc   r!   r"   rà   Œ  s   ÿz>PostExtractor.extract_comment_with_replies.<locals>.<listcomp>z&div[data-sigil='comment inline-reply']rp   z]div.async_elem[data-sigil='replies-see-more'] a[href],div[id*='comment_replies_more'] a[href]Trm   rŸ   rp  rq  )ri  r   rs  r¡   r·   r¸   r   rŠ   r‹   rg   rú   )r0   r™   rÞ   rr  Zreply_generatorrû   r!   rc   r"   Úextract_comment_with_replies‰  s&    

þ
þz*PostExtractor.extract_comment_with_repliesc              
   c   sŠ  | j st d¡ dS d}| j j|dd}|s:t d¡ dS d}| j d¡rT|› d	}t| |¡ƒ}|stt d
¡ dS |D ]}|  |¡}|rx|V  qxd| j	 d¡› d}|j|dd}|sÚd| j	 d¡› d}|j|dd}d}	|r(|j
 d¡r(tt|j
 d¡ƒjƒ}
t|
 d¡d ƒ}||	k r(|}	| j d¡}t|ƒttfv rJ|}	t d|	› d¡ | j d¡rtt|	d}g }| j d¡}d}|rÊ| j d¡rºt | j d¡|j
 d¡¡}n|j
 d¡d }| j d¡rä| j d¡}|r†t|ƒ|	kr†|r|t t|¡ƒ ||v r&t d¡ q†| j d¡r@| d¡ nt d|› ¡ z|  |¡}W nT tjyv   ‚ Y n> ty² } z$t |¡ W Y d}~q†W Y d}~n
d}~0 0 | |¡ |jj|dd}|sât d
¡ q†| |¡}| |¡ |s
t d
¡ q†|D ]}|  |¡}|r|V  q|j|dd}|r~| j d¡rlt | j d¡|j
 d¡¡}n|j
 d¡d }nd}qädS ) z˜Fetch comments for an existing post obtained by `get_posts`.
        Note that this method may raise multiple http requests per post to get all commentsz-Unable to get comments without full post HTMLNzdiv[id^="ufi_"]Trm   zNo comments area foundro  rn  z>div>div:not(id)>divzNo comments found on pagezdiv#see_next_r3   z azdiv#see_prev_r  zdata-ajaxify-hrefr@  r   rJ   zFetching up to z	 commentsrj  )ÚtotalZcomment_request_url_callbackZresponse_urlrŸ   zS&m_entstream_source=video_home&player_suborigin=entry_point&player_format=permalinkZcomment_start_urlzcycle detected, breaké   r÷   ) r    rg   rú   r   r   rf   r  rh   rt  re   r¡   r   r   rÚ   r  r  r)  rÂ   r	   r   Zcombine_url_paramsr”   r¢   r   r‰   r,   r   rŠ   r‹   rµ   rŽ   Úextend)r0   Zcomments_area_selectorr¤   Zcomments_selectorrJ   r™   rÞ   Zmore_selectorr3  r1  Úparsedr@  Zcomments_optZpbarZvisited_urlsZrequest_url_callbackZmore_urlr  rû   Zmore_commentsr!   r!   r"   r“      s¶    






ÿ
ÿÿ


$






ÿ
ÿÿz#PostExtractor.extract_comments_fullr¯   c                 c   s4   | j  |¡}|D ]}| j d|¡}t |¡V  qd S )Nz\g<prefix>"\g<key>":)Úshares_and_reactions_regexÚfindallÚbad_json_key_regexÚsubrÇ   rÈ   )r0   rŽ   Z	bad_jsonsZbad_jsonZ	good_jsonr!   r!   r"   rF  
  s    z'PostExtractor.parse_share_and_reactionsc                 C   sD   | j  d¡}t|ƒdkr@|d  d¡d j|d j|d jdœS d S )	Nz!div[data-ft='{"tn":"H"}']>div>divr?  r   Úspanr  r
   é   )Zlisting_titleZlisting_priceZlisting_location)r+   r   r”   r4   )r0   Zdivsr!   r!   r"   r†     s    ýzPostExtractor.extract_listingc                 C   s°   | j  d¡dd … }|r¬|d j|d jd dœg}|d jd }| d¡r”t d|› ¡ |  |¡}|j d	¡}|D ]}| 	|j|jd dœ¡ qv|| j jd
ddjdœS d S )Nzheader h3 ar
   r   rŸ   r:  r  z/browse/users/r÷   z%#root .item>div>div>a:not(.touchable)z	header h3Trm   )r`   r¦   )
r+   r   r4   r¡   rù   rg   rÂ   r,   rŽ   rµ   )r0   rN   Zpeopler¥   r  rM   r!   r!   r"   r‡     s    

zPostExtractor.extract_withc              
   C   sŒ   | j d ur| j S i | _ z$| jjd  dd¡}t |¡| _ W nJ tyj } zt d|¡ W Y d }~n$d }~0  t	y„   t d¡ Y n0 | j S )Nzdata-ftrN  rO  zError parsing data-ft JSON: %rzdata-ft attribute not found)
r-   r+   r¡   r´   r&  r'  r   rg   rú   rÄ   )r0   Zdata_ft_jsonr—   r!   r!   r"   rž   (  s    
 zPostExtractor.data_ftc              
   C   sÖ   | j d ur| j S | j dd¡rÎ| j d¡rÎ| j d¡}t d|› ¡ z|  |¡}W nT tjy¨ } z:| j d¡ 	t
t¡}t d|› ¡ |  |¡}W Y d }~n
d }~0 0 |j d¡rÀt d¡ |j| _ | j S d S d S )Nrý   Tr3   r÷   rL   zfor (;;)z"full_post_html startswith for (;;))r.   r   rf   re   rg   rÂ   r,   r   ZNotFoundr´   r   r   r4   rù   rh   rŽ   )r0   r¥   r  rû   r!   r!   r"   r    8  s     
 
zPostExtractor.full_post_htmlc              
   C   sR   | j r| j S z|  d¡| _ W n. tyJ } zt |¡ W Y d }~n
d }~0 0 | j S )NZ	MLiveData)r/   r
  r‹   rg   rú   )r0   rû   r!   r!   r"   r   M  s     zPostExtractor.live_datac                 C   sF   |s| j r| j }n| j}t |d |j¡}|r>t | d¡¡S i S d S )Nz[^{]+({.+?})(?:\]\]|,\d)r
   )r    r+   rØ   r   rŽ   r&  r'  rÙ   )r0   r  r+   rÝ   r!   r!   r"   r
  W  s    zPostExtractor.get_jsmod)N)NF)N)Grˆ   Ú
__module__Ú__qualname__Ú__doc__rØ   ÚcompileÚ
IGNORECASErî   rò   rô   rá   r  Zphoto_link_2rø   rÓ   rV  rç   rè   rW  ry  r{  rŒ   Úpost_story_regexr1   r   rd   r   ÚPartialPostrs   r}   rt   ru   r|   rw   r{   rr   rx   ry   rz   r(  r   rü   rv   r   r+  r9  r’   r   r~   rK  rJ  r   r€   r   r‚   rƒ   r„   r…   ri  rs  rt  r“   rF  r†   r‡   ÚpropertyÚdictrž   r    r   r
  r!   r!   r!   r"   r   <   s†   ÿ


þ




ÿ




4b	F''&	ptZ

Y:j


	r   c                   @   s$   e Zd ZdZe d¡Ze d¡ZdS )r#   zAClass for extracting posts from Facebook Groups rather than Pagesz.https://m.facebook.com/groups/[^/]+/permalink/z:href="(https://m.facebook.com/groups/[^/]+/permalink/\d+/)N)rˆ   r  r€  r  rØ   r‚  rç   r„  r!   r!   r!   r"   r#   e  s   
r#   c                   @   sR   e Zd Zedœdd„Zedœdd„Zedœdd„Zedœdd	„Zedœd
d„ZdS )r'   r2   c                 C   s   | j jdddj}||dœS )Nzdiv.msgTrm   r°   )r+   r   r4   )r0   r4   r!   r!   r"   rt   m  s    zPhotoPostExtractor.extract_textc                 C   s$   |   | jj¡}||g|  ¡ d dœS )Nr?   )r:   r<   r=   )rü   r    rŽ   rw   )r0   r:   r!   r!   r"   rv   q  s
    
ýz%PhotoPostExtractor.extract_photo_linkc                 C   s&   t  d| jj¡}|r"d| d¡iS d S )Nzentity_id:(\d+),rO   r
   )rØ   r   r+   rŽ   rÙ   rX  r!   r!   r"   r|   y  s    z"PhotoPostExtractor.extract_user_idc                 C   s   dt  t|  ¡ d ¡iS )NrL   r3   )r   r¢   r   rs   rc   r!   r!   r"   rr   ~  s    z#PhotoPostExtractor.extract_post_urlc                 C   sT   zdt | jd ƒiW S  tyN   t d| jj¡}|rJd| ¡ d i Y S Y n0 d S )Nr3   r›   úft_ent_identifier=(\d+)r   )r(  r   rÄ   rØ   r   r    rŽ   rÕ   rX  r!   r!   r"   rs     s    z"PhotoPostExtractor.extract_post_idN)	rˆ   r  r€  r…  rt   rv   r|   rr   rs   r!   r!   r!   r"   r'   l  s
   r'   c                       s&   e Zd Zd‡ fdd„	Zdd„ Z‡  ZS )r)   Nc                    sF   |   |¡}|r0||ƒ}|r0|j d¡d }|j}tƒ  ||||¡ d S )Nz[data-ft*="top_level_post_id"]r   )Úextract_hashtag_post_idrŽ   r   Úsuperr1   )r0   r+   r   r   r    r3   r  ©Ú	__class__r!   r"   r1   ‹  s    
zHashtagPostExtractor.__init__c                 C   s"   t  d|j¡}|r| ¡ d S d S )Nrˆ  r   )rØ   r   rŽ   rÕ   )r0   r+   rÝ   r!   r!   r"   r‰  •  s    z,HashtagPostExtractor.extract_hashtag_post_id)N)rˆ   r  r€  r1   r‰  Ú__classcell__r!   r!   r‹  r"   r)   Š  s   
r)   c                   @   s(   e Zd Zedœdd„Zedœdd„ZdS )r%   r2   c                 C   sR   | j jddd}|rN|jdddjd }|r8t t|¡}|jdddj|dœS d S )Nz#m-stories-card-headerTrm   rß   rŸ   zdiv.overflowTextr    )r+   r   r¡   r   r¢   r   r4   r£   r!   r!   r"   r}     s    zStoryExtractor.extract_usernamec                 C   sJ   | j jddd}t |jd ¡d }t dt |¡› ¡ t |¡|dœS )Nr¾   Trm   r¿   r8   rÀ   r»   )	r+   r   rÇ   rÈ   r¡   rg   rÂ   r   rÃ   )r0   rÊ   r8   r!   r!   r"   ru   ¥  s    zStoryExtractor.extract_timeN)rˆ   r  r€  r…  r}   ru   r!   r!   r!   r"   r%   œ  s   r%   )N)N)N)N)3r·   rÇ   Zdemjson3r&  r   ÚloggingrØ   r   Útypingr   r   r   Úurllib.parser   r   Z	tqdm.autor	   r®   r   r   Ú	constantsr   r   r   Zfb_typesr   r   r   r   r   r   rI  r   Zyoutube_dl.utilsr   ÚImportErrorÚ	getLoggerrˆ   rg   r(  r…  r   r$   r&   r(   r*   r   r#   r'   r)   r%   r!   r!   r!   r"   Ú<module>   sr    

 ÿþ ÿþ ÿþþ ÿþ          3