a
    xdF                     @   sZ  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
mZmZmZ d dlmZ ddlmZ ddlmZ ddlmZmZmZmZ ddlmZmZ dd	lmZ d dlZd dlZd d
l m Z m!Z! d dl"Z"e Z#dd Z$dd Z%d6ddZ&dd Z'dd Z(e)edddZ*ee)e+f e
e, dddZ-e)e
e dddZ.e)edddZ/ee)e+f ed d!d"Z0e)e
e dd#d$Z1d7ee) ee)e+df ee
e)  ee) ee e
e d%d&d'Z2d8e)ee e
e d(d)d*Z3d9e)ee e
e d+d,d-Z4eeej5d.d/d0Z6d:ee) ee)e+df e)e)d1d2d3Z7ej8fd4d5Z9e:e;Z<e<=e>  dS );    N)AnyDictIteratorOptionalSetUnion)cookiejar_from_dict   )DEFAULT_REQUESTS_TIMEOUT)FacebookScraper)CredentialsPostRawPostProfile)html_element_to_stringparse_cookie_file)
exceptions)datetime	timedeltac              
      s   t | tr| dkrBzdd l}|jdd} W q   tdY q0 qzt| } W q ty } ztd| W Y d }~qd }~0 0 nt | t	rt
| } | d urdd | D   fd	dd
D }|rtd| tjj|  t stdd S )NZfrom_browserr   z.facebook.com)Zdomain_namez8browser_cookie3 must be installed to use browser cookiesz"Cookies are in an invalid format: c                 S   s   g | ]
}|j qS  )name.0cr   r   Q/var/www/html/Ranjet/env/lib/python3.9/site-packages/facebook_scraper/__init__.py
<listcomp>-       zset_cookies.<locals>.<listcomp>c                    s   g | ]}| vr|qS r   r   r   Zcookie_namesr   r   r   .   r   )Zc_userxszMissing cookies with name(s): zCookies are not valid)
isinstancestrbrowser_cookie3loadModuleNotFoundErrorr   
ValueErrorr   ZInvalidCookiesdictr   _scrapersessioncookiesupdateZis_logged_in)r(   r!   eZmissing_cookiesr   r   r   set_cookies   s.    
(
r+   c                   C   s   t i tj_d S N)r   r&   r'   r(   r   r   r   r   unset_cookies6   s    r-   Tc                 C   s   t | | d S r,   )r&   	set_proxy)proxyverifyr   r   r   r.   ;   s    r.   c                 C   s   t |  d S r,   )r&   set_user_agent)
user_agentr   r   r   r1   ?   s    r1   c                 C   s   t |  d S r,   )r&   set_noscript)noscriptr   r   r   r3   C   s    r3   )accountreturnc                 K   s8   | dttjd< | dd}t| tj| fi |S )a  Get a Facebook user's profile information
    Args:
        account(str): The account of the profile.
        cookies (Union[dict, CookieJar, str]): Cookie jar to use.
            Can also be a filename to load the cookies from a file (Netscape format).
    timeoutr(   N)popr
   r&   requests_kwargsr+   get_profiler5   kwargsr(   r   r   r   r:   G   s    
r:   )post_idr6   c                 K   s8   | dttjd< | dd}t| tj| fi |S )a  Get reactors for a given post ID
    Args:
        post_id(str): The post ID, as returned from get_posts
        cookies (Union[dict, CookieJar, str]): Cookie jar to use.
            Can also be a filename to load the cookies from a file (Netscape format).
    r7   r(   N)r8   r
   r&   r9   r+   get_reactors)r=   r<   r(   r   r   r   r>   W   s    
r>   c                 K   s8   | dttjd< | dd}t| tj| fi |S )zGet a Facebook user's friends
    Args:
        account(str): The account of the profile.
        cookies (Union[dict, CookieJar, str]): Cookie jar to use.
            Can also be a filename to load the cookies from a file (Netscape format).
    r7   r(   N)r8   r
   r&   r9   r+   get_friendsr;   r   r   r   r?   g   s    
r?   c                 K   s8   | dttjd< | dd}t| tj| fi |S )zGet a page's information
    Args:
        account(str): The account of the profile.
        cookies (Union[dict, CookieJar, str]): Cookie jar to use.
            Can also be a filename to load the cookies from a file (Netscape format).
    r7   r(   N)r8   r
   r&   r9   r+   get_page_infor;   r   r   r   r@   w   s    r@   )groupr6   c                 K   s8   | dttjd< | dd}t| tj| fi |S )zGet a group's profile information
    Args:
        group(str or int): The group name or ID
        cookies (Union[dict, CookieJar, str]): Cookie jar to use.
            Can also be a filename to load the cookies from a file (Netscape format).
    r7   r(   N)r8   r
   r&   r9   r+   get_group_info)rA   r<   r(   r   r   r   rB      s    rB   c                 K   s8   | dttjd< | dd}t| tj| fi |S )zGet a page's shop listings
    Args:
        account(str): The account of the profile.
        cookies (Union[dict, CookieJar, str]): Cookie jar to use.
            Can also be a filename to load the cookies from a file (Netscape format).
    r7   r(   N)r8   r
   r&   r9   r+   get_shopr;   r   r   r   rC      s    rC   )r5   rA   	post_urlshashtagcredentialsr6   c           	      K   s  t dd | |||fD }|dkr*td|dttjd< |dd}|dur`|dur`tdt| |d	i }t|t	rt
jd
dd dd |D }|d|  d|v rt
jddd |d d|v r|d|d< d|vr|dd|d< |dd|d< |durtj|  | dur4tj| fi |S |durPtj|fi |S |durltj|fi |S |durtj|fi |S tddS )a-  Get posts from a Facebook page or group.

    Args:
        account (str): The account of the page.
        group (int): The group id.
        post_urls ([str]): List of manually specified post URLs.
        credentials (Optional[Tuple[str, str]]): Tuple of email and password to login before scraping.
        timeout (int): Timeout for requests.
        page_limit (int): How many pages of posts to go through.
            Use None to try to get all of them.
        extra_info (bool): Set to True to try to get reactions.
        youtube_dl (bool): Use Youtube-DL for video extraction.
        cookies (Union[dict, CookieJar, str]): Cookie jar to use.
            Can also be a filename to load the cookies from a file (Netscape format).

    Yields:
        dict: The post representation in a dictionary.
    c                 s   s   | ]}|d uV  qd S r,   r   )r   argr   r   r   	<genexpr>   r   zget_posts.<locals>.<genexpr>r	   z7You need to specify either account, group, or post_urlsr7   r(   N<Can't use cookies and credentials arguments at the same timeoptions,The options argument should be a dictionary.   
stacklevelc                 S   s   i | ]
}|d qS Tr   r   kr   r   r   
<dictcomp>   r   zget_posts.<locals>.<dictcomp>r5   sleep?The sleep parameter has been removed, it won't have any effect.pages
page_limit	reactions
extra_infoF
youtube_dlNo account nor group)sumr$   r8   r
   r&   r9   r+   
setdefaultr   setwarningswarnlogin	get_postsZget_group_postsZget_posts_by_hashtagZget_posts_by_url)	r5   rA   rD   rE   rF   r<   Z
valid_argsr(   rJ   r   r   r   ra      sD    







ra   )r5   rF   r6   c                 K   s  | du rt d|dttjd< |dd}|durF|durFt dt| |di }t|trt	j
ddd	 d
d |D }|d|  d|v rt	j
ddd	 |d d|v r|d|d< |dd|d< |dd|d< |durtj|  tj| fi |S )a  Get photo posts from a Facebook page.

    Args:
        account (str): The account of the page.
        credentials (Optional[Tuple[str, str]]): Tuple of email and password to login before scraping.
        timeout (int): Timeout for requests.
        page_limit (int): How many pages of posts to go through.
            Use None to try to get all of them.
        extra_info (bool): Set to True to try to get reactions.
        youtube_dl (bool): Use Youtube-DL for video extraction.
        cookies (Union[dict, CookieJar, str]): Cookie jar to use.
            Can also be a filename to load the cookies from a file (Netscape format).

    Yields:
        dict: The post representation in a dictionary.
    NzYou need to specify accountr7   r(   rI   rJ   rK   rL   rM   c                 S   s   i | ]
}|d qS rO   r   rP   r   r   r   rR     r   zget_photos.<locals>.<dictcomp>r5   rS   rT   rU   rV   rX   FrW   rY   )r$   r8   r
   r&   r9   r+   r\   r   r]   r^   r_   r`   
get_photos)r5   rF   r<   r(   rJ   r   r   r   rb      s0    


rb   )wordrF   r6   c                 K   s   | st d|dttjd< |dd}|durB|durBt dt| |di }t|tr|t	j
ddd	 d
d |D }|d|  d|v rt	j
ddd	 |d d|v r|d|d< d|vr|dd|d< |dd|d< |durtj|  | durtj| fi |S t ddS )a  Get posts from a Facebook page or group.
    Args:
        word (str): The word for searching posts.
        group (int): The group id.
        credentials (Optional[Tuple[str, str]]): Tuple of email and password to login before scraping.
        timeout (int): Timeout for requests.
        page_limit (int): How many pages of posts to go through.
            Use None to try to get all of them.
        extra_info (bool): Set to True to try to get reactions.
        youtube_dl (bool): Use Youtube-DL for video extraction.
        cookies (Union[dict, CookieJar, str]): Cookie jar to use.
            Can also be a filename to load the cookies from a file (Netscape format).

    Yields:
        dict: The post representation in a dictionary.
    zYou need to specify wordr7   r(   NrI   rJ   rK   rL   rM   c                 S   s   i | ]
}|d qS rO   r   rP   r   r   r   rR   L  r   z'get_posts_by_search.<locals>.<dictcomp>rc   rS   rT   rU   rV   rW   rX   FrY   rZ   )r$   r8   r
   r&   r9   r+   r\   r   r]   r^   r_   r`   get_posts_by_search)rc   rF   r<   r(   rJ   r   r   r   rd   (  s6    



rd   )postsourcelocationc                 C   s   | d }| d}t d| t||ddH}|d tj| |dtd |d	 |t|d
d W d    n1 s~0    Y  d S )Nr=   z.htmlzWriting post %swt)modez<!--
   )indentdefaultz
-->
T)pretty)	loggerdebugopenjoinpathwritejsondumpr    r   )re   rf   rg   r=   filenamefr   r   r   write_post_to_diski  s    


rw   )r5   rA   ru   encodingc              
      s  | dd}|dur(|jdd d|d< |du rJt| p8|d |d }|du rZt }|d	krjtj}nt|d
d|d}d}| dd}|dd}	t	
 t|	d }
d}|dr$zRtd}|  }W d   n1 s0    Y  td|  |r|}W n ty"   Y n0 fdd}|d ztf | |||d|D ]}|dur| d}zt||| W n ty   td Y n0 n|dr|d j|d< |r
|ddkr|d n* st|  tj| dd}|  n|ddkr$|d d}|d  r|tj|d!|d  tjd"}|d#r|tj|d#|d  tjd"r|d}|r|ddkr r fd$d%| D }t j!||td&d' n
|"| |s|d( r|d( |
k rtd)|d(  d*|	 d+|
 d,  q$d}t#$| qVW n@ t%y:   Y n. tyf } zt&'  W Y d}~n
d}~0 0 |ddkr|d- |rt(d.tj)d/ |*  dS )0ar  Write posts from an account or group to a CSV or JSON file

    Args:
        account (str): Facebook account name e.g. "nike" or "nintendo"
        group (Union[str, int, None]): Facebook group id e.g. 676845025728409
        filename (str): Filename, defaults to <account or group>_posts.csv
        encoding (str): Encoding for the output file, defaults to locale.getpreferredencoding()
        credentials (Optional[Tuple[str, str]]): Tuple of email and password to login before scraping. Defaults to scrape anonymously
        timeout (Optional[int]): Timeout for requests.
        page_limit (Optional[int]): How many pages of posts to go through.
            Use None to try to get all of them.
        extra_info (Optional[bool]): Set to True to try to get reactions.
        dump_location (Optional[pathlib.Path]): Location where to write the HTML source of the posts.
    dump_locationNT)exist_okFZremove_sourcez_posts.format-w )newlinerx   rS   r   
days_limitiB  )daysresume_filerzExisting URL:c                    s@    r<t  d}|| d  W d    n1 s20    Y  d S )Nr}   
)rp   rr   )urlrv   )r   r   r   handle_pagination_url  s    z1write_posts_to_csv.<locals>.handle_pagination_urlkeys)r5   rA   	start_urlZrequest_url_callbackrf   zError writing post to diskrs   z[
ignore)extrasaction,textZmatching)flagsZnot_matchingc                    s   i | ]\}}| v r||qS r   r   )r   rQ   v)r   r   r   rR     r   z&write_posts_to_csv.<locals>.<dictcomp>rj   )rl   rk   timezReached days_limit - z is more than z days old (older than )z
]zCouldn't get any posts.)file)+r8   mkdirr    getlocalegetpreferredencodingsysstdoutrp   r   nowr   readlinestriprn   ro   FileNotFoundErrorra   rw   	Exception	exceptionhtmlrr   listr   csv
DictWriterwriteheaderresearch
IGNORECASEitemsrs   rt   writerowr   rS   KeyboardInterrupt	traceback	print_excprintstderrclose)r5   rA   ru   rx   r<   ry   Zoutput_fileZ
first_postrS   r   Zmax_post_timer   rv   existing_urlr   re   rf   Zdict_writermatchr*   r   )r   r   r   write_posts_to_csvu  s    
*







r   c                 C   s*   t  }||  t| t|  d S r,   )loggingStreamHandlersetLevelrn   
addHandler)levelhandlerr   r   r   enable_logging  s    

r   )T)NNNNN)N)N)NNNN)?r   rs   r   r   pathlibr   r^   typingr   r   r   r   r   r   Zrequests.cookiesr   	constantsr
   Zfacebook_scraperr   Zfb_typesr   r   r   r   utilsr   r   r~   r   r   r   r   r   r   r&   r+   r-   r.   r1   r3   r    r:   intr%   r>   r?   r@   rB   rC   ra   rb   rd   Pathrw   r   DEBUGr   	getLogger__name__rn   r   NullHandlerr   r   r   r   <module>   s    

     
Q = A    |	
