%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /snap/core/17212/usr/lib/python3.5/urllib/__pycache__/
Upload File :
Create Path :
Current File : //snap/core/17212/usr/lib/python3.5/urllib/__pycache__/robotparser.cpython-35.pyc



|�g4�@sddZddlZddlZdgZGdd�d�ZGdd�d�ZGdd�d�ZdS)	a% robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
�N�RobotFileParserc@s�eZdZdZddd�Zdd�Zdd�Zd	d
�Zdd�Zd
d�Z	dd�Z
dd�Zdd�ZdS)rzs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    �cCs>g|_d|_d|_d|_|j|�d|_dS)NFr)�entries�
default_entry�disallow_all�	allow_all�set_url�last_checked)�self�url�r�(/usr/lib/python3.5/urllib/robotparser.py�__init__s				
zRobotFileParser.__init__cCs|jS)z�Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r	)r
rrr
�mtimeszRobotFileParser.mtimecCsddl}|j�|_dS)zYSets the time the robots.txt file was last fetched to the
        current time.

        rN)�timer	)r
rrrr
�modified(szRobotFileParser.modifiedcCs5||_tjj|�dd�\|_|_dS)z,Sets the URL referring to a robots.txt file.��N)r�urllib�parse�urlparseZhost�path)r
rrrr
r0s	zRobotFileParser.set_urlcCs�ytjj|j�}Wnmtjjk
r�}zG|jdkrOd|_n'|jdkrv|jdkrvd|_WYdd}~Xn)X|j	�}|j
|jd�j��dS)	z4Reads the robots.txt URL and feeds it to the parser.��Ti�i�Nzutf-8)rr)
rZrequestZurlopenr�errorZ	HTTPError�coderr�readr�decode�
splitlines)r
�f�err�rawrrr
r5szRobotFileParser.readcCs>d|jkr*|jdkr:||_n|jj|�dS)N�*)�
useragentsrr�append)r
�entryrrr
�
_add_entryBszRobotFileParser._add_entrycCsd}t�}|j�x�|D]�}|sr|dkrJt�}d}n(|dkrr|j|�t�}d}|jd�}|dkr�|d|�}|j�}|s�q |jdd�}t|�dkr |dj�j�|d<tj	j
|dj��|d<|ddkr_|dkrB|j|�t�}|jj|d�d}q |ddkr�|dkr�|j
jt|dd	��d}q |dd
kr |dkr |j
jt|dd��d}q W|dkr�|j|�dS)z�Parse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        rr��#N�:z
user-agentZdisallowFZallowT)�Entryrr&�find�strip�split�len�lowerrr�unquoter#r$�	rulelines�RuleLine)r
�lines�stater%�line�irrr
rKsL
	

		
	 
			
zRobotFileParser.parsecCs�|jr
dS|jrdS|js'dStjjtjj|��}tjjdd|j|j	|j
|jf�}tjj|�}|s�d}x-|j
D]"}|j|�r�|j|�Sq�W|jr�|jj|�SdS)z=using the parsed robots.txt decide if useragent can fetch urlFTr�/)rrr	rrrr0�
urlunparserZparamsZqueryZfragment�quoter�
applies_to�	allowancer)r
�	useragentrZ
parsed_urlr%rrr
�	can_fetchs$				zRobotFileParser.can_fetchcCsdjdd�|jD��S)NrcSs g|]}t|�d�qS)�
)�str)�.0r%rrr
�
<listcomp>�s	z+RobotFileParser.__str__.<locals>.<listcomp>)�joinr)r
rrr
�__str__�szRobotFileParser.__str__N)
�__name__�
__module__�__qualname__�__doc__rrrrrr&rr=rCrrrr
rs	
	4c@s:eZdZdZdd�Zdd�Zdd�ZdS)	r2zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.cCsY|dkr|rd}tjjtjj|��}tjj|�|_||_dS)NrT)rrr8rr9rr;)r
rr;rrr
r�s
zRuleLine.__init__cCs|jdkp|j|j�S)Nr")r�
startswith)r
�filenamerrr
r:�szRuleLine.applies_tocCs|jrdndd|jS)NZAllowZDisallowz: )r;r)r
rrr
rC�szRuleLine.__str__N)rDrErFrGrr:rCrrrr
r2�sr2c@sFeZdZdZdd�Zdd�Zdd�Zdd	�Zd
S)r*z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS)N)r#r1)r
rrr
r�s	zEntry.__init__cCsjg}x'|jD]}|jd|dg�qWx*|jD]}|jt|�dg�q:Wdj|�S)NzUser-agent: r>r)r#�extendr1r?rB)r
Zret�agentr5rrr
rC�sz
Entry.__str__cCs]|jd�dj�}x=|jD]2}|dkr9dS|j�}||kr#dSq#WdS)z2check if this entry applies to the specified agentr7rr"TF)r-r/r#)r
r<rKrrr
r:�szEntry.applies_tocCs.x'|jD]}|j|�r
|jSq
WdS)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)r1r:r;)r
rIr5rrr
r;�szEntry.allowanceN)rDrErFrGrrCr:r;rrrr
r*�s

r*)rGZurllib.parserZurllib.request�__all__rr2r*rrrr
�<module>s
	�

Zerion Mini Shell 1.0