%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /lib/python3.12/urllib/__pycache__/
Upload File :
Create Path :
Current File : //lib/python3.12/urllib/__pycache__/robotparser.cpython-312.pyc

�

���f�$���dZddlZddlZddlZdgZejdd�ZGd�d�ZGd�d�Z	Gd	�d
�Z
y)a% robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
�N�RobotFileParser�RequestRatezrequests secondsc�Z�eZdZdZdd�Zd�Zd�Zd�Zd�Zd�Z	d�Z
d	�Zd
�Zd�Z
d�Zd
�Zy)rzs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    c�z�g|_g|_d|_d|_d|_|j|�d|_y)NFr)�entries�sitemaps�
default_entry�disallow_all�	allow_all�set_url�last_checked��self�urls  �)/usr/lib/python3.12/urllib/robotparser.py�__init__zRobotFileParser.__init__s;�������
�!���!���������S�����c��|jS)z�Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r
�rs r�mtimezRobotFileParser.mtime%s��� � � rc�6�ddl}|j�|_y)zYSets the time the robots.txt file was last fetched to the
        current time.

        rN)�timer
)rrs  r�modifiedzRobotFileParser.modified.s��
	� �I�I�K��rc�p�||_tjj|�dd\|_|_y)z,Sets the URL referring to a robots.txt file.��N)r�urllib�parse�urlparse�host�pathrs  rrzRobotFileParser.set_url6s-�����%�|�|�4�4�S�9�!�A�>���	�4�9rc��	tjj|j�}|j	�}|j|j
d�j��y#tjj$rT}|jdvrd|_n4|jdk\r |jdkrd|_Yd}~yYd}~yYd}~yYd}~yd}~wwxYw)z4Reads the robots.txt URL and feeds it to the parser.zutf-8)i�i�Ti�i�N)
r�request�urlopenr�readr�decode�
splitlines�error�	HTTPError�coder
r)r�f�raw�errs    rr%zRobotFileParser.read;s���		9����&�&�t�x�x�0�A��&�&�(�C��J�J�s�z�z�'�*�5�5�7�8���|�|�%�%�	&��x�x�:�%�$(��!����S��S�X�X��^�!%����&4��"��	&�s�)A*�*C�;C�Cc��d|jvr|j�||_yy|jj|�y�N�*)�
useragentsr	r�append)r�entrys  r�
_add_entryzRobotFileParser._add_entryHs=���%�"�"�"��!�!�)�%*��"�*�
�L�L����&rc���d}t�}|j�|D�]�}|s4|dk(r
t�}d}n"|dk(r|j|�t�}d}|jd�}|dk\r|d|}|j	�}|s�h|jdd�}t
|�dk(s��|dj	�j�|d<tjj|dj	��|d<|ddk(rB|dk(r|j|�t�}|jj|d�d}��*|ddk(r3|dk7s��9|jjt|dd	��d}��e|dd
k(r3|dk7s��t|jjt|dd��d}���|ddk(r?|dk7s���|dj	�j�rt!|d�|_d}���|dd
k(r�|dk7s���|djd�}t
|�dk(rk|dj	�j�rJ|dj	�j�r)t%t!|d�t!|d��|_d}���|ddk(s���|j(j|d����|dk(r|j|�yy)z�Parse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        rr��#N�:z
user-agent�disallowF�allowTzcrawl-delayzrequest-rate�/�sitemap)�Entryrr4�find�strip�split�len�lowerrr�unquoter1r2�	rulelines�RuleLine�isdigit�int�delayr�req_rater)r�lines�stater3�line�i�numberss       rrzRobotFileParser.parseQs���������
�
���7	2�D���A�:�!�G�E��E��a�Z��O�O�E�*�!�G�E��E��	�	�#��A��A�v��B�Q�x���:�:�<�D����:�:�c�1�%�D��4�y�A�~��q�'�-�-�/�/�/�1��Q�� �,�,�.�.�t�A�w�}�}��?��Q����7�l�*���z�����.� %����$�$�+�+�D��G�4��E��!�W�
�*���z����.�.�x��Q���/G�H� !���!�W��'���z����.�.�x��Q���/F�G� !���!�W�
�-���z� ��7�=�=�?�2�2�4�*-�d�1�g�,�E�K� !���!�W��.���z�"&�q�'�-�-��"4����L�A�-�'�!�*�2B�2B�2D�2L�2L�2N� '��
� 0� 0� 2� :� :� <�-8��W�Q�Z��#�g�VW�j�/�-Z�E�N� !���!�W�	�)�
�M�M�(�(��a��1�o7	2�p�A�:��O�O�E�"�rc�b�|jry|jry|jsytjjtjj
|��}tjjdd|j|j|j|jf�}tjj|�}|sd}|jD]&}|j|�s�|j|�cS|j r|j j|�Sy)z=using the parsed robots.txt decide if useragent can fetch urlFT�r;)r
rr
rrrrC�
urlunparser!�params�query�fragment�quoter�
applies_to�	allowancer	)r�	useragentr�
parsed_urlr3s     r�	can_fetchzRobotFileParser.can_fetch�s��������>�>��
� � ���\�\�*�*�6�<�<�+?�+?��+D�E�
��l�l�%�%�r�"�Z�_�_����j�.�.�
�0C�0C�'E�F���l�l� � ��%����C��\�\�	,�E����	�*����s�+�+�	,�����%�%�/�/��4�4�rc���|j�sy|jD]!}|j|�s�|jcS|jr|jjSy�N)rrrVrHr	�rrXr3s   r�crawl_delayzRobotFileParser.crawl_delay�sY���z�z�|���\�\�	#�E����	�*��{�{�"�	#�����%�%�+�+�+�rc���|j�sy|jD]!}|j|�s�|jcS|jr|jjSyr\)rrrVrIr	r]s   r�request_ratezRobotFileParser.request_rate�sY���z�z�|���\�\�	&�E����	�*��~�~�%�	&�����%�%�.�.�.�rc�4�|jsy|jSr\)rrs r�	site_mapszRobotFileParser.site_maps�s���}�}���}�}�rc��|j}|j�||jgz}djtt|��S)Nz

)rr	�join�map�str)rrs  r�__str__zRobotFileParser.__str__�s@���,�,�����)���!3�!3� 4�4�G��{�{�3�s�G�,�-�-rN)rP)�__name__�
__module__�__qualname__�__doc__rrrrr%r4rrZr^r`rbrg�rrrrsE���
�!�(�?�
9�'�G#�R�:���
.rc�"�eZdZdZd�Zd�Zd�Zy)rEzoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.c���|dk(r|sd}tjjtjj|��}tjj	|�|_||_y)NrPT)rrrQrrUr!rW)rr!rWs   rrzRuleLine.__init__�sP���2�:�i��I��|�|�&�&�v�|�|�'<�'<�T�'B�C���L�L�&�&�t�,��	�"��rc�Z�|jdk(xs|j|j�Sr/)r!�
startswith)r�filenames  rrVzRuleLine.applies_to�s%���y�y�C��A�8�#6�#6�t�y�y�#A�Arc�B�|jrdnddz|jzS)N�Allow�Disallowz: )rWr!rs rrgzRuleLine.__str__�s���>�>��z�T�A�D�I�I�M�MrN)rhrirjrkrrVrgrlrrrErE�s��1�#�B�NrrEc�(�eZdZdZd�Zd�Zd�Zd�Zy)r=z?An entry has one or more user-agents and zero or more rulelinesc�<�g|_g|_d|_d|_yr\)r1rDrHrIrs rrzEntry.__init__�s����������
���
rc��g}|jD]}|jd|����|j�|jd|j���|j�7|j}|jd|j�d|j
���|j
tt|j��dj|�S)NzUser-agent: z
Crawl-delay: zRequest-rate: r;�
)r1r2rHrI�requests�seconds�extendrerfrDrd)r�ret�agent�rates    rrgz
Entry.__str__�s������_�_�	/�E��J�J��e�W�-�.�	/��:�:�!��J�J��t�z�z�l�3�4��=�=�$��=�=�D��J�J���
�
��a����~�F�G��
�
�3�s�D�N�N�+�,��y�y��~�rc��|jd�dj�}|jD]}|dk(ry|j�}||vs�yy)z2check if this entry applies to the specified agentr;rr0TF)r@rBr1)rrXr}s   rrVzEntry.applies_to�sX���O�O�C�(��+�1�1�3�	��_�_�	�E���|���K�K�M�E��	�!��
	�rc�d�|jD]!}|j|�s�|jcSy)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)rDrVrW)rrqrLs   rrWzEntry.allowance
s2���N�N�	&�D����x�(��~�~�%�	&�rN)rhrirjrkrrgrVrWrlrrr=r=�s��I��
��rr=)rk�collections�urllib.parser�urllib.request�__all__�
namedtuplerrrEr=rlrr�<module>r�sU��
�����
��$�k�$�$�]�4F�G��~.�~.�BN�N�$(�(r

Zerion Mini Shell 1.0