
    daf,                     v   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dlm
Z
 d dlmZmZ  G d d      Z G d d	eej                        Z G d
 deej                        Z G d deej                        Z G d deej                        Z G d de      Z G d deej                        Z G d deej                        Z G d de      Z G d deej                        Z G d deej                        Z G d deej                        Z G d deej                        Z G d  d!e      Z G d" d#eej                        Z G d$ d%eej                        Z G d& d'eej                        Z G d( d)eej                        Z  G d* d+eej                        Z! G d, d-eej                        Z" G d. d/e      Z# ejH                  ejJ                  d0       G d1 d2ej                               Z& ejN                          G d3 d4ej                               Z(e)d5k(  r ejT                          yy)6    N)support)socket_helper)threading_helper)BaseHTTPRequestHandler
HTTPServerc                   >    e Zd ZdZdZg Zg ZdZd Zd Z	d Z
d Zd Zy)	BaseRobotTest test_robotparserNc                     t        j                  | j                        j                         }t        j
                  j                         | _        | j                  j                  |       y N)	ioStringIO
robots_txt	readlinesurllibrobotparserRobotFileParserparserparse)selfliness     0/root/Python-3.12.4/Lib/test/test_robotparser.pysetUpzBaseRobotTest.setUp   sE    DOO,668((88:%     c                 P    t        |t              r	|\  }}||fS | j                  |fS r   )
isinstancetupleagentr   urlr   s      r   get_agent_and_urlzBaseRobotTest.get_agent_and_url   s-    c5!JE3#:zz3r   c                     | j                   D ]]  }| j                  |      \  }}| j                  ||      5  | j                  | j                  j                  ||             d d d        _ y # 1 sw Y   jxY wN)r!   r   )goodr"   subTest
assertTruer   	can_fetchr    s      r   test_good_urlszBaseRobotTest.test_good_urls   sw    99 	CC//4JE3#U3 C 5 5eS ABC C	CC C   ,A..A7	c                     | j                   D ]]  }| j                  |      \  }}| j                  ||      5  | j                  | j                  j                  ||             d d d        _ y # 1 sw Y   jxY wr$   )badr"   r&   assertFalser   r(   r    s      r   test_bad_urlszBaseRobotTest.test_bad_urls$   sy    88 	DC//4JE3#U3 D  !6!6uc!BCD D	DD Dr*   c                 l    | j                  | j                  j                         | j                         y r   )assertEqualr   	site_mapsr   s    r   test_site_mapszBaseRobotTest.test_site_maps*   s#    ..0$..Ar   )__name__
__module____qualname__r   r   r%   r,   r1   r   r"   r)   r.   r3    r   r   r	   r	      s7    JED
CI!
CDBr   r	   c                        e Zd ZdZddgZg dZy)UserAgentWildcardTestzUser-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
Disallow: /tmp/ # these will soon disappear
Disallow: /foo.html
    /
/test.html)/cyberworld/map/index.htmlz/tmp/xxx	/foo.htmlNr4   r5   r6   r   r%   r,   r7   r   r   r9   r9   .   s    J D
ACr   r9   c                       e Zd ZdZg dZdgZy)CrawlDelayAndCustomAgentTestz# robots.txt for http://www.example.com/

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow:
    )r:   r;   )cybermapperr<   r<   Nr>   r7   r   r   r@   r@   9   s    J ND'
(Cr   r@   c                   &    e Zd ZdZddgZdgZddgZy)SitemapTesta  # robots.txt for http://www.example.com/

User-agent: *
Sitemap: http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xml
Sitemap: http://www.google.com/hostednews/sitemap_index.xml
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

    r:   r;   r<   z7http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xmlz2http://www.google.com/hostednews/sitemap_index.xmlN)r4   r5   r6   r   r%   r,   r1   r7   r   r   rC   rC   J   s+    	J D'
(CJEGIr   rC   c                       e Zd ZdZg Zg dZy)RejectAllRobotsTestz(# go away
User-agent: *
Disallow: /
    )r<   r:   /tmp/Nr>   r7   r   r   rE   rE   [   s    J
 D
6Cr   rE   c                       e Zd ZdZdZd Zy)BaseRequestRateTestNc                    | j                   }| j                  | j                  z   D ]!  }| j                  |      \  }}| j	                  ||      5  | j                  |j                  |      | j                         |j                  |      }| j                  || j                         | j                  | j                  |t        j                  j                         | j                  |j                  | j                  j                         | j                  |j                  | j                  j                         d d d        $ y # 1 sw Y   0xY wr$   )r   r%   r,   r"   r&   r0   crawl_delayrequest_rateassertIsInstancer   r   RequestRaterequestsseconds)r   r   r!   r   parsed_request_rates        r   test_request_ratez%BaseRequestRateTest.test_request_ratei   s*   99txx' 	C//4JE3#U3   !3!3E!:D<L<LM&,&9&9%&@#  !4d6G6GH$$0))+**66 $$+44))22 $$+33))11 	 s   C/EE	)r4   r5   r6   rK   rJ   rQ   r7   r   r   rH   rH   e   s    LKr   rH   c                       e Zd ZdZdgZy)EmptyFileTestr
   z/fooN)r4   r5   r6   r   r%   r7   r   r   rS   rS      s    J8Dr   rS   c                   ^    e Zd ZdZdZej                  j                  dd      ZdZ	dgZ
g dZy)	CrawlDelayAndRequestRateTestzUser-agent: figtree
Crawl-delay: 3
Request-rate: 9/30
Disallow: /tmp
Disallow: /a%3cd.html
Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html
    figtree	         )rV   r=   )/tmpz	/tmp.html/tmp/a.html/a%3cd.html/a%3Cd.htmlz/a%2fb.htmlz/~joe/index.htmlN)r4   r5   r6   r   r   r   r   rM   rK   rJ   r%   r,   r7   r   r   rU   rU      s<    J E%%11!R8LK$%D.Cr   rU   c                       e Zd ZdZy)DifferentAgentTestzFigTree Robot libwww-perl/5.04Nr4   r5   r6   r   r7   r   r   r_   r_      s    ,Er   r_   c                   "    e Zd ZdZdgZg dZdZy)InvalidRequestRateTestzUser-agent: *
Disallow: /tmp/
Disallow: /a%3Cd.html
Disallow: /a/b.html
Disallow: /%7ejoe/index.html
Crawl-delay: 3
Request-rate: 9/banana
    rZ   )rF   r[   r\   r]   z	/a/b.htmlz/%7Ejoe/index.htmlrY   N)r4   r5   r6   r   r%   r,   rJ   r7   r   r   rb   rb      s    J 8D!CKr   rb   c                       e Zd ZdZdgZg Zy)InvalidCrawlDelayTestz2User-Agent: *
Disallow: /.
Crawl-delay: pears
    r=   Nr>   r7   r   r   rd   rd      s    J
 =D
Cr   rd   c                        e Zd ZdZdZdgZdgZy)AnotherInvalidRequestRateTestzeUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
Request-rate: whale/banana
    	Googlebot/folder1/myfile.html/folder1/anotherfile.htmlNr4   r5   r6   r   r   r%   r,   r7   r   r   rf   rf      s     J E"#D&
'Cr   rf   c                       e Zd ZdZdZdgZy)UserAgentOrderingTestzMUser-agent: Googlebot
Disallow: /

User-agent: Googlebot-Mobile
Allow: /
    rg   z/something.jpgN)r4   r5   r6   r   r   r,   r7   r   r   rl   rl      s    J E
Cr   rl   c                       e Zd ZdZy)UserAgentGoogleMobileTestzGooglebot-MobileNr`   r7   r   r   rn   rn      s    Er   rn   c                        e Zd ZdZdZdgZdgZy)GoogleURLOrderingTestzJUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
    	googlebotrh   ri   Nrj   r7   r   r   rp   rp      s     J
 E"#D&
'Cr   rp   c                       e Zd ZdZdgZdgZy)DisallowQueryStringTestz2User-agent: *
Disallow: /some/path?name=value
    
/some/pathz/some/path?name=valueNr>   r7   r   r   rs   rs      s    J >D"
#Cr   rs   c                       e Zd ZdZdgZdgZy)UseFirstUserAgentWildcardTestzNUser-agent: *
Disallow: /some/path

User-agent: *
Disallow: /another/path
    z/another/pathrt   Nr>   r7   r   r   rv   rv      s    J D.Cr   rv   c                       e Zd ZdZdgZdgZy)EmptyQueryStringTestz>User-agent: *
Allow: /some/path?
Disallow: /another/path?
    z/some/path?z/another/path?Nr>   r7   r   r   rx   rx      s    J
 ?D
Cr   rx   c                   Z    e Zd ZdZej
                  j                  dd      ZdZddgZ	dgZ
y)	DefaultEntryTestzOUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/
    rY         r:   r;   r<   N)r4   r5   r6   r   r   r   rM   rK   rJ   r%   r,   r7   r   r   rz   rz     s:    J %%11!R8LKD'
(Cr   rz   c                       e Zd ZdZdZd Zy)StringFormattingTestzUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow: /some/path
    zxUser-agent: cybermapper
Disallow: /some/path

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/c                 b    | j                  t        | j                        | j                         y r   )r0   strr   expected_outputr2   s    r   test_string_formattingz+StringFormattingTest.test_string_formatting*  s!    T[[)4+?+?@r   N)r4   r5   r6   r   r   r   r7   r   r   r~   r~     s    	JOAr   r~   c                       e Zd Zd Zd Zy)RobotHandlerc                 (    | j                  dd       y )Ni  zForbidden access)
send_errorr2   s    r   do_GETzRobotHandler.do_GET0  s    /0r   c                      y r   r7   )r   formatargss      r   log_messagezRobotHandler.log_message3  s    r   N)r4   r5   r6   r   r   r7   r   r   r   r   .  s    1r   r   z&Socket server requires working socket.c                   <    e Zd Zd Zd Zej                  d        Zy)PasswordProtectedSiteTestCasec                 \   | j                  t        j                  j                         t	        t
        j                  dft              | _        t        j                  d| j                  j                  ddi      | _        d| j                  _        | j                  j                          y )Nr   zHTTPServer servingpoll_intervalg{Gz?)nametargetkwargsT)
addCleanupr   request
urlcleanupr   r   HOSTr   server	threadingThreadserve_forevertdaemonstartr2   s    r   r   z#PasswordProtectedSiteTestCase.setUp=  sw    112 -"4"4a!8,G!!%;;,, $D)+ r   c                     | j                   j                          | j                  j                          | j                   j	                          y r   )r   shutdownr   joinserver_closer2   s    r   tearDownz&PasswordProtectedSiteTestCase.tearDownM  s/      "r   c                 D   | j                   j                  }dt        j                  z   dz   t	        |d         z   }|dz   }t
        j                  j                         }|j                  |       |j                          | j                  |j                  d|             y )Nzhttp://:r|   z/robots.txt*)r   server_addressr   r   r   r   r   r   set_urlreadr-   r(   )r   addrr!   
robots_urlr   s        r   testPasswordProtectedSitez7PasswordProtectedSiteTestCase.testPasswordProtectedSiteR  s    {{))-,,,s2Sa\A=(
##335s))#z:;r   N)r4   r5   r6   r   r   r   reap_threadsr   r7   r   r   r   r   7  s&     #
 ""< #<r   r   c                   Z    e Zd ZdZdj	                  e      Zed        Zd Zd Z	d Z
d Zy)	NetworkTestCasezhttp://www.pythontest.net/z{}elsewhere/robots.txtc                 &   t        j                  d       t        j                  | j                        5  t
        j                  j                  | j                        | _	        | j                  j                          d d d        y # 1 sw Y   y xY w)Nnetwork)r   requiresr   transient_internetbase_urlr   r   r   r   r   r   )clss    r   
setUpClasszNetworkTestCase.setUpClassc  sa    #--cll; 	++;;CNNKCJJJOO	 	 	s   A	BBc                     dj                  | j                  |t        j                  j	                  |      d   sd      S d      S )Nz{}{}{}r|   r:   r
   )r   r   ospathsplitext)r   r   s     r   r!   zNetworkTestCase.urlj  sA    MM4BGG,<,<T,B1,E
 	
KM
 	
r   c                    | j                  | j                  j                         | j                  | j                  j                         | j	                  | j                  j                         d       | j                  | j                  j                  d             | j                  | j                  j                  d             y )Nr   r   )r-   r   disallow_all	allow_allassertGreatermtimerJ   rK   r2   s    r   
test_basiczNetworkTestCase.test_basico  s    112../4;;,,.2005611#67r   c                    | j                  | j                  j                  d| j                  d                   | j	                  | j                  j                  d| j
                               | j	                  | j                  j                  d| j                  d                   | j	                  | j                  j                  d| j                  d                   | j	                  | j                  j                  d| j                  d                   | j                  | j                  j                  d| j
                               y )Nr   	elsewhereNutchbrianwebstats)r'   r   r(   r!   r-   r   r2   s    r   test_can_fetchzNetworkTestCase.test_can_fetchv  s    --c488K3HIJ..wFG..w8IJK..w8LMN..sDHHZ4HIJ--c4==ABr   c                    t         j                  j                  | j                  d            }|j	                          | j                  |j                         | j                  |j                         | j                  |j                         d       | j                  |j                  d             | j                  |j                  d             y )Nzi-robot.txtr   r   )r   r   r   r!   r   r'   r   r-   r   r0   r   assertIsNonerJ   rK   )r   r   s     r   test_read_404zNetworkTestCase.test_read_404~  s    ##33DHH]4KL((),,-+&,,S12&--c23r   N)r4   r5   r6   r   r   r   classmethodr   r!   r   r   r   r7   r   r   r   r   ]  s@     ,H)00:J 

8C4r   r   __main__)+r   r   r   unittesturllib.robotparserr   testr   test.supportr   r   http.serverr   r   r	   TestCaser9   r@   rC   rE   rH   rS   rU   r_   rb   rd   rf   rl   rn   rp   rs   rv   rx   rz   r~   r   
skipUnlesshas_socket_supportr   requires_working_socketr   r4   mainr7   r   r   <module>r      s+   	 	     & ) :B BDBM8+<+< B)=(2C2C )"G-!2!2 G"7-):): 7- 8'):): 
.#68I8I .$-5 -]H,=,=  	M8+<+< 	
(M83D3D 
(M8+<+<  5 
(M8+<+< 
($mX->-> $
M83D3D 
=(*;*; 
)*H,=,= 
)A=(*;*; A4)  ,<H$5$5 <	<D !  "'4h'' '4 #'4R ZHMMO r   