[code.view]

[top] / python / PyMOTW / robotparser / robotparser_simple.py

     #!/usr/bin/env python
     # encoding: utf-8
     #
     # Copyright (c) 2009 Doug Hellmann All rights reserved.
     #
     """
     """
     #end_pymotw_header
     
     import robotparser
     import urlparse
     
     AGENT_NAME = 'PyMOTW'
     URL_BASE = 'http://www.doughellmann.com/'
     parser = robotparser.RobotFileParser()
     parser.set_url(urlparse.urljoin(URL_BASE, 'robots.txt'))
     parser.read()
     
     PATHS = [
         '/',
         '/PyMOTW/',
         '/admin/',
         '/downloads/PyMOTW-1.92.tar.gz',
         ]
     
     for path in PATHS:
         print '%6s : %s' % (parser.can_fetch(AGENT_NAME, path), path)
         url = urlparse.urljoin(URL_BASE, path)
         print '%6s : %s' % (parser.can_fetch(AGENT_NAME, url), url)
         print
     

[top] / python / PyMOTW / robotparser / robotparser_simple.py

contact | logmethods.com