Package gluon :: Module rewrite
[hide private]
[frames] | no frames]

Source Code for Module gluon.rewrite

   1  #!/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3   
   4  """ 
   5  This file is part of the web2py Web Framework 
   6  Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu> 
   7  License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html) 
   8   
   9  gluon.rewrite parses incoming URLs and formats outgoing URLs for gluon.html.URL. 
  10   
  11  In addition, it rewrites both incoming and outgoing URLs based on the (optional) user-supplied routes.py, 
  12  which also allows for rewriting of certain error messages. 
  13   
  14  routes.py supports two styles of URL rewriting, depending on whether 'routers' is defined. 
  15  Refer to router.example.py and routes.example.py for additional documentation. 
  16   
  17  """ 
  18   
  19  import os 
  20  import re 
  21  import logging 
  22  import traceback 
  23  import threading 
  24  import urllib 
  25  from storage import Storage, List 
  26  from http import HTTP 
  27  from fileutils import abspath, read_file 
  28  from settings import global_settings 
  29   
  30  isdir = os.path.isdir 
  31  isfile = os.path.isfile 
  32  exists = os.path.exists 
  33  pjoin = os.path.join 
  34   
  35  logger = logging.getLogger('web2py.rewrite') 
  36  THREAD_LOCAL = threading.local()  # thread-local storage for routing params 
  37   
  38  regex_at = re.compile(r'(?<!\\)\$[a-zA-Z]\w*') 
  39  regex_anything = re.compile(r'(?<!\\)\$anything') 
  40  regex_redirect = re.compile(r'(\d+)->(.*)') 
  41  regex_full_url = re.compile( 
  42      r'^(?P<scheme>http|https|HTTP|HTTPS)\://(?P<host>[^/]*)(?P<uri>.*)') 
  43  regex_version = re.compile(r'^(_[\d]+\.[\d]+\.[\d]+)$') 
  44  # pattern to replace spaces with underscore in URL 
  45  #   also the html escaped variants '+' and '%20' are covered 
  46  regex_space = re.compile('(\+|\s|%20)+') 
  47   
  48  # pattern to find valid paths in url /application/controller/... 
  49  #   this could be: 
  50  #     for static pages: 
  51  #        /<b:application>/static/<x:file> 
  52  #     for dynamic pages: 
  53  #        /<a:application>[/<c:controller>[/<f:function>[.<e:ext>][/<s:args>]]] 
  54  #   application, controller, function and ext may only contain [a-zA-Z0-9_] 
  55  #   file and args may also contain '-', '=', '.' and '/' 
  56  #   apps in routes_apps_raw must parse raw_args into args 
  57   
  58  regex_url = re.compile('^/((?P<a>\w+)(/(?P<c>\w+)(/(?P<z>(?P<f>\w+)(\.(?P<e>[\w.]+))?(?P<s>.*)))?)?)?$') 
  59  regex_args = re.compile('^[/\w@=-]*(\.[/\w@=-]+)*$') 
60 61 62 -def _router_default():
63 "return new copy of default base router" 64 router = Storage( 65 default_application='init', 66 applications='ALL', 67 default_controller='default', 68 controllers='DEFAULT', 69 default_function='index', 70 functions=dict(), 71 default_language=None, 72 languages=None, 73 root_static=['favicon.ico', 'robots.txt'], 74 map_static=None, 75 domains=None, 76 exclusive_domain=False, 77 map_hyphen=False, 78 acfe_match=r'\w+$', # legal app/ctlr/fcn/ext 79 # 80 # Implementation note: 81 # The file_match & args_match patterns use look-behind to avoid 82 # pathological backtracking from nested patterns. 83 # 84 file_match = r'([-+=@$%\w]|(?<=[-+=@$%\w])[./])*$', # legal static subpath 85 args_match=r'([\w@ -]|(?<=[\w@ -])[.=])*$', # legal arg in args 86 ) 87 return router
88
89 90 -def _params_default(app=None):
91 "return new copy of default parameters" 92 p = Storage() 93 p.name = app or "BASE" 94 p.default_application = app or "init" 95 p.default_controller = "default" 96 p.default_function = "index" 97 p.routes_app = [] 98 p.routes_in = [] 99 p.routes_out = [] 100 p.routes_onerror = [] 101 p.routes_apps_raw = [] 102 p.error_handler = None 103 p.error_message = '<html><body><h1>%s</h1></body></html>' 104 p.error_message_ticket = \ 105 '<html><body><h1>Internal error</h1>Ticket issued: <a href="/admin/default/ticket/%(ticket)s" target="_blank">%(ticket)s</a></body><!-- this is junk text else IE does not display the page: ' + ('x' * 512) + ' //--></html>' 106 p.routers = None 107 p.logging = 'off' 108 return p
109 110 params_apps = dict() 111 params = _params_default(app=None) # regex rewrite parameters 112 THREAD_LOCAL.routes = params # default to base regex rewrite parameters 113 routers = None
114 115 116 -def log_rewrite(string):
117 "Log rewrite activity under control of routes.py" 118 if params.logging == 'debug': # catch common cases first 119 logger.debug(string) 120 elif params.logging == 'off' or not params.logging: 121 pass 122 elif params.logging == 'print': 123 print string 124 elif params.logging == 'info': 125 logger.info(string) 126 elif params.logging == 'warning': 127 logger.warning(string) 128 elif params.logging == 'error': 129 logger.error(string) 130 elif params.logging == 'critical': 131 logger.critical(string) 132 else: 133 logger.debug(string)
134 135 ROUTER_KEYS = set( 136 ('default_application', 'applications', 137 'default_controller', 'controllers', 138 'default_function', 'functions', 139 'default_language', 'languages', 140 'domain', 'domains', 'root_static', 'path_prefix', 141 'exclusive_domain', 'map_hyphen', 'map_static', 142 'acfe_match', 'file_match', 'args_match')) 143 144 ROUTER_BASE_KEYS = set( 145 ('applications', 'default_application', 146 'domains', 'path_prefix'))
147 148 # The external interface to rewrite consists of: 149 # 150 # load: load routing configuration file(s) 151 # url_in: parse and rewrite incoming URL 152 # url_out: assemble and rewrite outgoing URL 153 # 154 # THREAD_LOCAL.routes.default_application 155 # THREAD_LOCAL.routes.error_message 156 # THREAD_LOCAL.routes.error_message_ticket 157 # THREAD_LOCAL.routes.try_redirect_on_error 158 # THREAD_LOCAL.routes.error_handler 159 # 160 # filter_url: helper for doctest & unittest 161 # filter_err: helper for doctest & unittest 162 # regex_filter_out: doctest 163 164 165 -def fixup_missing_path_info(environ):
166 eget = environ.get 167 path_info = eget('PATH_INFO') 168 request_uri = eget('REQUEST_URI') 169 if not path_info and request_uri: 170 # for fcgi, get path_info and 171 # query_string from request_uri 172 items = request_uri.split('?') 173 path_info = environ['PATH_INFO'] = items[0] 174 environ['QUERY_STRING'] = items[1] if len(items) > 1 else '' 175 elif not request_uri: 176 query_string = eget('QUERY_STRING') 177 if query_string: 178 environ['REQUEST_URI'] = '%s?%s' % (path_info, query_string) 179 else: 180 environ['REQUEST_URI'] = path_info 181 if not eget('HTTP_HOST'): 182 environ['HTTP_HOST'] = \ 183 '%s:%s' % (eget('SERVER_NAME'), eget('SERVER_PORT'))
184
185 186 -def url_in(request, environ):
187 "parse and rewrite incoming URL" 188 if routers: 189 return map_url_in(request, environ) 190 return regex_url_in(request, environ)
191
192 193 -def url_out(request, environ, application, controller, function, 194 args, other, scheme, host, port):
195 "assemble and rewrite outgoing URL" 196 if routers: 197 acf = map_url_out(request, environ, application, controller, 198 function, args, other, scheme, host, port) 199 url = '%s%s' % (acf, other) 200 else: 201 url = '/%s/%s/%s%s' % (application, controller, function, other) 202 url = regex_filter_out(url, environ) 203 # 204 # fill in scheme and host if absolute URL is requested 205 # scheme can be a string, eg 'http', 'https', 'ws', 'wss' 206 # 207 if host is True or (host is None and (scheme or port is not None)): 208 host = request.env.http_host 209 if not scheme or scheme is True: 210 scheme = request.env.get('wsgi_url_scheme', 'http').lower() \ 211 if request else 'http' 212 if host: 213 host_port = host if not port else host.split(':', 1)[0] + ':%s' % port 214 url = '%s://%s%s' % (scheme, host_port, url) 215 return url
216
217 218 -def try_rewrite_on_error(http_response, request, environ, ticket=None):
219 """ 220 called from main.wsgibase to rewrite the http response. 221 """ 222 status = int(str(http_response.status).split()[0]) 223 if status >= 399 and THREAD_LOCAL.routes.routes_onerror: 224 keys = set(('%s/%s' % (request.application, status), 225 '%s/*' % (request.application), 226 '*/%s' % (status), 227 '*/*')) 228 for (key, uri) in THREAD_LOCAL.routes.routes_onerror: 229 if key in keys: 230 if uri == '!': 231 # do nothing! 232 return http_response, environ 233 elif '?' in uri: 234 path_info, query_string = uri.split('?', 1) 235 query_string += '&' 236 else: 237 path_info, query_string = uri, '' 238 query_string += \ 239 'code=%s&ticket=%s&requested_uri=%s&request_url=%s' % \ 240 (status, ticket, urllib.quote_plus( 241 request.env.request_uri), request.url) 242 if uri.startswith('http://') or uri.startswith('https://'): 243 # make up a response 244 url = path_info + '?' + query_string 245 message = 'You are being redirected <a href="%s">here</a>' 246 return HTTP(303, message % url, Location=url), environ 247 else: 248 error_raising_path = environ['PATH_INFO'] 249 # Rewrite routes_onerror path. 250 path_info = '/' + path_info.lstrip( 251 '/') # add leading '/' if missing 252 environ['PATH_INFO'] = path_info 253 error_handling_path = \ 254 url_in(request, environ)[2]['PATH_INFO'] 255 # Avoid infinite loop. 256 if error_handling_path != error_raising_path: 257 # wsgibase will be called recursively with the routes_onerror path. 258 environ['PATH_INFO'] = path_info 259 environ['QUERY_STRING'] = query_string 260 environ['WEB2PY_STATUS_CODE'] = status 261 return None, environ 262 # do nothing! 263 return http_response, environ
264
265 266 -def try_redirect_on_error(http_object, request, ticket=None):
267 "called from main.wsgibase to rewrite the http response" 268 status = int(str(http_object.status).split()[0]) 269 if status > 399 and THREAD_LOCAL.routes.routes_onerror: 270 keys = set(('%s/%s' % (request.application, status), 271 '%s/*' % (request.application), 272 '*/%s' % (status), 273 '*/*')) 274 for (key, redir) in THREAD_LOCAL.routes.routes_onerror: 275 if key in keys: 276 if redir == '!': 277 break 278 elif '?' in redir: 279 url = '%s&code=%s&ticket=%s&requested_uri=%s&request_url=%s' % \ 280 (redir, status, ticket, 281 urllib.quote_plus(request.env.request_uri), 282 request.url) 283 else: 284 url = '%s?code=%s&ticket=%s&requested_uri=%s&request_url=%s' % \ 285 (redir, status, ticket, 286 urllib.quote_plus(request.env.request_uri), 287 request.url) 288 return HTTP(303, 'You are being redirected <a href="%s">here</a>' % url, Location=url) 289 return http_object
290
291 292 -def load(routes='routes.py', app=None, data=None, rdict=None):
293 """ 294 load: read (if file) and parse routes 295 store results in params 296 (called from main.py at web2py initialization time) 297 If data is present, it's used instead of the routes.py contents. 298 If rdict is present, it must be a dict to be used for routers (unit test) 299 """ 300 global params 301 global routers 302 if app is None: 303 # reinitialize 304 global params_apps 305 params_apps = dict() 306 params = _params_default(app=None) # regex rewrite parameters 307 THREAD_LOCAL.routes = params # default to base regex rewrite parameters 308 routers = None 309 310 if isinstance(rdict, dict): 311 symbols = dict(routers=rdict) 312 path = 'rdict' 313 else: 314 if data is not None: 315 path = 'routes' 316 else: 317 if app is None: 318 path = abspath(routes) 319 else: 320 path = abspath('applications', app, routes) 321 if not exists(path): 322 return 323 data = read_file(path).replace('\r\n', '\n') 324 325 symbols = dict(app=app) 326 try: 327 exec (data + '\n') in symbols 328 except SyntaxError, e: 329 logger.error( 330 '%s has a syntax error and will not be loaded\n' % path 331 + traceback.format_exc()) 332 raise e 333 334 p = _params_default(app) 335 336 for sym in ('routes_app', 'routes_in', 'routes_out'): 337 if sym in symbols: 338 for items in symbols[sym]: 339 p[sym].append(compile_regex(*items)) 340 for sym in ('routes_onerror', 'routes_apps_raw', 341 'error_handler', 'error_message', 'error_message_ticket', 342 'default_application', 'default_controller', 'default_function', 343 'logging'): 344 if sym in symbols: 345 p[sym] = symbols[sym] 346 if 'routers' in symbols: 347 p.routers = Storage(symbols['routers']) 348 for key in p.routers: 349 if isinstance(p.routers[key], dict): 350 p.routers[key] = Storage(p.routers[key]) 351 352 if app is None: 353 params = p # install base rewrite parameters 354 THREAD_LOCAL.routes = params # install default as current routes 355 # 356 # create the BASE router if routers in use 357 # 358 routers = params.routers # establish routers if present 359 if isinstance(routers, dict): 360 routers = Storage(routers) 361 if routers is not None: 362 router = _router_default() 363 if routers.BASE: 364 router.update(routers.BASE) 365 routers.BASE = router 366 367 # scan each app in applications/ 368 # create a router, if routers are in use 369 # parse the app-specific routes.py if present 370 # 371 all_apps = [] 372 apppath = abspath('applications') 373 for appname in os.listdir(apppath): 374 if not appname.startswith('.') and \ 375 isdir(abspath(apppath, appname)) and \ 376 isdir(abspath(apppath, appname, 'controllers')): 377 all_apps.append(appname) 378 if routers: 379 router = Storage(routers.BASE) # new copy 380 if appname in routers: 381 for key in routers[appname].keys(): 382 if key in ROUTER_BASE_KEYS: 383 raise SyntaxError("BASE-only key '%s' in router '%s'" % (key, appname)) 384 router.update(routers[appname]) 385 routers[appname] = router 386 if exists(abspath('applications', appname, routes)): 387 load(routes, appname) 388 389 if routers: 390 load_routers(all_apps) 391 392 else: # app 393 params_apps[app] = p 394 if routers and p.routers: 395 if app in p.routers: 396 routers[app].update(p.routers[app]) 397 398 log_rewrite('URL rewrite is on. configuration in %s' % path)
399
400 401 -def compile_regex(k, v, env=None):
402 """ 403 Preprocess and compile the regular expressions in routes_app/in/out 404 The resulting regex will match a pattern of the form: 405 406 [remote address]:[protocol]://[host]:[method] [path] 407 408 We allow abbreviated regexes on input; here we try to complete them. 409 """ 410 k0 = k # original k for error reporting 411 # bracket regex in ^...$ if not already done 412 if not k[0] == '^': 413 k = '^%s' % k 414 if not k[-1] == '$': 415 k = '%s$' % k 416 # if there are no :-separated parts, prepend a catch-all for the IP address 417 if k.find(':') < 0: 418 # k = '^.*?:%s' % k[1:] 419 k = '^.*?:https?://[^:/]+:[a-z]+ %s' % k[1:] 420 # if there's no ://, provide a catch-all for the protocol, host & method 421 if k.find('://') < 0: 422 i = k.find(':/') 423 if i < 0: 424 raise SyntaxError("routes pattern syntax error: path needs leading '/' [%s]" % k0) 425 k = r'%s:https?://[^:/]+:[a-z]+ %s' % (k[:i], k[i + 1:]) 426 # $anything -> ?P<anything>.* 427 for item in regex_anything.findall(k): 428 k = k.replace(item, '(?P<anything>.*)') 429 # $a (etc) -> ?P<a>\w+ 430 for item in regex_at.findall(k): 431 k = k.replace(item, r'(?P<%s>\w+)' % item[1:]) 432 # same for replacement pattern, but with \g 433 for item in regex_at.findall(v): 434 v = v.replace(item, r'\g<%s>' % item[1:]) 435 return (re.compile(k, re.DOTALL), v, env or {})
436
437 438 -def load_routers(all_apps):
439 "load-time post-processing of routers" 440 441 for app in routers: 442 # initialize apps with routers that aren't present, 443 # on behalf of unit tests 444 if app not in all_apps: 445 all_apps.append(app) 446 router = Storage(routers.BASE) # new copy 447 if app != 'BASE': 448 keys = set(routers[app]).intersection(ROUTER_BASE_KEYS) 449 if keys: 450 raise SyntaxError("BASE-only key(s) %s in router '%s'" % ( 451 tuple(keys), app)) 452 router.update(routers[app]) 453 routers[app] = router 454 router = routers[app] 455 keys = set(router).difference(ROUTER_KEYS) 456 if keys: 457 raise SyntaxError("unknown key(s) %s in router '%s'" % ( 458 tuple(keys), app)) 459 if not router.controllers: 460 router.controllers = set() 461 elif not isinstance(router.controllers, str): 462 router.controllers = set(router.controllers) 463 if router.languages: 464 router.languages = set(router.languages) 465 else: 466 router.languages = set() 467 if router.functions: 468 if isinstance(router.functions, (set, tuple, list)): 469 functions = set(router.functions) 470 if isinstance(router.default_function, str): 471 functions.add( 472 router.default_function) # legacy compatibility 473 router.functions = {router.default_controller: functions} 474 for controller in router.functions: 475 router.functions[controller] = set( 476 router.functions[controller]) 477 else: 478 router.functions = dict() 479 if app != 'BASE': 480 for base_only in ROUTER_BASE_KEYS: 481 router.pop(base_only, None) 482 if 'domain' in router: 483 routers.BASE.domains[router.domain] = app 484 if isinstance(router.controllers, str) and router.controllers == 'DEFAULT': 485 router.controllers = set() 486 if isdir(abspath('applications', app)): 487 cpath = abspath('applications', app, 'controllers') 488 for cname in os.listdir(cpath): 489 if isfile(abspath(cpath, cname)) and cname.endswith('.py'): 490 router.controllers.add(cname[:-3]) 491 if router.controllers: 492 router.controllers.add('static') 493 router.controllers.add(router.default_controller) 494 495 if isinstance(routers.BASE.applications, str) and routers.BASE.applications == 'ALL': 496 routers.BASE.applications = list(all_apps) 497 if routers.BASE.applications: 498 routers.BASE.applications = set(routers.BASE.applications) 499 else: 500 routers.BASE.applications = set() 501 502 for app in routers.keys(): 503 # set router name 504 router = routers[app] 505 router.name = app 506 # compile URL validation patterns 507 router._acfe_match = re.compile(router.acfe_match) 508 router._file_match = re.compile(router.file_match) 509 if router.args_match: 510 router._args_match = re.compile(router.args_match) 511 # convert path_prefix to a list of path elements 512 if router.path_prefix: 513 if isinstance(router.path_prefix, str): 514 router.path_prefix = router.path_prefix.strip('/').split('/') 515 516 # rewrite BASE.domains as tuples 517 # 518 # key: 'domain[:port]' -> (domain, port) 519 # value: 'application[/controller] -> (application, controller) 520 # (port and controller may be None) 521 # 522 domains = dict() 523 if routers.BASE.domains: 524 for (d, a) in routers.BASE.domains.iteritems(): 525 (domain, app) = (d.strip(':'), a.strip('/')) 526 if ':' in domain: 527 (domain, port) = domain.split(':') 528 else: 529 port = None 530 if '/' in app: 531 (app, ctlr) = app.split('/', 1) 532 else: 533 ctlr = None 534 if ctlr and '/' in ctlr: 535 (ctlr, fcn) = ctlr.split('/') 536 else: 537 fcn = None 538 if app not in all_apps and app not in routers: 539 raise SyntaxError("unknown app '%s' in domains" % app) 540 domains[(domain, port)] = (app, ctlr, fcn) 541 routers.BASE.domains = domains
542
543 544 -def regex_uri(e, regexes, tag, default=None):
545 "filter incoming URI against a list of regexes" 546 path = e['PATH_INFO'] 547 host = e.get('HTTP_HOST', e.get('SERVER_NAME', 'localhost')).lower() 548 i = host.find(':') 549 if i > 0: 550 host = host[:i] 551 key = '%s:%s://%s:%s %s' % \ 552 (e.get('REMOTE_ADDR', 'localhost'), 553 e.get('wsgi.url_scheme', 'http').lower(), host, 554 e.get('REQUEST_METHOD', 'get').lower(), path) 555 for (regex, value, custom_env) in regexes: 556 if regex.match(key): 557 e.update(custom_env) 558 rewritten = regex.sub(value, key) 559 log_rewrite('%s: [%s] [%s] -> %s' % (tag, key, value, rewritten)) 560 return rewritten 561 log_rewrite('%s: [%s] -> %s (not rewritten)' % (tag, key, default)) 562 return default
563
564 565 -def regex_select(env=None, app=None, request=None):
566 """ 567 select a set of regex rewrite params for the current request 568 """ 569 if app: 570 THREAD_LOCAL.routes = params_apps.get(app, params) 571 elif env and params.routes_app: 572 if routers: 573 map_url_in(request, env, app=True) 574 else: 575 app = regex_uri(env, params.routes_app, "routes_app") 576 THREAD_LOCAL.routes = params_apps.get(app, params) 577 else: 578 THREAD_LOCAL.routes = params # default to base rewrite parameters 579 log_rewrite("select routing parameters: %s" % THREAD_LOCAL.routes.name) 580 return app # for doctest
581
582 583 -def regex_filter_in(e):
584 "regex rewrite incoming URL" 585 routes = THREAD_LOCAL.routes 586 query = e.get('QUERY_STRING', None) 587 e['WEB2PY_ORIGINAL_URI'] = e['PATH_INFO'] + (query and ('?' + query) or '') 588 if routes.routes_in: 589 path = regex_uri(e, routes.routes_in, 590 "routes_in", e['PATH_INFO']) 591 rmatch = regex_redirect.match(path) 592 if rmatch: 593 raise HTTP(int(rmatch.group(1)), location=rmatch.group(2)) 594 items = path.split('?', 1) 595 e['PATH_INFO'] = items[0] 596 if len(items) > 1: 597 if query: 598 query = items[1] + '&' + query 599 else: 600 query = items[1] 601 e['QUERY_STRING'] = query 602 e['REQUEST_URI'] = e['PATH_INFO'] + (query and ('?' + query) or '') 603 return e
604
605 606 -def sluggify(key):
607 return key.lower().replace('.', '_')
608
609 -def invalid_url(routes):
610 raise HTTP(400, 611 routes.error_message % 'invalid request', 612 web2py_error='invalid path')
613
614 -def regex_url_in(request, environ):
615 "rewrite and parse incoming URL" 616 617 # ################################################## 618 # select application 619 # rewrite URL if routes_in is defined 620 # update request.env 621 # ################################################## 622 623 regex_select(env=environ, request=request) 624 routes = THREAD_LOCAL.routes 625 if routes.routes_in: 626 environ = regex_filter_in(environ) 627 request.env.update( 628 (k.lower().replace('.', '_'), v) for k, v in environ.iteritems()) 629 630 # ################################################## 631 # serve if a static file 632 # ################################################## 633 634 path = request.env.path_info.replace('\\', '/') or '/' 635 path = regex_space.sub('_', path) 636 if path.endswith('/') and len(path) > 1: 637 path = path[:-1] 638 match = regex_url.match(path) 639 if not match: 640 invalid_url(routes) 641 request.raw_args = (match.group('s') or '') 642 if request.raw_args.startswith('/'): 643 request.raw_args = request.raw_args[1:] 644 if match.group('c') == 'static': 645 application = match.group('a') 646 version, filename = None, match.group('z') 647 items = filename.split('/', 1) 648 if regex_version.match(items[0]): 649 version, filename = items 650 static_folder = pjoin(request.env.applications_parent, 651 'applications', application,'static') 652 static_file = os.path.abspath(pjoin(static_folder,filename)) 653 if not static_file.startswith(static_folder): 654 invalid_url(routes) 655 return (static_file, version, environ) 656 else: 657 # ################################################## 658 # parse application, controller and function 659 # ################################################## 660 request.application = match.group('a') or routes.default_application 661 request.controller = match.group('c') or routes.default_controller 662 request.function = match.group('f') or routes.default_function 663 request.raw_extension = match.group('e') 664 request.extension = request.raw_extension or 'html' 665 if request.application in routes.routes_apps_raw: 666 # application is responsible for parsing args 667 request.args = None 668 elif not regex_args.match(request.raw_args): 669 invalid_url(routes) 670 elif request.raw_args: 671 request.args = List(request.raw_args.split('/')) 672 else: 673 request.args = List([]) 674 return (None, None, environ)
675
676 677 -def regex_filter_out(url, e=None):
678 "regex rewrite outgoing URL" 679 if not hasattr(THREAD_LOCAL, 'routes'): 680 regex_select() # ensure routes is set (for application threads) 681 routes = THREAD_LOCAL.routes 682 if routers: 683 return url # already filtered 684 if routes.routes_out: 685 items = url.split('?', 1) 686 if e: 687 host = e.get('http_host', 'localhost').lower() 688 i = host.find(':') 689 if i > 0: 690 host = host[:i] 691 items[0] = '%s:%s://%s:%s %s' % \ 692 (e.get('remote_addr', ''), 693 e.get('wsgi_url_scheme', 'http').lower(), host, 694 e.get('request_method', 'get').lower(), items[0]) 695 else: 696 items[0] = ':http://localhost:get %s' % items[0] 697 for (regex, value, tmp) in routes.routes_out: 698 if regex.match(items[0]): 699 rewritten = '?'.join([regex.sub(value, items[0])] + items[1:]) 700 log_rewrite('routes_out: [%s] -> %s' % (url, rewritten)) 701 return rewritten 702 log_rewrite('routes_out: [%s] not rewritten' % url) 703 return url
704
705 706 -def filter_url(url, method='get', remote='0.0.0.0', 707 out=False, app=False, lang=None, 708 domain=(None, None), env=False, scheme=None, 709 host=None, port=None):
710 """ 711 doctest/unittest interface to regex_filter_in() and regex_filter_out() 712 """ 713 match = regex_full_url.match(url) 714 urlscheme = match.group('scheme').lower() 715 urlhost = match.group('host').lower() 716 uri = match.group('uri') 717 k = uri.find('?') 718 if k < 0: 719 k = len(uri) 720 if isinstance(domain, str): 721 domain = (domain, None) 722 (path_info, query_string) = (uri[:k], uri[k + 1:]) 723 path_info = urllib.unquote(path_info) # simulate server 724 e = { 725 'REMOTE_ADDR': remote, 726 'REQUEST_METHOD': method, 727 'wsgi.url_scheme': urlscheme, 728 'HTTP_HOST': urlhost, 729 'REQUEST_URI': uri, 730 'PATH_INFO': path_info, 731 'QUERY_STRING': query_string, 732 #for filter_out request.env use lowercase 733 'remote_addr': remote, 734 'request_method': method, 735 'wsgi_url_scheme': urlscheme, 736 'http_host': urlhost 737 } 738 739 request = Storage() 740 e["applications_parent"] = global_settings.applications_parent 741 request.env = Storage(e) 742 request.uri_language = lang 743 744 # determine application only 745 # 746 if app: 747 if routers: 748 return map_url_in(request, e, app=True) 749 return regex_select(e) 750 751 # rewrite outbound URL 752 # 753 if out: 754 (request.env.domain_application, 755 request.env.domain_controller) = domain 756 items = path_info.lstrip('/').split('/') 757 if items[-1] == '': 758 items.pop() # adjust trailing empty args 759 assert len(items) >= 3, "at least /a/c/f is required" 760 a = items.pop(0) 761 c = items.pop(0) 762 f = items.pop(0) 763 if not routers: 764 return regex_filter_out(uri, e) 765 acf = map_url_out( 766 request, None, a, c, f, items, None, scheme, host, port) 767 if items: 768 url = '%s/%s' % (acf, '/'.join(items)) 769 if items[-1] == '': 770 url += '/' 771 else: 772 url = acf 773 if query_string: 774 url += '?' + query_string 775 return url 776 777 # rewrite inbound URL 778 # 779 (static, version, e) = url_in(request, e) 780 if static: 781 return static 782 result = "/%s/%s/%s" % ( 783 request.application, request.controller, request.function) 784 if request.extension and request.extension != 'html': 785 result += ".%s" % request.extension 786 if request.args: 787 result += " %s" % request.args 788 if e['QUERY_STRING']: 789 result += " ?%s" % e['QUERY_STRING'] 790 if request.uri_language: 791 result += " (%s)" % request.uri_language 792 if env: 793 return request.env 794 return result
795
796 797 -def filter_err(status, application='app', ticket='tkt'):
798 "doctest/unittest interface to routes_onerror" 799 routes = THREAD_LOCAL.routes 800 if status > 399 and routes.routes_onerror: 801 keys = set(('%s/%s' % (application, status), 802 '%s/*' % (application), 803 '*/%s' % (status), 804 '*/*')) 805 for (key, redir) in routes.routes_onerror: 806 if key in keys: 807 if redir == '!': 808 break 809 elif '?' in redir: 810 url = redir + '&' + 'code=%s&ticket=%s' % (status, ticket) 811 else: 812 url = redir + '?' + 'code=%s&ticket=%s' % (status, ticket) 813 return url # redirection 814 return status # no action
815
816 # router support 817 # 818 819 820 -class MapUrlIn(object):
821 "logic for mapping incoming URLs" 822
823 - def __init__(self, request=None, env=None):
824 "initialize a map-in object" 825 self.request = request 826 self.env = env 827 828 self.router = None 829 self.application = None 830 self.language = None 831 self.controller = None 832 self.function = None 833 self.extension = 'html' 834 835 self.controllers = set() 836 self.functions = dict() 837 self.languages = set() 838 self.default_language = None 839 self.map_hyphen = False 840 self.exclusive_domain = False 841 842 path = self.env['PATH_INFO'] 843 self.query = self.env.get('QUERY_STRING', None) 844 path = path.lstrip('/') 845 self.env['PATH_INFO'] = '/' + path 846 self.env['WEB2PY_ORIGINAL_URI'] = self.env['PATH_INFO'] + ( 847 self.query and ('?' + self.query) or '') 848 849 # to handle empty args, strip exactly one trailing slash, if present 850 # .../arg1// represents one trailing empty arg 851 # 852 if path.endswith('/'): 853 path = path[:-1] 854 self.args = List(path and path.split('/') or []) 855 856 # see http://www.python.org/dev/peps/pep-3333/#url-reconstruction for URL composition 857 self.remote_addr = self.env.get('REMOTE_ADDR', 'localhost') 858 self.scheme = self.env.get('wsgi.url_scheme', 'http').lower() 859 self.method = self.env.get('REQUEST_METHOD', 'get').lower() 860 (self.host, self.port) = (self.env.get('HTTP_HOST'), None) 861 if not self.host: 862 (self.host, self.port) = ( 863 self.env.get('SERVER_NAME'), self.env.get('SERVER_PORT')) 864 if not self.host: 865 (self.host, self.port) = ('localhost', '80') 866 if ':' in self.host: 867 (self.host, self.port) = self.host.rsplit(':',1) # for ipv6 support 868 if not self.port: 869 self.port = '443' if self.scheme == 'https' else '80'
870
871 - def map_prefix(self):
872 "strip path prefix, if present in its entirety" 873 prefix = routers.BASE.path_prefix 874 if prefix: 875 prefixlen = len(prefix) 876 if prefixlen > len(self.args): 877 return 878 for i in xrange(prefixlen): 879 if prefix[i] != self.args[i]: 880 return # prefix didn't match 881 self.args = List(self.args[prefixlen:]) # strip the prefix
882
883 - def map_app(self):
884 "determine application name" 885 base = routers.BASE # base router 886 self.domain_application = None 887 self.domain_controller = None 888 self.domain_function = None 889 arg0 = self.harg0 890 if not base.exclusive_domain and base.applications and arg0 in base.applications: 891 self.application = arg0 892 elif not base.exclusive_domain and arg0 and not base.applications: 893 self.application = arg0 894 elif (self.host, self.port) in base.domains: 895 (self.application, self.domain_controller, 896 self.domain_function) = base.domains[(self.host, self.port)] 897 self.env['domain_application'] = self.application 898 self.env['domain_controller'] = self.domain_controller 899 self.env['domain_function'] = self.domain_function 900 elif (self.host, None) in base.domains: 901 (self.application, self.domain_controller, 902 self.domain_function) = base.domains[(self.host, None)] 903 self.env['domain_application'] = self.application 904 self.env['domain_controller'] = self.domain_controller 905 self.env['domain_function'] = self.domain_function 906 elif base.applications and arg0 in base.applications: 907 self.application = arg0 908 elif arg0 and not base.applications: 909 self.application = arg0 910 else: 911 self.application = base.default_application or '' 912 self.pop_arg_if(self.application == arg0) 913 914 if not base._acfe_match.match(self.application): 915 raise HTTP( 916 400, THREAD_LOCAL.routes.error_message % 'invalid request', 917 web2py_error="invalid application: '%s'" % self.application) 918 919 if self.application not in routers and \ 920 (self.application != THREAD_LOCAL.routes.default_application or self.application == 'welcome'): 921 raise HTTP( 922 400, THREAD_LOCAL.routes.error_message % 'invalid request', 923 web2py_error="unknown application: '%s'" % self.application) 924 925 # set the application router 926 # 927 log_rewrite("select application=%s" % self.application) 928 self.request.application = self.application 929 if self.application not in routers: 930 self.router = routers.BASE # support gluon.main.wsgibase init->welcome 931 else: 932 self.router = routers[self.application] # application router 933 self.controllers = self.router.controllers 934 self.default_controller = self.domain_controller or self.router.default_controller 935 self.functions = self.router.functions 936 self.languages = self.router.languages 937 self.default_language = self.router.default_language 938 self.map_hyphen = self.router.map_hyphen 939 self.exclusive_domain = self.router.exclusive_domain 940 self._acfe_match = self.router._acfe_match 941 self.file_match = self.router.file_match 942 self._file_match = self.router._file_match 943 self._args_match = self.router._args_match
944
945 - def map_root_static(self):
946 ''' 947 handle root-static files (no hyphen mapping) 948 949 a root-static file is one whose incoming URL expects it to be at the root, 950 typically robots.txt & favicon.ico 951 ''' 952 953 if len(self.args) == 1 and self.arg0 in self.router.root_static: 954 self.controller = self.request.controller = 'static' 955 root_static_file = pjoin(self.request.env.applications_parent, 956 'applications', self.application, 957 self.controller, self.arg0) 958 log_rewrite("route: root static=%s" % root_static_file) 959 return root_static_file, None 960 return None, None
961
962 - def map_language(self):
963 "handle language (no hyphen mapping)" 964 arg0 = self.arg0 # no hyphen mapping 965 if arg0 and self.languages and arg0 in self.languages: 966 self.language = arg0 967 else: 968 self.language = self.default_language 969 if self.language: 970 log_rewrite("route: language=%s" % self.language) 971 self.pop_arg_if(self.language == arg0) 972 arg0 = self.arg0
973
974 - def map_controller(self):
975 "identify controller" 976 # handle controller 977 # 978 arg0 = self.harg0 # map hyphens 979 if not arg0 or (self.controllers and arg0 not in self.controllers): 980 self.controller = self.default_controller or '' 981 else: 982 self.controller = arg0 983 self.pop_arg_if(arg0 == self.controller) 984 log_rewrite("route: controller=%s" % self.controller) 985 if not self.router._acfe_match.match(self.controller): 986 raise HTTP( 987 400, THREAD_LOCAL.routes.error_message % 'invalid request', 988 web2py_error='invalid controller')
989
990 - def map_static(self):
991 ''' 992 handle static files 993 file_match but no hyphen mapping 994 ''' 995 if self.controller != 'static': 996 return None, None 997 version = regex_version.match(self.args(0)) 998 if self.args and version: 999 file = '/'.join(self.args[1:]) 1000 else: 1001 file = '/'.join(self.args) 1002 if len(self.args) == 0: 1003 bad_static = True # require a file name 1004 elif '/' in self.file_match: 1005 # match the path 1006 bad_static = not self.router._file_match.match(file) 1007 else: 1008 # match path elements 1009 bad_static = False 1010 for name in self.args: 1011 bad_static = bad_static or name in ( 1012 '', '.', '..') or not self.router._file_match.match(name) 1013 if bad_static: 1014 log_rewrite('bad static path=%s' % file) 1015 raise HTTP(400, 1016 THREAD_LOCAL.routes.error_message % 'invalid request', 1017 web2py_error='invalid static file') 1018 # 1019 # support language-specific static subdirectories, 1020 # eg /appname/en/static/filename => applications/appname/static/en/filename 1021 # if language-specific file doesn't exist, try same file in static 1022 # 1023 if self.language: 1024 static_file = pjoin(self.request.env.applications_parent, 1025 'applications', self.application, 1026 'static', self.language, file) 1027 if not self.language or not isfile(static_file): 1028 static_file = pjoin(self.request.env.applications_parent, 1029 'applications', self.application, 1030 'static', file) 1031 self.extension = None 1032 log_rewrite("route: static=%s" % static_file) 1033 return static_file, version
1034
1035 - def map_function(self):
1036 "handle function.extension" 1037 arg0 = self.harg0 # map hyphens 1038 functions = self.functions.get(self.controller, set()) 1039 if isinstance(self.router.default_function, dict): 1040 default_function = self.router.default_function.get( 1041 self.controller, None) 1042 else: 1043 default_function = self.router.default_function # str or None 1044 default_function = self.domain_function or default_function 1045 if not arg0 or functions and arg0 not in functions: 1046 self.function = default_function or "" 1047 self.pop_arg_if(arg0 and self.function == arg0) 1048 else: 1049 func_ext = arg0.split('.') 1050 if len(func_ext) > 1: 1051 self.function = func_ext[0] 1052 self.extension = func_ext[-1] 1053 else: 1054 self.function = arg0 1055 self.pop_arg_if(True) 1056 log_rewrite( 1057 "route: function.ext=%s.%s" % (self.function, self.extension)) 1058 1059 if not self.router._acfe_match.match(self.function): 1060 raise HTTP( 1061 400, THREAD_LOCAL.routes.error_message % 'invalid request', 1062 web2py_error='invalid function') 1063 if self.extension and not self.router._acfe_match.match(self.extension): 1064 raise HTTP( 1065 400, THREAD_LOCAL.routes.error_message % 'invalid request', 1066 web2py_error='invalid extension')
1067
1068 - def validate_args(self):
1069 ''' 1070 check args against validation pattern 1071 ''' 1072 for arg in self.args: 1073 if not self.router._args_match.match(arg): 1074 raise HTTP( 1075 400, THREAD_LOCAL.routes.error_message % 'invalid request', 1076 web2py_error='invalid arg <%s>' % arg)
1077
1078 - def sluggify(self):
1079 "" 1080 self.request.env.update( 1081 (k.lower().replace('.', '_'), v) for k, v in self.env.iteritems())
1082
1083 - def update_request(self):
1084 ''' 1085 update request from self 1086 build env.request_uri 1087 make lower-case versions of http headers in env 1088 ''' 1089 self.request.application = self.application 1090 self.request.controller = self.controller 1091 self.request.function = self.function 1092 self.request.extension = self.extension 1093 self.request.args = self.args 1094 if self.language: 1095 self.request.uri_language = self.language 1096 uri = '/%s/%s' % (self.controller, self.function) 1097 app = self.application 1098 if self.map_hyphen: 1099 uri = uri.replace('_', '-') 1100 app = app.replace('_', '-') 1101 if self.extension and self.extension != 'html': 1102 uri += '.' + self.extension 1103 if self.language: 1104 uri = '/%s%s' % (self.language, uri) 1105 uri = '/%s%s%s%s' % ( 1106 app, 1107 uri, 1108 urllib.quote('/' + '/'.join( 1109 str(x) for x in self.args)) if self.args else '', 1110 ('?' + self.query) if self.query else '') 1111 self.env['REQUEST_URI'] = uri 1112 self.sluggify()
1113 1114 @property
1115 - def arg0(self):
1116 "return first arg" 1117 return self.args(0)
1118 1119 @property
1120 - def harg0(self):
1121 "return first arg with optional hyphen mapping" 1122 if self.map_hyphen and self.args(0): 1123 return self.args(0).replace('-', '_') 1124 return self.args(0)
1125
1126 - def pop_arg_if(self, dopop):
1127 "conditionally remove first arg and return new first arg" 1128 if dopop: 1129 self.args.pop(0)
1130
1131 1132 -class MapUrlOut(object):
1133 "logic for mapping outgoing URLs" 1134
1135 - def __init__(self, request, env, application, controller, 1136 function, args, other, scheme, host, port):
1137 "initialize a map-out object" 1138 self.default_application = routers.BASE.default_application 1139 if application in routers: 1140 self.router = routers[application] 1141 else: 1142 self.router = routers.BASE 1143 self.request = request 1144 self.env = env 1145 self.application = application 1146 self.controller = controller 1147 self.is_static = ( 1148 controller == 'static' or controller.startswith('static/')) 1149 self.function = function 1150 self.args = args 1151 self.other = other 1152 self.scheme = scheme 1153 self.host = host 1154 self.port = port 1155 1156 self.applications = routers.BASE.applications 1157 self.controllers = self.router.controllers 1158 self.functions = self.router.functions.get(self.controller, set()) 1159 self.languages = self.router.languages 1160 self.default_language = self.router.default_language 1161 self.exclusive_domain = self.router.exclusive_domain 1162 self.map_hyphen = self.router.map_hyphen 1163 self.map_static = self.router.map_static 1164 self.path_prefix = routers.BASE.path_prefix 1165 1166 self.domain_application = request and self.request.env.domain_application 1167 self.domain_controller = request and self.request.env.domain_controller 1168 if isinstance(self.router.default_function, dict): 1169 self.default_function = self.router.default_function.get( 1170 self.controller, None) 1171 else: 1172 self.default_function = self.router.default_function 1173 1174 if (self.router.exclusive_domain and self.domain_application and self.domain_application != self.application and not self.host): 1175 raise SyntaxError('cross-domain conflict: must specify host') 1176 1177 lang = request and request.uri_language 1178 if lang and self.languages and lang in self.languages: 1179 self.language = lang 1180 else: 1181 self.language = None 1182 1183 self.omit_application = False 1184 self.omit_language = False 1185 self.omit_controller = False 1186 self.omit_function = False
1187
1188 - def omit_lang(self):
1189 "omit language if possible" 1190 1191 if not self.language or self.language == self.default_language: 1192 self.omit_language = True
1193
1194 - def omit_acf(self):
1195 "omit what we can of a/c/f" 1196 1197 router = self.router 1198 1199 # Handle the easy no-args case of tail-defaults: /a/c /a / 1200 # 1201 if not self.args and self.function == self.default_function: 1202 self.omit_function = True 1203 if self.controller == router.default_controller: 1204 self.omit_controller = True 1205 if self.application == self.default_application: 1206 self.omit_application = True 1207 1208 # omit default application 1209 # (which might be the domain default application) 1210 # 1211 default_application = self.domain_application or self.default_application 1212 if self.application == default_application: 1213 self.omit_application = True 1214 1215 # omit controller if default controller 1216 # 1217 default_controller = ((self.application == self.domain_application) and self.domain_controller) or router.default_controller or '' 1218 if self.controller == default_controller: 1219 self.omit_controller = True 1220 1221 # omit function if possible 1222 # 1223 if self.functions and self.function in self.functions and self.function == self.default_function: 1224 self.omit_function = True 1225 1226 # prohibit ambiguous cases 1227 # 1228 # because we presume the lang string to be unambiguous, its presence protects application omission 1229 # 1230 if self.exclusive_domain: 1231 applications = [self.domain_application] 1232 else: 1233 applications = self.applications 1234 if self.omit_language: 1235 if not applications or self.controller in applications: 1236 self.omit_application = False 1237 if self.omit_application: 1238 if not applications or self.function in applications: 1239 self.omit_controller = False 1240 if not self.controllers or self.function in self.controllers: 1241 self.omit_controller = False 1242 if self.args: 1243 if self.args[0] in self.functions or self.args[0] in self.controllers or self.args[0] in applications: 1244 self.omit_function = False 1245 if self.omit_controller: 1246 if self.function in self.controllers or self.function in applications: 1247 self.omit_controller = False 1248 if self.omit_application: 1249 if self.controller in applications: 1250 self.omit_application = False 1251 1252 # handle static as a special case 1253 # (easier for external static handling) 1254 # 1255 if self.is_static: 1256 if not self.map_static: 1257 self.omit_application = False 1258 if self.language: 1259 self.omit_language = False 1260 self.omit_controller = False 1261 self.omit_function = False
1262
1263 - def build_acf(self):
1264 "build acf from components" 1265 acf = '' 1266 if self.map_hyphen: 1267 self.application = self.application.replace('_', '-') 1268 self.controller = self.controller.replace('_', '-') 1269 if self.controller != 'static' and not self.controller.startswith('static/'): 1270 self.function = self.function.replace('_', '-') 1271 if not self.omit_application: 1272 acf += '/' + self.application 1273 # handle case of flipping lang/static/file to static/lang/file for external rewrite 1274 if self.is_static and self.map_static is False and not self.omit_language: 1275 acf += '/' + self.controller + '/' + self.language 1276 else: 1277 if not self.omit_language: 1278 acf += '/' + self.language 1279 if not self.omit_controller: 1280 acf += '/' + self.controller 1281 if not self.omit_function: 1282 acf += '/' + self.function 1283 if self.path_prefix: 1284 acf = '/' + '/'.join(self.path_prefix) + acf 1285 if self.args: 1286 return acf 1287 return acf or '/'
1288
1289 - def acf(self):
1290 "convert components to /app/lang/controller/function" 1291 1292 if not routers: 1293 return None # use regex filter 1294 self.omit_lang() # try to omit language 1295 self.omit_acf() # try to omit a/c/f 1296 return self.build_acf() # build and return the /a/lang/c/f string
1297
1298 1299 -def map_url_in(request, env, app=False):
1300 "route incoming URL" 1301 1302 # initialize router-url object 1303 # 1304 THREAD_LOCAL.routes = params # default to base routes 1305 map = MapUrlIn(request=request, env=env) 1306 map.sluggify() 1307 map.map_prefix() # strip prefix if present 1308 map.map_app() # determine application 1309 1310 # configure THREAD_LOCAL.routes for error rewrite 1311 # 1312 if params.routes_app: 1313 THREAD_LOCAL.routes = params_apps.get(app, params) 1314 1315 if app: 1316 return map.application 1317 1318 root_static_file, version = map.map_root_static( 1319 ) # handle root-static files 1320 if root_static_file: 1321 map.update_request() 1322 return (root_static_file, version, map.env) 1323 # handle mapping of lang/static to static/lang in externally-rewritten URLs 1324 # in case we have to handle them ourselves 1325 if map.languages and map.map_static is False and map.arg0 == 'static' and map.args(1) in map.languages: 1326 map.map_controller() 1327 map.map_language() 1328 else: 1329 map.map_language() 1330 map.map_controller() 1331 static_file, version = map.map_static() 1332 if static_file: 1333 map.update_request() 1334 return (static_file, version, map.env) 1335 map.map_function() 1336 map.validate_args() 1337 map.update_request() 1338 return (None, None, map.env)
1339
1340 1341 -def map_url_out(request, env, application, controller, 1342 function, args, other, scheme, host, port):
1343 ''' 1344 supply /a/c/f (or /a/lang/c/f) portion of outgoing url 1345 1346 The basic rule is that we can only make transformations 1347 that map_url_in can reverse. 1348 1349 Suppose that the incoming arguments are a,c,f,args,lang 1350 and that the router defaults are da, dc, df, dl. 1351 1352 We can perform these transformations trivially if args=[] and lang=None or dl: 1353 1354 /da/dc/df => / 1355 /a/dc/df => /a 1356 /a/c/df => /a/c 1357 1358 We would also like to be able to strip the default application or application/controller 1359 from URLs with function/args present, thus: 1360 1361 /da/c/f/args => /c/f/args 1362 /da/dc/f/args => /f/args 1363 1364 We use [applications] and [controllers] and {functions} to suppress ambiguous omissions. 1365 1366 We assume that language names do not collide with a/c/f names. 1367 ''' 1368 map = MapUrlOut(request, env, application, controller, 1369 function, args, other, scheme, host, port) 1370 return map.acf()
1371
1372 1373 -def get_effective_router(appname):
1374 "return a private copy of the effective router for the specified application" 1375 if not routers or appname not in routers: 1376 return None 1377 return Storage(routers[appname]) # return a copy
1378