gluon.cache

1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 4 """ 5 This file is part of the web2py Web Framework 6 Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu> 7 License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html) 8 9 Basic caching classes and methods 10 ================================= 11 12 - Cache - The generic caching object interfacing with the others 13 - CacheInRam - providing caching in ram 14 - CacheOnDisk - provides caches on disk 15 16 Memcache is also available via a different module (see gluon.contrib.memcache) 17 18 When web2py is running on Google App Engine, 19 caching will be provided by the GAE memcache 20 (see gluon.contrib.gae_memcache) 21 """ 22 import traceback 23 import time 24 import portalocker 25 import shelve 26 import thread 27 import os 28 import logging 29 import re 30 import hashlib 31 import datetime 32 try: 33 import settings 34 have_settings = True 35 except ImportError: 36 have_settings = False 37 38 logger = logging.getLogger("web2py.cache") 39 40 __all__ = ['Cache', 'lazy_cache'] 41 42 43 DEFAULT_TIME_EXPIRE = 300

44 45 46 -class CacheAbstract(object):

47 """ 48 Abstract class for cache implementations. 49 Main function is now to provide referenced api documentation. 50 51 Use CacheInRam or CacheOnDisk instead which are derived from this class. 52 53 Attentions, Michele says: 54 55 There are signatures inside gdbm files that are used directly 56 by the python gdbm adapter that often are lagging behind in the 57 detection code in python part. 58 On every occasion that a gdbm store is probed by the python adapter, 59 the probe fails, because gdbm file version is newer. 60 Using gdbm directly from C would work, because there is backward 61 compatibility, but not from python! 62 The .shelve file is discarded and a new one created (with new 63 signature) and it works until it is probed again... 64 The possible consequences are memory leaks and broken sessions. 65 """ 66 67 cache_stats_name = 'web2py_cache_statistics' 68

69 - def __init__(self, request=None):

70 """ 71 Paremeters 72 ---------- 73 request: 74 the global request object 75 """ 76 raise NotImplementedError

77

78 - def __call__(self, key, f, 79 time_expire=DEFAULT_TIME_EXPIRE):

80 """ 81 Tries retrieve the value corresponding to `key` from the cache of the 82 object exists and if it did not expire, else it called the function `f` 83 and stores the output in the cache corresponding to `key`. In the case 84 the output of the function is returned. 85 86 :param key: the key of the object to be store or retrieved 87 :param f: the function, whose output is to be cached 88 :param time_expire: expiration of the cache in microseconds 89 90 - `time_expire` is used to compare the current time with the time when 91 the requested object was last saved in cache. It does not affect 92 future requests. 93 - Setting `time_expire` to 0 or negative value forces the cache to 94 refresh. 95 96 If the function `f` is `None` the cache is cleared. 97 """ 98 raise NotImplementedError

99

100 - def clear(self, regex=None):

101 """ 102 Clears the cache of all keys that match the provided regular expression. 103 If no regular expression is provided, it clears all entries in cache. 104 105 Parameters 106 ---------- 107 regex: 108 if provided, only keys matching the regex will be cleared. 109 Otherwise all keys are cleared. 110 """ 111 112 raise NotImplementedError

113

114 - def increment(self, key, value=1):

115 """ 116 Increments the cached value for the given key by the amount in value 117 118 Parameters 119 ---------- 120 key: 121 key for the cached object to be incremeneted 122 value: 123 amount of the increment (defaults to 1, can be negative) 124 """ 125 raise NotImplementedError

126

127 - def _clear(self, storage, regex):

128 """ 129 Auxiliary function called by `clear` to search and clear cache entries 130 """ 131 r = re.compile(regex) 132 for (key, value) in storage.items(): 133 if r.match(str(key)): 134 del storage[key]

135

136 137 -class CacheInRam(CacheAbstract):

138 """ 139 Ram based caching 140 141 This is implemented as global (per process, shared by all threads) 142 dictionary. 143 A mutex-lock mechanism avoid conflicts. 144 """ 145 146 locker = thread.allocate_lock() 147 meta_storage = {} 148

149 - def __init__(self, request=None):

150 self.initialized = False 151 self.request = request 152 self.storage = {}

153

154 - def initialize(self):

155 if self.initialized: 156 return 157 else: 158 self.initialized = True 159 self.locker.acquire() 160 request = self.request 161 if request: 162 app = request.application 163 else: 164 app = '' 165 if not app in self.meta_storage: 166 self.storage = self.meta_storage[app] = { 167 CacheAbstract.cache_stats_name: {'hit_total': 0, 'misses': 0}} 168 else: 169 self.storage = self.meta_storage[app] 170 self.locker.release()

171

172 - def clear(self, regex=None):

173 self.initialize() 174 self.locker.acquire() 175 storage = self.storage 176 if regex is None: 177 storage.clear() 178 else: 179 self._clear(storage, regex) 180 181 if not CacheAbstract.cache_stats_name in storage.keys(): 182 storage[CacheAbstract.cache_stats_name] = { 183 'hit_total': 0, 'misses': 0} 184 185 self.locker.release()

186

187 - def __call__(self, key, f, 188 time_expire=DEFAULT_TIME_EXPIRE, 189 destroyer=None):

190 """ 191 Attention! cache.ram does not copy the cached object. It just stores a reference to it. 192 Turns out the deepcopying the object has some problems: 193 1) would break backward compatibility 194 2) would be limiting because people may want to cache live objects 195 3) would work unless we deepcopy no storage and retrival which would make things slow. 196 Anyway. You can deepcopy explicitly in the function generating the value to be cached. 197 """ 198 self.initialize() 199 200 dt = time_expire 201 now = time.time() 202 203 self.locker.acquire() 204 item = self.storage.get(key, None) 205 if item and f is None: 206 del self.storage[key] 207 if destroyer: 208 destroyer(item[1]) 209 self.storage[CacheAbstract.cache_stats_name]['hit_total'] += 1 210 self.locker.release() 211 212 if f is None: 213 return None 214 if item and (dt is None or item[0] > now - dt): 215 return item[1] 216 elif item and (item[0] < now - dt) and destroyer: 217 destroyer(item[1]) 218 value = f() 219 220 self.locker.acquire() 221 self.storage[key] = (now, value) 222 self.storage[CacheAbstract.cache_stats_name]['misses'] += 1 223 self.locker.release() 224 return value

225

226 - def increment(self, key, value=1):

227 self.initialize() 228 self.locker.acquire() 229 try: 230 if key in self.storage: 231 value = self.storage[key][1] + value 232 self.storage[key] = (time.time(), value) 233 except BaseException, e: 234 self.locker.release() 235 raise e 236 self.locker.release() 237 return value

238

239 240 -class CacheOnDisk(CacheAbstract):

241 """ 242 Disk based cache 243 244 This is implemented as a shelve object and it is shared by multiple web2py 245 processes (and threads) as long as they share the same filesystem. 246 The file is locked when accessed. 247 248 Disk cache provides persistance when web2py is started/stopped but it slower 249 than `CacheInRam` 250 251 Values stored in disk cache must be pickable. 252 """ 253

254 - def _close_shelve_and_unlock(self):

255 try: 256 if self.storage: 257 self.storage.close() 258 finally: 259 if self.locker and self.locked: 260 portalocker.unlock(self.locker) 261 self.locker.close() 262 self.locked = False

263

264 - def _open_shelve_and_lock(self):

265 """Open and return a shelf object, obtaining an exclusive lock 266 on self.locker first. Replaces the close method of the 267 returned shelf instance with one that releases the lock upon 268 closing.""" 269 270 storage = None 271 locker = None 272 locked = False 273 try: 274 locker = locker = open(self.locker_name, 'a') 275 portalocker.lock(locker, portalocker.LOCK_EX) 276 locked = True 277 try: 278 storage = shelve.open(self.shelve_name) 279 except: 280 logger.error('corrupted cache file %s, will try rebuild it' 281 % (self.shelve_name)) 282 storage = None 283 if not storage and os.path.exists(self.shelve_name): 284 os.unlink(self.shelve_name) 285 storage = shelve.open(self.shelve_name) 286 if not CacheAbstract.cache_stats_name in storage.keys(): 287 storage[CacheAbstract.cache_stats_name] = { 288 'hit_total': 0, 'misses': 0} 289 storage.sync() 290 except Exception, e: 291 if storage: 292 storage.close() 293 storage = None 294 if locked: 295 portalocker.unlock(locker) 296 locker.close() 297 locked = False 298 raise RuntimeError( 299 'unable to create/re-create cache file %s' % self.shelve_name) 300 self.locker = locker 301 self.locked = locked 302 self.storage = storage 303 return storage

304

305 - def __init__(self, request=None, folder=None):

306 self.initialized = False 307 self.request = request 308 self.folder = folder 309 self.storage = {}

310

311 - def initialize(self):

312 if self.initialized: 313 return 314 else: 315 self.initialized = True 316 folder = self.folder 317 request = self.request 318 319 # Lets test if the cache folder exists, if not 320 # we are going to create it 321 folder = folder or os.path.join(request.folder, 'cache') 322 323 if not os.path.exists(folder): 324 os.mkdir(folder) 325 326 ### we need this because of a possible bug in shelve that may 327 ### or may not lock 328 self.locker_name = os.path.join(folder, 'cache.lock') 329 self.shelve_name = os.path.join(folder, 'cache.shelve')

330

331 - def clear(self, regex=None):

332 self.initialize() 333 storage = self._open_shelve_and_lock() 334 try: 335 if regex is None: 336 storage.clear() 337 else: 338 self._clear(storage, regex) 339 storage.sync() 340 finally: 341 self._close_shelve_and_unlock()

342

343 - def __call__(self, key, f, 344 time_expire=DEFAULT_TIME_EXPIRE):

345 self.initialize() 346 dt = time_expire 347 storage = self._open_shelve_and_lock() 348 try: 349 item = storage.get(key, None) 350 storage[CacheAbstract.cache_stats_name]['hit_total'] += 1 351 if item and f is None: 352 del storage[key] 353 storage.sync() 354 now = time.time() 355 if f is None: 356 value = None 357 elif item and (dt is None or item[0] > now - dt): 358 value = item[1] 359 else: 360 value = f() 361 storage[key] = (now, value) 362 storage[CacheAbstract.cache_stats_name]['misses'] += 1 363 storage.sync() 364 finally: 365 self._close_shelve_and_unlock() 366 367 return value

368

369 - def increment(self, key, value=1):

370 self.initialize() 371 storage = self._open_shelve_and_lock() 372 try: 373 if key in storage: 374 value = storage[key][1] + value 375 storage[key] = (time.time(), value) 376 storage.sync() 377 finally: 378 self._close_shelve_and_unlock() 379 return value

380

381 -class CacheAction(object):

382 - def __init__(self, func, key, time_expire, cache, cache_model):

383 self.__name__ = func.__name__ 384 self.__doc__ = func.__doc__ 385 self.func = func 386 self.key = key 387 self.time_expire = time_expire 388 self.cache = cache 389 self.cache_model = cache_model

390

391 - def __call__(self, *a, **b):

392 if not self.key: 393 key2 = self.__name__ + ':' + repr(a) + ':' + repr(b) 394 else: 395 key2 = self.key.replace('%(name)s', self.__name__)\ 396 .replace('%(args)s', str(a)).replace('%(vars)s', str(b)) 397 cache_model = self.cache_model 398 if not cache_model or isinstance(cache_model, str): 399 cache_model = getattr(self.cache, cache_model or 'ram') 400 return cache_model(key2, 401 lambda a=a, b=b: self.func(*a, **b), 402 self.time_expire)

403

404 405 -class Cache(object):

406 """ 407 Sets up generic caching, creating an instance of both CacheInRam and 408 CacheOnDisk. 409 In case of GAE will make use of gluon.contrib.gae_memcache. 410 411 - self.ram is an instance of CacheInRam 412 - self.disk is an instance of CacheOnDisk 413 """ 414 415 autokey = ':%(name)s:%(args)s:%(vars)s' 416

417 - def __init__(self, request):

418 """ 419 Parameters 420 ---------- 421 request: 422 the global request object 423 """ 424 # GAE will have a special caching 425 if have_settings and settings.global_settings.web2py_runtime_gae: 426 from contrib.gae_memcache import MemcacheClient 427 self.ram = self.disk = MemcacheClient(request) 428 else: 429 # Otherwise use ram (and try also disk) 430 self.ram = CacheInRam(request) 431 try: 432 self.disk = CacheOnDisk(request) 433 except IOError: 434 logger.warning('no cache.disk (IOError)') 435 except AttributeError: 436 # normally not expected anymore, as GAE has already 437 # been accounted for 438 logger.warning('no cache.disk (AttributeError)')

439

440 - def action(self, time_expire=DEFAULT_TIME_EXPIRE, cache_model=None, 441 prefix=None, session=False, vars=True, lang=True, 442 user_agent=False, public=True, valid_statuses=None, 443 quick=None):

444 """ 445 Experimental! 446 Currently only HTTP 1.1 compliant 447 reference : http://code.google.com/p/doctype-mirror/wiki/ArticleHttpCaching 448 time_expire: same as @cache 449 cache_model: same as @cache 450 prefix: add a prefix to the calculated key 451 session: adds response.session_id to the key 452 vars: adds request.env.query_string 453 lang: adds T.accepted_language 454 user_agent: if True, adds is_mobile and is_tablet to the key. 455 Pass a dict to use all the needed values (uses str(.items())) (e.g. user_agent=request.user_agent()) 456 used only if session is not True 457 public: if False forces the Cache-Control to be 'private' 458 valid_statuses: by default only status codes starting with 1,2,3 will be cached. 459 pass an explicit list of statuses on which turn the cache on 460 quick: Session,Vars,Lang,User-agent,Public: 461 fast overrides with initial strings, e.g. 'SVLP' or 'VLP', or 'VLP' 462 """ 463 from gluon import current 464 from gluon.http import HTTP 465 def wrap(func): 466 def wrapped_f(): 467 if current.request.env.request_method != 'GET': 468 return func() 469 if time_expire: 470 cache_control = 'max-age=%(time_expire)s, s-maxage=%(time_expire)s' % dict(time_expire=time_expire) 471 if quick: 472 session_ = True if 'S' in quick else False 473 vars_ = True if 'V' in quick else False 474 lang_ = True if 'L' in quick else False 475 user_agent_ = True if 'U' in quick else False 476 public_ = True if 'P' in quick else False 477 else: 478 session_, vars_, lang_, user_agent_, public_ = session, vars, lang, user_agent, public 479 if not session_ and public_: 480 cache_control += ', public' 481 expires = (current.request.utcnow + datetime.timedelta(seconds=time_expire)).strftime('%a, %d %b %Y %H:%M:%S GMT') 482 vary = None 483 else: 484 cache_control += ', private' 485 expires = 'Fri, 01 Jan 1990 00:00:00 GMT' 486 if cache_model: 487 #figure out the correct cache key 488 cache_key = [current.request.env.path_info, current.response.view] 489 if session_: 490 cache_key.append(current.response.session_id) 491 elif user_agent_: 492 if user_agent_ is True: 493 cache_key.append("%(is_mobile)s_%(is_tablet)s" % current.request.user_agent()) 494 else: 495 cache_key.append(str(user_agent_.items())) 496 if vars_: 497 cache_key.append(current.request.env.query_string) 498 if lang_: 499 cache_key.append(current.T.accepted_language) 500 cache_key = hashlib.md5('__'.join(cache_key)).hexdigest() 501 if prefix: 502 cache_key = prefix + cache_key 503 try: 504 #action returns something 505 rtn = cache_model(cache_key, lambda : func(), time_expire=time_expire) 506 http, status = None, current.response.status 507 except HTTP, e: 508 #action raises HTTP (can still be valid) 509 rtn = cache_model(cache_key, lambda : e.body, time_expire=time_expire) 510 http, status = HTTP(e.status, rtn, **e.headers), e.status 511 else: 512 #action raised a generic exception 513 http = None 514 else: 515 #no server-cache side involved 516 try: 517 #action returns something 518 rtn = func() 519 http, status = None, current.response.status 520 except HTTP, e: 521 #action raises HTTP (can still be valid) 522 status = e.status 523 http = HTTP(e.status, e.body, **e.headers) 524 else: 525 #action raised a generic exception 526 http = None 527 send_headers = False 528 if http and isinstance(valid_statuses, list): 529 if status in valid_statuses: 530 send_headers = True 531 elif valid_statuses is None: 532 if str(status)[0] in '123': 533 send_headers = True 534 if send_headers: 535 headers = { 536 'Pragma' : None, 537 'Expires' : expires, 538 'Cache-Control' : cache_control 539 } 540 current.response.headers.update(headers) 541 if cache_model and not send_headers: 542 #we cached already the value, but the status is not valid 543 #so we need to delete the cached value 544 cache_model(cache_key, None) 545 if http: 546 if send_headers: 547 http.headers.update(current.response.headers) 548 raise http 549 return rtn

550 wrapped_f.__name__ = func.__name__ 551 wrapped_f.__doc__ = func.__doc__ 552 return wrapped_f

553 return wrap 554

555 - def __call__(self, 556 key=None, 557 time_expire=DEFAULT_TIME_EXPIRE, 558 cache_model=None):

559 """ 560 Decorator function that can be used to cache any function/method. 561 562 Example:: 563 564 @cache('key', 5000, cache.ram) 565 def f(): 566 return time.ctime() 567 568 When the function f is called, web2py tries to retrieve 569 the value corresponding to `key` from the cache of the 570 object exists and if it did not expire, else it calles the function `f` 571 and stores the output in the cache corresponding to `key`. In the case 572 the output of the function is returned. 573 574 :param key: the key of the object to be store or retrieved 575 :param time_expire: expiration of the cache in microseconds 576 :param cache_model: "ram", "disk", or other 577 (like "memcache" if defined). It defaults to "ram". 578 579 Notes 580 ----- 581 `time_expire` is used to compare the curret time with the time when the 582 requested object was last saved in cache. It does not affect future 583 requests. 584 Setting `time_expire` to 0 or negative value forces the cache to 585 refresh. 586 587 If the function `f` is an action, we suggest using 588 @cache.client instead 589 """ 590 591 def tmp(func, cache=self, cache_model=cache_model): 592 return CacheAction(func, key, time_expire, self, cache_model)

593 return tmp 594 595 @staticmethod

596 - def with_prefix(cache_model, prefix):

597 """ 598 allow replacing cache.ram with cache.with_prefix(cache.ram,'prefix') 599 it will add prefix to all the cache keys used. 600 """ 601 return lambda key, f, time_expire=DEFAULT_TIME_EXPIRE, prefix=prefix:\ 602 cache_model(prefix + key, f, time_expire)

603

604 605 -def lazy_cache(key=None, time_expire=None, cache_model='ram'):

606 """ 607 can be used to cache any function including in modules, 608 as long as the cached function is only called within a web2py request 609 if a key is not provided, one is generated from the function name 610 the time_expire defaults to None (no cache expiration) 611 if cache_model is "ram" then the model is current.cache.ram, etc. 612 """ 613 def decorator(f, key=key, time_expire=time_expire, cache_model=cache_model): 614 key = key or repr(f) 615 616 def g(*c, **d): 617 from gluon import current 618 return current.cache(key, time_expire, cache_model)(f)(*c, **d)

619 g.__name__ = f.__name__ 620 return g 621 return decorator 622

Source Code for Module gluon.cache