Did I find the right examples for you? yes no      Crawl my project      Python Jobs

All Samples(8)  |  Call(8)  |  Derive(0)  |  Import(0)

src/m/e/metacourse-HEAD/metated/metaTED/crawler/get_downloadable_talks.py   metacourse(Download)
def get_downloadable_talks(num_workers=None):
    talks_urls = get_talks_urls()
 
    downloadable_talks = cached_storage.get('talks_infos', {})
    new_talks_urls = [url for url in talks_urls if url not in downloadable_talks]

src/m/e/metaTED-2.1.1/metaTED/crawler/get_downloadable_talks.py   metaTED(Download)
def get_downloadable_talks(num_workers=None):
    talks_urls = get_talks_urls()
 
    downloadable_talks = cached_storage.get('talks_infos', {})
    new_talks_urls = [url for url in talks_urls if url not in downloadable_talks]

src/m/e/metacourse-HEAD/metated/metaTED/crawler/get_talks_urls.py   metacourse(Download)
        #    cached number
        logging.info('Found a cached version of talk urls. Validating...')
        num_pages = cached_storage.get('num_of_talk_list_pages')
        if num_pages and num_pages == _get_num_pages():
            num_talks = cached_storage.get('num_of_talks_urls_on_last_page')

src/m/e/metaTED-2.1.1/metaTED/crawler/get_talks_urls.py   metaTED(Download)
        #    cached number
        logging.info('Found a cached version of talk urls. Validating...')
        num_pages = cached_storage.get('num_of_talk_list_pages')
        if num_pages and num_pages == _get_num_pages():
            num_talks = cached_storage.get('num_of_talks_urls_on_last_page')

src/m/e/metacourse-HEAD/metated/metaTED/metalink.py   metacourse(Download)
    # they were physically written do disk
    refresh_date = formatdate()
    first_published_on = cached_storage.get('first_published_on')
    if first_published_on is None:
        cached_storage['first_published_on'] = first_published_on = refresh_date

src/m/e/metaTED-2.1.1/metaTED/metalink.py   metaTED(Download)
    # they were physically written do disk
    refresh_date = formatdate()
    first_published_on = cached_storage.get('first_published_on')
    if first_published_on is None:
        cached_storage['first_published_on'] = first_published_on = refresh_date