• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python scraper.Scraper类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sunpy.util.scraper.Scraper的典型用法代码示例。如果您正苦于以下问题:Python Scraper类的具体用法?Python Scraper怎么用?Python Scraper使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Scraper类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_filelist_url_missing_directory

def test_filelist_url_missing_directory():
    # Asserts solution to ticket #2684.
    # Attempting to access data for the year 1960 results in a 404, so no files are returned.
    pattern = 'http://lasp.colorado.edu/eve/data_access/evewebdataproducts/level2/%Y/%j/'
    s = Scraper(pattern)
    timerange = TimeRange('1960/01/01 00:00:00', '1960/01/02 00:00:00')
    assert len(s.filelist(timerange)) == 0
开发者ID:PritishC,项目名称:sunpy,代码行数:7,代码来源:test_scraper.py


示例2: testURL_patternMilliseconds

def testURL_patternMilliseconds():
    s = Scraper('fd_%Y%m%d_%H%M%S_%e.fts')
    # NOTE: Seems that if below fails randomly - not understood why
    #       with `== True` fails a bit less...
    assert s._URL_followsPattern('fd_20130410_231211_119.fts')
    assert not s._URL_followsPattern('fd_20130410_231211.fts.gz')
    assert not s._URL_followsPattern('fd_20130410_ar_231211.fts.gz')
开发者ID:Cadair,项目名称:sunpy,代码行数:7,代码来源:test_scraper.py


示例3: _get_url_for_timerange

    def _get_url_for_timerange(self, timerange, **kwargs):
        """
        Returns the url for Fermi/GBM data for the given date.

        Parameters
        ----------
        timerange : `sunpy.time.TimeRange`
            The time range for which to download the data.

        Returns
        -------
        `str`:
            The url(s) for time of interest.
        """
        # Checks if detector keyword
        # If not defaults to detector 5
        if 'detector' in kwargs:
            det = _check_detector(kwargs['detector'])
        else:
            det = 'n5'

        # Check for resolution keyword - either CSPEC or CTIME
        # Default type is CSPEC
        if 'resolution' in kwargs:
            data_type = _check_type(kwargs['resolution'])
        else:
            data_type = 'cspec'

        gbm_pattern = ('https://heasarc.gsfc.nasa.gov/FTP/fermi/data/gbm/daily/'
                       '%Y/%m/%d/current/glg_{data_type}_{det}_%y%m%d_v00.pha')
        gbm_files = Scraper(gbm_pattern, data_type=data_type, det=det)
        urls = gbm_files.filelist(timerange)

        return urls
开发者ID:Cadair,项目名称:sunpy,代码行数:34,代码来源:fermi_gbm.py


示例4: testFilesRange_sameDirectory_months_remote

def testFilesRange_sameDirectory_months_remote():
    pattern = ('http://www.srl.caltech.edu/{spacecraft}/DATA/{instrument}/'
               'Ahead/1minute/AeH%y%b.1m')
    s = Scraper(pattern, spacecraft='STEREO', instrument='HET')
    startdate = parse_time((2007, 8, 1))
    enddate = parse_time((2007, 9, 10))
    timerange = TimeRange(startdate, enddate)
    assert len(s.filelist(timerange)) == 2
开发者ID:Cadair,项目名称:sunpy,代码行数:8,代码来源:test_scraper.py


示例5: _get_time_for_url

 def _get_time_for_url(self, urls):
     eve = Scraper(BASEURL)
     times = list()
     for url in urls:
         t0 = eve._extractDateURL(url)
         # hard coded full day as that's the normal.
         times.append(TimeRange(t0, t0 + datetime.timedelta(days=1)))
     return times
开发者ID:bwgref,项目名称:sunpy,代码行数:8,代码来源:eve.py


示例6: _get_time_for_url

 def _get_time_for_url(self, urls):
     freq = urls[0].split('/')[-1][0:3]  # extract the frequency label
     crawler = Scraper(BASEURL, freq=freq)
     times = list()
     for url in urls:
         t0 = crawler._extractDateURL(url)
         # hard coded full day as that's the normal.
         times.append(TimeRange(t0, t0 + TimeDelta(1*u.day)))
     return times
开发者ID:Cadair,项目名称:sunpy,代码行数:9,代码来源:norh.py


示例7: testDirectoryRange_Month

def testDirectoryRange_Month():
    s = Scraper('%Y%m/%d/%j_%H.txt')
    startdate = parse_time((2008, 2, 20, 10))
    enddate = parse_time((2008, 3, 2, 5))
    timerange = TimeRange(startdate, enddate)
    assert len(s.range(timerange)) == 12
    startdate = parse_time((2009, 2, 20, 10))
    enddate = parse_time((2009, 3, 2, 5))
    timerange = TimeRange(startdate, enddate)
    assert len(s.range(timerange)) == 11
开发者ID:Cadair,项目名称:sunpy,代码行数:10,代码来源:test_scraper.py


示例8: testDirectoryRange_Month

def testDirectoryRange_Month():
    s = Scraper("%Y%m/%d/%j_%H.txt")
    startdate = datetime.datetime(2008, 2, 20, 10)
    enddate = datetime.datetime(2008, 3, 2, 5)
    timerange = TimeRange(startdate, enddate)
    assert len(s.range(timerange)) == 12
    startdate = datetime.datetime(2009, 2, 20, 10)
    enddate = datetime.datetime(2009, 3, 2, 5)
    timerange = TimeRange(startdate, enddate)
    assert len(s.range(timerange)) == 11
开发者ID:ZachWerginz,项目名称:sunpy,代码行数:10,代码来源:test_scraper.py


示例9: testExtractDates_usingPattern

def testExtractDates_usingPattern():
    # Standard pattern
    s = Scraper('data/%Y/%m/%d/fits/swap/swap_00174_fd_%Y%m%d_%H%M%S.fts.gz')
    testURL = 'data/2014/05/14/fits/swap/swap_00174_fd_20140514_200135.fts.gz'
    timeURL = parse_time((2014, 5, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
    # Not-full repeated pattern
    s = Scraper('data/%Y/fits/swap/swap_00174_fd_%Y%m%d_%H%M%S.fts.gz')
    testURL = 'data/2014/fits/swap/swap_00174_fd_20140514_200135.fts.gz'
    timeURL = parse_time((2014, 5, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
开发者ID:Cadair,项目名称:sunpy,代码行数:11,代码来源:test_scraper.py


示例10: testFilesRange_sameDirectory_local

def testFilesRange_sameDirectory_local():
    # Fails due to an IsADirectoryError, wrapped in a URLError, after `requests`
    # tries to open a directory as a binary file.
    s = Scraper('/'.join(['file:/', rootdir,
                          'EIT', 'efz%Y%m%d.%H%M%S_s.fits']))
    startdate = parse_time((2004, 3, 1, 4, 0))
    enddate = parse_time((2004, 3, 1, 6, 30))
    assert len(s.filelist(TimeRange(startdate, enddate))) == 3
    startdate = parse_time((2010, 1, 10, 20, 30))
    enddate = parse_time((2010, 1, 20, 20, 30))
    assert len(s.filelist(TimeRange(startdate, enddate))) == 0
开发者ID:PritishC,项目名称:sunpy,代码行数:11,代码来源:test_scraper.py


示例11: testFilesRange_sameDirectory_local

def testFilesRange_sameDirectory_local():
    s = Scraper('/'.join(['file:/', rootdir,
                          'EIT', 'efz%Y%m%d.%H%M%S_s.fits']))
    print(s.pattern)
    print(s.now)
    startdate = parse_time((2004, 3, 1, 4, 0))
    enddate = parse_time((2004, 3, 1, 6, 30))
    assert len(s.filelist(TimeRange(startdate, enddate))) == 3
    startdate = parse_time((2010, 1, 10, 20, 30))
    enddate = parse_time((2010, 1, 20, 20, 30))
    assert len(s.filelist(TimeRange(startdate, enddate))) == 0
开发者ID:Cadair,项目名称:sunpy,代码行数:11,代码来源:test_scraper.py


示例12: testFilesRange_sameDirectory_remote

def testFilesRange_sameDirectory_remote():
    pattern = "http://solarmonitor.org/data/%Y/%m/%d/" "fits/{instrument}/" "{instrument}_00174_fd_%Y%m%d_%H%M%S.fts.gz"
    s = Scraper(pattern, instrument="swap")
    startdate = datetime.datetime(2014, 5, 14, 0, 0)
    enddate = datetime.datetime(2014, 5, 14, 6, 30)
    timerange = TimeRange(startdate, enddate)
    assert len(s.filelist(timerange)) == 2
    startdate = datetime.datetime(2014, 5, 14, 21, 0)
    enddate = datetime.datetime(2014, 5, 14, 23, 30)
    timerange = TimeRange(startdate, enddate)
    assert len(s.filelist(timerange)) == 0
开发者ID:ZachWerginz,项目名称:sunpy,代码行数:11,代码来源:test_scraper.py


示例13: testExtractDates_notSeparators_andSimilar

def testExtractDates_notSeparators_andSimilar():
    s = Scraper('data/%Y/Jun%b%d_%H%M%S')
    testURL = 'data/2014/JunJun14_200135'
    timeURL = parse_time((2014, 6, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
    testURL = 'data/2014/JunMay14_200135'
    timeURL = parse_time((2014, 5, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
    # and testing with the month afterwards
    s = Scraper('data/%Y/%dJun%b_%H%M%S')
    testURL = 'data/2014/14JunJun_200135'
    timeURL = parse_time((2014, 6, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
开发者ID:Cadair,项目名称:sunpy,代码行数:13,代码来源:test_scraper.py


示例14: testFilesRange_sameDirectory_remote

def testFilesRange_sameDirectory_remote():
    pattern = ('http://solarmonitor.org/data/%Y/%m/%d/'
               'fits/{instrument}/'
               '{instrument}_00174_fd_%Y%m%d_%H%M%S.fts.gz')
    s = Scraper(pattern, instrument='swap')
    startdate = parse_time((2014, 5, 14, 0, 0))
    enddate = parse_time((2014, 5, 14, 6, 30))
    timerange = TimeRange(startdate, enddate)
    assert len(s.filelist(timerange)) == 2
    startdate = parse_time((2014, 5, 14, 21, 0))
    enddate = parse_time((2014, 5, 14, 23, 30))
    timerange = TimeRange(startdate, enddate)
    assert len(s.filelist(timerange)) == 0
开发者ID:Cadair,项目名称:sunpy,代码行数:13,代码来源:test_scraper.py


示例15: _get_url_for_timerange

    def _get_url_for_timerange(self, timerange, **kwargs):
        """
        Returns list of URLS corresponding to value of input timerange.

        Parameters
        ----------
        timerange: `sunpy.time.TimeRange`
            time range for which data is to be downloaded.

        Returns
        -------
        urls : list
            list of URLs corresponding to the requested time range
        """

        # We allow queries with no Wavelength but error here so that the query
        # does not get passed to VSO and spit out garbage.
        if 'wavelength' not in kwargs.keys() or not kwargs['wavelength']:
            raise ValueError("Queries to NORH should specify either 17GHz or 34GHz as a Wavelength."
                             "see https://solar.nro.nao.ac.jp/norh/doc/manuale/node65.html")
        else:
            wavelength = kwargs['wavelength']

        # If wavelength is a single value GenericClient will have made it a
        # Quantity in the kwargs.
        if not isinstance(wavelength, u.Quantity):
            raise ValueError("Wavelength to NORH must be one value not {}.".format(wavelength))

        wavelength = wavelength.to(u.GHz, equivalencies=u.spectral())
        if wavelength == 34 * u.GHz:
            freq = 'tcz'
        elif wavelength == 17 * u.GHz:
            freq = 'tca'
        else:
            raise ValueError("NORH Data can be downloaded for 17GHz or 34GHz,"
                             " see https://solar.nro.nao.ac.jp/norh/doc/manuale/node65.html")

        # If start of time range is before 00:00, converted to such, so
        # files of the requested time ranger are included.
        # This is done because the archive contains daily files.
        if timerange.start.strftime('%M-%S') != '00-00':
            timerange = TimeRange(timerange.start.strftime('%Y-%m-%d'),
                                  timerange.end)

        norh = Scraper(BASEURL, freq=freq)
        # TODO: warn user that some files may have not been listed, like for example:
        #       tca160504_224657 on ftp://solar-pub.nao.ac.jp/pub/nsro/norh/data/tcx/2016/05/
        #       as it doesn't follow pattern.

        return norh.filelist(timerange)
开发者ID:Cadair,项目名称:sunpy,代码行数:50,代码来源:norh.py


示例16: _get_url_for_timerange

    def _get_url_for_timerange(self, timerange, **kwargs):
        """
        Return list of URLS corresponding to value of input timerange.

        Parameters
        ----------
        timerange: `sunpy.time.TimeRange`
            time range for which data is to be downloaded.

        Returns
        -------
        urls : list
            list of URLs corresponding to the requested time range

        """
        # If start of time range is before 00:00, converted to such, so
        # files of the requested time ranger are included.
        # This is done because the archive contains daily files.
        if timerange.start.time() != datetime.time(0, 0):
            timerange = TimeRange('{:%Y-%m-%d}'.format(timerange.start), timerange.end)
        eve = Scraper(BASEURL)
        return eve.filelist(timerange)
开发者ID:bwgref,项目名称:sunpy,代码行数:22,代码来源:eve.py


示例17: testDirectoryRange_single

def testDirectoryRange_single():
    s = Scraper('%Y%m%d/%H_%M.csv')
    startdate = parse_time((2010, 10, 10, 5, 0))
    enddate = parse_time((2010, 10, 10, 7, 0))
    timerange = TimeRange(startdate, enddate)
    assert len(s.range(timerange)) == 1
开发者ID:Cadair,项目名称:sunpy,代码行数:6,代码来源:test_scraper.py


示例18: testDirectoryRangeHours

def testDirectoryRangeHours():
    s = Scraper('%Y%m%d_%H/%H%M.csv')
    timerange = TimeRange('2009-12-31T23:40:00', '2010-01-01T01:15:00')
    assert len(s.range(timerange)) == 3  # 3 directories (1 per hour)
开发者ID:Cadair,项目名称:sunpy,代码行数:4,代码来源:test_scraper.py


示例19: testNoDateDirectory

def testNoDateDirectory():
    s = Scraper('mySpacecraft/myInstrument/xMinutes/aaa%y%b.ext')
    directory_list = ['mySpacecraft/myInstrument/xMinutes/']
    timerange = TimeRange('2009/11/20', '2010/01/03')
    assert s.range(timerange) == directory_list
开发者ID:Cadair,项目名称:sunpy,代码行数:5,代码来源:test_scraper.py


示例20: testDirectoryRangeFalse

def testDirectoryRangeFalse():
    s = Scraper('%Y%m%d/%Y%m%d_%H.fit.gz')
    directory_list = ['20091230/', '20091231/', '20100101/',
                      '20090102/', '20090103/']
    timerange = TimeRange('2009/12/30', '2010/01/03')
    assert s.range(timerange) != directory_list
开发者ID:Cadair,项目名称:sunpy,代码行数:6,代码来源:test_scraper.py



注:本文中的sunpy.util.scraper.Scraper类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python xml.xml_to_dict函数代码示例发布时间:2022-05-27
下一篇:
Python progressbar.TTYProgressBar类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap