Source code for pandasdmx.remote
# encoding: utf-8
# pandaSDMX is licensed under the Apache 2.0 license a copy of which
# is included in the source distribution of pandaSDMX.
# This is notwithstanding any licenses of third-party software included in this distribution.
# (c) 2014, 2015 Dr. Leo <fhaxbox66qgmail.com>, all rights reserved
'''
This module is part of pandaSDMX. It contains
a classes for http access.
'''
import requests
from pandasdmx.utils import DictLike, str_type
from tempfile import SpooledTemporaryFile as STF
from contextlib import closing
try:
import requests_cache
except ImportError:
pass
[docs]def is_url(s):
'''
return True if s (str) is a valid URL, False otherwise.
'''
return bool(requests.utils.urlparse(s).scheme)
[docs]class REST:
"""
Query SDMX resources via REST or from a file
The constructor accepts arbitrary keyword arguments that will be passed
to the requests.get function on each call. This makes the REST class somewhat similar to a requests.Session. E.g., proxies or
authorisation data needs only be provided once. The keyword arguments are
stored in self.config. Modify this dict to issue the next 'get' request with
changed arguments.
"""
max_size = 2 ** 24
'''upper bound for in-memory temp file. Larger files will be spooled from disc'''
def __init__(self, cache, http_cfg):
default_cfg = dict(stream=True, timeout=30.1)
for it in default_cfg.items():
http_cfg.setdefault(*it)
self.config = DictLike(http_cfg)
if cache:
requests_cache.install_cache(**cache)
[docs] def get(self, url, fromfile=None, params={}, headers={}):
'''Get SDMX message from REST service or local file
Args:
url(str): URL of the REST service without the query part
If None, fromfile must be set. Default is None
params(dict): will be appended as query part to the URL after a '?'
fromfile(str): path to SDMX file containing an SDMX message.
It will be passed on to the
reader for parsing.
headers(dict): http headers. Overwrite instance-wide headers.
Default is {}
Returns:
tuple: three objects:
0. file-like object containing the SDMX message
1. the complete URL, if any, including the query part
constructed from params
2. the status code
Raises:
HTTPError if SDMX service responded with
status code 401. Otherwise, the status code
is returned
'''
if fromfile:
try:
# Load data from local file
# json files must be opened in text mode, all others in binary as
# they may be zip files or xml.
if fromfile.endswith('.json'):
mode_str = 'r'
else:
mode_str = 'rb'
source = open(fromfile, mode_str)
except TypeError:
# so fromfile must be file-like
source = fromfile
final_url = resp_headers = status_code = None
else:
source, final_url, resp_headers, status_code = self.request(
url, params=params, headers=headers)
return source, final_url, resp_headers, status_code
[docs] def request(self, url, params={}, headers={}):
"""
Retrieve SDMX messages.
If needed, override in subclasses to support other data providers.
:param url: The URL of the message.
:type url: str
:return: the xml data as file-like object
"""
# Generate current config. Merge in any given headers
cur_config = self.config.copy()
if 'headers' in cur_config:
cur_config['headers'] = cur_config['headers'].copy()
cur_config['headers'].update(headers)
else:
cur_config['headers'] = headers
with closing(requests.get(url, params=params, **cur_config)) as response:
if response.status_code == requests.codes.OK:
# Prepare the temp file. xml content will be
# stored in a binary file, json in a textfile.
if (response.headers.get('Content-Type')
and ('json' in response.headers['Content-Type'])):
enc, fmode = response.encoding, 'w+t'
else:
enc, fmode = None, 'w+b'
# Create temp file ensuring 2to3 compatibility
if str_type == str: # we are on py3
source = STF(
max_size=self.max_size, mode=fmode, encoding=enc)
else:
# On py27 we must omit the 'encoding' kwarg
source = STF(max_size=self.max_size, mode=fmode)
for c in response.iter_content(chunk_size=1000000,
decode_unicode=bool(enc)):
source.write(c)
else:
source = None
code = int(response.status_code)
if 400 <= code <= 499:
raise response.raise_for_status()
return source, response.url, response.headers, code