pandaSDMX: Statistical Data and Metadata eXchange in Python

pandaSDMX is an Apache 2.0-licensed Python package aimed at becoming the most intuitive and versatile tool to retrieve and acquire statistical data and metadata disseminated in SDMX format. It supports out of the box the SDMX services of the European statistics office (Eurostat), the European Central Bank (ECB), and the French National Institute for statistics (INSEE). pandaSDMX can export data and metadata as pandas DataFrames, the gold-standard of data analysis in Python. From pandas you can export data and metadata to Excel, R and friends. As from version 0.4, pandaSDMX can export data to many other file formats and database backends via Odo.

Main features

  • intuitive API inspired by requests
  • support for many SDMX features including
    • generic datasets
    • data structure definitions, code lists and concept schemes
    • dataflow definitions and content-constraints
    • categorisations and category schemes
  • pythonic representation of the SDMX information model
  • When requesting datasets, validate column selections against code lists and content-constraints if available
  • export data and metadata as multi-indexed pandas DataFrames or Series, and many other formats and database backends via Odo
  • read and write SDMX messages to and from local files
  • configurable HTTP connections
  • support for requests-cache allowing to cache SDMX messages in memory, MongoDB, Redis or SQLite
  • extensible through custom readers and writers for alternative input and output formats of data and metadata
  • growing test suite

Example

In [1]: from pandasdmx import Request

# Get recent annual unemployment data on Greece, Ireland and Spain from Eurostat
In [2]: resp = Request('ESTAT').data('une_rt_a', key={'GEO': 'EL+ES+IE'}, params={'startPeriod': '2006'})

# Select data across age groups and write them to pandas DataFrames
In [3]: data = resp.write((s for s in resp.data.series if s.key.AGE == 'TOTAL'))

# Explore the data set. First, show dimension names
In [4]: data.columns.names
Out[4]: FrozenList(['UNIT', 'AGE', 'SEX', 'GEO', 'FREQ'])

# corresponding dimension values
In [5]: data.columns.levels
Out[5]: FrozenList([['PC_ACT', 'PC_POP', 'THS_PER'], ['TOTAL'], ['F', 'M', 'T'], ['EL', 'ES', 'IE'], ['A']])

# Print aggregate unemployment rates across ages and sexes
In [6]: data.loc[:, ('TOTAL', 'T')]
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-6-d7cae304597a> in <module>()
----> 1 data.loc[:, ('TOTAL', 'T')]

/home/docs/checkouts/readthedocs.org/user_builds/pandasdmx/envs/master/lib/python3.4/site-packages/pandas/core/indexing.py in __getitem__(self, key)
   1292 
   1293         if type(key) is tuple:
-> 1294             return self._getitem_tuple(key)
   1295         else:
   1296             return self._getitem_axis(key, axis=0)

/home/docs/checkouts/readthedocs.org/user_builds/pandasdmx/envs/master/lib/python3.4/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
    782     def _getitem_tuple(self, tup):
    783         try:
--> 784             return self._getitem_lowerdim(tup)
    785         except IndexingError:
    786             pass

/home/docs/checkouts/readthedocs.org/user_builds/pandasdmx/envs/master/lib/python3.4/site-packages/pandas/core/indexing.py in _getitem_lowerdim(self, tup)
    889         # we may have a nested tuples indexer here
    890         if self._is_nested_tuple_indexer(tup):
--> 891             return self._getitem_nested_tuple(tup)
    892 
    893         # we maybe be using a tuple to represent multiple dimensions here

/home/docs/checkouts/readthedocs.org/user_builds/pandasdmx/envs/master/lib/python3.4/site-packages/pandas/core/indexing.py in _getitem_nested_tuple(self, tup)
    962 
    963             current_ndim = obj.ndim
--> 964             obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
    965             axis += 1
    966 

/home/docs/checkouts/readthedocs.org/user_builds/pandasdmx/envs/master/lib/python3.4/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1465         # fall thru to straight lookup
   1466         self._has_valid_type(key, axis)
-> 1467         return self._get_label(key, axis=axis)
   1468 
   1469 

/home/docs/checkouts/readthedocs.org/user_builds/pandasdmx/envs/master/lib/python3.4/site-packages/pandas/core/indexing.py in _get_label(self, label, axis)
     91             raise IndexingError('no slices here, handle elsewhere')
     92 
---> 93         return self.obj._xs(label, axis=axis)
     94 
     95     def _get_loc(self, key, axis=0):

/home/docs/checkouts/readthedocs.org/user_builds/pandasdmx/envs/master/lib/python3.4/site-packages/pandas/core/generic.py in xs(self, key, axis, level, copy, drop_level)
   1738 
   1739         if axis == 1:
-> 1740             return self[key]
   1741 
   1742         self._consolidate_inplace()

/home/docs/checkouts/readthedocs.org/user_builds/pandasdmx/envs/master/lib/python3.4/site-packages/pandas/core/frame.py in __getitem__(self, key)
   1993             return self._getitem_frame(key)
   1994         elif is_mi_columns:
-> 1995             return self._getitem_multilevel(key)
   1996         else:
   1997             return self._getitem_column(key)

/home/docs/checkouts/readthedocs.org/user_builds/pandasdmx/envs/master/lib/python3.4/site-packages/pandas/core/frame.py in _getitem_multilevel(self, key)
   2037 
   2038     def _getitem_multilevel(self, key):
-> 2039         loc = self.columns.get_loc(key)
   2040         if isinstance(loc, (slice, Series, np.ndarray, Index)):
   2041             new_columns = self.columns[loc]

/home/docs/checkouts/readthedocs.org/user_builds/pandasdmx/envs/master/lib/python3.4/site-packages/pandas/indexes/multi.py in get_loc(self, key, method)
   1579 
   1580         if start == stop:
-> 1581             raise KeyError(key)
   1582 
   1583         if not follow_key:

KeyError: ('TOTAL', 'T')

Indices and tables