viernes, 14 de octubre de 2016

Collect GDP forecast using OCDE API with python 3.4

Collect GDP forecast using OCDE API with python 2.7

Rafael Valero Fernandez 14/10/2016
Gather data using OCDE API (After writing this entrances I realice a possible easier way, by using padas data reader: https://pandas-datareader.readthedocs.io/en/latest/remote_data.html#oecd)
Summary
Step 2) Read the previous link with python
Step 3) get the json
Step 4) Parse json into dataframe pandas

In [60]:
request_URL = "http://stats.oecd.org/sdmx-json/data/EO/GBR.ET_ANNPCT.A+Q/all?detail=Full&dimensionAtObservation=AllDimensions&startPeriod=2016"

# libraries to obtain the data from web

import urllib
import json


html_text = urllib.urlopen(request_URL)
html_text
Out[60]:
<addinfourl at 146112136L whose fp = <socket._fileobject object at 0x000000000444EF48>>
In [61]:
data = json.load(html_text)
data
Out[61]:
{u'dataSets': [{u'action': u'Information',
   u'observations': {u'0:0:0:0': [1.1540489105753, 0, None, 0, 0, None],
    u'0:0:0:1': [0.581208615963739, 0, None, 0, 0, None],
    u'0:0:1:2': [0.750000000000886, 1, None, 0, 0, None],
    u'0:0:1:3': [0.650000000000173, 1, None, 0, 0, None],
    u'0:0:1:4': [0.619999999999355, 1, None, 0, 0, None],
    u'0:0:1:5': [0.600000000000531, 1, None, 0, 0, None],
    u'0:0:1:6': [0.579999999999496, 1, None, 0, 0, None],
    u'0:0:1:7': [0.56000000000028, 1, None, 0, 0, None],
    u'0:0:1:8': [0.540000000000551, 1, None, 0, 0, None],
    u'0:0:1:9': [0.529999999999949, 1, None, 0, 0, None]}}],
 u'header': {u'id': u'991b95e3-2946-4107-b472-e0fdaa9e6c27',
  u'links': [{u'href': u'http://stats.oecd.org:80/sdmx-json/data/EO/GBR.ET_ANNPCT.A+Q/all?detail=Full&dimensionAtObservation=AllDimensions&startPeriod=2016',
    u'rel': u'request'}],
  u'prepared': u'2016-10-14T08:40:25.356Z',
  u'sender': {u'id': u'OECD',
   u'name': u'Organisation for Economic Co-operation and Development'},
  u'test': False},
 u'structure': {u'annotations': [{u'text': u'',
    u'title': u'Copyright OECD - All rights reserved',
    u'uri': u''},
   {u'text': u'',
    u'title': u'Terms and Conditions',
    u'uri': u'http://www.oecd.org/termsandconditions/'},
   {u'text': u'',
    u'title': u'Privacy Policy',
    u'uri': u'http://www.oecd.org/privacy/'},
   {u'text': u'', u'title': u'MyOECD', u'uri': u'https://www.oecd.org/login'},
   {u'text': u'',
    u'title': u'Contact Us',
    u'uri': u'http://www.oecd.org/contact/'}],
  u'attributes': {u'dataSet': [],
   u'observation': [{u'id': u'TIME_FORMAT',
     u'name': u'Time Format',
     u'values': [{u'id': u'P1Y', u'name': u'Annual'},
      {u'id': u'P3M', u'name': u'Quarterly'}]},
    {u'id': u'OBS_STATUS', u'name': u'Observation Status', u'values': []},
    {u'id': u'UNIT',
     u'name': u'Unit',
     u'role': u'UNIT_MEASURE',
     u'values': [{u'id': u'PC', u'name': u'Percentage'}]},
    {u'default': u'0',
     u'id': u'POWERCODE',
     u'name': u'Unit multiplier',
     u'role': u'UNIT_MULT',
     u'values': [{u'id': u'0', u'name': u'Units'}]},
    {u'id': u'REFERENCEPERIOD',
     u'name': u'Reference period',
     u'role': u'BASE_PER',
     u'values': []}],
   u'series': []},
  u'description': u'Economic Outlook No 99 - June 2016',
  u'dimensions': {u'observation': [{u'id': u'LOCATION',
     u'keyPosition': 0,
     u'name': u'Country',
     u'role': u'REF_AREA',
     u'values': [{u'id': u'GBR', u'name': u'United Kingdom'}]},
    {u'id': u'VARIABLE',
     u'keyPosition': 1,
     u'name': u'Variable',
     u'values': [{u'id': u'ET_ANNPCT', u'name': u'Total employment, growth'}]},
    {u'id': u'FREQUENCY',
     u'keyPosition': 2,
     u'name': u'Frequency',
     u'role': u'FREQ',
     u'values': [{u'id': u'A', u'name': u'Annual'},
      {u'id': u'Q', u'name': u'Quarterly'}]},
    {u'id': u'TIME_PERIOD',
     u'name': u'Time',
     u'role': u'TIME_PERIOD',
     u'values': [{u'id': u'2016', u'name': u'2016'},
      {u'id': u'2017', u'name': u'2017'},
      {u'id': u'2016-Q1', u'name': u'Q1-2016'},
      {u'id': u'2016-Q2', u'name': u'Q2-2016'},
      {u'id': u'2016-Q3', u'name': u'Q3-2016'},
      {u'id': u'2016-Q4', u'name': u'Q4-2016'},
      {u'id': u'2017-Q1', u'name': u'Q1-2017'},
      {u'id': u'2017-Q2', u'name': u'Q2-2017'},
      {u'id': u'2017-Q3', u'name': u'Q3-2017'},
      {u'id': u'2017-Q4', u'name': u'Q4-2017'}]}]},
  u'links': [{u'href': u'http://stats.oecd.org/sdmx-json/dataflow/EO/all',
    u'rel': u'dataflow'}],
  u'name': u'Economic Outlook No 99 - June 2016'}}
In [54]:
type(data)
Out[54]:
dict
In [62]:
values_I_want=data['dataSets'][0]['observations']
values_I_want
Out[62]:
{u'0:0:0:0': [1.1540489105753, 0, None, 0, 0, None],
 u'0:0:0:1': [0.581208615963739, 0, None, 0, 0, None],
 u'0:0:1:2': [0.750000000000886, 1, None, 0, 0, None],
 u'0:0:1:3': [0.650000000000173, 1, None, 0, 0, None],
 u'0:0:1:4': [0.619999999999355, 1, None, 0, 0, None],
 u'0:0:1:5': [0.600000000000531, 1, None, 0, 0, None],
 u'0:0:1:6': [0.579999999999496, 1, None, 0, 0, None],
 u'0:0:1:7': [0.56000000000028, 1, None, 0, 0, None],
 u'0:0:1:8': [0.540000000000551, 1, None, 0, 0, None],
 u'0:0:1:9': [0.529999999999949, 1, None, 0, 0, None]}
In [63]:
for i in values_I_want:
    print(i)
0:0:1:4
0:0:1:8
0:0:1:9
0:0:1:5
0:0:0:1
0:0:0:0
0:0:1:6
0:0:1:7
0:0:1:2
0:0:1:3
In [70]:
import pandas as pd
import numpy as np

auxiliar_1= values_I_want.values()
auxiliar_2 = len(auxiliar_1)

data_values=np.empty([auxiliar_2,1])

for i in range(0,auxiliar_2):
     data_values[i] = auxiliar_1[i][0]
In [71]:
data_values
Out[71]:
array([[ 0.62      ],
       [ 0.54      ],
       [ 0.53      ],
       [ 0.6       ],
       [ 0.58120862],
       [ 1.15404891],
       [ 0.58      ],
       [ 0.56      ],
       [ 0.75      ],
       [ 0.65      ]])
In [85]:
data['structure']['dimensions']['observation'][3]['values'][1]['id']
Out[85]:
u'2017'
In [179]:
auxiliar_1= data['structure']['dimensions']['observation'][3]['values']
auxiliar_2 = len(auxiliar_1)



index_df=[]
for i in range(0,auxiliar_2):
     index_df.append(auxiliar_1[i]['id'].encode('ascii'))


index_df
Out[179]:
['2016',
 '2017',
 '2016-Q1',
 '2016-Q2',
 '2016-Q3',
 '2016-Q4',
 '2017-Q1',
 '2017-Q2',
 '2017-Q3',
 '2017-Q4']
In [185]:
df = pd.DataFrame(data_values,index=index_df)
df.columns=['gdp_projected']
df
Out[185]:

gdp_projected
20160.620000
20170.540000
2016-Q10.530000
2016-Q20.600000
2016-Q30.581209
2016-Q41.154049
2017-Q10.580000
2017-Q20.560000
2017-Q30.750000
2017-Q40.650000



No hay comentarios:

Publicar un comentario