pandas_multiindex_json

Pandas MultiIndex JSON

In [1]:
import pandas as pd
import numpy as np
import json

DataFrame From Dictionary

In [2]:
d = {}
for i in range(2):
    for j,k in enumerate(np.random.rand(2)):
        d[(i,j)] = {'data1':k, 'data2':np.square(k), 'data3':np.sqrt(k), 'data4':np.log(k)}
In [3]:
d
Out[3]:
{(0, 0): {'data1': 0.63969688287219773,
  'data2': 0.40921210195640628,
  'data3': 0.79981052935817099,
  'data4': -0.44676083533423155},
 (0, 1): {'data1': 0.63735524364866158,
  'data2': 0.4062217066064448,
  'data3': 0.79834531604354109,
  'data4': -0.45042809651998916},
 (1, 0): {'data1': 0.79321798000555221,
  'data2': 0.62919476380408867,
  'data3': 0.89062785719151649,
  'data4': -0.23165721491116487},
 (1, 1): {'data1': 0.7730684926552801,
  'data2': 0.5976348943363069,
  'data3': 0.87924313625713346,
  'data4': -0.25738762803893295}}
In [4]:
df = pd.DataFrame.from_dict(d, orient='index')
In [5]:
df.index.names = ['category1', 'category2']
In [6]:
df
Out[6]:
data2 data1 data4 data3
category1 category2
0 0 0.409212 0.639697 -0.446761 0.799811
1 0.406222 0.637355 -0.450428 0.798345
1 0 0.629195 0.793218 -0.231657 0.890628
1 0.597635 0.773068 -0.257388 0.879243

Alternatively, we could have done the reverse, by making columns the multindex.

In [7]:
df2 = pd.DataFrame.from_dict(d, orient='columns')
In [8]:
df2.columns.names = ['category1', 'category2']
In [9]:
df2
Out[9]:
category1 0 1
category2 0 1 0 1
data1 0.639697 0.637355 0.793218 0.773068
data2 0.409212 0.406222 0.629195 0.597635
data3 0.799811 0.798345 0.890628 0.879243
data4 -0.446761 -0.450428 -0.231657 -0.257388

Dictionary From DataFrame

In [10]:
d = df.to_dict()
In [11]:
d
Out[11]:
{'data1': {(0, 0): 0.63969688287219773,
  (0, 1): 0.63735524364866158,
  (1, 0): 0.79321798000555221,
  (1, 1): 0.7730684926552801},
 'data2': {(0, 0): 0.40921210195640628,
  (0, 1): 0.4062217066064448,
  (1, 0): 0.62919476380408867,
  (1, 1): 0.5976348943363069},
 'data3': {(0, 0): 0.79981052935817099,
  (0, 1): 0.79834531604354109,
  (1, 0): 0.89062785719151649,
  (1, 1): 0.87924313625713346},
 'data4': {(0, 0): -0.44676083533423155,
  (0, 1): -0.45042809651998916,
  (1, 0): -0.23165721491116487,
  (1, 1): -0.25738762803893295}}

To get back to original format we had, first transpose, then convert to dictionary.

In [12]:
d = df.T.to_dict()

Of course, since df2 is in fact the "Transpose" of df, doing df2.to_dict() would be the same as the above method.

In [13]:
d
Out[13]:
{(0, 0): {'data1': 0.63969688287219773,
  'data2': 0.40921210195640628,
  'data3': 0.79981052935817099,
  'data4': -0.44676083533423155},
 (0, 1): {'data1': 0.63735524364866158,
  'data2': 0.4062217066064448,
  'data3': 0.79834531604354109,
  'data4': -0.45042809651998916},
 (1, 0): {'data1': 0.79321798000555221,
  'data2': 0.62919476380408867,
  'data3': 0.89062785719151649,
  'data4': -0.23165721491116487},
 (1, 1): {'data1': 0.7730684926552801,
  'data2': 0.5976348943363069,
  'data3': 0.87924313625713346,
  'data4': -0.25738762803893295}}

Write JSON File

JSON only support string keys, and therefore won't accept our tuple from Pandas multiindex. Converting it to a string would work, and below is a full example on how to do this, however, you should probably consider writing as a simply csv.

In [14]:
d = {str(k):v for k,v in d.items()}
In [15]:
d
Out[15]:
{'(0, 0)': {'data1': 0.63969688287219773,
  'data2': 0.40921210195640628,
  'data3': 0.79981052935817099,
  'data4': -0.44676083533423155},
 '(0, 1)': {'data1': 0.63735524364866158,
  'data2': 0.4062217066064448,
  'data3': 0.79834531604354109,
  'data4': -0.45042809651998916},
 '(1, 0)': {'data1': 0.79321798000555221,
  'data2': 0.62919476380408867,
  'data3': 0.89062785719151649,
  'data4': -0.23165721491116487},
 '(1, 1)': {'data1': 0.7730684926552801,
  'data2': 0.5976348943363069,
  'data3': 0.87924313625713346,
  'data4': -0.25738762803893295}}
In [16]:
with open('file.json', 'w') as f:
    f.write(json.dumps(d, indent=4))

Read JSON File

In [17]:
from ast import literal_eval
In [18]:
with open('file.json') as f:
    d = json.loads(f.read())
In [19]:
d = {literal_eval(k):v for k,v in d.items()}
In [20]:
d
Out[20]:
{(0, 0): {'data1': 0.6396968828721977,
  'data2': 0.4092121019564063,
  'data3': 0.799810529358171,
  'data4': -0.44676083533423155},
 (0, 1): {'data1': 0.6373552436486616,
  'data2': 0.4062217066064448,
  'data3': 0.7983453160435411,
  'data4': -0.45042809651998916},
 (1, 0): {'data1': 0.7932179800055522,
  'data2': 0.6291947638040887,
  'data3': 0.8906278571915165,
  'data4': -0.23165721491116487},
 (1, 1): {'data1': 0.7730684926552801,
  'data2': 0.5976348943363069,
  'data3': 0.8792431362571335,
  'data4': -0.25738762803893295}}