Skip to content Skip to sidebar Skip to footer

Python Pandas Fill Dataframe With Missing Values

I have this dataframe as an example import pandas as pd #create dataframe df = pd.DataFrame([['DE', 'Table',201705,201705, 1000], ['DE', 'Table',201705,201704, 1000],\

Solution 1:

Here is my solution.I believe some genius will provide better solution~ Let us waiting for it ~

df1=df.groupby('ISO').agg({'Billed Week' : np.max,'Created Week' : np.min})
df1['ISO']=df1.index

     Created Week  Billed Week ISO
ISO                               
AT         201704201708  AT
DE         201701201705  DE

ISO=[]
BilledWeek=[]
CreateWeek=[]
for i inrange(len(df1)):
    BilledWeek.extend([df1.ix[i,1]]*(df1.ix[i,1]-df1.ix[i,0]+1))
    CreateWeek.extend(list(range(df1.ix[i,0],df1.ix[i,1]+1)))
    ISO.extend([df1.ix[i,2]]*(df1.ix[i,1]-df1.ix[i,0]+1))
DF=pd.DataFrame({'BilledWeek':BilledWeek,'CreateWeek':CreateWeek,'ISO':ISO})
Target=DF.merge(df,left_on=['BilledWeek','CreateWeek','ISO'],right_on=['Billed Week','Created Week','ISO'],how='left')
Target.Billings.fillna(0,inplace=True)
Target=Target.drop(['Billed Week',  'Created Week'],axis=1)
Target['Product']=Target.groupby('ISO')['Product'].ffill()

Out[75]: 
   BilledWeek  CreateWeek ISO Product  Billings
0201708201704  AT   Table    1000.01201708201705  AT   Table    1000.02201708201706  AT   Table    1000.03201708201707  AT   Table       0.04201708201708  AT   Table    1000.05201705201701  DE   Table    1000.06201705201702  DE   Table    1000.07201705201703  DE   Table       0.08201705201704  DE   Table    1000.09201705201705  DE   Table    1000.0

Solution 2:

def seqfix(x):
    s = x['Created Week']
    x = x.set_index('Created Week')
    x = x.reindex(range(min(s), max(s)+1))
    x['Billings'] = x['Billings'].fillna(0)
    x = x.ffill().reset_index()
    return x

df = df.groupby(['ISO', 'Billed Week']).apply(seqfix).reset_index(drop=True)
df[['Billed Week', 'Billings']] = df[['Billed Week', 'Billings']].astype(int)
df = df[['ISO', 'Product', 'Billed Week', 'Created Week', 'Billings']]

print(df)

  ISO Product  Billed Week  Created Week  Billings
0ATTable20170820170410001ATTable20170820170510002ATTable20170820170610003ATTable20170820170704ATTable20170820170810005  DE   Table20170520170110006  DE   Table20170520170210007  DE   Table20170520170308  DE   Table20170520170410009  DE   Table2017052017051000

Solution 3:

Build a MultiIndex with all the gaps in Created Weeks filled and then reindex the original DF.

idx = (df.groupby(['Billed Week'])
       .apply(lambda x: [(x['ISO'].min(),
                          x['Product'].min(),
                          x['Billed Week'].min(),
                          e) for e inrange(x['Created Week'].min(), x['Created Week'].max()+1)])
       .tolist()
)

multi_idx = pd.MultiIndex.from_tuples(sum(idx,[]),names=['ISO','Product','Billed Week','Created Week'])

(df.set_index(['ISO','Product','Billed Week','Created Week'])
     .reindex(multi_idx)
     .reset_index()
     .fillna(0)
)

Out[671]: 
  ISO Product  Billed Week  Created Week  Billings
0  DE   Table2017052017011000.01  DE   Table2017052017021000.02  DE   Table2017052017030.03  DE   Table2017052017041000.04  DE   Table2017052017051000.05ATTable2017082017041000.06ATTable2017082017051000.07ATTable2017082017061000.08ATTable2017082017070.09ATTable2017082017081000.0

Post a Comment for "Python Pandas Fill Dataframe With Missing Values"