Python Pandas Fill Dataframe With Missing Values
I have this dataframe as an example import pandas as pd #create dataframe df = pd.DataFrame([['DE', 'Table',201705,201705, 1000], ['DE', 'Table',201705,201704, 1000],\
Solution 1:
Here is my solution.I believe some genius will provide better solution~ Let us waiting for it ~
df1=df.groupby('ISO').agg({'Billed Week' : np.max,'Created Week' : np.min})
df1['ISO']=df1.index
Created Week Billed Week ISO
ISO
AT 201704201708 AT
DE 201701201705 DE
ISO=[]
BilledWeek=[]
CreateWeek=[]
for i inrange(len(df1)):
BilledWeek.extend([df1.ix[i,1]]*(df1.ix[i,1]-df1.ix[i,0]+1))
CreateWeek.extend(list(range(df1.ix[i,0],df1.ix[i,1]+1)))
ISO.extend([df1.ix[i,2]]*(df1.ix[i,1]-df1.ix[i,0]+1))
DF=pd.DataFrame({'BilledWeek':BilledWeek,'CreateWeek':CreateWeek,'ISO':ISO})
Target=DF.merge(df,left_on=['BilledWeek','CreateWeek','ISO'],right_on=['Billed Week','Created Week','ISO'],how='left')
Target.Billings.fillna(0,inplace=True)
Target=Target.drop(['Billed Week', 'Created Week'],axis=1)
Target['Product']=Target.groupby('ISO')['Product'].ffill()
Out[75]:
BilledWeek CreateWeek ISO Product Billings
0201708201704 AT Table 1000.01201708201705 AT Table 1000.02201708201706 AT Table 1000.03201708201707 AT Table 0.04201708201708 AT Table 1000.05201705201701 DE Table 1000.06201705201702 DE Table 1000.07201705201703 DE Table 0.08201705201704 DE Table 1000.09201705201705 DE Table 1000.0
Solution 2:
def seqfix(x):
s = x['Created Week']
x = x.set_index('Created Week')
x = x.reindex(range(min(s), max(s)+1))
x['Billings'] = x['Billings'].fillna(0)
x = x.ffill().reset_index()
return x
df = df.groupby(['ISO', 'Billed Week']).apply(seqfix).reset_index(drop=True)
df[['Billed Week', 'Billings']] = df[['Billed Week', 'Billings']].astype(int)
df = df[['ISO', 'Product', 'Billed Week', 'Created Week', 'Billings']]
print(df)
ISO Product Billed Week Created Week Billings
0ATTable20170820170410001ATTable20170820170510002ATTable20170820170610003ATTable20170820170704ATTable20170820170810005 DE Table20170520170110006 DE Table20170520170210007 DE Table20170520170308 DE Table20170520170410009 DE Table2017052017051000
Solution 3:
Build a MultiIndex with all the gaps in Created Weeks filled and then reindex the original DF.
idx = (df.groupby(['Billed Week'])
.apply(lambda x: [(x['ISO'].min(),
x['Product'].min(),
x['Billed Week'].min(),
e) for e inrange(x['Created Week'].min(), x['Created Week'].max()+1)])
.tolist()
)
multi_idx = pd.MultiIndex.from_tuples(sum(idx,[]),names=['ISO','Product','Billed Week','Created Week'])
(df.set_index(['ISO','Product','Billed Week','Created Week'])
.reindex(multi_idx)
.reset_index()
.fillna(0)
)
Out[671]:
ISO Product Billed Week Created Week Billings
0 DE Table2017052017011000.01 DE Table2017052017021000.02 DE Table2017052017030.03 DE Table2017052017041000.04 DE Table2017052017051000.05ATTable2017082017041000.06ATTable2017082017051000.07ATTable2017082017061000.08ATTable2017082017070.09ATTable2017082017081000.0
Post a Comment for "Python Pandas Fill Dataframe With Missing Values"