Skip to content Skip to sidebar Skip to footer

How To Splitting Column Value In Dataframe Into Multiple Columns

I need to split a dataframe column into multiple columns to make sure only two value is contained within each cell. The current dataframe looks like: Name | Number

Solution 1:

You can use str.findall("..") to split the values, then join the list on the original df. Use apply to get the complete/incomplete status.

import pandas as pd

df = pd.DataFrame({"Name":["Tom","Nick","Juli","June","Junw"],
                   "Number":[78797071, 0, 0, 39797571, 0],
                   "Code":[0, 89797071, 57797074, 0, 23000000]})

df = df.join(pd.DataFrame(df["Code"].astype(str).str.findall("..").values.tolist()).add_prefix('DIV')).fillna("00")
df["Incomplete"] = df.iloc[:,3:7].apply(lambda row: "incomplete"if row.str.contains('00').any() else"complete", axis=1)

print (df)

#
   Name    Number      Code DIV0 DIV1 DIV2 DIV3  Incomplete
0   Tom  78797071000000000  incomplete
1  Nick         08979707189797071    complete
2  Juli         05779707457797074    complete
3  June  39797571000000000  incomplete
4  Junw         02300000023000000  incomplete

Solution 2:

Try this quick fix.

import pandas as pd
import re

#data-preprocessing
data = {'Name': ['Tom','Nick','Juli','June','Junw'],'Code': ['0', '89797071', '57797074', '0', '23000000']}

#I omitted Number key in data

df = pd.DataFrame(data)

print(df)

#find patterns

pattern = r'(\d{2})(\d{2})(\d{2})(\d{2})'
zero_pattern = r'0{1,}'

split_data = []

for _ in df['Code'].items():

  to_find = _[1]

  splitted = re.findall(pattern, to_find)
  if splitted:
    temp = list(splitted[0])
    if'00'in temp:
      temp.append('incomplete')
    else:
      temp.append('complete')
    split_data.append(temp)

  zeromatch = re.match(zero_pattern, to_find)
  if zeromatch:
    split_data.append(['0','0','0','0','incomplete'])

#make right dataframe

col_name = ['DIV1','DIV2','DIV3','DIV4','Incomplete']

df2 = pd.DataFrame(split_data, columns=col_name)  

df[col_name]= df2

print(df)

Output

   Name      Code0   Tom         01  Nick  897970712  Juli  577970743  June         04  Junw  23000000
   Name      Code DIV1 DIV2 DIV3 DIV4  Incomplete
0   Tom         00000  incomplete
1  Nick  8979707189797071    complete
2  Juli  5779707457797074    complete
3  June         00000  incomplete
4  Junw  2300000023000000  incomplete

Solution 3:

you can do it using string functions zfill and findall like below


df.Code = df.Code.astype(np.str)

## zfill will pad string with 0 to make its lenght 8, findall will find each pair of digit## explode will split list into rows (explode works with pandas 0.25 and above)## reshape to make it 4 columns
arr = df.Code.str.zfill(8).str.findall(r"(\d\d)").explode().values.reshape(-1, 4)

## create new dataframe from arr with given column names
df2 = pd.DataFrame(arr, columns=[f"Div{i+1}"for i inrange(arr.shape[1])])

## set "Incomplete" colum to incomplete if any column of row contains "00"
df2["Incomplete"] = np.where(np.any(arr == "00", axis=1), "incomplete", "complete")

pd.concat([df,df2], axis=1)


Result

        Name    Number  Code    Div1    Div2    Div3    Div4    Incomplete
0   Tom 78797071000000000  incomplete
1   Nick        8979707189797071  complete
2   Juli        5779707457797074  complete
3   June    39797571000000000  incomplete
4   Junw        2300000023000000  incomplete

Post a Comment for "How To Splitting Column Value In Dataframe Into Multiple Columns"