[英]Expanding Pandas Column to Separate Rows based on calculation
我有看起來像這樣的 DataFrame。
df1 = pd.DataFrame(columns=['ID', 'Divide', 'Object', 'List'], data=[ ['A, B', 2, 20, [0, 5]], ['C, D', 2, 40, [10, 15, 35]], ['E, F', 2, 20, [11, 15]], ['G', 1, 10, [1, 5]], ['H', 1, 10, ''], ['I, J', 2, 20, ''] ])
| | ID | Divide | Object | List |
|---:|:-----|---------:|---------:|:-------------|
| 0 | A, B | 2 | 20 | [0, 5] |
| 1 | C, D | 2 | 40 | [10, 15, 35] |
| 2 | E, F | 2 | 20 | [11, 15] |
| 3 | G | 1 | 10 | [1, 5] |
| 4 | H | 1 | 10 | |
| 5 | I, J | 2 | 20 | |
每個ID都需要有自己的行。 但是, List列具有屬於每個ID的數據。 邏輯如下:
所以,決賽桌看起來像這樣:
| | ID | Divide | Object | List |
|---:|:-----|---------:|---------:|:-------|
| 0 | A | 2 | 20 | 0, 5 |
| 1 | B | 2 | 20 | |
| 2 | C | 2 | 40 | 10, 15 |
| 3 | D | 2 | 40 | 35 |
| 4 | E | 2 | 20 | |
| 5 | F | 2 | 20 | 11, 15 |
| 6 | G | 1 | 10 | 1, 5 |
| 7 | H | 1 | 10 | |
| 8 | I | 2 | 20 | |
| 9 | J | 2 | 20 | |
如果是列表,則可以使用explode來展平列表。 但是我不知道是誰應用 DataFrame 中的計算邏輯來解析出細節。 謝謝
你可以試試這個:
import pandas as pd
df = pd.DataFrame(columns=['ID', 'Divide', 'Object', 'List'], data=[ ['A, B', 2, 20, [0, 5]], ['C, D', 2, 40, [10, 15, 35]], ['E, F', 2, 20, [11, 15]], ['G', 1, 10, [1, 5]], ['H', 1, 10, ''], ['I, J', 2, 20, ''] ])
def split_list(lst, limit):
l1 = list()
l2 = list()
for e in lst:
if e <= limit:
l1.append(e)
else:
l2.append(e)
return l1, l2
df['ID'] = df['ID'].str.split(', ')
df['Limit'] = df['Object'] / df['Divide']
df['List'] = df.apply(lambda row: dict(zip(row['ID'], split_list(row['List'], row['Limit']))), axis=1)
df = df.explode('ID')
df['List'] = df.apply(lambda row: row['List'].get(row['ID']), axis=1)
print(df)
# Out[192]:
# ID Divide Object List Limit
# 0 A 2 20 [0, 5] 10.0
# 0 B 2 20 [] 10.0
# 1 C 2 40 [10, 15] 20.0
# 1 D 2 40 [35] 20.0
# 2 E 2 20 [] 10.0
# 2 F 2 20 [11, 15] 10.0
# 3 G 1 10 [1, 5] 10.0
# 4 H 1 10 [] 10.0
# 5 I 2 20 [] 10.0
# 5 J 2 20 [] 10.0
df.ID_r==df["ID"].str.strip().str[:1]
import io, json
df = (pd.read_csv(io.StringIO("""| | ID | Divide | Object | List |
| 0 | A, B | 2 | 20 | [0, 5] |
| 1 | C, D | 2 | 40 | [10, 15, 35] |
| 2 | E, F | 2 | 20 | [11, 15] |
| 3 | G | 1 | 10 | [1, 5] |
| 4 | H | 1 | 10 | |
| 5 | I, J | 2 | 20 | |"""), sep="|")
.pipe(lambda d: d.rename(columns={c:c.strip() for c in d.columns}))
.pipe(lambda d: d.drop(columns=[c for c in d.columns if "Unnamed" in c or c==""]))
.assign(List=lambda d: d["List"].apply(lambda l: json.loads(l) if "[" in l else []))
)
### end make sample data work... NB List is a list and empty if no list..
# explode ID column
df = df.join(df["ID"].apply(lambda id: [t.strip() for t in id.split(",")]).explode(), rsuffix="_r")
# real logic, take first two list items if first, else ...
df["List"] = np.where(df.ID_r==df["ID"].str.strip().str[:1], df["List"].apply(lambda l: l[:2]),df["List"].apply(lambda l: l[2:]))
df.reset_index(drop=True).drop(columns=["ID"]).rename(columns={"ID_r":"ID"})
划分 | Object | 列表 | ID | |
---|---|---|---|---|
0 | 2 | 20 | [0, 5] | 一個 |
1 | 2 | 20 | [] | 乙 |
2 | 2 | 40 | [10, 15] | C |
3 | 2 | 40 | [35] | D |
4 | 2 | 20 | [11、15] | 乙 |
5 | 2 | 20 | [] | F |
6 | 1 | 10 | [1, 5] | G |
7 | 1 | 10 | [] | H |
8 | 2 | 20 | [] | 我 |
9 | 2 | 20 | [] | Ĵ |
嘗試同時分解 ID 和 List,然后根據 ID 進入的順序有條件地過濾。
import pandas as pd
df1 = pd.DataFrame(columns=['ID', 'Divide', 'Object', 'List'],
data=[['A, B', 2, 20, [0, 5]],
['C, D', 2, 40, [10, 15, 35]],
['E, F', 2, 20, [11, 15]],
['G', 1, 10, [1, 5]],
['H', 1, 10, ''],
['I, J', 2, 20, '']])
# Split and Explode ID
df1['ID'] = df1['ID'].str.split(', ')
# Group By Each ID and set index so that First and Second IDs are tracked
df1 = df1.explode('ID') \
.groupby(level=0) \
.apply(lambda x: x.reset_index()) \
.droplevel(0)
# Calculate Cap For Later
df1['cap'] = df1['Object'] // df1['Divide'] - 1
def split_lists(g):
# If more than 1 row and non-empty list
if len(g) > 1 and not g['List'].empty:
# Check if is the First ID
if g['level_0'].iloc[0] == 0:
# Filter Less Than Equal To Cap
g['List'] = g['List'][g['List'] <= g['cap']]
else:
# Filter Greater Than Cap
g['List'] = g['List'][g['List'] > g['cap']]
return g
# Explode Lists Group By ID filter using function
# Regroup and convert back to lists
df2 = df1 \
.explode('List') \
.reset_index() \
.groupby('ID') \
.apply(split_lists) \
.groupby('ID')['List'] \
.apply(lambda x: x.dropna().tolist())
# Drop Extra Columns from df1 and merge back
out = df1.drop(columns=['List', 'index', 'cap']) \
.merge(df2, left_on='ID', right_index=True, how='left') \
.reset_index(drop=True)
print(out)
出去:
ID Divide Object List
0 A 2 20 [0, 5]
1 B 2 20 []
2 C 2 40 [10, 15]
3 D 2 40 [35]
4 E 2 20 []
5 F 2 20 [11, 15]
6 G 1 10 [1, 5]
7 H 1 10 []
8 I 2 20 []
9 J 2 20 []
帶有附加列的 DF1
index ID Divide Object List cap
0 0 A 2 20 [0, 5] 9
1 0 B 2 20 [0, 5] 9
0 1 C 2 40 [10, 15, 35] 19
1 1 D 2 40 [10, 15, 35] 19
0 2 E 2 20 [11, 15] 9
1 2 F 2 20 [11, 15] 9
0 3 G 1 10 [1, 5] 9
0 4 H 1 10 9
0 5 I 2 20 9
1 5 J 2 20 9
過濾和重組后的DF2
ID
A [0, 5]
B []
C [10, 15]
D [35]
E []
F [11, 15]
G [1, 5]
H []
I []
J []
Name: List, dtype: object
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.