[英]Apply string function to data frame
任务是使用 html 标记将 URL 包装在 excel 文件中。 为此,我有一个函数和以下代码适用于名为 ANSWER 的一列:
import pandas as pd
import numpy as np
import string
import re
def hyperlinksWrapper(myString):
#finding all substrings that look like a URL
URLs = re.findall("(?P<url>https?://[^','')'' ''<'';'\s\n]+)", myString)
#print(URLs)
#replacing each URL by a link wrapped into <a> html-tags
for link in URLs:
wrappedLink = '<a href="' + link + '">' + link + '</a>'
myString = myString.replace(link, wrappedLink)
return(myString)
#Opening the original XLS file
filename = "Excel.xlsx"
df = pd.read_excel(filename)
#Filling all the empty cells in the ANSWER cell with the value "n/a"
df.ANSWER.replace(np.NaN, "n/a", inplace=True)
#Going through the ANSWER column and applying hyperlinksWrapper to each cell
for i in range(len(df.ANSWER)):
df.ANSWER[i] = hyperlinksWrapper(df.ANSWER[i])
#Export to CSV
df.to_excel('Excel_refined.xlsx')
问题是,我如何不查看一列,而是查看 dataframe 中的所有列(每个单元格)而不指定确切的列名?
也许你正在寻找这样的东西:
import pandas as pd
import numpy as np
import string
import re
def hyperlinksWrapper(myString):
#finding all substrings that look like a URL
URLs = re.findall("(?P<url>https?://[^','')'' ''<'';'\s\n]+)", myString)
#print(URLs)
#replacing each URL by a link wrapped into <a> html-tags
for link in URLs:
wrappedLink = '<a href="' + link + '">' + link + '</a>'
myString = myString.replace(link, wrappedLink)
return(myString)
# dummy dataframe
df = pd.DataFrame(
{'answer_col1': ['https://example.com', 'https://example.org', np.nan],
'answer_col2': ['https://example.net', 'Hello', 'World']}
)
# as suggested in the comments (replaces all NaNs in df)
df.fillna("n/a", inplace=True)
# option 1
# loops over every column of df
for col in df.columns:
# applies hyperlinksWrapper to every row in col
df[col] = df[col].apply(hyperlinksWrapper)
# [UPDATED] option 2
# applies hyperlinksWrapper to every element of df
df = df.applymap(hyperlinksWrapper)
df.head()
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.