繁体   English   中英

仅当某些值在范围内时,我才能从文件中读取内容,然后写出到另一个文件中?

[英]How can I read in from a file, then write out to another file only if certain values are in a range?

这是我正在读取的peaks_ef.xpk文件中的示例。

label dataset sw sf
1H 1H_2
NOESY_F1eF2f.nv
4807.69238281 4803.07373047
600.402832031 600.402832031
1H.L 1H.P 1H.W 1H.B 1H.E 1H.J 1H.U 1H_2.L 1H_2.P 1H_2.W 1H_2.B 1H_2.E 1H_2.J 1H_2.U vol int stat comment flag0 flag8 flag9
0 {1.H2'} 4.93607 0.05000 0.10000 ++ {0.0} {} {1.H1'} 5.82020 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
1 {1.H2'} 4.93607 0.05000 0.10000 ++ {0.0} {} {1.H1'} 5.82020 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
2 {1.H3'} 4.70891 0.05000 0.10000 ++ {0.0} {} {1.H8} 8.13712 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
3 {1.H2'} 4.93607 0.05000 0.10000 ++ {0.0} {} {1.H8} 8.13712 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
4 {2.H2'} 4.55388 0.05000 0.10000 ++ {0.0} {} {2.H1'} 5.90291 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
5 {2.H2'} 4.55388 0.05000 0.10000 ++ {0.0} {} {2.H1'} 5.90291 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
6 {2.H3'} 4.60420 0.05000 0.10000 ++ {0.0} {} {2.H8} 7.61004 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
7 {2.H2'} 4.55388 0.05000 0.10000 ++ {0.0} {} {2.H8} 7.61004 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
8 {1.H3'} 4.70891 0.05000 0.10000 ++ {0.0} {} {2.H8} 7.61004 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
9 {1.H2'} 4.93607 0.05000 0.10000 ++ {0.0} {} {2.H8} 7.61004 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
10 {3.H5} 5.20481 0.05000 0.10000 ++ {0.0} {} {2.H8} 7.61004 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0

我想将1H.P和1H_2.P列中的值写入到另一个文件中,但是我只想包含一定范围内的值。 我以为我的代码是这样做的。 mask变量应该“过滤”值吗?

这是我的代码:

import pandas as pd
import os
import sys
import re

i=0;
contents_peak=[]
peak_lines=[]
with open ("ee_pinkH1.xpk","r") as peakPPM:
    for PPM in peakPPM.readlines():
        float_num = re.findall("[\s][1-9]{1}\.[0-9]+",PPM)
        if (len(float_num)>1):
            i=i+1
            value = ('Peak '+ str(i) + ' ' + str(float_num[0]) + ' 0.05 ' + str(float_num[1]) + ' 0.05' + '\n')
            peak_lines.append(value)
tclust_peak = open("tclust.txt","w+")
tclust_peak.write("rbclust \n")
for value in peak_lines:
    tclust_peak.write(value)
tclust_peak.close()

result={}
text = 'ee'
filename= 'ee_pinkH1.xpk'

if text == 'ee':
    df=pd.read_csv("peaks_ee.xpk",sep=" ", skiprows=5)

    shift1 = df["1H.P"]
    shift2 = df["1H_2.P"]

    if filename=='ee_pinkH1.xpk':
        mask = ((shift1>5.1) & (shift1<6)) & ((shift2>7) & (shift2<8.25))
    elif filename == 'ee_pinkH2.xpk':
        mask = ((shift1>3.25)&(shift1<5))&((shift2>7)&(shift2<8.5))

if text == 'ef':
    df = pd.read_csv('peaks_ef.xpk',sep = " ",skiprows=5)
    shift1=df["1H.P"]
    shift2=df["1H_2.P"]

    if filename == 'ef_blue.xpk':
        mask = ((shift1>5) & (shift1<6)) & ((shift2>7.25) & (shift2<8.25))
    elif filename == 'ef_green.xpk':
        mask = ((shift1>7) & (shift1<9)) & ((shift2>5.25) & (shift2<6.2))
    elif filename == 'ef_orange.xpk':
        mask = ((shift1>3) & (shift1<5)) & ((shift2>5.2) & (shift2<6.25))

if text == 'fe':
    df = pd.read_csv('peaks_fe.xpk', sep=" ",skiprows=5)

    shift1= df["1H.P"]
    shift2= df["1H_2.P"]

    if filename == 'fe_yellow.xpk':
        mask = ((shift1>3) & (shift1<5)) & ((shift2>5) & (shift2<6))
    elif filename == 'fe_green.xpk':
        mask = ((shift1>5.1) & (shift1<6)) & ((shift2>7) & (shift2<8.25))

result = df[mask]
result = result[["1H.L","1H_2.L"]]

for col in result.columns:
    result[col] = result[col].str.strip("{} ")
result.drop_duplicates(keep='first', inplace=True)
result = result.set_index([['Atom '+str(i) for i in range(1,len(result)+1)]])
tclust_atom=open("tclust.txt","a")
result.to_string(tclust_atom, header = False)
df1 = df.copy()[['1H.L','1H.P']]
df2 = df.copy()[['1H_2.L','1H_2.P']]

df2.rename(columns={'1H_2.L': '1H.L', '1H_2.P': '1H.P'}, inplace=True)
df = pd.concat([df1,df2])
df['1H.L']=df['1H.L'].apply(lambda row: row.strip('{}'))
df['new']=0.3
df.drop_duplicates(keep='first',inplace=True)

tclust_atom=open("tclust_ppm.txt","w+")
df.to_csv("tclust_ppm.txt",sep=" ", index=False, header=False)

我的输出示例是:

5.H3' 4.43488 0.3
6.H2' 4.49744 0.3
7.H1' 5.95115 0.3
6.H3' 4.51612 0.3
8.H5 5.39709 0.3
7.H3' 4.62099 0.3
7.H2 7.67414 0.3
8.H2' 4.31783 0.3
9.H1' 5.91813 0.3
8.H3' 4.45577 0.3
10.H5 5.17157 0.3
9.H3' 4.66179 0.3

根据我的代码,过滤器或“掩码”变量位于if语句中:

if text == 'ef':
df = pd.read_csv('peaks_ef.xpk',sep = " ",skiprows=5)
shift1=df["1H.P"]
shift2=df["1H_2.P"]

if filename == 'ef_blue.xpk':
    mask = ((shift1>5) & (shift1<6)) & ((shift2>7.25) & (shift2<8.25))
elif filename == 'ef_green.xpk':
    mask = ((shift1>7) & (shift1<9)) & ((shift2>5.25) & (shift2<6.2))
elif filename == 'ef_orange':
    mask = ((shift1>3) & (shift1<5)) & ((shift2>5.2) & (shift2<6.25))

它应该来自elif filename =='ef_orange':并且shift1和shift2都不应大于6.25,但是在我的输出中,我得到的答案是7.67414。 为什么我的过滤不起作用,如何解决?

通过使用

shift1=df["1H.P"]
shift2=df["1H_2.P"]

您只是将过滤器压缩为一个序列,即作为您的列,而当您想在整个数据框上进行过滤时,就更容易将其视为自己的函数。

def fil(df,oneLow,oneHigh,twoLow,twoHigh):
    df = df[((df['1H.P'] > oneLow) & (df['1H.P'] < oneHigh)) & ((df['1H_2.P'] > twoLow) & (df['1H_2.P'] < twoHigh))]
    return df


if text == 'ef':
    df = pd.read_csv('peaks_ef.xpk',sep = " ",skiprows=5)
    #shift1=df["1H.P"] remove
    #shift2=df["1H_2.P"] remove

    if filename == 'ef_blue.xpk':
        #mask = ((shift1>5) & (shift1<6)) & ((shift2>7.25) & (shift2<8.25))
        df = fil(df,5,6,7.25,8.25)
    elif filename == 'ef_green.xpk':
        #mask = ((shift1>7) & (shift1<9)) & ((shift2>5.25) & (shift2<6.2))
        df = fil(df,7,9,5.25,6.2)
    elif filename == 'ef_orange':
        #mask = ((shift1>3) & (shift1<5)) & ((shift2>5.2) & (shift2<6.25))
        df = fil(df,3,5,5.2,6.25)

使用完整代码进行编辑

import pandas as pd
import os
import sys
import re

def fil(df,oneLow,oneHigh,twoLow,twoHigh):
    df = df[((df['1H.P'] > oneLow) & (df['1H.P'] < oneHigh)) & ((df['1H_2.P'] > twoLow) & (df['1H_2.P'] < twoHigh))]
    return df



i=0;
contents_peak=[]
peak_lines=[]
with open ("ee_pinkH1.xpk","r") as peakPPM:
    for PPM in peakPPM.readlines():
        float_num = re.findall("[\s][1-9]{1}\.[0-9]+",PPM)
        if (len(float_num)>1):
            i=i+1
            value = ('Peak '+ str(i) + ' ' + str(float_num[0]) + ' 0.05 ' + str(float_num[1]) + ' 0.05' + '\n')
            peak_lines.append(value)
tclust_peak = open("tclust.txt","w+")
tclust_peak.write("rbclust \n")
for value in peak_lines:
    tclust_peak.write(value)
tclust_peak.close()

result={}
text = 'ee'
filename= 'ee_pinkH1.xpk'

if text == 'ee':
    df=pd.read_csv("peaks_ee.xpk",sep=" ", skiprows=5)

    if filename=='ee_pinkH1.xpk':
        result = fil(df,5.1,6,7,8.25)
    elif filename == 'ee_pinkH2.xpk':
        result = fil(df,3.25,5,7,8.5)
if text == 'ef':
    df = pd.read_csv('peaks_ef.xpk',sep = " ",skiprows=5)

    if filename == 'ef_blue.xpk':
        result = fil(df,5,6,7.25,8.25)
    elif filename == 'ef_green.xpk':
        result = fil(df,7,9,5.25,6.2)
    elif filename == 'ef_orange.xpk':
        result = fil(df,3,5,5.2,6.25)
if text == 'fe':
    df = pd.read_csv('peaks_fe.xpk', sep=" ",skiprows=5)

    if filename == 'fe_yellow.xpk':
        result= fil(df,3,5,5,6)
    elif filename == 'fe_green.xpk':
        result= fil(df,5.1,6,7,8.25)

for col in result.columns:
    result[col] = result[col].str.strip("{} ")
result.drop_duplicates(keep='first', inplace=True)
result = result.set_index([['Atom '+str(i) for i in range(1,len(result)+1)]])
tclust_atom=open("tclust.txt","a")
result.to_string(tclust_atom, header = False)
df1 = df.copy()[['1H.L','1H.P']]
df2 = df.copy()[['1H_2.L','1H_2.P']]

df2.rename(columns={'1H_2.L': '1H.L', '1H_2.P': '1H.P'}, inplace=True)
df = pd.concat([df1,df2])
df['1H.L']=df['1H.L'].apply(lambda row: row.strip('{}'))
df['new']=0.3
df.drop_duplicates(keep='first',inplace=True)

tclust_atom=open("tclust_ppm.txt","w+")
df.to_csv("tclust_ppm.txt",sep=" ", index=False, header=False)

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM