簡體   English   中英

僅當某些值在范圍內時,我才能從文件中讀取內容,然后寫出到另一個文件中?

[英]How can I read in from a file, then write out to another file only if certain values are in a range?

這是我正在讀取的peaks_ef.xpk文件中的示例。

label dataset sw sf
1H 1H_2
NOESY_F1eF2f.nv
4807.69238281 4803.07373047
600.402832031 600.402832031
1H.L 1H.P 1H.W 1H.B 1H.E 1H.J 1H.U 1H_2.L 1H_2.P 1H_2.W 1H_2.B 1H_2.E 1H_2.J 1H_2.U vol int stat comment flag0 flag8 flag9
0 {1.H2'} 4.93607 0.05000 0.10000 ++ {0.0} {} {1.H1'} 5.82020 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
1 {1.H2'} 4.93607 0.05000 0.10000 ++ {0.0} {} {1.H1'} 5.82020 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
2 {1.H3'} 4.70891 0.05000 0.10000 ++ {0.0} {} {1.H8} 8.13712 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
3 {1.H2'} 4.93607 0.05000 0.10000 ++ {0.0} {} {1.H8} 8.13712 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
4 {2.H2'} 4.55388 0.05000 0.10000 ++ {0.0} {} {2.H1'} 5.90291 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
5 {2.H2'} 4.55388 0.05000 0.10000 ++ {0.0} {} {2.H1'} 5.90291 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
6 {2.H3'} 4.60420 0.05000 0.10000 ++ {0.0} {} {2.H8} 7.61004 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
7 {2.H2'} 4.55388 0.05000 0.10000 ++ {0.0} {} {2.H8} 7.61004 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
8 {1.H3'} 4.70891 0.05000 0.10000 ++ {0.0} {} {2.H8} 7.61004 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
9 {1.H2'} 4.93607 0.05000 0.10000 ++ {0.0} {} {2.H8} 7.61004 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0
10 {3.H5} 5.20481 0.05000 0.10000 ++ {0.0} {} {2.H8} 7.61004 0.05000 0.10000 ++ {0.0} {} 0.0 100.0000 0 {} 0 0 0

我想將1H.P和1H_2.P列中的值寫入到另一個文件中,但是我只想包含一定范圍內的值。 我以為我的代碼是這樣做的。 mask變量應該“過濾”值嗎?

這是我的代碼:

import pandas as pd
import os
import sys
import re

i=0;
contents_peak=[]
peak_lines=[]
with open ("ee_pinkH1.xpk","r") as peakPPM:
    for PPM in peakPPM.readlines():
        float_num = re.findall("[\s][1-9]{1}\.[0-9]+",PPM)
        if (len(float_num)>1):
            i=i+1
            value = ('Peak '+ str(i) + ' ' + str(float_num[0]) + ' 0.05 ' + str(float_num[1]) + ' 0.05' + '\n')
            peak_lines.append(value)
tclust_peak = open("tclust.txt","w+")
tclust_peak.write("rbclust \n")
for value in peak_lines:
    tclust_peak.write(value)
tclust_peak.close()

result={}
text = 'ee'
filename= 'ee_pinkH1.xpk'

if text == 'ee':
    df=pd.read_csv("peaks_ee.xpk",sep=" ", skiprows=5)

    shift1 = df["1H.P"]
    shift2 = df["1H_2.P"]

    if filename=='ee_pinkH1.xpk':
        mask = ((shift1>5.1) & (shift1<6)) & ((shift2>7) & (shift2<8.25))
    elif filename == 'ee_pinkH2.xpk':
        mask = ((shift1>3.25)&(shift1<5))&((shift2>7)&(shift2<8.5))

if text == 'ef':
    df = pd.read_csv('peaks_ef.xpk',sep = " ",skiprows=5)
    shift1=df["1H.P"]
    shift2=df["1H_2.P"]

    if filename == 'ef_blue.xpk':
        mask = ((shift1>5) & (shift1<6)) & ((shift2>7.25) & (shift2<8.25))
    elif filename == 'ef_green.xpk':
        mask = ((shift1>7) & (shift1<9)) & ((shift2>5.25) & (shift2<6.2))
    elif filename == 'ef_orange.xpk':
        mask = ((shift1>3) & (shift1<5)) & ((shift2>5.2) & (shift2<6.25))

if text == 'fe':
    df = pd.read_csv('peaks_fe.xpk', sep=" ",skiprows=5)

    shift1= df["1H.P"]
    shift2= df["1H_2.P"]

    if filename == 'fe_yellow.xpk':
        mask = ((shift1>3) & (shift1<5)) & ((shift2>5) & (shift2<6))
    elif filename == 'fe_green.xpk':
        mask = ((shift1>5.1) & (shift1<6)) & ((shift2>7) & (shift2<8.25))

result = df[mask]
result = result[["1H.L","1H_2.L"]]

for col in result.columns:
    result[col] = result[col].str.strip("{} ")
result.drop_duplicates(keep='first', inplace=True)
result = result.set_index([['Atom '+str(i) for i in range(1,len(result)+1)]])
tclust_atom=open("tclust.txt","a")
result.to_string(tclust_atom, header = False)
df1 = df.copy()[['1H.L','1H.P']]
df2 = df.copy()[['1H_2.L','1H_2.P']]

df2.rename(columns={'1H_2.L': '1H.L', '1H_2.P': '1H.P'}, inplace=True)
df = pd.concat([df1,df2])
df['1H.L']=df['1H.L'].apply(lambda row: row.strip('{}'))
df['new']=0.3
df.drop_duplicates(keep='first',inplace=True)

tclust_atom=open("tclust_ppm.txt","w+")
df.to_csv("tclust_ppm.txt",sep=" ", index=False, header=False)

我的輸出示例是:

5.H3' 4.43488 0.3
6.H2' 4.49744 0.3
7.H1' 5.95115 0.3
6.H3' 4.51612 0.3
8.H5 5.39709 0.3
7.H3' 4.62099 0.3
7.H2 7.67414 0.3
8.H2' 4.31783 0.3
9.H1' 5.91813 0.3
8.H3' 4.45577 0.3
10.H5 5.17157 0.3
9.H3' 4.66179 0.3

根據我的代碼,過濾器或“掩碼”變量位於if語句中:

if text == 'ef':
df = pd.read_csv('peaks_ef.xpk',sep = " ",skiprows=5)
shift1=df["1H.P"]
shift2=df["1H_2.P"]

if filename == 'ef_blue.xpk':
    mask = ((shift1>5) & (shift1<6)) & ((shift2>7.25) & (shift2<8.25))
elif filename == 'ef_green.xpk':
    mask = ((shift1>7) & (shift1<9)) & ((shift2>5.25) & (shift2<6.2))
elif filename == 'ef_orange':
    mask = ((shift1>3) & (shift1<5)) & ((shift2>5.2) & (shift2<6.25))

它應該來自elif filename =='ef_orange':並且shift1和shift2都不應大於6.25,但是在我的輸出中,我得到的答案是7.67414。 為什么我的過濾不起作用,如何解決?

通過使用

shift1=df["1H.P"]
shift2=df["1H_2.P"]

您只是將過濾器壓縮為一個序列,即作為您的列,而當您想在整個數據框上進行過濾時,就更容易將其視為自己的函數。

def fil(df,oneLow,oneHigh,twoLow,twoHigh):
    df = df[((df['1H.P'] > oneLow) & (df['1H.P'] < oneHigh)) & ((df['1H_2.P'] > twoLow) & (df['1H_2.P'] < twoHigh))]
    return df


if text == 'ef':
    df = pd.read_csv('peaks_ef.xpk',sep = " ",skiprows=5)
    #shift1=df["1H.P"] remove
    #shift2=df["1H_2.P"] remove

    if filename == 'ef_blue.xpk':
        #mask = ((shift1>5) & (shift1<6)) & ((shift2>7.25) & (shift2<8.25))
        df = fil(df,5,6,7.25,8.25)
    elif filename == 'ef_green.xpk':
        #mask = ((shift1>7) & (shift1<9)) & ((shift2>5.25) & (shift2<6.2))
        df = fil(df,7,9,5.25,6.2)
    elif filename == 'ef_orange':
        #mask = ((shift1>3) & (shift1<5)) & ((shift2>5.2) & (shift2<6.25))
        df = fil(df,3,5,5.2,6.25)

使用完整代碼進行編輯

import pandas as pd
import os
import sys
import re

def fil(df,oneLow,oneHigh,twoLow,twoHigh):
    df = df[((df['1H.P'] > oneLow) & (df['1H.P'] < oneHigh)) & ((df['1H_2.P'] > twoLow) & (df['1H_2.P'] < twoHigh))]
    return df



i=0;
contents_peak=[]
peak_lines=[]
with open ("ee_pinkH1.xpk","r") as peakPPM:
    for PPM in peakPPM.readlines():
        float_num = re.findall("[\s][1-9]{1}\.[0-9]+",PPM)
        if (len(float_num)>1):
            i=i+1
            value = ('Peak '+ str(i) + ' ' + str(float_num[0]) + ' 0.05 ' + str(float_num[1]) + ' 0.05' + '\n')
            peak_lines.append(value)
tclust_peak = open("tclust.txt","w+")
tclust_peak.write("rbclust \n")
for value in peak_lines:
    tclust_peak.write(value)
tclust_peak.close()

result={}
text = 'ee'
filename= 'ee_pinkH1.xpk'

if text == 'ee':
    df=pd.read_csv("peaks_ee.xpk",sep=" ", skiprows=5)

    if filename=='ee_pinkH1.xpk':
        result = fil(df,5.1,6,7,8.25)
    elif filename == 'ee_pinkH2.xpk':
        result = fil(df,3.25,5,7,8.5)
if text == 'ef':
    df = pd.read_csv('peaks_ef.xpk',sep = " ",skiprows=5)

    if filename == 'ef_blue.xpk':
        result = fil(df,5,6,7.25,8.25)
    elif filename == 'ef_green.xpk':
        result = fil(df,7,9,5.25,6.2)
    elif filename == 'ef_orange.xpk':
        result = fil(df,3,5,5.2,6.25)
if text == 'fe':
    df = pd.read_csv('peaks_fe.xpk', sep=" ",skiprows=5)

    if filename == 'fe_yellow.xpk':
        result= fil(df,3,5,5,6)
    elif filename == 'fe_green.xpk':
        result= fil(df,5.1,6,7,8.25)

for col in result.columns:
    result[col] = result[col].str.strip("{} ")
result.drop_duplicates(keep='first', inplace=True)
result = result.set_index([['Atom '+str(i) for i in range(1,len(result)+1)]])
tclust_atom=open("tclust.txt","a")
result.to_string(tclust_atom, header = False)
df1 = df.copy()[['1H.L','1H.P']]
df2 = df.copy()[['1H_2.L','1H_2.P']]

df2.rename(columns={'1H_2.L': '1H.L', '1H_2.P': '1H.P'}, inplace=True)
df = pd.concat([df1,df2])
df['1H.L']=df['1H.L'].apply(lambda row: row.strip('{}'))
df['new']=0.3
df.drop_duplicates(keep='first',inplace=True)

tclust_atom=open("tclust_ppm.txt","w+")
df.to_csv("tclust_ppm.txt",sep=" ", index=False, header=False)

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM