繁体   English   中英

使用 SeqinR 基于序列名称提取子集对齐

[英]Extracting subset alignment based in sequence name using SeqinR

我试图从一个序列中提取一组对齐的序列,一个序列对齐(对齐对象)与SeqinR

alignment (S4 对象)的dput()下方

structure(list(nb = 39, nam = c("Lip4", "pdb|5FRD|A", "pdb|1M33|A", 
"pdb|5H3H|B", "pdb|1HL7|B", "pdb|1A8S|A", "pdb|1WPR|B", "pdb|4JYM|A", 
"pdb|2OCI|A", "pdb|1XRO|A", "pdb|3OOS|A", "pdb|2RHW|A", "pdb|2WUF|B", 
"pdb|1IUP|A", "pdb|4LXI|A", "GBD36331.1", "ADV62823.1", "KRA67074.1", 
"WP_030056103.1", "OHD12261.1", "WP_084594117.1", "WP_091643197.1", 
"OGO52173.1", "KFN38864.1", "GBC76610.1", "PKN85453.1", "ACU30832.1", 
"KPJ59441.1", "WP_028052179.1", "OGN93027.1", "OGO32450.1", "PSN93892.1", 
"ESQ22364.1", "WP_069806679.1", "WP_013336464.1", "WP_066315780.1", 
"KRT77961.1", "OIO07272.1", "WP_009823005.1"), seq = list("ygglgwvdnrggsadlgvsmggaplllplgdpilhf", 
    "-----w-dlpngsay-ghslggapllyplgdpilhf", "wgglgwldlpggsaxagwslggapltlplgdpwsh-", 
    "vggmgfydyrggsadlgfsmggagllldlgdpilhy", "ygglgwvdrrggsadlgfsmgtlglfvplgdpvyhf", 
    "yggfgwvdrrggsadlgfstggvgallplgdpvlhf", "-ggfgwvdyvggsadlghsvgagplmlplcdpllhy", 
    "-gglgwvdnmggtsdlghsvsagpimlphsdplvhh", "ygglgfvdprggsadlgwsdggapmi-plgdpvlhf", 
    "ykptgyvdqfggsvelgssyggaqlviplgdpilhf", "------vnlkggsidlghsaggaqivlpfgdpilhf", 
    "yggmgwvdspgnsaalgnamggaplllkfgdpilhf", "ygglgwvdqpggsaalgnalggapllvplgdpilhf", 
    "ygglgyvdmvggtvhlgnafggasml-plgdpilhf", "ygglgrvdmvggtthlgnamggaplllplgdpighf", 
    "-gglgvvelrgarvdlgvsfggapvalplgdpihhf", "-ggmglvslrggsadlgvsfggvplllplgdpifhc", 
    "ygglgwvfqiggsvylgesfggvp-llplgdpifhf", "ygglgwvylrghssdlgwsyggvglllplgdpllhf", 
    "ygglgwvdnrggsadaglsmggaplllplgdpilhf", "fgglgwvdnrggtadagvsmggaplllplgdpilhf", 
    "ygglgwvdnrggsadagvsmggapmllplgdpllhf", "ygglgwvdnrdgsadlgvsmggapllvplgdpilhf", 
    "ygglgwvdnrggsadlgismggaplllplgdpllhf", "ygglgwtdnrggsadlghsmggaplllplgdpilhf", 
    "ygglgwvdnrggsadlglsmggapllapmadplmhf", "----gwcdnrggsadlgvsmggapvllplgdpiehf", 
    "ygglgwvdnrggtadqgvsvggapllvplgdpilhf", "yggcgwldnrggsadlgismggaplllplgdhilhf", 
    "ygglgfvdnrggsadvgismggaplllplgdpilhf", "yggmgyldnrggsadigasmggaplllplgdsilhf", 
    "ygglgwldnrggsadlgvsmggaplllplgdpilhf", "ygglgwldnrggstdlgvsmggadlllplgdnilhf", 
    "ygglgwldnrggsvdlgvsmggaplllplgdpilhf", "ygglgwldnrggsadlgfsmggaglllptgdpifhh", 
    "ygglgwvdnrggtadqgvsmggapllvklgdpiehf", "ygglgwvdnrggsadlgaslggaplllplgdpilhf", 
    "ygglgwvdnrggsadlgaslggaallpplgdpilhf", "ygglgwvdnrggsgdlgwslgglgallplgdpilhf"), 
    com = NA), class = "alignment")

我正在尝试这种方式:

library(seqinr)

fas <- read.alignment("data.fa")
n <- c("OGO52173.1", "KFN38864.1", "GBC76610.1")
sub_fas <- fas[ fas$nam %in% n ]

但是我用dput()得到了下面显示的不同类型的对象:

structure(list(39, c("Lip4", "pdb|5FRD|A", "pdb|1M33|A", "pdb|5H3H|B", 
"pdb|1HL7|B", "pdb|1A8S|A", "pdb|1WPR|B", "pdb|4JYM|A", "pdb|2OCI|A", 
"pdb|1XRO|A", "pdb|3OOS|A", "pdb|2RHW|A", "pdb|2WUF|B", "pdb|1IUP|A", 
"pdb|4LXI|A", "GBD36331.1", "ADV62823.1", "KRA67074.1", "WP_030056103.1", 
"OHD12261.1", "WP_084594117.1", "WP_091643197.1", "OGO52173.1", 
"KFN38864.1", "GBC76610.1", "PKN85453.1", "ACU30832.1", "KPJ59441.1", 
"WP_028052179.1", "OGN93027.1", "OGO32450.1", "PSN93892.1", "ESQ22364.1", 
"WP_069806679.1", "WP_013336464.1", "WP_066315780.1", "KRT77961.1", 
"OIO07272.1", "WP_009823005.1"), list("ygglgwvdnrggsadlgvsmggaplllplgdpilhf", 
    "-----w-dlpngsay-ghslggapllyplgdpilhf", "wgglgwldlpggsaxagwslggapltlplgdpwsh-", 
    "vggmgfydyrggsadlgfsmggagllldlgdpilhy", "ygglgwvdrrggsadlgfsmgtlglfvplgdpvyhf", 
    "yggfgwvdrrggsadlgfstggvgallplgdpvlhf", "-ggfgwvdyvggsadlghsvgagplmlplcdpllhy", 
    "-gglgwvdnmggtsdlghsvsagpimlphsdplvhh", "ygglgfvdprggsadlgwsdggapmi-plgdpvlhf", 
    "ykptgyvdqfggsvelgssyggaqlviplgdpilhf", "------vnlkggsidlghsaggaqivlpfgdpilhf", 
    "yggmgwvdspgnsaalgnamggaplllkfgdpilhf", "ygglgwvdqpggsaalgnalggapllvplgdpilhf", 
    "ygglgyvdmvggtvhlgnafggasml-plgdpilhf", "ygglgrvdmvggtthlgnamggaplllplgdpighf", 
    "-gglgvvelrgarvdlgvsfggapvalplgdpihhf", "-ggmglvslrggsadlgvsfggvplllplgdpifhc", 
    "ygglgwvfqiggsvylgesfggvp-llplgdpifhf", "ygglgwvylrghssdlgwsyggvglllplgdpllhf", 
    "ygglgwvdnrggsadaglsmggaplllplgdpilhf", "fgglgwvdnrggtadagvsmggaplllplgdpilhf", 
    "ygglgwvdnrggsadagvsmggapmllplgdpllhf", "ygglgwvdnrdgsadlgvsmggapllvplgdpilhf", 
    "ygglgwvdnrggsadlgismggaplllplgdpllhf", "ygglgwtdnrggsadlghsmggaplllplgdpilhf", 
    "ygglgwvdnrggsadlglsmggapllapmadplmhf", "----gwcdnrggsadlgvsmggapvllplgdpiehf", 
    "ygglgwvdnrggtadqgvsvggapllvplgdpilhf", "yggcgwldnrggsadlgismggaplllplgdhilhf", 
    "ygglgfvdnrggsadvgismggaplllplgdpilhf", "yggmgyldnrggsadigasmggaplllplgdsilhf", 
    "ygglgwldnrggsadlgvsmggaplllplgdpilhf", "ygglgwldnrggstdlgvsmggadlllplgdnilhf", 
    "ygglgwldnrggsvdlgvsmggaplllplgdpilhf", "ygglgwldnrggsadlgfsmggaglllptgdpifhh", 
    "ygglgwvdnrggtadqgvsmggapllvklgdpiehf", "ygglgwvdnrggsadlgaslggaplllplgdpilhf", 
    "ygglgwvdnrggsadlgaslggaallpplgdpilhf", "ygglgwvdnrggsgdlgwslgglgallplgdpilhf"), 
    NA, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL), .Names = c("nb", "nam", "seq", "com", 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA))

另外,我没有得到基于列表n的子集。

在这个例子中,我期望与fas相同的对象,但变量n只包含 3 个元素。

提前致谢

我相信这可能有更好的方法来做到这一点。
但我用一个技巧解决了。

首先,我使用read.fasta没有read.alignment导入文件,我仍在使用seqinr

fas <- read.fasta("file.fa")

之后,我在我的列表中构建了一个tibble 、过滤器并使用as.alignment ,它是对齐对象的构造函数。

tibble(x) %>% 
  cbind(names = names(x)) %>% 
  filter(names %in% n) %>% 
  as.alignment(nb = nrow(.), seq = pull(., x), nam = pull(., names))

有了这个,我做了我需要的。

改进是完全有用和可接受的。 不管怎么说,还是要谢谢你。 我希望它可以帮助某人。

align <- read.alignment("fastafile",format="fasta")

align.temp.nam <- align$nam[align$nam %in% n]

align.temp.seq <- align$seq[align$nam %in% n]

align.sub <- as.alignment(nb=length(align.temp.nam),nam=align.temp.nam,seq=align.temp.seq)

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM