简体   繁体   English

深度哈希反转算法(应该是红宝石)

[英]Algorithm for Deep Hash Invert (should be in ruby)

I have a hash H (see bottom) and need to perform a deep invert operation on it, such that a new hash H2 is returned where each key K is a value inside the original hash. 我有一个散列H (见下)并需要对它执行深度反转操作,这样就会返回一个新的散列H2 ,其中每个键K都是原始散列内的值。 The keys in H2 map to an array of arrays of all the sequences of keys that, when applied onto the original hash H would give you the key K which is a value in the original hash. H2的键映射到所有键序列的数组数组,当应用到原始哈希H它将为您提供键K ,它是原始哈希中的值。

Perhaps I should use a different data structure for the output, such as a hash of hashes? 也许我应该为输出使用不同的数据结构,例如散列哈希?

I'd like this to work with hashes of arbitrary nesting levels. 我希望这适用于任意嵌套级别的哈希。

I don't know where to start designing an optimal algorithm 我不知道从哪里开始设计最佳算法

Original Hash 原始的哈希

What the input might look like 输入可能是什么样的

{
  u: {
    u: { u: :phe, c: :phe, a: :leu, g: :leu },
    c: { u: :ser, c: :ser, a: :ser, g: :ser },
    a: { u: :tyr, c: :tyr, a: :STOP, g: :STOP },
    g: { u: :cys, c: :cys, a: :STOP, g: :trp }
  },
  c: {
    u: { u: :leu, c: :leu, a: :leu, g: :leu },
    c: { u: :pro, c: :pro, a: :pro, g: :pro },
    a: { u: :his, c: :his, a: :gln, g: :gln },
    g: { u: :arg, c: :arg, a: :arg, g: :arg }
  },
  {...}
}

Simplified Output 简化输出

What output would look like 输出会是什么样子

{
  phe: [[:u,:u,:u],[:u,:u,:c]],
  leu: [[:u,:u,:a],[:u,:u,:g]],
  ser: [[:u,:c,:u],[:u,:c,:c],[:u,:u,:a],[:u,:u,:g]],
  tyr: [[:u,:a,:u],[:u,:a,:c]],
  "...": [[...]]
}

Why? 为什么? I'm writing my own bioinformatics library and want to be able to return the possible nucleotide sequences for a given protein, denoted by the three character :symbols 我正在编写自己的生物信息学文库,希望能够返回给定蛋白质的可能核苷酸序列,用三个字符表示:symbols

Code

def recurse(h, arr=[])
  h.each_with_object({}) { |(k,v),g| g.update((Hash===v) ?
    recurse(v, arr + [k]) : { v=>[arr+[k]] }) { |_,o,n| o+n } }
end

The recursion uses the form of Hash#update (aka merge! ) that employs the block { |_,o,n| o+n } } 递归使用Hash#update (aka merge! )的形式,它使用块{ |_,o,n| o+n } } { |_,o,n| o+n } } to determine the values of keys that are present in both hashes being merged. { |_,o,n| o+n } }确定合并的两个哈希中存在的键的值。

Example 1 例1

h =
{
  u: {
    u: { u: :phe, c: :phe, a: :leu, g: :leu },
    c: { u: :ser, c: :ser, a: :ser, g: :ser },
    a: { u: :tyr, c: :tyr, a: :STOP, g: :STOP },
    g: { u: :cys, c: :cys, a: :STOP, g: :trp }
  },
  c: {
    u: { u: :leu, c: :leu, a: :leu, g: :leu },
    c: { u: :pro, c: :pro, a: :pro, g: :pro },
    a: { u: :his, c: :his, a: :gln, g: :gln },
    g: { u: :arg, c: :arg, a: :arg, g: :arg }
  },
}

recurse h
  #=> {:phe=>[[:u, :u, :u], [:u, :u, :c]],
  #    :leu=>[[:u, :u, :a], [:u, :u, :g], [:c, :u, :u],
  #      [:c, :u, :c], [:c, :u, :a], [:c, :u, :g]],
  #    :ser=>[[:u, :c, :u], [:u, :c, :c], [:u, :c, :a], [:u, :c, :g]], 
  #    :tyr=>[[:u, :a, :u], [:u, :a, :c]],
  #    :STOP=>[[:u, :a, :a], [:u, :a, :g], [:u, :g, :a]],
  #    :cys=>[[:u, :g, :u], [:u, :g, :c]],
  #    :trp=>[[:u, :g, :g]],
  #    :pro=>[[:c, :c, :u], [:c, :c, :c], [:c, :c, :a], [:c, :c, :g]], 
  #    :his=>[[:c, :a, :u], [:c, :a, :c]],
  #    :gln=>[[:c, :a, :a], [:c, :a, :g]],
  #    :arg=>[[:c, :g, :u], [:c, :g, :c], [:c, :g, :a], [:c, :g, :g]]}

Example 2 例2

h =
{
  u: {
    u: { u: :phe, a: :leu },
    c: { u: :ser, c: :phe },
    a: { u: :tyr, c: { a: { u: :leu, c: :ser }, u: :tyr } }
  },
  c: {
    u: { u: :leu, c: :pro },
    a: { u: :arg }
  },
}

recurse(h)
  #=> {:phe=>[[:u, :u, :u], [:u, :c, :c]],
  #    :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u], [:c, :u, :u]],
  #    :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]],
  #    :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]],
  #    :pro=>[[:c, :u, :c]], :arg=>[[:c, :a, :u]]}

Explanation 说明

Here is the code modified to display the calculations that are being performed: 以下是修改后的代码,用于显示正在执行的计算:

def recurse(h, arr=[], level = 0)
  indent = ' '*(2*level)
  puts "#{indent}level = #{level}"
  puts "#{indent}h= #{h}"
  puts "#{indent}arr= #{arr}"
  g = h.each_with_object({}) do |(k,v),g|
    puts "#{indent}  level = #{level}"
    puts "#{indent}  k=#{k}"
    puts "#{indent}  v=#{v}"
    puts "#{indent}  g=#{g}"
    case v
    when Hash
      puts "#{indent}  v is Hash"
      g.update(recurse(v, arr + [k], level+1)) { |_,o,n| o+n }
    else
      puts "#{indent}  v is not a Hash"
      g.update({ v=>[arr+[k]] }) { |_,o,n| o+n }
    end
  end
  puts "#{indent}return #{g}"
  g
end

The output for recurse h follows, for Example 2 (for diehards only). recurse h的输出如下,对于例2(仅适用于顽固分子)。

level = 0
h= {:u=>{:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe}, :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}, :c=>{:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}}
arr= []
  level = 0
  k=u
  v={:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe},
     :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}
  g={}
  v is Hash
  level = 1
  h= {:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe},
      :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}
  arr= [:u]
    level = 1
    k=u
    v={:u=>:phe, :a=>:leu}
    g={}
    v is Hash
    level = 2
    h= {:u=>:phe, :a=>:leu}
    arr= [:u, :u]
      level = 2
      k=u
      v=phe
      g={}
      v is not a Hash
      level = 2
      k=a
      v=leu
      g={:phe=>[[:u, :u, :u]]}
      v is not a Hash
    return {:phe=>[[:u, :u, :u]], :leu=>[[:u, :u, :a]]}
    level = 1
    k=c
    v={:u=>:ser, :c=>:phe}
    g={:phe=>[[:u, :u, :u]], :leu=>[[:u, :u, :a]]}
    v is Hash
    level = 2
    h= {:u=>:ser, :c=>:phe}
    arr= [:u, :c]
      level = 2
      k=u
      v=ser
      g={}
      v is not a Hash
      level = 2
      k=c
      v=phe
      g={:ser=>[[:u, :c, :u]]}
      v is not a Hash
    return {:ser=>[[:u, :c, :u]], :phe=>[[:u, :c, :c]]}
    level = 1
    k=a
    v={:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}
    g={:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a]], :ser=>[[:u, :c, :u]]}
    v is Hash
    level = 2
    h= {:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}
    arr= [:u, :a]
      level = 2
      k=u
      v=tyr
      g={}
      v is not a Hash
      level = 2
      k=c
      v={:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}
      g={:tyr=>[[:u, :a, :u]]}
      v is Hash
      level = 3
      h= {:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}
      arr= [:u, :a, :c]
        level = 3
        k=a
        v={:u=>:leu, :c=>:ser}
        g={}
        v is Hash
        level = 4
        h= {:u=>:leu, :c=>:ser}
        arr= [:u, :a, :c, :a]
          level = 4
          k=u
          v=leu
          g={}
          v is not a Hash
          level = 4
          k=c
          v=ser
          g={:leu=>[[:u, :a, :c, :a, :u]]}
          v is not a Hash
        return {:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]]}
        level = 3
        k=u
        v=tyr
        g={:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]]}
        v is not a Hash
      return {:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]],
              :tyr=>[[:u, :a, :c, :u]]}
    return {:tyr=>[[:u, :a, :u], [:u, :a, :c, :u]], :leu=>[[:u, :a, :c, :a, :u]],
            :ser=>[[:u, :a, :c, :a, :c]]}
  return {:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u]],
          :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]], :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]]}
  level = 0
  k=c
  v={:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}
  g={:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u]],
     :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]], :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]]}
  v is Hash
  level = 1
  h= {:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}
  arr= [:c]
    level = 1
    k=u
    v={:u=>:leu, :c=>:pro}
    g={}
    v is Hash
    level = 2
    h= {:u=>:leu, :c=>:pro}
    arr= [:c, :u]
      level = 2
      k=u
      v=leu
      g={}
      v is not a Hash
      level = 2
      k=c
      v=pro
      g={:leu=>[[:c, :u, :u]]}
      v is not a Hash
    return {:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]]}
    level = 1
    k=a
    v={:u=>:arg}
    g={:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]]}
    v is Hash
    level = 2
    h= {:u=>:arg}
    arr= [:c, :a]
      level = 2
      k=u
      v=arg
      g={}
      v is not a Hash
    return {:arg=>[[:c, :a, :u]]}
  return {:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]], :arg=>[[:c, :a, :u]]}
return {:phe=>[[:u, :u, :u], [:u, :c, :c]],
        :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u], [:c, :u, :u]],
        :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]],
        :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]],
        :pro=>[[:c, :u, :c]],
        :arg=>[[:c, :a, :u]]}
  #=> <the last value returned above> 

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM