简体   繁体   English

EM算法不起作用

[英]EM Algorithm not working

I am trying to implement a simple EM algorithm. 我正在尝试实现一个简单的EM算法。 So far, it seems to be working well except for the small problem that variances quickly shrink to zero, converging around the mean of the data. 到目前为止,除方差迅速缩小至零(收敛于数据均值)这一小问题外,它似乎运行良好。 (If I do not update the variances, it will converge to the mean completely fine!) (如果我不更新方差,它将收敛到均值完全可以!)

As far as I can tell, this is due to "weighting" the points close to the centre too heavily - hence making the algorithm lower the variance and shrink to zero. 据我所知,这是由于过于靠近中心的点“加权”了,因此使算法降低了方差并缩小为零。 When I change the formula from 当我将公式从 第一 to 第二 the algorithm works much better (apart from slightly overestimating variance, which is to be expected). 该算法的效果要好得多(除了方差会被高估之外)。 Is this a problem with my code? 我的代码有问题吗?

class DataPoint {
  int nDims; // Number of dimensions
  float[] data;
  DataPoint(int n) {nDims = n; data = new float[n];}
  DataPoint(float[] d) {nDims = d.length; data = d;}
}

float sum(float[] d) {float ret = 0; for (int i = 0; i < d.length; ++i) {ret += d[i];} return ret;}
float[] sub(float[] f, float[] u) {float[] ret = new float[f.length]; for (int i = 0; i < f.length; ++i) {ret[i] = f[i] - u[i];} return ret;}
float distSq(float[] d) {float ret = 0; for (int i = 0; i < d.length; ++i) {ret += d[i]*d[i];} return ret;}
float distSq(float[][] d) {float ret = 0; for (int i = 0; i < d.length; ++i) {ret += distSq(d[i]);} return ret;}

float det(float[][] mat) {
  if (mat.length == 2 && mat[0].length == 2) {
    float det = (mat[0][0] * mat[1][1]) - (mat[0][1] * mat[1][0]);
    return det;
  }
  throw new RuntimeException("Det has to be 2x2");
}

float[][] inverse(float[][] mat) {
  if (mat.length == 2 && mat[0].length == 2) {
    float det = mat[0][0] * mat[1][1] - mat[0][1] * mat[1][0];
    float[][] ret = {{mat[1][1]/det, -mat[0][1]/det}, {-mat[1][0]/det, mat[0][0]/det}};
    return ret;
  }
  throw new RuntimeException("Inverse has to be 2x2");

}

class GMM {
  int number;
  int dims;
  float[] weights;
  float[][] means;
  float[][][] covariances;
  float[][][] invCov;


  GMM(int gNo, int noDimensions) {
    number = gNo;
    dims = noDimensions;
    weights = new float[gNo];
    means = new float[gNo][noDimensions];
    covariances = new float[gNo][noDimensions][noDimensions];
    invCov      = new float[gNo][noDimensions][noDimensions];

    // Initialise to random values.
    for (int i = 0; i < gNo; ++i) {
      weights[i] = random(0, 1);
      for (int j = 0; j < noDimensions; ++j) {
        means[i][j] = random(-100,100);
        covariances[i][j][j] = 100;
        invCov[i] = inverse(covariances[i]);
      }
    }
    normaliseWeights();
  }

  float[][] EStep(DataPoint[] data) {
    // For each data point, return probablility of each gaussian having generated it
    // Arguments: n-dimensional data
    float[][] ret = new float[number][data.length];

    for (int Gauss = 0; Gauss < number; ++Gauss) {
      for (int i = 0; i < data.length; ++i) {
        ret[Gauss][i] = calculateProbabilityFast(data[i], Gauss);
      }
    }
    return ret;
  }

  void MStep(DataPoint[] data, float[][] dataProbabilities) {
    for (int Gauss = 0; Gauss < number; ++Gauss) {
      means[Gauss] = new float[data[0].nDims]; // Reset dims to zero
      float probSum = 0;
      for (int i = 0; i < dataProbabilities[Gauss].length; ++i) {
        probSum += dataProbabilities[Gauss][i];
        for (int j = 0; j < means[Gauss].length; ++j) {
          means[Gauss][j] += data[i].data[j] * dataProbabilities[Gauss][i];
        }
      }
      for (int i = 0; i < means[Gauss].length; ++i) {
        means[Gauss][i] /= probSum; // Normalise
      }
      // Means[Gauss] has been updated

      // Now for covariance.... :x
      covariances[Gauss] = new float[data[0].nDims][data[0].nDims];
      for (int m = 0; m < data[0].nDims; ++m) {
        for (int n = 0; n < data[0].nDims; ++n) {
          for (int i = 0; i < dataProbabilities[Gauss].length; ++i) {
            covariances[Gauss][m][n] += (data[i].data[m]-means[Gauss][m])*(data[i].data[n]-means[Gauss][n])*dataProbabilities[Gauss][i];
          }
        }
      }
      // Created a triangular matrix, normalise and then update other half too.
      for (int m = 0; m < data[0].nDims; ++m) {
        for (int n = 0; n < data[0].nDims; ++n) {
          covariances[Gauss][m][n] /= probSum;
        }
      }
      // Update inverses
      invCov[Gauss] = inverse(covariances[Gauss]);
      weights[Gauss] = probSum;
    }
    normaliseWeights();
  }

  float calculateProbabilityFast(DataPoint x, int Gauss) {
    float ret = pow(TWO_PI, dims/2.0)*sqrt(det(covariances[Gauss]));
    float exponent = 0;
    for (int i = 0; i < x.nDims; ++i) {
      float temp = 0;
      for (int j = 0; j < x.nDims; ++j) {
        temp += (x.data[j] - means[Gauss][j])*invCov[Gauss][i][j];
      }
      exponent += temp*(x.data[i] - means[Gauss][i]);
    }
    exponent = exp(-0.5*exponent);
    // ==================================================================
    // If I change this line HERE to -0.3*exponent, everything works fine
    // ==================================================================
    //print(exponent); print(","); println(ret);
    return exponent/ret;
  }



  void normaliseWeights() {
    float sum = sum(weights);
    for (int i = 0; i < number; ++i) {weights[i] /= sum;}
  }

  void display() {
    ellipseMode(CENTER);
    for (int i = 0; i < number; ++i) {
      //strokeWeight(weights[i]*100);
      strokeWeight(5);
      stroke(color(255, 0, 0));
      point(means[i][0], means[i][1]);
      noFill();
      strokeWeight(1.5);
      ellipse(means[i][0], means[i][1], (covariances[i][0][0]), (covariances[i][1][1]));
      ellipse(means[i][0], means[i][1], (covariances[i][0][0]*2), (covariances[i][1][1]*2));
      fill(0);
    }
  }
}

DataPoint[] data;

final int size = 10000;

GMM MixModel;

void setup() {
  // Hidden gaussians
  size(800,600);
  MixModel = new GMM(1, 2); // 1 gaussians, 2 dimensions.
  data = new DataPoint[size];
  int gNo = 1;
  float gxMeans[] = new float[gNo];
  float gxVars[]  = new float[gNo];
  float gyMeans[] = new float[gNo];
  float gyVars[]  = new float[gNo];
  float covars[]  = new float[gNo];
  for (int i = 0; i < gNo; ++i) {
    gxMeans[i] = random(-100, 100);
    gxVars[i] =  random(5, 40);
    gyMeans[i] = random(-100, 100);
    gyVars[i] =  random(5, 40); // Actually std. devs!! 
    covars[i] = 0;//random(-1, 1);
    println("Vars: " + str(pow(gxVars[i], 2)) + ", " + str(pow(gyVars[i], 2)));
    println("Covar: " + str(covars[i]));
  }
  for (int i = 0; i < size; ++i) {
    int gauss = (int)random(gNo);
    data[i] = new DataPoint(2);
    data[i].data[0] = randomGaussian()*gxVars[gauss] + gxMeans[gauss];
    data[i].data[1] = (randomGaussian()*gyVars[gauss])*(1-abs(covars[gauss]))+(gyVars[gauss]*covars[gauss]*(data[i].data[0]-gxMeans[gauss])/gxVars[gauss]) + gyMeans[gauss];
  }


  frameRate(5); // Let's see what's happening!
}


void draw() {
  translate(width/2, height/2); // set 0,0 at centre
  background(color(255, 255, 255));
  stroke(0);
  strokeWeight(1);
  for (int i = 0; i < size; ++i) {
    point(data[i].data[0], data[i].data[1]);
  }
  MixModel.display();
  float[][] dataProbs = MixModel.EStep(data);
  MixModel.MStep(data, dataProbs);
  print(MixModel.covariances[0][0][0]); print(", ");
  println(MixModel.covariances[0][1][1]);
}

EDIT: Complete, minimal working example. 编辑:完整,最少的工作示例。 The variance still converges to 0, so this suggests perhaps I'm doing something wrong with the algorithm? 方差仍然收敛到0,所以这表明我的算法做错了吗?

import random, statistics, math

hiddenMu = random.uniform(-100, 100)
hiddenVar = random.uniform(10, 30)
dataLen = 10000

data = [random.gauss(hiddenMu, hiddenVar) for i in range(dataLen)]

hiddenVar **= 2 # Make it the actual variance rather than std. dev.

print("Variance: " + str(hiddenVar) + ", actual: " + str(statistics.variance(data)))
print("Mean    : " + str(hiddenMu ) + ", actual: " + str(statistics.mean    (data)))

guessMu = random.uniform(-100, 100)
guessVar = 100

print("Initial mu guess:  " + str(guessMu))
print("Initial var guess: " + str(guessVar))

# perform iterations

numIters = 100

for i in range(numIters):

    dataProbs = [math.exp(-0.5*((i-guessMu)**2)/guessVar)/((2*math.pi*guessVar)**0.5) for i in data]

    guessMu = sum(map(lambda x: x[0]*x[1], zip(dataProbs, data)))/sum(dataProbs)
    guessVar = sum(map(lambda x: x[0]*((x[1]-guessMu)**2), zip(dataProbs, data)))/sum(dataProbs)

    print(str(i) + " mu guess:  " + str(guessMu))
    print(str(i) + " var guess: " + str(guessVar))
    print()

EDIT 2 : Could I need something like Bessel's correction? 编辑2 :我需要像贝塞尔的更正吗? (multiply the result by n/(n-1)). (将结果乘以n /(n-1))。 If so, how would I go about doing this when the sum of the probabilities themselves may be less than one? 如果是这样,当几率之和可能小于1时,我将如何去做?

For anyone else having the same problem, I now understand the conditional assigning of points to separate Gaussians in a GMM. 对于其他有相同问题的人,我现在了解在GMM中有条件地将点分配给单独的高斯人。 You might see this with NaNs showing up in your programs, such as in question EM algorithm code is not working 您可能会看到NaN出现在程序中,例如EM算法代码无法正常工作

Rather than assigning each point the probability of belonging to the Gaussian with the formula listed above, you need to assign that probability and then normalise over all Gaussians listed - which means that when a point can be assigned to a Gaussian, it becomes completely assigned if that Gaussian is the only one that could have generated it - ie the probability becomes 1, even if it was originally only a very small chance of having come from that distribution. 无需使用上面列出的公式为每个点分配属于高斯的概率,而是需要分配该概率,然后对列出的所有高斯进行归一化 -这意味着当一个点可以分配给高斯时,如果高斯是唯一可以产生它的人-即概率变为1,即使它最初只是从该分布中获得的机会很小。

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM