简体   繁体   中英

Java set containsall performance

I am trying to compare two hashsets in java, both of them contains several characters, but I need to do it a lot of times (10^5~8), so I am trying to improve the performance. In detail, what I want to compare is whether set A contains B or set B contains A and their size difference <= 2. Here are the two methods I come up with,

  1. use set containsall method,
  2. since the set can only contains 26 letters, I am not using hashset anymore, I use bit operation, if the virtual set has 'a', then I give 1; if it has 'b', I give 1<<1, which is 2; if it has 'c', I give 1<<2, which is 4, then I add all the values together as the value for the set. Then I do xor and count the number of 1 in the result.

Which method would be better?

If I understand you correctly the second approach is the one to use.

I would do it something like this:

public class IntBitSet {

  private int set = 0;
  private final int firstChar = (byte) 'a';
  private final int lastChar = (byte) 'z';

  public int size() {
    return Integer.bitCount(set);
  }

  public boolean isEmpty() {
    return set == 0;
  }

  public boolean contains(char c) {
    assert c <= lastChar && c >= firstChar : c + " is not a valid value.";

    return ((set >>> (c - firstChar) ) & 1) != 0;
  }

  public void add(char c) {
    assert c <= lastChar && c >= firstChar : c + " is not a valid value.";

    set = set | (1 << (c - firstChar));
  }

  public void remove(char c) {
    assert c <= lastChar && c >= firstChar : c + " is not a valid value.";

    set = set & ~(1 << (c - firstChar));
  }

  public boolean containsAll(IntBitSet c) {
    return (this.set & c.set) == c.set;
  }

  public void clear() {
    set = 0;
  }
}

and the unit test.

  import org.junit.Test;
  import static org.junit.Assert.*;

  public class IntBitSetTest {

    public IntBitSetTest() {
    }

    @Test
    public void testSize() {
      System.out.println("size");
      IntBitSet instance = new IntBitSet();

      int count = 0;
      for(char c = 'a'; c <= 'z'; c+=3) {
        instance.add(c);
        count++;
      }

      assertEquals(count, instance.size());

    }

    @Test
    public void testIsEmpty() {
      System.out.println("isEmpty");
      IntBitSet instance = new IntBitSet();

      assertTrue(instance.isEmpty());

      instance.add('g');
      assertFalse(instance.isEmpty());

    }

    @Test
    public void testContains() {
      System.out.println("contains");
      IntBitSet instance = new IntBitSet();

      for(char c = 'a'; c <= 'z'; c++) {
        instance.add(c);
      }

      instance.remove('o');
      instance.remove('u');
      instance.remove('s');

      assertTrue(instance.contains('a'));
      assertTrue(instance.contains('d'));
      assertTrue(instance.contains('i'));

      assertFalse(instance.contains('o'));
      assertFalse(instance.contains('u'));
      assertFalse(instance.contains('s'));
    }

    @Test
    public void testAdd() {
      System.out.println("add");
      IntBitSet instance = new IntBitSet();
      instance.add('b');
      assertFalse(instance.contains('a'));
      assertTrue(instance.contains('b'));
      assertFalse(instance.contains('c'));
      assertFalse(instance.contains('d'));
      assertFalse(instance.contains('e'));
      assertFalse(instance.contains('f'));
      assertFalse(instance.contains('g'));
      assertFalse(instance.contains('h'));
      assertFalse(instance.contains('i'));
      assertFalse(instance.contains('j'));
      assertFalse(instance.contains('k'));
      assertFalse(instance.contains('l'));
      assertFalse(instance.contains('m'));
      assertFalse(instance.contains('n'));
      assertFalse(instance.contains('p'));
      assertFalse(instance.contains('q'));
      assertFalse(instance.contains('r'));
      assertFalse(instance.contains('s'));
      assertFalse(instance.contains('t'));
      assertFalse(instance.contains('u'));
      assertFalse(instance.contains('v'));
      assertFalse(instance.contains('w'));
      assertFalse(instance.contains('x'));
      assertFalse(instance.contains('y'));
      assertFalse(instance.contains('z'));
    }

    @Test
    public void testRemove() {
      System.out.println("remove");

      IntBitSet instance = new IntBitSet();

      for(char c = 'a'; c <= 'z'; c++) {
        instance.add(c);
      }

      instance.remove('e');

      assertTrue(instance.contains('a'));
      assertTrue(instance.contains('b'));
      assertTrue(instance.contains('c'));
      assertTrue(instance.contains('d'));
      assertFalse(instance.contains('e'));
      assertTrue(instance.contains('f'));
      assertTrue(instance.contains('g'));
      assertTrue(instance.contains('h'));
      assertTrue(instance.contains('i'));
      assertTrue(instance.contains('j'));
      assertTrue(instance.contains('k'));
      assertTrue(instance.contains('l'));
      assertTrue(instance.contains('m'));
      assertTrue(instance.contains('n'));
      assertTrue(instance.contains('p'));
      assertTrue(instance.contains('q'));
      assertTrue(instance.contains('r'));
      assertTrue(instance.contains('s'));
      assertTrue(instance.contains('t'));
      assertTrue(instance.contains('u'));
      assertTrue(instance.contains('v'));
      assertTrue(instance.contains('w'));
      assertTrue(instance.contains('x'));
      assertTrue(instance.contains('y'));
      assertTrue(instance.contains('z'));
    }

    @Test
    public void testContainsAll() {
      System.out.println("containsAll");

      IntBitSet instance1 = new IntBitSet();
      IntBitSet instance2 = new IntBitSet();
      IntBitSet instance3 = new IntBitSet();

      for(char c = 'a'; c <= 'z'; c+=3) {
        instance1.add(c);
        instance2.add(c);
        if(c % 2 == 0)
          instance3.add(c);
      }

      assertTrue(instance1.containsAll(instance2));
      assertTrue(instance1.containsAll(instance3));
      assertFalse(instance3.containsAll(instance1));
    }

    @Test
    public void testClear() {
      System.out.println("clear");
      IntBitSet instance = new IntBitSet();

      instance.add('z');

      instance.clear();
      assertTrue(instance.size() == 0);
      assertTrue(instance.isEmpty());

    }
  }

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM