![](/img/trans.png)
[英]Whats the most efficient way to sort elements of an array based on frequency of elements
[英]Sort the array elements based on the elements frequency in java
我已經編寫了代碼來根據數組中元素的頻率對 java 中的數組進行排序。 我需要更好的代碼或偽代碼(沒有集合框架)。請提供鏈接或代碼幫助。
public class SortByFreq1 {
public static void main(String[] args) {
int arr[] = { 2, 5, 2, 8, 5, 6, 8, 8, 0, -8 };
int nArr[] = new int[arr.length];
Map<Integer,Integer> map = new HashMap<Integer, Integer>();
Map<Integer,Integer> sortmap = new HashMap<Integer, Integer>();
ArrayList<Integer> arrList = new ArrayList<Integer>();
for (int i = 0; i < arr.length; i++) {
arrList.add(arr[i]);
}
Set<Integer> set = new HashSet<Integer>(arrList);
for (Integer i : set) {
map.put(i, Collections.frequency(arrList, i));
}
// System.out.println(map.keySet());
// sort map by value
Set<Entry<Integer,Integer>> valList=map.entrySet();
ArrayList<Entry<Integer, Integer>> tempLst = new ArrayList<Map.Entry<Integer, Integer>>(valList);
Collections.sort(tempLst, new Comparator<Entry<Integer, Integer>>() {
@Override
public int compare(Entry<Integer, Integer> o1, Entry<Integer, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
int k = 0;
for (Entry<Integer, Integer> entry : tempLst) {
int no = entry.getKey();
int noOfTimes = entry.getValue();
int i = 0;
while (i < noOfTimes) {
nArr[k++] = no;
i++;
}
}
for (int i = 0; i < nArr.length; i++)
System.out.print(nArr[i] + " ");
}
}
它背后的邏輯與Counting Sort非常相似。
注意:我們不會修改傳入的數組。
有兩種不同的方法,同時具有幾乎相同的時間和空間復雜度。
上面提到的 k 是數組中不同數字的數量。
使用Stream
或許我們可以讓這個過程更簡潔一些,盡管 OP 並沒有要求這樣:
/**
* 1. count the frequency and sort the entry based on the frequency while using LinkedHashMap to retain the order;
* 2. fill up the new array based on the frequency while traversing the LinkedHashMap;
* @param arr
* @return
*/
private static int[] sortByCounting(int[] arr) {
Map<Integer, Long> countMap = Arrays.stream(arr).boxed()
.collect(Collectors.groupingBy(Integer::intValue, Collectors.counting()))
.entrySet().stream()
.sorted((e1, e2) -> e2.getValue().compareTo(e1.getValue()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (oldV, newV) -> oldV, LinkedHashMap::new));
int[] newArr = new int[arr.length];
int i = 0;
for (Map.Entry<Integer, Long> entry : countMap.entrySet()) {
Arrays.fill(newArr, i, i += entry.getValue().intValue(), entry.getKey());
}
return newArr;
}
由於我們無法使用內置的收集方法,因此我們必須記錄數量的計數。
本能地,我們可以引入一個自定義對來記錄number
及其相關frequency
(或我們可以說的count
)作為我們的自定義方法。
private static int[] sortByPlainCounting(int[] arr) {
if (arr.length < 1) throw new IllegalArgumentException("Array cannot be empty");
MyPair[] pairs = prepareMyPairs(arr);
Arrays.sort(pairs, Comparator.comparing(MyPair::getCount).reversed());
int[] newArr = new int[arr.length];
int i = 0;
for (MyPair pair : pairs) {
Arrays.fill(newArr, i, i += pair.count, pair.key);
}
return newArr;
}
static class MyPair {
int key;
int count;
public MyPair(int theKey) {
this.key = theKey;
this.count = 1;
}
public void inc() {
this.count++;
}
public int getCount() {
return this.count;
}
}
static MyPair[] prepareMyPairs(int[] arr) {
Integer[] tmpArr = Arrays.stream(arr).boxed().toArray(Integer[]::new);
Arrays.sort(tmpArr, Comparator.reverseOrder());
int count = 1;
int prev = tmpArr[0];
for (int i = 1; i < tmpArr.length; i++) {
if (tmpArr[i] != prev) {
prev = tmpArr[i];
count++;
}
}
MyPair[] pairs = new MyPair[count];
int k = 0;
for (int i = 0; i < tmpArr.length; i++) {
if (pairs[k] == null) {
pairs[k] = new MyPair(tmpArr[i]);
} else {
if (pairs[k].key == tmpArr[i]) {
pairs[k].inc();
} else {
k++; i--;
}
}
}
return pairs;
}
做最后的比較,我們可以證明:
public static void main(String[] args) {
int N = 10_000 + new Random().nextInt(100);
Long start;
List<Long> list0 = new ArrayList<>();
List<Long> list1 = new ArrayList<>();
for (int i = 0; i < 100; ++i) {
int[] arr = RandomGenerator.generateArrays(N, N, N / 10, N / 5, false);
start = System.nanoTime();
int[] arr0 = sortByCounting(arr);
list0.add(System.nanoTime() - start);
start = System.nanoTime();
int[] arr1 = sortByPlainCounting(arr);
list1.add(System.nanoTime() - start);
System.out.println(isFrequencyEqual(arr0, arr1));
}
System.out.println("Collection time cost: " + list0.stream().collect(Collectors.summarizingLong(Long::valueOf)));
System.out.println("Custom time cost: " + list1.stream().collect(Collectors.summarizingLong(Long::valueOf)));
}
private static boolean isFrequencyEqual(int[] arr0, int[] arr1) {
Map<Integer, Long> countMap0 = getCountMap(arr0);
Map<Integer, Long> countMap1 = getCountMap(arr1);
boolean isEqual = countMap0.entrySet().size() == countMap1.entrySet().size();
if (!isEqual) return false;
isEqual = countMap0.values().containsAll(countMap1.values()) &&
countMap1.values().containsAll(countMap0.values());
if (!isEqual) return false;
List<Long> countList0 = countMap0.values().stream().collect(Collectors.toList());
List<Long> countList1 = countMap1.values().stream().collect(Collectors.toList());
for (int i = 0; i < countList0.size(); i++) {
if (countList1.get(i) != countList0.get(i)) return false;
}
return true;
}
private static Map<Integer, Long> getCountMap(int[] arr) {
return Arrays.stream(arr).boxed()
.collect(Collectors.groupingBy(Integer::intValue, Collectors.counting()))
.entrySet().stream()
.sorted((e1, e2) -> e2.getValue().compareTo(e1.getValue()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (oldV, newV) -> oldV, LinkedHashMap::new));
}
輔助工具方法:
public static int[] generateArrays(int minSize, int maxSize, int low, int high, boolean isUnique) {
Random random = new Random(System.currentTimeMillis());
int N = random.nextInt(maxSize - minSize + 1) + minSize;
if (isUnique) {
Set<Integer> intSet = new HashSet<>();
while (intSet.size() < N) {
intSet.add(random.nextInt(high - low) + low);
}
return intSet.stream().mapToInt(Integer::intValue).toArray();
} else {
int[] arr = new int[N];
for (int i = 0; i < N; ++i) {
arr[i] = random.nextInt(high - low) + low;
}
return arr;
}
}
和測試輸出:
Sorted by frequency: true
// ... another 98 same output
Sorted by frequency: true
Collection time cost: LongSummaryStatistics{count=100, sum=273531781, min=466684, average=2735317.810000, max=131741520}
Custom time cost: LongSummaryStatistics{count=100, sum=366417748, min=1733417, average=3664177.480000, max=27617114}
可以使用鴿巢排序在 O(n) 中完成。 偽代碼:
counts = new HashMap<Item, int>(),;
foreach item in elements:
counts[item] += 1;
buckets = new List<Item>[elements.length+1];
foreach item in counts:
buckets[counts[item]].Append(item)
for i from 1 to elements.length:
bucket = buckets[i]; /* edit: looping over i instead over bucket */
for item in bucket:
/* edit: if the out has to preserve the original number of elements
such as [1,5,5,0,1,9,1] will print
9,0,5,5,1,1,1 instead of 9,0,5,1, then the next line
has to be repeated i times*/
System.out.println(item)
編輯:通過實現哈希表和鏈表,可以在沒有集合框架的情況下編寫相同的內容:
class Node {
public Node next;
public int value;
};
log2count = Math.ceil(Math.log(elements.length) / Math.log(2));
hashSize = (int) Math.Round(Math.Pow(2, log2count) * 2);
/* countsQuadraticProbing[i] is 0 if the hash entry is empty,
otherwise it contains the frequency of the element in
elementsQuadraticProbing[i].
Note that quadratic probing over a hash table of size 2**k,
and probing of (0.5 i + 0.5 i**2) is guaranteed to find an empty
entry if the hash table is not full.
*/
countsQuadraticProbing = new int[hashSize];
elementsQuadraticProbing = new int[hashSize];
foreach item in elements:
for i from 0 to hashSize-1:
index = (item + (i * (i + 1) / 2)) % hashSize;
if countsQuadraticProbing[index] == 0:
countsQuadraticProbing[index] = 1;
elementsQuadraticProbing[index] = item;
break;
if elementsQuadraticProbing[index] == item:
countsQuadraticProbing[index]++;
break;
buckets = new Node[elements.length+1];
for i from 0 to hashSize-1:
count = countsQuadraticProbing[index];
if count != 0:
Node topNode = new Node();
topNode.next = buckets[count];
topNode.value = elementsQuadraticProbing[i];
buckets[count] = topNode;
/* there are O(N) buckets, total of elements in all buckets O(N),
overall complexity of the nested loop O(N)
*/
for i from 1 to elements.length:
node = buckets[i] /* edit: using i for iteration */
while node != null:
/* edit: if the out has to preserve the original number of elements
such as [1,5,5,0,1,9,1] will print
9,0,5,5,1,1,1 instead of 9,0,5,1, then the next line
has to be repeated i times*/
System.out.println(node.value);
node = node.next;
您的解決方案更好,但由於我沒有使用任何集合,所以它會很大。
1. 對列表進行排序 2. 獲取每個元素的頻率 3. 創建一個新的數組列表/數組並將頻率較高的元素存儲到較低頻率。
獲取頻率
class CountFrequencies { // Function to find counts of all elements present in // arr[0..n-1]. The array elements must be range from // 1 to n void findCounts(int arr[], int n) { // Traverse all array elements int i = 0; while (i < n) { // If this element is already processed, // then nothing to do if (arr[i] <= 0) { i++; continue; } // Find index corresponding to this element // For example, index for 5 is 4 int elementIndex = arr[i] - 1; // If the elementIndex has an element that is not // processed yet, then first store that element // to arr[i] so that we don't loose anything. if (arr[elementIndex] > 0) { arr[i] = arr[elementIndex]; // After storing arr[elementIndex], change it // to store initial count of 'arr[i]' arr[elementIndex] = -1; } else { // If this is NOT first occurrence of arr[i], // then increment its count. arr[elementIndex]--; // And initialize arr[i] as 0 means the element // 'i+1' is not seen so far arr[i] = 0; i++; } } System.out.println("Below are counts of all elements"); for (int j = 0; j < n; j++) System.out.println(j+1 + "->" + Math.abs(arr[j])); }
上面的代碼應該給你一個輸出:
1 -> 3
2 -> 0
3 -> 2
4 -> 0
5 -> 2
6 -> 0
7 -> 2
8 -> 0
9 -> 2
10 -> 0
11 -> 0
我只是很好奇為什么你不能在這種情況下使用好的舊冒泡排序並只自定義一點冒泡在最壞的情況下時間復雜度將是 O(n*n) 空間復雜度將是 O( 3n) :)
純數組實現將類似於:
private static void bubbleSortByOccurrences(int[] arr) {
int[][] counter = new int[2][arr.length];
int counterIndex = -1;
for (int value : arr) {
int idx = 0;
for (; idx <= counterIndex; idx++) {
if (counter[0][idx] == value) {
counter[1][idx]++;
while (idx > 0 && counter[1][idx] > counter[1][idx-1]) {
int temp = counter[1][idx];
counter[0][idx] = counter[0][idx-1];
counter[1][idx] = counter[1][idx-1];
counter[0][idx-1] = value;
counter[1][idx-1] = temp;
idx--;
}
break;
}
}
if (idx > counterIndex) {
counter[0][idx] = value;
counter[1][idx]++;
counterIndex = idx;
}
}
fillArrayBackwards(arr, counter, counterIndex);
}
private static void fillArrayBackwards(int[] buf, int[][] counter, int counterIndex) {
for (int i = counterIndex, j = buf.length - 1; i >=0; i--) {
for (int k = 0; k < counter[1][i]; k++) {
buf[j--] = counter[0][i];
}
}
}
使用 Bubble 類實現的相同算法將如下所示:
private static void bubbleSortByOccurrences(int[] arr) {
Bubble head = null;
for (int value : arr) {
if (head == null) {
head = new Bubble(value);
} else {
Bubble currentHead = null;
Bubble current = head;
for (; current != null && !(current.getValue() == value); current = current.getTail()) {
currentHead = current;
}
if (current == null) {
current = new Bubble(value);
current.setTail(head);
head = current;
} else {
current.incrementOccurrences();
while (current.getTail() != null && current.getOccurrences() > current.getTail().getOccurrences()) {
Bubble currentTail = current.getTail();
current.setTail(currentTail.getTail());
currentTail.setTail(current);
if (currentHead != null) {
currentHead.setTail(currentTail);
currentHead = currentTail;
} else {
head = currentTail;
}
}
}
}
}
fillArrayBackwards(arr, head);
}
private static void fillArrayBackwards(int[] buf, Bubble head) {
int i = buf.length - 1;
for (Bubble current = head; current != null; current = current.getTail()) {
for (int j = 0; j < current.getOccurrences(); j++) {
buf[i--] = current.getValue();
}
}
}
其中自定義定義的氣泡如下:
class Bubble {
private int value;
private int occurrences;
private Bubble tail;
public Bubble(int value) {
this.value = value;
this.occurrences = 1;
}
public int getValue() {
return value;
}
public int getOccurrences() {
return occurrences;
}
public void incrementOccurrences() {
this.occurrences++;
}
public Bubble getTail() {
return tail;
}
public void setTail(Bubble tail) {
this.tail = tail;
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.