What is the most efficient way to iterate over array and preprocess it before copy in C++?

Question

I have custom array constructor like below:

rtc::ArrayView<const uint8_t> frame,
rtc::ArrayView<uint8_t> encrypted_frame,
uint8_t unencrypted_bytes = 10;

How could I efficiently loop into these frames and do processing for it? Is only for loop the possible option? If we just want to copy the frame without preprocessing, I know that we could just copy using std::copy . Is there any ways to make this iterator processing more efficient?

  // // Copy rest of frame
  // std::copy(frame.begin() + unencrypted_bytes, frame.begin() +
  // (encrypted_frame.size() - 41),
  //           encrypted_frame.begin() + unencrypted_bytes);

  // Doing XOR for Frame
  for (size_t i = unencrypted_bytes; i < encrypted_frame.size() - 41; i++) {
    // encrypted_frame[i] = i;
    RTC_LOG(LS_INFO) << "Ivan, original frame Before XOR : " << i << " "
                     << frame[i];
    encrypted_frame[i] = frame[i] ^ fake_key_;
    RTC_LOG(LS_INFO) << "Ivan, encrypted frame After XOR : " << i << " "
                     << encrypted_frame[i];
  }

Below is my array view constructor

/*
 *  Copyright 2015 The WebRTC Project Authors. All rights reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef API_ARRAY_VIEW_H_
#define API_ARRAY_VIEW_H_

#include <algorithm>
#include <array>
#include <iterator>
#include <type_traits>

#include "rtc_base/checks.h"
#include "rtc_base/type_traits.h"

namespace rtc {

// tl;dr: rtc::ArrayView is the same thing as gsl::span from the Guideline
//        Support Library.
//
// Many functions read from or write to arrays. The obvious way to do this is
// to use two arguments, a pointer to the first element and an element count:
//
//   bool Contains17(const int* arr, size_t size) {
//     for (size_t i = 0; i < size; ++i) {
//       if (arr[i] == 17)
//         return true;
//     }
//     return false;
//   }
//
// This is flexible, since it doesn't matter how the array is stored (C array,
// std::vector, rtc::Buffer, ...), but it's error-prone because the caller has
// to correctly specify the array length:
//
//   Contains17(arr, arraysize(arr));     // C array
//   Contains17(arr.data(), arr.size());  // std::vector
//   Contains17(arr, size);               // pointer + size
//   ...
//
// It's also kind of messy to have two separate arguments for what is
// conceptually a single thing.
//
// Enter rtc::ArrayView<T>. It contains a T pointer (to an array it doesn't
// own) and a count, and supports the basic things you'd expect, such as
// indexing and iteration. It allows us to write our function like this:
//
//   bool Contains17(rtc::ArrayView<const int> arr) {
//     for (auto e : arr) {
//       if (e == 17)
//         return true;
//     }
//     return false;
//   }
//
// And even better, because a bunch of things will implicitly convert to
// ArrayView, we can call it like this:
//
//   Contains17(arr);                             // C array
//   Contains17(arr);                             // std::vector
//   Contains17(rtc::ArrayView<int>(arr, size));  // pointer + size
//   Contains17(nullptr);                         // nullptr -> empty ArrayView
//   ...
//
// ArrayView<T> stores both a pointer and a size, but you may also use
// ArrayView<T, N>, which has a size that's fixed at compile time (which means
// it only has to store the pointer).
//
// One important point is that ArrayView<T> and ArrayView<const T> are
// different types, which allow and don't allow mutation of the array elements,
// respectively. The implicit conversions work just like you'd hope, so that
// e.g. vector<int> will convert to either ArrayView<int> or ArrayView<const
// int>, but const vector<int> will convert only to ArrayView<const int>.
// (ArrayView itself can be the source type in such conversions, so
// ArrayView<int> will convert to ArrayView<const int>.)
//
// Note: ArrayView is tiny (just a pointer and a count if variable-sized, just
// a pointer if fix-sized) and trivially copyable, so it's probably cheaper to
// pass it by value than by const reference.

namespace impl {

// Magic constant for indicating that the size of an ArrayView is variable
// instead of fixed.
enum : std::ptrdiff_t { kArrayViewVarSize = -4711 };

// Base class for ArrayViews of fixed nonzero size.
template <typename T, std::ptrdiff_t Size>
class ArrayViewBase {
  static_assert(Size > 0, "ArrayView size must be variable or non-negative");

 public:
  ArrayViewBase(T* data, size_t size) : data_(data) {}

  static constexpr size_t size() { return Size; }
  static constexpr bool empty() { return false; }
  T* data() const { return data_; }

 protected:
  static constexpr bool fixed_size() { return true; }

 private:
  T* data_;
};

// Specialized base class for ArrayViews of fixed zero size.
template <typename T>
class ArrayViewBase<T, 0> {
 public:
  explicit ArrayViewBase(T* data, size_t size) {}

  static constexpr size_t size() { return 0; }
  static constexpr bool empty() { return true; }
  T* data() const { return nullptr; }

 protected:
  static constexpr bool fixed_size() { return true; }
};

// Specialized base class for ArrayViews of variable size.
template <typename T>
class ArrayViewBase<T, impl::kArrayViewVarSize> {
 public:
  ArrayViewBase(T* data, size_t size)
      : data_(size == 0 ? nullptr : data), size_(size) {}

  size_t size() const { return size_; }
  bool empty() const { return size_ == 0; }
  T* data() const { return data_; }

 protected:
  static constexpr bool fixed_size() { return false; }

 private:
  T* data_;
  size_t size_;
};

}  // namespace impl

template <typename T, std::ptrdiff_t Size = impl::kArrayViewVarSize>
class ArrayView final : public impl::ArrayViewBase<T, Size> {
 public:
  using value_type = T;
  using const_iterator = const T*;

  // Construct an ArrayView from a pointer and a length.
  template <typename U>
  ArrayView(U* data, size_t size)
      : impl::ArrayViewBase<T, Size>::ArrayViewBase(data, size) {
    RTC_DCHECK_EQ(size == 0 ? nullptr : data, this->data());
    RTC_DCHECK_EQ(size, this->size());
    RTC_DCHECK_EQ(!this->data(),
                  this->size() == 0);  // data is null iff size == 0.
  }

  // Construct an empty ArrayView. Note that fixed-size ArrayViews of size > 0
  // cannot be empty.
  ArrayView() : ArrayView(nullptr, 0) {}
  ArrayView(std::nullptr_t)  // NOLINT
      : ArrayView() {}
  ArrayView(std::nullptr_t, size_t size)
      : ArrayView(static_cast<T*>(nullptr), size) {
    static_assert(Size == 0 || Size == impl::kArrayViewVarSize, "");
    RTC_DCHECK_EQ(0, size);
  }

  // Construct an ArrayView from a C-style array.
  template <typename U, size_t N>
  ArrayView(U (&array)[N])  // NOLINT
      : ArrayView(array, N) {
    static_assert(Size == N || Size == impl::kArrayViewVarSize,
                  "Array size must match ArrayView size");
  }

  // (Only if size is fixed.) Construct a fixed size ArrayView<T, N> from a
  // non-const std::array instance. For an ArrayView with variable size, the
  // used ctor is ArrayView(U& u) instead.
  template <typename U,
            size_t N,
            typename std::enable_if<
                Size == static_cast<std::ptrdiff_t>(N)>::type* = nullptr>
  ArrayView(std::array<U, N>& u)  // NOLINT
      : ArrayView(u.data(), u.size()) {}

  // (Only if size is fixed.) Construct a fixed size ArrayView<T, N> where T is
  // const from a const(expr) std::array instance. For an ArrayView with
  // variable size, the used ctor is ArrayView(U& u) instead.
  template <typename U,
            size_t N,
            typename std::enable_if<
                Size == static_cast<std::ptrdiff_t>(N)>::type* = nullptr>
  ArrayView(const std::array<U, N>& u)  // NOLINT
      : ArrayView(u.data(), u.size()) {}

  // (Only if size is fixed.) Construct an ArrayView from any type U that has a
  // static constexpr size() method whose return value is equal to Size, and a
  // data() method whose return value converts implicitly to T*. In particular,
  // this means we allow conversion from ArrayView<T, N> to ArrayView<const T,
  // N>, but not the other way around. We also don't allow conversion from
  // ArrayView<T> to ArrayView<T, N>, or from ArrayView<T, M> to ArrayView<T,
  // N> when M != N.
  template <
      typename U,
      typename std::enable_if<Size != impl::kArrayViewVarSize &&
                              HasDataAndSize<U, T>::value>::type* = nullptr>
  ArrayView(U& u)  // NOLINT
      : ArrayView(u.data(), u.size()) {
    static_assert(U::size() == Size, "Sizes must match exactly");
  }
  template <
      typename U,
      typename std::enable_if<Size != impl::kArrayViewVarSize &&
                              HasDataAndSize<U, T>::value>::type* = nullptr>
  ArrayView(const U& u)  // NOLINT(runtime/explicit)
      : ArrayView(u.data(), u.size()) {
    static_assert(U::size() == Size, "Sizes must match exactly");
  }

  // (Only if size is variable.) Construct an ArrayView from any type U that
  // has a size() method whose return value converts implicitly to size_t, and
  // a data() method whose return value converts implicitly to T*. In
  // particular, this means we allow conversion from ArrayView<T> to
  // ArrayView<const T>, but not the other way around. Other allowed
  // conversions include
  // ArrayView<T, N> to ArrayView<T> or ArrayView<const T>,
  // std::vector<T> to ArrayView<T> or ArrayView<const T>,
  // const std::vector<T> to ArrayView<const T>,
  // rtc::Buffer to ArrayView<uint8_t> or ArrayView<const uint8_t>, and
  // const rtc::Buffer to ArrayView<const uint8_t>.
  template <
      typename U,
      typename std::enable_if<Size == impl::kArrayViewVarSize &&
                              HasDataAndSize<U, T>::value>::type* = nullptr>
  ArrayView(U& u)  // NOLINT
      : ArrayView(u.data(), u.size()) {}
  template <
      typename U,
      typename std::enable_if<Size == impl::kArrayViewVarSize &&
                              HasDataAndSize<U, T>::value>::type* = nullptr>
  ArrayView(const U& u)  // NOLINT(runtime/explicit)
      : ArrayView(u.data(), u.size()) {}

  // Indexing and iteration. These allow mutation even if the ArrayView is
  // const, because the ArrayView doesn't own the array. (To prevent mutation,
  // use a const element type.)
  T& operator[](size_t idx) const {
    RTC_DCHECK_LT(idx, this->size());
    RTC_DCHECK(this->data());
    return this->data()[idx];
  }
  T* begin() const { return this->data(); }
  T* end() const { return this->data() + this->size(); }
  const T* cbegin() const { return this->data(); }
  const T* cend() const { return this->data() + this->size(); }
  std::reverse_iterator<T*> rbegin() const {
    return std::make_reverse_iterator(end());
  }
  std::reverse_iterator<T*> rend() const {
    return std::make_reverse_iterator(begin());
  }
  std::reverse_iterator<const T*> crbegin() const {
    return std::make_reverse_iterator(cend());
  }
  std::reverse_iterator<const T*> crend() const {
    return std::make_reverse_iterator(cbegin());
  }

  ArrayView<T> subview(size_t offset, size_t size) const {
    return offset < this->size()
               ? ArrayView<T>(this->data() + offset,
                              std::min(size, this->size() - offset))
               : ArrayView<T>();
  }
  ArrayView<T> subview(size_t offset) const {
    return subview(offset, this->size());
  }
};

// Comparing two ArrayViews compares their (pointer,size) pairs; it does *not*
// dereference the pointers.
template <typename T, std::ptrdiff_t Size1, std::ptrdiff_t Size2>
bool operator==(const ArrayView<T, Size1>& a, const ArrayView<T, Size2>& b) {
  return a.data() == b.data() && a.size() == b.size();
}
template <typename T, std::ptrdiff_t Size1, std::ptrdiff_t Size2>
bool operator!=(const ArrayView<T, Size1>& a, const ArrayView<T, Size2>& b) {
  return !(a == b);
}

// Variable-size ArrayViews are the size of two pointers; fixed-size ArrayViews
// are the size of one pointer. (And as a special case, fixed-size ArrayViews
// of size 0 require no storage.)
static_assert(sizeof(ArrayView<int>) == 2 * sizeof(int*), "");
static_assert(sizeof(ArrayView<int, 17>) == sizeof(int*), "");
static_assert(std::is_empty<ArrayView<int, 0>>::value, "");

template <typename T>
inline ArrayView<T> MakeArrayView(T* data, size_t size) {
  return ArrayView<T>(data, size);
}

// Only for primitive types that have the same size and aligment.
// Allow reinterpret cast of the array view to another primitive type of the
// same size.
// Template arguments order is (U, T, Size) to allow deduction of the template
// arguments in client calls: reinterpret_array_view<target_type>(array_view).
template <typename U, typename T, std::ptrdiff_t Size>
inline ArrayView<U, Size> reinterpret_array_view(ArrayView<T, Size> view) {
  static_assert(sizeof(U) == sizeof(T) && alignof(U) == alignof(T),
                "ArrayView reinterpret_cast is only supported for casting "
                "between views that represent the same chunk of memory.");
  static_assert(
      std::is_fundamental<T>::value && std::is_fundamental<U>::value,
      "ArrayView reinterpret_cast is only supported for casting between "
      "fundamental types.");
  return ArrayView<U, Size>(reinterpret_cast<U*>(view.data()), view.size());
}

}  // namespace rtc

#endif  // API_ARRAY_VIEW_H_

Answer 1

This looks like WebRTC code. And if I had to guess, you're encrypting the media bytes of an RTP packet (just a guess). And so you probably want that to be fast.

I'm going to assume you recognize that the RTC_LOG statements in your main loop are likely more of a loop performance killer than anything else you to optimize the xor encryption. It's going to negate whatever optimizations you do if you are logging each individual byte. So let's start with this.

  for (size_t i = unencrypted_bytes; i < encrypted_frame.size() - 41; i++) {
    encrypted_frame[i] = frame[i] ^ fake_key_;
  }

The operator overload for [] looks like this:


  T& operator[](size_t idx) const {
    RTC_DCHECK_LT(idx, this->size());
    RTC_DCHECK(this->data());
    return this->data()[idx];
  }

So that means every frame invokes a call to data() for both the source and destination array. And both do some additional validation checks. In a retail build, the compiler may be able to optimize most of that away since. But I don't know that for a fact because I don't know if the compiler will optimize your ArrayView like it would operations on a std:: colleciton class. Nor do I know if those RTC_DCHECK macros are no-ops in a release build.

But in a Debug build, it will be really slow. So if we can make debug fast, we can assume it carries over to your release build.

So we can make sure our primary loop that iterates over the bytes and doesn't make any function calls within the loop. That's going to be your biggest speed up. Hence, this will be much faster than what you have:

uint8_t* frame_data = frame.data();
uint8_t* encrypted_data = encrypted_frame.data()
const size_t stop = i < encrypted_frame.size() - 41;
for (size_t i = frame_data + unencrypted_bytes; i < stop; i++) {
    encrypted_data[i] = frame_data[i] ^ fake_key_;
}

You could optionally use std::transform instead of a for-loop, but I think that will be nearly equivalent.

Again, it's entirely possible the compiler will optimize out the original function be as good as what I just produced. But since ArrayView doesn't compile locally for me (don't have the webrtc sources handy), I don't know. Otherwise, if I could, I'd have all my assumptions validated on godbolt.

But I do know from experience that a function call per element in really tight loop iterating over bytes or words, even if declared inline, is never as fast as manually inlining all the code you need directly into the loop.

Answer 2

For xor-ing a source range to a destination range, I'd use std::transform :

std::transform(
    frame.cbegin() + unencrypted_bytes,
    frame.cend() - 41,
    encrypted_frame.begin(),
    [=] (const auto byte) -> std::uint8_t { return byte ^ fake_key_; });

In C++, it is often the case that clearly expressing the intention of the program with high-level abstractions using the standard library is the best choice. Source code should be used to express what the program needs to do, and avoid dictating how to do it as much as possible*, because that would constrain compiler optimizations from coming up with the best possible approach.

_{* Unless you really know what you're doing and you have the benchmarks to prove it}

It would also be nice to make use of std::bit_xor , but that would require two input ranges in order to invoke the overload for std::transform that accepts a binary operator. Assuming the constant fake_key_ is a std::uint8_t , here's the definition for an iterator to model an infinite, filled range :

template <class T>
struct filled {
  using value_type = T;
  using difference_type = std::ptrdiff_t;
  using reference = const T &;
  using pointer = const T *;
  using iterator_category = std::input_iterator_tag;

  constexpr filled() noexcept = default;
  constexpr filled(const filled &) = default;
  constexpr filled(filled &&) = default;
  constexpr filled(reference value) : value{value} {}

  constexpr filled &operator=(const filled &) = default;
  constexpr filled &operator=(filled &&) = default;

  constexpr ~filled() = default;

  constexpr bool operator==(const filled &) const noexcept { return false; }

  constexpr reference operator*() const noexcept { return value; }
  constexpr pointer operator->() const noexcept {
    return std::addressof(value);
  }

  constexpr filled &operator++() noexcept { return *this; }
  constexpr filled operator++(int) { return *this; }

 private:
  value_type value;
};

static_assert(std::input_iterator<filled<std::uint8_t>>);

Enabling the use of this overload:

std::transform(
    frame.cbegin() + unencrypted_bytes,
    frame.cend() - 41,
    encrypted_frame.begin(),
    filled<std::uint8_t>(fake_key_),
    std::bit_xor<std::uint8_t>());

What is the most efficient way to iterate over array and preprocess it before copy in C++?

Question

2 answers

solution1
4 ACCPTED 2022-08-27 06:33:31

solution2
1 2022-08-27 06:58:31

What is the most efficient way to iterate over array and preprocess it before copy in C++?

Question

2 answers

solution1 4 ACCPTED 2022-08-27 06:33:31

solution2 1 2022-08-27 06:58:31

solution1
4 ACCPTED 2022-08-27 06:33:31

solution2
1 2022-08-27 06:58:31