From f1a560b4f86c168c19e1f4928cce4811e40706d2 Mon Sep 17 00:00:00 2001 From: Yaniv Hollander Date: Sun, 3 Oct 2021 16:57:09 -0400 Subject: [PATCH] Update windowed_median.cpp --- probability/windowed_median.cpp | 116 +++++++++++++++++--------------- 1 file changed, 61 insertions(+), 55 deletions(-) diff --git a/probability/windowed_median.cpp b/probability/windowed_median.cpp index fcb1895b6..2ceb9d638 100644 --- a/probability/windowed_median.cpp +++ b/probability/windowed_median.cpp @@ -1,18 +1,38 @@ /** - * \file - * \brief An implementation of a median calculation of a sliding window along a data stream + * @file + * @brief An implementation of a median calculation of a sliding window along a data stream + * + * @details + * Given a stream of integers, the algorithm calculates the median of a fix size window at the back of the stream. The leading time complexity of this algorithm is + * O(log(N), and it is inspired by the known algorithm to calculate the median of an infinite stream of values, with the proper modifications to account for the finite + * window size for which the median is needed + * + * ### Algorithm + * The sliding window is managed by a list, which guarantees O(1) for both pushing and popping. Each new value is pushed to the window back, while a value + * from the front of the window is popped. In addition, the algorithm manages a multi-value binary search tree (BST), implemented by std::multiset. For each new + * value that is inserted to the window, it is also inserted to the BST. When a value is popped from the window, it is also erased from the BST. Both insertion and + * erasion to/from the BST are O(logN) in time, with N the size of the window. Finally, the algorithm keeps a pointer to the root of the BST, and updates its position + * whenever values are inserted or erased to/from BST. The root of the tree is the median! Hence, median retrieval is always O(1) + * + * Time complexity: O(logN). Space complexity: O(N). N - size of window + * @author [Yaniv Hollander] (https://github.com/YanivHollander) */ - #include +#include #include #include #include using namespace std; + /** - * \class WindowedMedian - * \brief A class to calculate the median of a leading sliding window at the back of a stream of integer values. Each insertion of a new value - * is O(logN) in time, where N is the size of the sliding window. Each retrieval of median is O(1) in time. Space complexity is O(N) + * @namespace probability + * @brief Probability algorithms + */ +namespace probability { +/** + * @class WindowedMedian + * @brief A class to calculate the median of a leading sliding window at the back of a stream of integer values. */ class WindowedMedian { const int _windowSize; // Sliding window size @@ -21,8 +41,8 @@ class WindowedMedian { multiset::const_iterator _itMedian; // An iterator that points to the root of the multi-value BST /** - * \brief Inserts a value to a sorted multi-value BST - * \param value Value to insert + * @brief Inserts a value to a sorted multi-value BST + * @param value Value to insert */ void insertToSorted(int value) { _sortedValues.insert(value); // Insert value to BST - O(logN) @@ -44,8 +64,8 @@ class WindowedMedian { } /** - * \brief Erases a value to a sorted multi-value BST - * \param value Value to insert + * @brief Erases a value from a sorted multi-value BST + * @param value Value to insert */ void eraseFromSorted(int value) { const auto sz = _sortedValues.size(); @@ -68,14 +88,14 @@ class WindowedMedian { public: /** - * \brief Constructs a WindowedMedian object - * \param windowSize Sliding window size + * @brief Constructs a WindowedMedian object + * @param windowSize Sliding window size */ WindowedMedian(int windowSize) : _windowSize(windowSize) {}; /** - * \brief Insert a new value to the stream - * \param value New value to insert + * @brief Insert a new value to the stream + * @param value New value to insert */ void insert(int value) { @@ -89,8 +109,8 @@ public: } /** - * \brief Gets the median of the values in the sliding window - * \return Median of sliding window. For even window size return the average between the two values in the middle + * @brief Gets the median of the values in the sliding window + * @return Median of sliding window. For even window size return the average between the two values in the middle */ float getMedian() const { if (_sortedValues.size() % 2 != 0) @@ -99,8 +119,8 @@ public: } /** - * \brief A naive and inefficient method to obtain the median of the sliding window. Used for testing! - * \return Median of sliding window. For even window size return the average between the two values in the middle + * @brief A naive and inefficient method to obtain the median of the sliding window. Used for testing! + * @return Median of sliding window. For even window size return the average between the two values in the middle */ float getMedianNaive() const { auto window = _window; @@ -111,54 +131,41 @@ public: return 0.5 * median + 0.5 * *next(window.begin(), window.size() / 2 - 1); // O(N) } }; +} // namespace probability #include /** - * \brief A testing function - * \param vals Stream of values - * \param windowSize Size of sliding window + * @brief A testing function + * @param vals Stream of values + * @param windowSize Size of sliding window */ -bool test(const vector &vals, int windowSize) { - WindowedMedian windowedMedian(windowSize); - bool testSucceeded = true; +static void test(const vector &vals, int windowSize) { + probability::WindowedMedian windowedMedian(windowSize); for (int i = 0; i < vals.size(); i++) { windowedMedian.insert(vals[i]); // Comparing medians: efficient function vs. Naive one - if (windowedMedian.getMedian() != windowedMedian.getMedianNaive()) { - cout << "i = " << i << ": " << windowedMedian.getMedian() << "!=" << - windowedMedian.getMedianNaive() << endl; - testSucceeded = false; - } + assert(windowedMedian.getMedian() == windowedMedian.getMedianNaive()); } - return testSucceeded; } #include #include +/** + * @brief Main function + * @param argc commandline argument count (ignored) + * @param argv commandline array of arguments (ignored) + * @returns 0 on exit + */ int main(int argc, const char * argv[]) { - cout << "TEST 1" << endl; - if (!test({1, 2, 3, 4, 5, 6, 7, 8, 9}, 3)) - return -1; - cout << "TEST 2" << endl; - if (!test({9, 8, 7, 6, 5, 4, 3, 2, 1}, 3)) - return -1; - cout << "TEST 3" << endl; - if (!test({9, 8, 7, 6, 5, 4, 5, 6}, 4)) - return -1; - cout << "TEST 4" << endl; - if (!test({3, 3, 3, 3, 3, 3, 3, 3, 3}, 3)) - return -1; - cout << "TEST 5" << endl; - if (!test({3, 3, 3, 3, -7, 3, 3, 3, 3}, 3)) - return -1; - cout << "TEST 6" << endl; - if (!test({4, 3, 3, -5, 7, 1, 3, 4, 5}, 5)) - return -1; - cout << "TEST 7" << endl; - if (!test({470211272, 101027544, 1457850878, 1458777923, 2007237709, 823564440, 1115438165, 1784484492, - 74243042, 114807987}, 6)) - return -1; + test({1, 2, 3, 4, 5, 6, 7, 8, 9}, 3); + test({9, 8, 7, 6, 5, 4, 3, 2, 1}, 3); + test({9, 8, 7, 6, 5, 4, 5, 6}, 4); + test({3, 3, 3, 3, 3, 3, 3, 3, 3}, 3); + test({3, 3, 3, 3, -7, 3, 3, 3, 3}, 3); + test({4, 3, 3, -5, 7, 1, 3, 4, 5}, 5); + test({470211272, 101027544, 1457850878, 1458777923, 2007237709, 823564440, 1115438165, 1784484492, + 74243042, 114807987}, 6); std::srand(static_cast(std::time(nullptr))); for (int i = 8; i < 100; i++) { const auto n = 1 + std::rand() / ((RAND_MAX + 5u) / 20); @@ -166,9 +173,8 @@ int main(int argc, const char * argv[]) { vector vals; for (int i = 0; i < n; i++) vals.push_back(rand() - RAND_MAX); - cout << "TEST " << i << endl; - if (!test(vals, windowSize)) - return -1; + test(vals, windowSize); } return 0; } +