From 4fe430d75fe7711223b7fb82d6fb6a0d60341678 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Mon, 1 Jun 2020 03:10:34 +0000 Subject: [PATCH] formatting source-code for fd695305150777981dc2a1f256aa2be444e4f108 --- machine_learning/adaline_learning.cpp | 285 +++++++++++------------ math/realtime_stats.cpp | 156 ++++++------- sorting/non_recursive_merge_sort.cpp | 151 ++++++------ sorting/quick_sort.cpp | 76 +++--- sorting/shell_sort2.cpp | 46 ++-- strings/brute_force_string_searching.cpp | 38 +-- strings/knuth_morris_pratt.cpp | 74 +++--- strings/rabin_karp.cpp | 141 ++++++----- 8 files changed, 481 insertions(+), 486 deletions(-) diff --git a/machine_learning/adaline_learning.cpp b/machine_learning/adaline_learning.cpp index de5378c19..077b9408e 100644 --- a/machine_learning/adaline_learning.cpp +++ b/machine_learning/adaline_learning.cpp @@ -35,156 +35,153 @@ * \brief Machine learning algorithms */ namespace machine_learning { - class adaline { - public: - /** - * Default constructor - * \param[in] num_features number of features present - * \param[in] eta learning rate (optional, default=0.1) - * \param[in] convergence accuracy (optional, - * default=\f$1\times10^{-5}\f$) - */ - adaline(int num_features, const double eta = 0.01f, - const double accuracy = 1e-5) - : eta(eta), accuracy(accuracy) { - if (eta <= 0) { - std::cerr << "learning rate should be positive and nonzero" - << std::endl; - std::exit(EXIT_FAILURE); +class adaline { + public: + /** + * Default constructor + * \param[in] num_features number of features present + * \param[in] eta learning rate (optional, default=0.1) + * \param[in] convergence accuracy (optional, + * default=\f$1\times10^{-5}\f$) + */ + adaline(int num_features, const double eta = 0.01f, + const double accuracy = 1e-5) + : eta(eta), accuracy(accuracy) { + if (eta <= 0) { + std::cerr << "learning rate should be positive and nonzero" + << std::endl; + std::exit(EXIT_FAILURE); + } + + weights = std::vector( + num_features + + 1); // additional weight is for the constant bias term + + // initialize with random weights in the range [-50, 49] + for (int i = 0; i < weights.size(); i++) weights[i] = 1.f; + // weights[i] = (static_cast(std::rand() % 100) - 50); + } + + /** + * Operator to print the weights of the model + */ + friend std::ostream &operator<<(std::ostream &out, const adaline &ada) { + out << "<"; + for (int i = 0; i < ada.weights.size(); i++) { + out << ada.weights[i]; + if (i < ada.weights.size() - 1) + out << ", "; + } + out << ">"; + return out; + } + + /** + * predict the output of the model for given set of features + * \param[in] x input vector + * \param[out] out optional argument to return neuron output before + * applying activation function (optional, `nullptr` to ignore) \returns + * model prediction output + */ + int predict(const std::vector &x, double *out = nullptr) { + if (!check_size_match(x)) + return 0; + + double y = weights.back(); // assign bias value + + // for (int i = 0; i < x.size(); i++) y += x[i] * weights[i]; + y = std::inner_product(x.begin(), x.end(), weights.begin(), y); + + if (out != nullptr) // if out variable is provided + *out = y; + + return activation(y); // quantizer: apply ADALINE threshold function + } + + /** + * Update the weights of the model using supervised learning for one + * feature vector \param[in] x feature vector \param[in] y known output + * value \returns correction factor + */ + double fit(const std::vector &x, const int &y) { + if (!check_size_match(x)) + return 0; + + /* output of the model with current weights */ + int p = predict(x); + int prediction_error = y - p; // error in estimation + double correction_factor = eta * prediction_error; + + /* update each weight, the last weight is the bias term */ + for (int i = 0; i < x.size(); i++) { + weights[i] += correction_factor * x[i]; + } + weights[x.size()] += correction_factor; // update bias + + return correction_factor; + } + + /** + * Update the weights of the model using supervised learning for an + * array of vectors. \param[in] X array of feature vector \param[in] y + * known output value for each feature vector + */ + template + void fit(std::vector const (&X)[N], const int *y) { + double avg_pred_error = 1.f; + + int iter; + for (iter = 0; (iter < MAX_ITER) && (avg_pred_error > accuracy); + iter++) { + avg_pred_error = 0.f; + + // perform fit for each sample + for (int i = 0; i < N; i++) { + double err = fit(X[i], y[i]); + avg_pred_error += std::abs(err); } + avg_pred_error /= N; - weights = std::vector( - num_features + - 1); // additional weight is for the constant bias term - - // initialize with random weights in the range [-50, 49] - for (int i = 0; i < weights.size(); i++) weights[i] = 1.f; - // weights[i] = (static_cast(std::rand() % 100) - 50); + // Print updates every 200th iteration + // if (iter % 100 == 0) + std::cout << "\tIter " << iter << ": Training weights: " << *this + << "\tAvg error: " << avg_pred_error << std::endl; } - /** - * Operator to print the weights of the model - */ - friend std::ostream &operator<<(std::ostream &out, const adaline &ada) { - out << "<"; - for (int i = 0; i < ada.weights.size(); i++) { - out << ada.weights[i]; - if (i < ada.weights.size() - 1) - out << ", "; - } - out << ">"; - return out; + if (iter < MAX_ITER) + + std::cout << "Converged after " << iter << " iterations." + << std::endl; + else + std::cout << "Did not converge after " << iter << " iterations." + << std::endl; + } + + int activation(double x) { return x > 0 ? 1 : -1; } + + private: + /** + * convenient function to check if input feature vector size matches the + * model weights size + * \param[in] x fecture vector to check + * \returns `true` size matches + * \returns `false` size does not match + */ + bool check_size_match(const std::vector &x) { + if (x.size() != (weights.size() - 1)) { + std::cerr << __func__ << ": " + << "Number of features in x does not match the feature " + "dimension in model!" + << std::endl; + return false; } + return true; + } - /** - * predict the output of the model for given set of features - * \param[in] x input vector - * \param[out] out optional argument to return neuron output before - * applying activation function (optional, `nullptr` to ignore) \returns - * model prediction output - */ - int predict(const std::vector &x, double *out = nullptr) { - if (!check_size_match(x)) - return 0; - - double y = weights.back(); // assign bias value - - // for (int i = 0; i < x.size(); i++) y += x[i] * weights[i]; - y = std::inner_product(x.begin(), x.end(), weights.begin(), y); - - if (out != nullptr) // if out variable is provided - *out = y; - - return activation( - y); // quantizer: apply ADALINE threshold function - } - - /** - * Update the weights of the model using supervised learning for one - * feature vector \param[in] x feature vector \param[in] y known output - * value \returns correction factor - */ - double fit(const std::vector &x, const int &y) { - if (!check_size_match(x)) - return 0; - - /* output of the model with current weights */ - int p = predict(x); - int prediction_error = y - p; // error in estimation - double correction_factor = eta * prediction_error; - - /* update each weight, the last weight is the bias term */ - for (int i = 0; i < x.size(); i++) { - weights[i] += correction_factor * x[i]; - } - weights[x.size()] += correction_factor; // update bias - - return correction_factor; - } - - /** - * Update the weights of the model using supervised learning for an - * array of vectors. \param[in] X array of feature vector \param[in] y - * known output value for each feature vector - */ - template - void fit(std::vector const (&X)[N], const int *y) { - double avg_pred_error = 1.f; - - int iter; - for (iter = 0; (iter < MAX_ITER) && (avg_pred_error > accuracy); - iter++) { - avg_pred_error = 0.f; - - // perform fit for each sample - for (int i = 0; i < N; i++) { - double err = fit(X[i], y[i]); - avg_pred_error += std::abs(err); - } - avg_pred_error /= N; - - // Print updates every 200th iteration - // if (iter % 100 == 0) - std::cout << "\tIter " << iter - << ": Training weights: " << *this - << "\tAvg error: " << avg_pred_error << std::endl; - } - - if (iter < MAX_ITER) - - std::cout << "Converged after " << iter << " iterations." - << std::endl; - else - std::cout << "Did not converge after " << iter << " iterations." - << std::endl; - } - - int activation(double x) { return x > 0 ? 1 : -1; } - - private: - /** - * convenient function to check if input feature vector size matches the - * model weights size - * \param[in] x fecture vector to check - * \returns `true` size matches - * \returns `false` size does not match - */ - bool check_size_match(const std::vector &x) { - if (x.size() != (weights.size() - 1)) { - std::cerr - << __func__ << ": " - << "Number of features in x does not match the feature " - "dimension in model!" - << std::endl; - return false; - } - return true; - } - - const double eta; ///< learning rate of the algorithm - const double accuracy; ///< model fit convergence accuracy - std::vector weights; ///< weights of the neural network - }; + const double eta; ///< learning rate of the algorithm + const double accuracy; ///< model fit convergence accuracy + std::vector weights; ///< weights of the neural network +}; } // namespace machine_learning diff --git a/math/realtime_stats.cpp b/math/realtime_stats.cpp index 03c816331..26b923625 100644 --- a/math/realtime_stats.cpp +++ b/math/realtime_stats.cpp @@ -16,95 +16,95 @@ */ namespace statistics { - /** - * continuous mean and variance computance using - * first value as an approximation for the mean. - * If the first number is much far form the mean, the algorithm becomes very - * inaccurate to compute variance and standard deviation. +/** + * continuous mean and variance computance using + * first value as an approximation for the mean. + * If the first number is much far form the mean, the algorithm becomes very + * inaccurate to compute variance and standard deviation. + */ +template +class stats_computer1 { + public: + /** Constructor + * \param[in] x new data sample */ - template - class stats_computer1 { - public: - /** Constructor - * \param[in] x new data sample - */ - void new_val(T x) { - if (n == 0) - K = x; - n++; - T tmp = x - K; - Ex += tmp; - Ex2 += tmp * tmp; - } + void new_val(T x) { + if (n == 0) + K = x; + n++; + T tmp = x - K; + Ex += tmp; + Ex2 += tmp * tmp; + } - /** return sample mean computed till last sample */ - double mean() const { return K + Ex / n; } + /** return sample mean computed till last sample */ + double mean() const { return K + Ex / n; } - /** return data variance computed till last sample */ - double variance() const { return (Ex2 - (Ex * Ex) / n) / (n - 1); } + /** return data variance computed till last sample */ + double variance() const { return (Ex2 - (Ex * Ex) / n) / (n - 1); } - /** return sample standard deviation computed till last sample */ - double std() const { return std::sqrt(this->variance()); } + /** return sample standard deviation computed till last sample */ + double std() const { return std::sqrt(this->variance()); } - /** short-hand operator to read new sample from input stream - * \n e.g.: `std::cin >> stats1;` - */ - friend std::istream &operator>>(std::istream &input, - stats_computer1 &stat) { - T val; - input >> val; - stat.new_val(val); - return input; - } - - private: - unsigned int n = 0; - double Ex, Ex2; - T K; - }; - - /** - * continuous mean and variance computance using - * Welford's algorithm (very accurate) + /** short-hand operator to read new sample from input stream + * \n e.g.: `std::cin >> stats1;` */ - template - class stats_computer2 { - public: - /** Constructor - * \param[in] x new data sample - */ - void new_val(T x) { - n++; - double delta = x - mu; - mu += delta / n; - double delta2 = x - mu; - M += delta * delta2; - } + friend std::istream &operator>>(std::istream &input, + stats_computer1 &stat) { + T val; + input >> val; + stat.new_val(val); + return input; + } - /** return sample mean computed till last sample */ - double mean() const { return mu; } + private: + unsigned int n = 0; + double Ex, Ex2; + T K; +}; - /** return data variance computed till last sample */ - double variance() const { return M / n; } +/** + * continuous mean and variance computance using + * Welford's algorithm (very accurate) + */ +template +class stats_computer2 { + public: + /** Constructor + * \param[in] x new data sample + */ + void new_val(T x) { + n++; + double delta = x - mu; + mu += delta / n; + double delta2 = x - mu; + M += delta * delta2; + } - /** return sample standard deviation computed till last sample */ - double std() const { return std::sqrt(this->variance()); } + /** return sample mean computed till last sample */ + double mean() const { return mu; } - /** short-hand operator to read new sample from input stream - * \n e.g.: `std::cin >> stats1;` - */ - friend std::istream &operator>>(std::istream &input, - stats_computer2 &stat) { - T val; - input >> val; - stat.new_val(val); - return input; - } + /** return data variance computed till last sample */ + double variance() const { return M / n; } - private: - unsigned int n = 0; - double mu = 0, var = 0, M = 0; - }; + /** return sample standard deviation computed till last sample */ + double std() const { return std::sqrt(this->variance()); } + + /** short-hand operator to read new sample from input stream + * \n e.g.: `std::cin >> stats1;` + */ + friend std::istream &operator>>(std::istream &input, + stats_computer2 &stat) { + T val; + input >> val; + stat.new_val(val); + return input; + } + + private: + unsigned int n = 0; + double mu = 0, var = 0, M = 0; +}; } // namespace statistics diff --git a/sorting/non_recursive_merge_sort.cpp b/sorting/non_recursive_merge_sort.cpp index c8c245b7d..b99b93108 100644 --- a/sorting/non_recursive_merge_sort.cpp +++ b/sorting/non_recursive_merge_sort.cpp @@ -9,84 +9,83 @@ #include // for std::move & std::remove_reference_t namespace sorting { - template - void merge(Iterator, Iterator, const Iterator, char[]); - /// bottom-up merge sort which sorts elements in a non-decreasing order - /** - * sorts elements non-recursively by breaking them into small segments, - * merging adjacent segments into larger sorted segments, then increasing - * the sizes of segments by factors of 2 and repeating the same process. - * best-case = worst-case = O(n log(n)) - * @param first points to the first element - * @param last points to 1-step past the last element - * @param n the number of elements - */ - template - void non_recursive_merge_sort(const Iterator first, const Iterator last, - const size_t n) { - // create a buffer large enough to store all elements - // dynamically allocated to comply with cpplint - char* buffer = new char[n * sizeof(*first)]; - // buffer size can be optimized to largest power of 2 less than n - // elements divide the container into equally-sized segments whose - // length start at 1 and keeps increasing by factors of 2 - for (size_t length(1); length < n; length <<= 1) { - // merge adjacent segments whose number is n / (length * 2) - Iterator left(first); - for (size_t counter(n / (length << 1)); counter; --counter) { - Iterator right(left + length), end(right + length); - merge(left, right, end, buffer); - left = end; - } - // if the number of remaining elements (n * 2 % length) is longer - // than a segment, merge the remaining elements - if ((n & ((length << 1) - 1)) > length) - merge(left, left + length, last, buffer); +template +void merge(Iterator, Iterator, const Iterator, char[]); +/// bottom-up merge sort which sorts elements in a non-decreasing order +/** + * sorts elements non-recursively by breaking them into small segments, + * merging adjacent segments into larger sorted segments, then increasing + * the sizes of segments by factors of 2 and repeating the same process. + * best-case = worst-case = O(n log(n)) + * @param first points to the first element + * @param last points to 1-step past the last element + * @param n the number of elements + */ +template +void non_recursive_merge_sort(const Iterator first, const Iterator last, + const size_t n) { + // create a buffer large enough to store all elements + // dynamically allocated to comply with cpplint + char* buffer = new char[n * sizeof(*first)]; + // buffer size can be optimized to largest power of 2 less than n + // elements divide the container into equally-sized segments whose + // length start at 1 and keeps increasing by factors of 2 + for (size_t length(1); length < n; length <<= 1) { + // merge adjacent segments whose number is n / (length * 2) + Iterator left(first); + for (size_t counter(n / (length << 1)); counter; --counter) { + Iterator right(left + length), end(right + length); + merge(left, right, end, buffer); + left = end; } - delete[] buffer; - } - /// merges 2 sorted adjacent segments into a larger sorted segment - /** - * best-case = worst-case = O(n) - * @param l points to the left part - * @param r points to the right part, end of left part - * @param e points to end of right part - * @param b points at the buffer - */ - template - void merge(Iterator l, Iterator r, const Iterator e, char b[]) { - // create 2 pointers to point at the buffer - auto p(reinterpret_cast*>(b)), - c(p); - // move the left part of the segment - for (Iterator t(l); r != t; ++t) *p++ = std::move(*t); - // while neither the buffer nor the right part has been exhausted - // move the smallest element of the two back to the container - while (e != r && c != p) *l++ = std::move(*r < *c ? *r++ : *c++); - // notice only one of the two following loops will be executed - // while the right part hasn't bee exhausted, move it back - while (e != r) *l++ = std::move(*r++); - // while the buffer hasn't bee exhausted, move it back - while (c != p) *l++ = std::move(*c++); - } - /// bottom-up merge sort which sorts elements in a non-decreasing order - /** - * @param first points to the first element - * @param n the number of elements - */ - template - void non_recursive_merge_sort(const Iterator first, const size_t n) { - non_recursive_merge_sort(first, first + n, n); - } - /// bottom-up merge sort which sorts elements in a non-decreasing order - /** - * @param first points to the first element - * @param last points to 1-step past the last element - */ - template - void non_recursive_merge_sort(const Iterator first, const Iterator last) { - non_recursive_merge_sort(first, last, last - first); + // if the number of remaining elements (n * 2 % length) is longer + // than a segment, merge the remaining elements + if ((n & ((length << 1) - 1)) > length) + merge(left, left + length, last, buffer); } + delete[] buffer; +} +/// merges 2 sorted adjacent segments into a larger sorted segment +/** + * best-case = worst-case = O(n) + * @param l points to the left part + * @param r points to the right part, end of left part + * @param e points to end of right part + * @param b points at the buffer + */ +template +void merge(Iterator l, Iterator r, const Iterator e, char b[]) { + // create 2 pointers to point at the buffer + auto p(reinterpret_cast*>(b)), c(p); + // move the left part of the segment + for (Iterator t(l); r != t; ++t) *p++ = std::move(*t); + // while neither the buffer nor the right part has been exhausted + // move the smallest element of the two back to the container + while (e != r && c != p) *l++ = std::move(*r < *c ? *r++ : *c++); + // notice only one of the two following loops will be executed + // while the right part hasn't bee exhausted, move it back + while (e != r) *l++ = std::move(*r++); + // while the buffer hasn't bee exhausted, move it back + while (c != p) *l++ = std::move(*c++); +} +/// bottom-up merge sort which sorts elements in a non-decreasing order +/** + * @param first points to the first element + * @param n the number of elements + */ +template +void non_recursive_merge_sort(const Iterator first, const size_t n) { + non_recursive_merge_sort(first, first + n, n); +} +/// bottom-up merge sort which sorts elements in a non-decreasing order +/** + * @param first points to the first element + * @param last points to 1-step past the last element + */ +template +void non_recursive_merge_sort(const Iterator first, const Iterator last) { + non_recursive_merge_sort(first, last, last - first); +} } // namespace sorting diff --git a/sorting/quick_sort.cpp b/sorting/quick_sort.cpp index 78268cf73..5d102dc88 100644 --- a/sorting/quick_sort.cpp +++ b/sorting/quick_sort.cpp @@ -25,48 +25,48 @@ #include namespace sorting { - /** - * This function takes last element as pivot, places - * the pivot element at its correct position in sorted - * array, and places all smaller (smaller than pivot) - * to left of pivot and all greater elements to right - * of pivot - * - */ +/** + * This function takes last element as pivot, places + * the pivot element at its correct position in sorted + * array, and places all smaller (smaller than pivot) + * to left of pivot and all greater elements to right + * of pivot + * + */ - int partition(int arr[], int low, int high) { - int pivot = arr[high]; // taking the last element as pivot - int i = (low - 1); // Index of smaller element +int partition(int arr[], int low, int high) { + int pivot = arr[high]; // taking the last element as pivot + int i = (low - 1); // Index of smaller element - for (int j = low; j < high; j++) { - // If current element is smaller than or - // equal to pivot - if (arr[j] <= pivot) { - i++; // increment index of smaller element - int temp = arr[i]; - arr[i] = arr[j]; - arr[j] = temp; - } - } - int temp = arr[i + 1]; - arr[i + 1] = arr[high]; - arr[high] = temp; - return (i + 1); - } - - /** - * The main function that implements QuickSort - * arr[] --> Array to be sorted, - * low --> Starting index, - * high --> Ending index - */ - void quickSort(int arr[], int low, int high) { - if (low < high) { - int p = partition(arr, low, high); - quickSort(arr, low, p - 1); - quickSort(arr, p + 1, high); + for (int j = low; j < high; j++) { + // If current element is smaller than or + // equal to pivot + if (arr[j] <= pivot) { + i++; // increment index of smaller element + int temp = arr[i]; + arr[i] = arr[j]; + arr[j] = temp; } } + int temp = arr[i + 1]; + arr[i + 1] = arr[high]; + arr[high] = temp; + return (i + 1); +} + +/** + * The main function that implements QuickSort + * arr[] --> Array to be sorted, + * low --> Starting index, + * high --> Ending index + */ +void quickSort(int arr[], int low, int high) { + if (low < high) { + int p = partition(arr, low, high); + quickSort(arr, low, p - 1); + quickSort(arr, p + 1, high); + } +} } // namespace sorting diff --git a/sorting/shell_sort2.cpp b/sorting/shell_sort2.cpp index c8af7e1de..85186e752 100644 --- a/sorting/shell_sort2.cpp +++ b/sorting/shell_sort2.cpp @@ -33,35 +33,35 @@ void show_data(T (&arr)[N]) { * \brief Sorting algorithms */ namespace sorting { - /** - * Optimized algorithm - takes half the time by utilizing - * Mar - **/ - template - void shell_sort(T *arr, size_t LEN) { - const unsigned int gaps[] = {701, 301, 132, 57, 23, 10, 4, 1}; - const unsigned int gap_len = 8; - size_t i, j, g; +/** + * Optimized algorithm - takes half the time by utilizing + * Mar + **/ +template +void shell_sort(T *arr, size_t LEN) { + const unsigned int gaps[] = {701, 301, 132, 57, 23, 10, 4, 1}; + const unsigned int gap_len = 8; + size_t i, j, g; - for (g = 0; g < gap_len; g++) { - unsigned int gap = gaps[g]; - for (i = gap; i < LEN; i++) { - T tmp = arr[i]; + for (g = 0; g < gap_len; g++) { + unsigned int gap = gaps[g]; + for (i = gap; i < LEN; i++) { + T tmp = arr[i]; - for (j = i; j >= gap && (arr[j - gap] - tmp) > 0; j -= gap) - arr[j] = arr[j - gap]; + for (j = i; j >= gap && (arr[j - gap] - tmp) > 0; j -= gap) + arr[j] = arr[j - gap]; - arr[j] = tmp; - } + arr[j] = tmp; } } +} - /** function overload - when input array is of a known length array type - */ - template - void shell_sort(T (&arr)[N]) { - shell_sort(arr, N); - } +/** function overload - when input array is of a known length array type + */ +template +void shell_sort(T (&arr)[N]) { + shell_sort(arr, N); +} } // namespace sorting diff --git a/strings/brute_force_string_searching.cpp b/strings/brute_force_string_searching.cpp index fc2f543c0..fd5244b37 100644 --- a/strings/brute_force_string_searching.cpp +++ b/strings/brute_force_string_searching.cpp @@ -11,28 +11,28 @@ #include namespace string_search { - /** - * Find a pattern in a string by comparing the pattern to every substring. - * @param text Any string that might contain the pattern. - * @param pattern String that we are searching for. - * @return Index where the pattern starts in the text - * @return -1 if the pattern was not found. - */ - int brute_force(const std::string &text, const std::string &pattern) { - size_t pat_l = pattern.length(); - size_t txt_l = text.length(); - int index = -1; - if (pat_l <= txt_l) { - for (size_t i = 0; i < txt_l - pat_l + 1; i++) { - std::string s = text.substr(i, pat_l); - if (s == pattern) { - index = i; - break; - } +/** + * Find a pattern in a string by comparing the pattern to every substring. + * @param text Any string that might contain the pattern. + * @param pattern String that we are searching for. + * @return Index where the pattern starts in the text + * @return -1 if the pattern was not found. + */ +int brute_force(const std::string &text, const std::string &pattern) { + size_t pat_l = pattern.length(); + size_t txt_l = text.length(); + int index = -1; + if (pat_l <= txt_l) { + for (size_t i = 0; i < txt_l - pat_l + 1; i++) { + std::string s = text.substr(i, pat_l); + if (s == pattern) { + index = i; + break; } } - return index; } + return index; +} } // namespace string_search using string_search::brute_force; diff --git a/strings/knuth_morris_pratt.cpp b/strings/knuth_morris_pratt.cpp index ee569cccc..d116ddcff 100644 --- a/strings/knuth_morris_pratt.cpp +++ b/strings/knuth_morris_pratt.cpp @@ -21,50 +21,50 @@ #include namespace string_search { - /** - * Generate the partial match table aka failure function for a pattern to - * search. - * \param[in] pattern text for which to create the partial match table - * \returns the partial match table as a vector array - */ - std::vector getFailureArray(const std::string &pattern) { - int pattern_length = pattern.size(); - std::vector failure(pattern_length + 1); - failure[0] = -1; - int j = -1; +/** + * Generate the partial match table aka failure function for a pattern to + * search. + * \param[in] pattern text for which to create the partial match table + * \returns the partial match table as a vector array + */ +std::vector getFailureArray(const std::string &pattern) { + int pattern_length = pattern.size(); + std::vector failure(pattern_length + 1); + failure[0] = -1; + int j = -1; - for (int i = 0; i < pattern_length; i++) { - while (j != -1 && pattern[j] != pattern[i]) { - j = failure[j]; - } - j++; - failure[i + 1] = j; + for (int i = 0; i < pattern_length; i++) { + while (j != -1 && pattern[j] != pattern[i]) { + j = failure[j]; } - return failure; + j++; + failure[i + 1] = j; } + return failure; +} - /** - * KMP algorithm to find a pattern in a text - * \param[in] pattern string pattern to search - * \param[in] text text in which to search - * \returns `true` if pattern was found - * \returns `false` if pattern was not found - */ - bool kmp(const std::string &pattern, const std::string &text) { - int text_length = text.size(), pattern_length = pattern.size(); - std::vector failure = getFailureArray(pattern); +/** + * KMP algorithm to find a pattern in a text + * \param[in] pattern string pattern to search + * \param[in] text text in which to search + * \returns `true` if pattern was found + * \returns `false` if pattern was not found + */ +bool kmp(const std::string &pattern, const std::string &text) { + int text_length = text.size(), pattern_length = pattern.size(); + std::vector failure = getFailureArray(pattern); - int k = 0; - for (int j = 0; j < text_length; j++) { - while (k != -1 && pattern[k] != text[j]) { - k = failure[k]; - } - k++; - if (k == pattern_length) - return true; + int k = 0; + for (int j = 0; j < text_length; j++) { + while (k != -1 && pattern[k] != text[j]) { + k = failure[k]; } - return false; + k++; + if (k == pattern_length) + return true; } + return false; +} } // namespace string_search using string_search::kmp; diff --git a/strings/rabin_karp.cpp b/strings/rabin_karp.cpp index 6d1f4e7de..91e104188 100644 --- a/strings/rabin_karp.cpp +++ b/strings/rabin_karp.cpp @@ -16,87 +16,86 @@ #define PRIME 5 ///< Prime modulus for hash functions namespace string_search { - /** - * convert a string to an intger - called as hashing function - * \param[in] s source of string to hash - * \param[in] n length of substring to hash - * \returns hash integer - */ - int64_t create_hash(const std::string& s, int n) { - int64_t result = 0; - for (int i = 0; i < n; ++i) { - result += (int64_t)(s[i] * (int64_t)pow(PRIME, i)); - } - return result; +/** + * convert a string to an intger - called as hashing function + * \param[in] s source of string to hash + * \param[in] n length of substring to hash + * \returns hash integer + */ +int64_t create_hash(const std::string& s, int n) { + int64_t result = 0; + for (int i = 0; i < n; ++i) { + result += (int64_t)(s[i] * (int64_t)pow(PRIME, i)); } + return result; +} - /** - * re-hash a string using known existing hash - * \param[in] s source of string to hash - * \param[in] old_index previous index of string - * \param[in] new_index new index of string - * \param[in] old_hash previous hash of substring - * \param[in] patLength length of substring to hash - * \returns new hash integer - */ - int64_t recalculate_hash(const std::string& s, int old_index, int new_index, - int64_t old_hash, int patLength) { - int64_t new_hash = old_hash - s[old_index]; - new_hash /= PRIME; - new_hash += - (int64_t)(s[new_index] * (int64_t)pow(PRIME, patLength - 1)); - return new_hash; +/** + * re-hash a string using known existing hash + * \param[in] s source of string to hash + * \param[in] old_index previous index of string + * \param[in] new_index new index of string + * \param[in] old_hash previous hash of substring + * \param[in] patLength length of substring to hash + * \returns new hash integer + */ +int64_t recalculate_hash(const std::string& s, int old_index, int new_index, + int64_t old_hash, int patLength) { + int64_t new_hash = old_hash - s[old_index]; + new_hash /= PRIME; + new_hash += (int64_t)(s[new_index] * (int64_t)pow(PRIME, patLength - 1)); + return new_hash; +} + +/** + * compare if two sub-strings are equal + * \param[in] str1 string pattern to search + * \param[in] str2 text in which to search + * \param[in] start1,end1 start and end indices for substring in str1 + * \param[in] start2,end2 start and end indices for substring in str2 + * \returns `true` if pattern was found + * \returns `false` if pattern was not found + * @note can this be replaced by std::string::compare? + */ +bool check_if_equal(const std::string& str1, const std::string& str2, + int start1, int end1, int start2, int end2) { + if (end1 - start1 != end2 - start2) { + return false; } - - /** - * compare if two sub-strings are equal - * \param[in] str1 string pattern to search - * \param[in] str2 text in which to search - * \param[in] start1,end1 start and end indices for substring in str1 - * \param[in] start2,end2 start and end indices for substring in str2 - * \returns `true` if pattern was found - * \returns `false` if pattern was not found - * @note can this be replaced by std::string::compare? - */ - bool check_if_equal(const std::string& str1, const std::string& str2, - int start1, int end1, int start2, int end2) { - if (end1 - start1 != end2 - start2) { + while (start1 <= end1 && start2 <= end2) { + if (str1[start1] != str2[start2]) { return false; } - while (start1 <= end1 && start2 <= end2) { - if (str1[start1] != str2[start2]) { - return false; - } - start1++; - start2++; - } - return true; + start1++; + start2++; } + return true; +} - /** - * Perform string pattern search using Rabin-Karp algorithm - * @param[in] str string to search in - * @param[in] pat pattern to search for - * @return index of first occurrence of pattern - * @return -1 if pattern not found - */ +/** + * Perform string pattern search using Rabin-Karp algorithm + * @param[in] str string to search in + * @param[in] pat pattern to search for + * @return index of first occurrence of pattern + * @return -1 if pattern not found + */ - int rabin_karp(const std::string& str, const std::string& pat) { - int64_t pat_hash = create_hash(pat, pat.size()); - int64_t str_hash = create_hash(str, pat.size()); - for (int i = 0; i <= str.size() - pat.size(); ++i) { - if (pat_hash == str_hash && - check_if_equal(str, pat, i, i + pat.size() - 1, 0, - pat.size() - 1)) { - return i; - } - if (i < str.size() - pat.size()) { - str_hash = recalculate_hash(str, i, i + pat.size(), str_hash, - pat.size()); - } +int rabin_karp(const std::string& str, const std::string& pat) { + int64_t pat_hash = create_hash(pat, pat.size()); + int64_t str_hash = create_hash(str, pat.size()); + for (int i = 0; i <= str.size() - pat.size(); ++i) { + if (pat_hash == str_hash && + check_if_equal(str, pat, i, i + pat.size() - 1, 0, + pat.size() - 1)) { + return i; + } + if (i < str.size() - pat.size()) { + str_hash = + recalculate_hash(str, i, i + pat.size(), str_hash, pat.size()); } - return -1; // return -1 if given pattern not found } + return -1; // return -1 if given pattern not found +} } // namespace string_search