formatting source-code for fd69530515

This commit is contained in:
github-actions
2020-06-01 03:10:34 +00:00
parent fd69530515
commit 4fe430d75f
8 changed files with 481 additions and 486 deletions

View File

@@ -35,156 +35,153 @@
* \brief Machine learning algorithms
*/
namespace machine_learning {
class adaline {
public:
/**
* Default constructor
* \param[in] num_features number of features present
* \param[in] eta learning rate (optional, default=0.1)
* \param[in] convergence accuracy (optional,
* default=\f$1\times10^{-5}\f$)
*/
adaline(int num_features, const double eta = 0.01f,
const double accuracy = 1e-5)
: eta(eta), accuracy(accuracy) {
if (eta <= 0) {
std::cerr << "learning rate should be positive and nonzero"
<< std::endl;
std::exit(EXIT_FAILURE);
class adaline {
public:
/**
* Default constructor
* \param[in] num_features number of features present
* \param[in] eta learning rate (optional, default=0.1)
* \param[in] convergence accuracy (optional,
* default=\f$1\times10^{-5}\f$)
*/
adaline(int num_features, const double eta = 0.01f,
const double accuracy = 1e-5)
: eta(eta), accuracy(accuracy) {
if (eta <= 0) {
std::cerr << "learning rate should be positive and nonzero"
<< std::endl;
std::exit(EXIT_FAILURE);
}
weights = std::vector<double>(
num_features +
1); // additional weight is for the constant bias term
// initialize with random weights in the range [-50, 49]
for (int i = 0; i < weights.size(); i++) weights[i] = 1.f;
// weights[i] = (static_cast<double>(std::rand() % 100) - 50);
}
/**
* Operator to print the weights of the model
*/
friend std::ostream &operator<<(std::ostream &out, const adaline &ada) {
out << "<";
for (int i = 0; i < ada.weights.size(); i++) {
out << ada.weights[i];
if (i < ada.weights.size() - 1)
out << ", ";
}
out << ">";
return out;
}
/**
* predict the output of the model for given set of features
* \param[in] x input vector
* \param[out] out optional argument to return neuron output before
* applying activation function (optional, `nullptr` to ignore) \returns
* model prediction output
*/
int predict(const std::vector<double> &x, double *out = nullptr) {
if (!check_size_match(x))
return 0;
double y = weights.back(); // assign bias value
// for (int i = 0; i < x.size(); i++) y += x[i] * weights[i];
y = std::inner_product(x.begin(), x.end(), weights.begin(), y);
if (out != nullptr) // if out variable is provided
*out = y;
return activation(y); // quantizer: apply ADALINE threshold function
}
/**
* Update the weights of the model using supervised learning for one
* feature vector \param[in] x feature vector \param[in] y known output
* value \returns correction factor
*/
double fit(const std::vector<double> &x, const int &y) {
if (!check_size_match(x))
return 0;
/* output of the model with current weights */
int p = predict(x);
int prediction_error = y - p; // error in estimation
double correction_factor = eta * prediction_error;
/* update each weight, the last weight is the bias term */
for (int i = 0; i < x.size(); i++) {
weights[i] += correction_factor * x[i];
}
weights[x.size()] += correction_factor; // update bias
return correction_factor;
}
/**
* Update the weights of the model using supervised learning for an
* array of vectors. \param[in] X array of feature vector \param[in] y
* known output value for each feature vector
*/
template <int N>
void fit(std::vector<double> const (&X)[N], const int *y) {
double avg_pred_error = 1.f;
int iter;
for (iter = 0; (iter < MAX_ITER) && (avg_pred_error > accuracy);
iter++) {
avg_pred_error = 0.f;
// perform fit for each sample
for (int i = 0; i < N; i++) {
double err = fit(X[i], y[i]);
avg_pred_error += std::abs(err);
}
avg_pred_error /= N;
weights = std::vector<double>(
num_features +
1); // additional weight is for the constant bias term
// initialize with random weights in the range [-50, 49]
for (int i = 0; i < weights.size(); i++) weights[i] = 1.f;
// weights[i] = (static_cast<double>(std::rand() % 100) - 50);
// Print updates every 200th iteration
// if (iter % 100 == 0)
std::cout << "\tIter " << iter << ": Training weights: " << *this
<< "\tAvg error: " << avg_pred_error << std::endl;
}
/**
* Operator to print the weights of the model
*/
friend std::ostream &operator<<(std::ostream &out, const adaline &ada) {
out << "<";
for (int i = 0; i < ada.weights.size(); i++) {
out << ada.weights[i];
if (i < ada.weights.size() - 1)
out << ", ";
}
out << ">";
return out;
if (iter < MAX_ITER)
std::cout << "Converged after " << iter << " iterations."
<< std::endl;
else
std::cout << "Did not converge after " << iter << " iterations."
<< std::endl;
}
int activation(double x) { return x > 0 ? 1 : -1; }
private:
/**
* convenient function to check if input feature vector size matches the
* model weights size
* \param[in] x fecture vector to check
* \returns `true` size matches
* \returns `false` size does not match
*/
bool check_size_match(const std::vector<double> &x) {
if (x.size() != (weights.size() - 1)) {
std::cerr << __func__ << ": "
<< "Number of features in x does not match the feature "
"dimension in model!"
<< std::endl;
return false;
}
return true;
}
/**
* predict the output of the model for given set of features
* \param[in] x input vector
* \param[out] out optional argument to return neuron output before
* applying activation function (optional, `nullptr` to ignore) \returns
* model prediction output
*/
int predict(const std::vector<double> &x, double *out = nullptr) {
if (!check_size_match(x))
return 0;
double y = weights.back(); // assign bias value
// for (int i = 0; i < x.size(); i++) y += x[i] * weights[i];
y = std::inner_product(x.begin(), x.end(), weights.begin(), y);
if (out != nullptr) // if out variable is provided
*out = y;
return activation(
y); // quantizer: apply ADALINE threshold function
}
/**
* Update the weights of the model using supervised learning for one
* feature vector \param[in] x feature vector \param[in] y known output
* value \returns correction factor
*/
double fit(const std::vector<double> &x, const int &y) {
if (!check_size_match(x))
return 0;
/* output of the model with current weights */
int p = predict(x);
int prediction_error = y - p; // error in estimation
double correction_factor = eta * prediction_error;
/* update each weight, the last weight is the bias term */
for (int i = 0; i < x.size(); i++) {
weights[i] += correction_factor * x[i];
}
weights[x.size()] += correction_factor; // update bias
return correction_factor;
}
/**
* Update the weights of the model using supervised learning for an
* array of vectors. \param[in] X array of feature vector \param[in] y
* known output value for each feature vector
*/
template <int N>
void fit(std::vector<double> const (&X)[N], const int *y) {
double avg_pred_error = 1.f;
int iter;
for (iter = 0; (iter < MAX_ITER) && (avg_pred_error > accuracy);
iter++) {
avg_pred_error = 0.f;
// perform fit for each sample
for (int i = 0; i < N; i++) {
double err = fit(X[i], y[i]);
avg_pred_error += std::abs(err);
}
avg_pred_error /= N;
// Print updates every 200th iteration
// if (iter % 100 == 0)
std::cout << "\tIter " << iter
<< ": Training weights: " << *this
<< "\tAvg error: " << avg_pred_error << std::endl;
}
if (iter < MAX_ITER)
std::cout << "Converged after " << iter << " iterations."
<< std::endl;
else
std::cout << "Did not converge after " << iter << " iterations."
<< std::endl;
}
int activation(double x) { return x > 0 ? 1 : -1; }
private:
/**
* convenient function to check if input feature vector size matches the
* model weights size
* \param[in] x fecture vector to check
* \returns `true` size matches
* \returns `false` size does not match
*/
bool check_size_match(const std::vector<double> &x) {
if (x.size() != (weights.size() - 1)) {
std::cerr
<< __func__ << ": "
<< "Number of features in x does not match the feature "
"dimension in model!"
<< std::endl;
return false;
}
return true;
}
const double eta; ///< learning rate of the algorithm
const double accuracy; ///< model fit convergence accuracy
std::vector<double> weights; ///< weights of the neural network
};
const double eta; ///< learning rate of the algorithm
const double accuracy; ///< model fit convergence accuracy
std::vector<double> weights; ///< weights of the neural network
};
} // namespace machine_learning

View File

@@ -16,95 +16,95 @@
*/
namespace statistics {
/**
* continuous mean and variance computance using
* first value as an approximation for the mean.
* If the first number is much far form the mean, the algorithm becomes very
* inaccurate to compute variance and standard deviation.
/**
* continuous mean and variance computance using
* first value as an approximation for the mean.
* If the first number is much far form the mean, the algorithm becomes very
* inaccurate to compute variance and standard deviation.
*/
template <typename T>
class stats_computer1 {
public:
/** Constructor
* \param[in] x new data sample
*/
template <typename T>
class stats_computer1 {
public:
/** Constructor
* \param[in] x new data sample
*/
void new_val(T x) {
if (n == 0)
K = x;
n++;
T tmp = x - K;
Ex += tmp;
Ex2 += tmp * tmp;
}
void new_val(T x) {
if (n == 0)
K = x;
n++;
T tmp = x - K;
Ex += tmp;
Ex2 += tmp * tmp;
}
/** return sample mean computed till last sample */
double mean() const { return K + Ex / n; }
/** return sample mean computed till last sample */
double mean() const { return K + Ex / n; }
/** return data variance computed till last sample */
double variance() const { return (Ex2 - (Ex * Ex) / n) / (n - 1); }
/** return data variance computed till last sample */
double variance() const { return (Ex2 - (Ex * Ex) / n) / (n - 1); }
/** return sample standard deviation computed till last sample */
double std() const { return std::sqrt(this->variance()); }
/** return sample standard deviation computed till last sample */
double std() const { return std::sqrt(this->variance()); }
/** short-hand operator to read new sample from input stream
* \n e.g.: `std::cin >> stats1;`
*/
friend std::istream &operator>>(std::istream &input,
stats_computer1 &stat) {
T val;
input >> val;
stat.new_val(val);
return input;
}
private:
unsigned int n = 0;
double Ex, Ex2;
T K;
};
/**
* continuous mean and variance computance using
* Welford's algorithm (very accurate)
/** short-hand operator to read new sample from input stream
* \n e.g.: `std::cin >> stats1;`
*/
template <typename T>
class stats_computer2 {
public:
/** Constructor
* \param[in] x new data sample
*/
void new_val(T x) {
n++;
double delta = x - mu;
mu += delta / n;
double delta2 = x - mu;
M += delta * delta2;
}
friend std::istream &operator>>(std::istream &input,
stats_computer1 &stat) {
T val;
input >> val;
stat.new_val(val);
return input;
}
/** return sample mean computed till last sample */
double mean() const { return mu; }
private:
unsigned int n = 0;
double Ex, Ex2;
T K;
};
/** return data variance computed till last sample */
double variance() const { return M / n; }
/**
* continuous mean and variance computance using
* Welford's algorithm (very accurate)
*/
template <typename T>
class stats_computer2 {
public:
/** Constructor
* \param[in] x new data sample
*/
void new_val(T x) {
n++;
double delta = x - mu;
mu += delta / n;
double delta2 = x - mu;
M += delta * delta2;
}
/** return sample standard deviation computed till last sample */
double std() const { return std::sqrt(this->variance()); }
/** return sample mean computed till last sample */
double mean() const { return mu; }
/** short-hand operator to read new sample from input stream
* \n e.g.: `std::cin >> stats1;`
*/
friend std::istream &operator>>(std::istream &input,
stats_computer2 &stat) {
T val;
input >> val;
stat.new_val(val);
return input;
}
/** return data variance computed till last sample */
double variance() const { return M / n; }
private:
unsigned int n = 0;
double mu = 0, var = 0, M = 0;
};
/** return sample standard deviation computed till last sample */
double std() const { return std::sqrt(this->variance()); }
/** short-hand operator to read new sample from input stream
* \n e.g.: `std::cin >> stats1;`
*/
friend std::istream &operator>>(std::istream &input,
stats_computer2 &stat) {
T val;
input >> val;
stat.new_val(val);
return input;
}
private:
unsigned int n = 0;
double mu = 0, var = 0, M = 0;
};
} // namespace statistics

View File

@@ -9,84 +9,83 @@
#include <utility> // for std::move & std::remove_reference_t
namespace sorting {
template <class Iterator>
void merge(Iterator, Iterator, const Iterator, char[]);
/// bottom-up merge sort which sorts elements in a non-decreasing order
/**
* sorts elements non-recursively by breaking them into small segments,
* merging adjacent segments into larger sorted segments, then increasing
* the sizes of segments by factors of 2 and repeating the same process.
* best-case = worst-case = O(n log(n))
* @param first points to the first element
* @param last points to 1-step past the last element
* @param n the number of elements
*/
template <class Iterator>
void non_recursive_merge_sort(const Iterator first, const Iterator last,
const size_t n) {
// create a buffer large enough to store all elements
// dynamically allocated to comply with cpplint
char* buffer = new char[n * sizeof(*first)];
// buffer size can be optimized to largest power of 2 less than n
// elements divide the container into equally-sized segments whose
// length start at 1 and keeps increasing by factors of 2
for (size_t length(1); length < n; length <<= 1) {
// merge adjacent segments whose number is n / (length * 2)
Iterator left(first);
for (size_t counter(n / (length << 1)); counter; --counter) {
Iterator right(left + length), end(right + length);
merge(left, right, end, buffer);
left = end;
}
// if the number of remaining elements (n * 2 % length) is longer
// than a segment, merge the remaining elements
if ((n & ((length << 1) - 1)) > length)
merge(left, left + length, last, buffer);
template <class Iterator>
void merge(Iterator, Iterator, const Iterator, char[]);
/// bottom-up merge sort which sorts elements in a non-decreasing order
/**
* sorts elements non-recursively by breaking them into small segments,
* merging adjacent segments into larger sorted segments, then increasing
* the sizes of segments by factors of 2 and repeating the same process.
* best-case = worst-case = O(n log(n))
* @param first points to the first element
* @param last points to 1-step past the last element
* @param n the number of elements
*/
template <class Iterator>
void non_recursive_merge_sort(const Iterator first, const Iterator last,
const size_t n) {
// create a buffer large enough to store all elements
// dynamically allocated to comply with cpplint
char* buffer = new char[n * sizeof(*first)];
// buffer size can be optimized to largest power of 2 less than n
// elements divide the container into equally-sized segments whose
// length start at 1 and keeps increasing by factors of 2
for (size_t length(1); length < n; length <<= 1) {
// merge adjacent segments whose number is n / (length * 2)
Iterator left(first);
for (size_t counter(n / (length << 1)); counter; --counter) {
Iterator right(left + length), end(right + length);
merge(left, right, end, buffer);
left = end;
}
delete[] buffer;
}
/// merges 2 sorted adjacent segments into a larger sorted segment
/**
* best-case = worst-case = O(n)
* @param l points to the left part
* @param r points to the right part, end of left part
* @param e points to end of right part
* @param b points at the buffer
*/
template <class Iterator>
void merge(Iterator l, Iterator r, const Iterator e, char b[]) {
// create 2 pointers to point at the buffer
auto p(reinterpret_cast<std::remove_reference_t<decltype(*l)>*>(b)),
c(p);
// move the left part of the segment
for (Iterator t(l); r != t; ++t) *p++ = std::move(*t);
// while neither the buffer nor the right part has been exhausted
// move the smallest element of the two back to the container
while (e != r && c != p) *l++ = std::move(*r < *c ? *r++ : *c++);
// notice only one of the two following loops will be executed
// while the right part hasn't bee exhausted, move it back
while (e != r) *l++ = std::move(*r++);
// while the buffer hasn't bee exhausted, move it back
while (c != p) *l++ = std::move(*c++);
}
/// bottom-up merge sort which sorts elements in a non-decreasing order
/**
* @param first points to the first element
* @param n the number of elements
*/
template <class Iterator>
void non_recursive_merge_sort(const Iterator first, const size_t n) {
non_recursive_merge_sort(first, first + n, n);
}
/// bottom-up merge sort which sorts elements in a non-decreasing order
/**
* @param first points to the first element
* @param last points to 1-step past the last element
*/
template <class Iterator>
void non_recursive_merge_sort(const Iterator first, const Iterator last) {
non_recursive_merge_sort(first, last, last - first);
// if the number of remaining elements (n * 2 % length) is longer
// than a segment, merge the remaining elements
if ((n & ((length << 1) - 1)) > length)
merge(left, left + length, last, buffer);
}
delete[] buffer;
}
/// merges 2 sorted adjacent segments into a larger sorted segment
/**
* best-case = worst-case = O(n)
* @param l points to the left part
* @param r points to the right part, end of left part
* @param e points to end of right part
* @param b points at the buffer
*/
template <class Iterator>
void merge(Iterator l, Iterator r, const Iterator e, char b[]) {
// create 2 pointers to point at the buffer
auto p(reinterpret_cast<std::remove_reference_t<decltype(*l)>*>(b)), c(p);
// move the left part of the segment
for (Iterator t(l); r != t; ++t) *p++ = std::move(*t);
// while neither the buffer nor the right part has been exhausted
// move the smallest element of the two back to the container
while (e != r && c != p) *l++ = std::move(*r < *c ? *r++ : *c++);
// notice only one of the two following loops will be executed
// while the right part hasn't bee exhausted, move it back
while (e != r) *l++ = std::move(*r++);
// while the buffer hasn't bee exhausted, move it back
while (c != p) *l++ = std::move(*c++);
}
/// bottom-up merge sort which sorts elements in a non-decreasing order
/**
* @param first points to the first element
* @param n the number of elements
*/
template <class Iterator>
void non_recursive_merge_sort(const Iterator first, const size_t n) {
non_recursive_merge_sort(first, first + n, n);
}
/// bottom-up merge sort which sorts elements in a non-decreasing order
/**
* @param first points to the first element
* @param last points to 1-step past the last element
*/
template <class Iterator>
void non_recursive_merge_sort(const Iterator first, const Iterator last) {
non_recursive_merge_sort(first, last, last - first);
}
} // namespace sorting

View File

@@ -25,48 +25,48 @@
#include <iostream>
namespace sorting {
/**
* This function takes last element as pivot, places
* the pivot element at its correct position in sorted
* array, and places all smaller (smaller than pivot)
* to left of pivot and all greater elements to right
* of pivot
*
*/
/**
* This function takes last element as pivot, places
* the pivot element at its correct position in sorted
* array, and places all smaller (smaller than pivot)
* to left of pivot and all greater elements to right
* of pivot
*
*/
int partition(int arr[], int low, int high) {
int pivot = arr[high]; // taking the last element as pivot
int i = (low - 1); // Index of smaller element
int partition(int arr[], int low, int high) {
int pivot = arr[high]; // taking the last element as pivot
int i = (low - 1); // Index of smaller element
for (int j = low; j < high; j++) {
// If current element is smaller than or
// equal to pivot
if (arr[j] <= pivot) {
i++; // increment index of smaller element
int temp = arr[i];
arr[i] = arr[j];
arr[j] = temp;
}
}
int temp = arr[i + 1];
arr[i + 1] = arr[high];
arr[high] = temp;
return (i + 1);
}
/**
* The main function that implements QuickSort
* arr[] --> Array to be sorted,
* low --> Starting index,
* high --> Ending index
*/
void quickSort(int arr[], int low, int high) {
if (low < high) {
int p = partition(arr, low, high);
quickSort(arr, low, p - 1);
quickSort(arr, p + 1, high);
for (int j = low; j < high; j++) {
// If current element is smaller than or
// equal to pivot
if (arr[j] <= pivot) {
i++; // increment index of smaller element
int temp = arr[i];
arr[i] = arr[j];
arr[j] = temp;
}
}
int temp = arr[i + 1];
arr[i + 1] = arr[high];
arr[high] = temp;
return (i + 1);
}
/**
* The main function that implements QuickSort
* arr[] --> Array to be sorted,
* low --> Starting index,
* high --> Ending index
*/
void quickSort(int arr[], int low, int high) {
if (low < high) {
int p = partition(arr, low, high);
quickSort(arr, low, p - 1);
quickSort(arr, p + 1, high);
}
}
} // namespace sorting

View File

@@ -33,35 +33,35 @@ void show_data(T (&arr)[N]) {
* \brief Sorting algorithms
*/
namespace sorting {
/**
* Optimized algorithm - takes half the time by utilizing
* Mar
**/
template <class T>
void shell_sort(T *arr, size_t LEN) {
const unsigned int gaps[] = {701, 301, 132, 57, 23, 10, 4, 1};
const unsigned int gap_len = 8;
size_t i, j, g;
/**
* Optimized algorithm - takes half the time by utilizing
* Mar
**/
template <class T>
void shell_sort(T *arr, size_t LEN) {
const unsigned int gaps[] = {701, 301, 132, 57, 23, 10, 4, 1};
const unsigned int gap_len = 8;
size_t i, j, g;
for (g = 0; g < gap_len; g++) {
unsigned int gap = gaps[g];
for (i = gap; i < LEN; i++) {
T tmp = arr[i];
for (g = 0; g < gap_len; g++) {
unsigned int gap = gaps[g];
for (i = gap; i < LEN; i++) {
T tmp = arr[i];
for (j = i; j >= gap && (arr[j - gap] - tmp) > 0; j -= gap)
arr[j] = arr[j - gap];
for (j = i; j >= gap && (arr[j - gap] - tmp) > 0; j -= gap)
arr[j] = arr[j - gap];
arr[j] = tmp;
}
arr[j] = tmp;
}
}
}
/** function overload - when input array is of a known length array type
*/
template <class T, size_t N>
void shell_sort(T (&arr)[N]) {
shell_sort(arr, N);
}
/** function overload - when input array is of a known length array type
*/
template <class T, size_t N>
void shell_sort(T (&arr)[N]) {
shell_sort(arr, N);
}
} // namespace sorting

View File

@@ -11,28 +11,28 @@
#include <vector>
namespace string_search {
/**
* Find a pattern in a string by comparing the pattern to every substring.
* @param text Any string that might contain the pattern.
* @param pattern String that we are searching for.
* @return Index where the pattern starts in the text
* @return -1 if the pattern was not found.
*/
int brute_force(const std::string &text, const std::string &pattern) {
size_t pat_l = pattern.length();
size_t txt_l = text.length();
int index = -1;
if (pat_l <= txt_l) {
for (size_t i = 0; i < txt_l - pat_l + 1; i++) {
std::string s = text.substr(i, pat_l);
if (s == pattern) {
index = i;
break;
}
/**
* Find a pattern in a string by comparing the pattern to every substring.
* @param text Any string that might contain the pattern.
* @param pattern String that we are searching for.
* @return Index where the pattern starts in the text
* @return -1 if the pattern was not found.
*/
int brute_force(const std::string &text, const std::string &pattern) {
size_t pat_l = pattern.length();
size_t txt_l = text.length();
int index = -1;
if (pat_l <= txt_l) {
for (size_t i = 0; i < txt_l - pat_l + 1; i++) {
std::string s = text.substr(i, pat_l);
if (s == pattern) {
index = i;
break;
}
}
return index;
}
return index;
}
} // namespace string_search
using string_search::brute_force;

View File

@@ -21,50 +21,50 @@
#include <vector>
namespace string_search {
/**
* Generate the partial match table aka failure function for a pattern to
* search.
* \param[in] pattern text for which to create the partial match table
* \returns the partial match table as a vector array
*/
std::vector<int> getFailureArray(const std::string &pattern) {
int pattern_length = pattern.size();
std::vector<int> failure(pattern_length + 1);
failure[0] = -1;
int j = -1;
/**
* Generate the partial match table aka failure function for a pattern to
* search.
* \param[in] pattern text for which to create the partial match table
* \returns the partial match table as a vector array
*/
std::vector<int> getFailureArray(const std::string &pattern) {
int pattern_length = pattern.size();
std::vector<int> failure(pattern_length + 1);
failure[0] = -1;
int j = -1;
for (int i = 0; i < pattern_length; i++) {
while (j != -1 && pattern[j] != pattern[i]) {
j = failure[j];
}
j++;
failure[i + 1] = j;
for (int i = 0; i < pattern_length; i++) {
while (j != -1 && pattern[j] != pattern[i]) {
j = failure[j];
}
return failure;
j++;
failure[i + 1] = j;
}
return failure;
}
/**
* KMP algorithm to find a pattern in a text
* \param[in] pattern string pattern to search
* \param[in] text text in which to search
* \returns `true` if pattern was found
* \returns `false` if pattern was not found
*/
bool kmp(const std::string &pattern, const std::string &text) {
int text_length = text.size(), pattern_length = pattern.size();
std::vector<int> failure = getFailureArray(pattern);
/**
* KMP algorithm to find a pattern in a text
* \param[in] pattern string pattern to search
* \param[in] text text in which to search
* \returns `true` if pattern was found
* \returns `false` if pattern was not found
*/
bool kmp(const std::string &pattern, const std::string &text) {
int text_length = text.size(), pattern_length = pattern.size();
std::vector<int> failure = getFailureArray(pattern);
int k = 0;
for (int j = 0; j < text_length; j++) {
while (k != -1 && pattern[k] != text[j]) {
k = failure[k];
}
k++;
if (k == pattern_length)
return true;
int k = 0;
for (int j = 0; j < text_length; j++) {
while (k != -1 && pattern[k] != text[j]) {
k = failure[k];
}
return false;
k++;
if (k == pattern_length)
return true;
}
return false;
}
} // namespace string_search
using string_search::kmp;

View File

@@ -16,87 +16,86 @@
#define PRIME 5 ///< Prime modulus for hash functions
namespace string_search {
/**
* convert a string to an intger - called as hashing function
* \param[in] s source of string to hash
* \param[in] n length of substring to hash
* \returns hash integer
*/
int64_t create_hash(const std::string& s, int n) {
int64_t result = 0;
for (int i = 0; i < n; ++i) {
result += (int64_t)(s[i] * (int64_t)pow(PRIME, i));
}
return result;
/**
* convert a string to an intger - called as hashing function
* \param[in] s source of string to hash
* \param[in] n length of substring to hash
* \returns hash integer
*/
int64_t create_hash(const std::string& s, int n) {
int64_t result = 0;
for (int i = 0; i < n; ++i) {
result += (int64_t)(s[i] * (int64_t)pow(PRIME, i));
}
return result;
}
/**
* re-hash a string using known existing hash
* \param[in] s source of string to hash
* \param[in] old_index previous index of string
* \param[in] new_index new index of string
* \param[in] old_hash previous hash of substring
* \param[in] patLength length of substring to hash
* \returns new hash integer
*/
int64_t recalculate_hash(const std::string& s, int old_index, int new_index,
int64_t old_hash, int patLength) {
int64_t new_hash = old_hash - s[old_index];
new_hash /= PRIME;
new_hash +=
(int64_t)(s[new_index] * (int64_t)pow(PRIME, patLength - 1));
return new_hash;
/**
* re-hash a string using known existing hash
* \param[in] s source of string to hash
* \param[in] old_index previous index of string
* \param[in] new_index new index of string
* \param[in] old_hash previous hash of substring
* \param[in] patLength length of substring to hash
* \returns new hash integer
*/
int64_t recalculate_hash(const std::string& s, int old_index, int new_index,
int64_t old_hash, int patLength) {
int64_t new_hash = old_hash - s[old_index];
new_hash /= PRIME;
new_hash += (int64_t)(s[new_index] * (int64_t)pow(PRIME, patLength - 1));
return new_hash;
}
/**
* compare if two sub-strings are equal
* \param[in] str1 string pattern to search
* \param[in] str2 text in which to search
* \param[in] start1,end1 start and end indices for substring in str1
* \param[in] start2,end2 start and end indices for substring in str2
* \returns `true` if pattern was found
* \returns `false` if pattern was not found
* @note can this be replaced by std::string::compare?
*/
bool check_if_equal(const std::string& str1, const std::string& str2,
int start1, int end1, int start2, int end2) {
if (end1 - start1 != end2 - start2) {
return false;
}
/**
* compare if two sub-strings are equal
* \param[in] str1 string pattern to search
* \param[in] str2 text in which to search
* \param[in] start1,end1 start and end indices for substring in str1
* \param[in] start2,end2 start and end indices for substring in str2
* \returns `true` if pattern was found
* \returns `false` if pattern was not found
* @note can this be replaced by std::string::compare?
*/
bool check_if_equal(const std::string& str1, const std::string& str2,
int start1, int end1, int start2, int end2) {
if (end1 - start1 != end2 - start2) {
while (start1 <= end1 && start2 <= end2) {
if (str1[start1] != str2[start2]) {
return false;
}
while (start1 <= end1 && start2 <= end2) {
if (str1[start1] != str2[start2]) {
return false;
}
start1++;
start2++;
}
return true;
start1++;
start2++;
}
return true;
}
/**
* Perform string pattern search using Rabin-Karp algorithm
* @param[in] str string to search in
* @param[in] pat pattern to search for
* @return index of first occurrence of pattern
* @return -1 if pattern not found
*/
/**
* Perform string pattern search using Rabin-Karp algorithm
* @param[in] str string to search in
* @param[in] pat pattern to search for
* @return index of first occurrence of pattern
* @return -1 if pattern not found
*/
int rabin_karp(const std::string& str, const std::string& pat) {
int64_t pat_hash = create_hash(pat, pat.size());
int64_t str_hash = create_hash(str, pat.size());
for (int i = 0; i <= str.size() - pat.size(); ++i) {
if (pat_hash == str_hash &&
check_if_equal(str, pat, i, i + pat.size() - 1, 0,
pat.size() - 1)) {
return i;
}
if (i < str.size() - pat.size()) {
str_hash = recalculate_hash(str, i, i + pat.size(), str_hash,
pat.size());
}
int rabin_karp(const std::string& str, const std::string& pat) {
int64_t pat_hash = create_hash(pat, pat.size());
int64_t str_hash = create_hash(str, pat.size());
for (int i = 0; i <= str.size() - pat.size(); ++i) {
if (pat_hash == str_hash &&
check_if_equal(str, pat, i, i + pat.size() - 1, 0,
pat.size() - 1)) {
return i;
}
if (i < str.size() - pat.size()) {
str_hash =
recalculate_hash(str, i, i + pat.size(), str_hash, pat.size());
}
return -1; // return -1 if given pattern not found
}
return -1; // return -1 if given pattern not found
}
} // namespace string_search