From b82e2cd4e76f85f84d1bec26d1104cdcd1854544 Mon Sep 17 00:00:00 2001 From: RitikaGupta8734 <43800283+RitikaGupta8734@users.noreply.github.com> Date: Sat, 4 Sep 2021 12:39:19 +0530 Subject: [PATCH] feat: added z_algorithm in strings (#1581) * feat: added z_algorithm in strings * Updated z_function.cpp Updated z_function.cpp as per contribution guidelines. Fixed Link using github markdown syntax Created a separate function for tests and covered the corner case * Apply suggestions from code review More comments added to the code Co-authored-by: David Leal * Apply suggestions from code review Some more documentation added as per contribution guidelines. Co-authored-by: David Leal * Update strings/z_function.cpp comments added Co-authored-by: David Leal * Update strings/z_function.cpp Co-authored-by: David Leal * updating DIRECTORY.md * clang-format and clang-tidy fixes for 0c7515e9 * Updated int -> uint64_t Updated int -> uint64_t for non-negative values * clang-format and clang-tidy fixes for 12d51239 * Update strings/z_function.cpp Co-authored-by: David Leal * Update strings/z_function.cpp Co-authored-by: David Leal * More comments added * clang-format and clang-tidy fixes for 8a627ac9 * Update strings/z_function.cpp Co-authored-by: David Leal * Update strings/z_function.cpp Co-authored-by: David Leal Co-authored-by: David Leal Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> --- DIRECTORY.md | 1 + strings/z_function.cpp | 96 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 strings/z_function.cpp diff --git a/DIRECTORY.md b/DIRECTORY.md index ec07bbf52..97a648755 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -334,3 +334,4 @@ * [Knuth Morris Pratt](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/strings/knuth_morris_pratt.cpp) * [Manacher Algorithm](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/strings/manacher_algorithm.cpp) * [Rabin Karp](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/strings/rabin_karp.cpp) + * [Z Function](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/strings/z_function.cpp) diff --git a/strings/z_function.cpp b/strings/z_function.cpp new file mode 100644 index 000000000..6ce98c491 --- /dev/null +++ b/strings/z_function.cpp @@ -0,0 +1,96 @@ +/** + * @file + * @brief The [Z function](https://cp-algorithms.com/string/z-function.html) for + * finding occurences of a pattern within a piece of text with time and space + * complexity O(n + m) + * @details + * 1. The Z-function for a string is an array of length n where the + * i-th element is equal to the greatest number of characters starting + * from the position i that coincide with the first characters of s. + * 2. E.g.: string: ababb then z[2]=2 as s[2]=s[0] and s[3]=s[1] and s[4]!=s[2] + * @author [Ritika Gupta](https://github.com/RitikaGupta8734) + */ + +#include /// for IO operations +#ifdef _MSC_VER +#include /// for string (use this for MS Visual C++) +#else +#include /// for string +#endif +#include /// for assert +#include /// for std::vector + +/** + * @brief Generate the Z-function for the inputted string. + * \param[in] pattern text on which to apply the Z-function + * \returns the Z-function output as a vector array + */ +std::vector Z_function(const std::string &pattern) { + uint64_t pattern_length = pattern.size(); + std::vector z(pattern_length, 0); + + for (uint64_t i = 1, l = 0, r = 0; i < pattern_length; i++) { + if (i <= r) { + z[i] = std::min(r - i + 1, z[i - l]); + } + while (i + z[i] < pattern_length && + pattern[z[i]] == pattern[i + z[i]]) { + z[i]++; + } + if (i + z[i] - 1 > r) { + r = i + z[i] - 1; + } + } + return z; +} + +/** + * @brief Using Z_function to find a pattern in a text + * \param[in] pattern string pattern to search + * \param[in] text text in which to search + * \returns a vector of starting indexes where pattern is found in the text + */ +std::vector find_pat_in_text(const std::string &pattern, + const std::string &text) { + uint64_t text_length = text.size(), pattern_length = pattern.size(); + std::vector z = Z_function(pattern + '#' + text); + std::vector matching_indexes; + + for (uint64_t i = 0; i < text_length; i++) { + if (z[i + pattern_length + 1] == pattern_length) { + matching_indexes.push_back(i); + } + } + return matching_indexes; +} + +/** + * @brief Self-test implementations + * @returns void + */ +static void test() { + // usual case + std::string text1 = "alskfjaldsabc1abc1abcbksbcdnsdabcabc"; + std::string pattern1 = "abc"; + + // matching_indexes1 gets the indexes where pattern1 exists in text1 + std::vector matching_indexes1 = find_pat_in_text(pattern1, text1); + assert((matching_indexes1 == std::vector{10, 14, 18, 30, 33})); + + // corner case + std::string text2 = "greengrass"; + std::string pattern2 = "abc"; + + // matching_indexes2 gets the indexes where pattern2 exists in text2 + std::vector matching_indexes2 = find_pat_in_text(pattern2, text2); + assert((matching_indexes2 == std::vector{})); +} + +/** + * @brief Main function + * @returns 0 on exit + */ +int main() { + test(); // run self-test implementations + return 0; +}