diff --git a/DIRECTORY.md b/DIRECTORY.md index 2f3dba45e..e8be6a9c5 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -208,6 +208,7 @@ * [Intersection Of 2 Arrays](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/operations_on_datastructures/intersection_of_2_arrays.cpp) * [Reverse A Linked List Using Recusion](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/operations_on_datastructures/reverse_a_linked_list_using_recusion.cpp) * [Selectionsortlinkedlist](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/operations_on_datastructures/selectionsortlinkedlist.cpp) + * [Trie Multiple Search](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/operations_on_datastructures/trie_multiple_search.cpp) * [Union Of 2 Arrays](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/operations_on_datastructures/union_of_2_arrays.cpp) ## Others diff --git a/operations_on_datastructures/trie_multiple_search.cpp b/operations_on_datastructures/trie_multiple_search.cpp new file mode 100644 index 000000000..854354fdf --- /dev/null +++ b/operations_on_datastructures/trie_multiple_search.cpp @@ -0,0 +1,466 @@ +/** + * @file + * @brief [Trie datastructure](https://iq.opengenus.org/autocomplete-using-trie-data-structure/) + * with search variants + * @details + * This provides multiple variants of search functions + * on a trie structure utilizing STL. The trie is valid + * for only English alphabets. + * @author [Ghanashyam](https://github.com/g-s-k-zoro) + */ + +#include /// for std::count +#include /// for assert +#include /// for tolower +#include /// for string operations +#include /// for IO Operations +#include /// for std::priority_queue + +/** + * @namespace operations_on_datastructures + * @brief Operations on data structures + */ +namespace operations_on_datastructures { +/** + * @namespace trie_operations + * @brief Functions for [Trie datastructure](https://iq.opengenus.org/autocomplete-using-trie-data-structure/) + * implementation + */ +namespace trie_operations { +/** + * @brief Class defining the structure of trie node and containing the methods + * to perform operations on them. + */ +class Tnode { + private: + static constexpr uint8_t ENGLISH_ALPHABET_SIZE = 26; + // pointers to alphabets + std::vector english; + + // To mark the end of word + bool endOfWord; + + // To store the frequency of searches for the word + uint32_t frequency; + + public: + Tnode() { + english.resize(ENGLISH_ALPHABET_SIZE, nullptr); + endOfWord = false; + frequency = 0; + } + // Copy Constructor + Tnode(const Tnode &node) { + english = node.english; + endOfWord = node.endOfWord; + frequency = node.frequency; + } + + Tnode &operator=(const Tnode &node) = default; + + Tnode(Tnode &&) = default; + + Tnode &operator=(Tnode &&) = default; + /** + * @brief Function to count the number of children a node in the trie has + * @param node a trie node whose children need to be counted + * @return count of the number of children of the given node (max 26) + */ + inline uint8_t numberOfChildren(Tnode *node) { + return ENGLISH_ALPHABET_SIZE - + std::count(node->english.begin(), node->english.end(), nullptr); + } + + // Functions to perform operations on trie + void Insert(const std::string &entry); + void Delete(std::string entry); + void DeleteFrom(Tnode *delete_from, std::string delete_string, + int remove_index); + bool SearchPresence(const std::string &key); + void SuggestAutocomplete(Tnode *new_root, const std::string &prefix); + void SearchSuggestions(const std::string &key); + void SuggestFreqAutocomplete( + Tnode *new_root, const std::string &prefix, + std::priority_queue > *suggestions); + void SearchFreqSuggestions(const std::string &key); + void SelectionTop_3( + std::priority_queue > *suggestions); + + // To free up the dynamically allocated objects + ~Tnode() { + int i = 0; + for (i = 0; i < ENGLISH_ALPHABET_SIZE; i++) { + if (english[i]) { + delete english[i]; + } + } + } +}; + +/** + * @brief Function to insert a word in the trie + * @param entry string entry to be inserted in the trie + */ +void Tnode::Insert(const std::string &entry) { + Tnode *cur_pos = this; + int letter_index = 0; + + for (auto &i : entry) { + // To ignore case + letter_index = tolower(i) - 97; + + // Allocate a node for each character of entry if not present in the + // trie + if (cur_pos->english[letter_index] == nullptr) { + cur_pos->english[letter_index] = new Tnode(); + } + + cur_pos = cur_pos->english[letter_index]; + } + // cur_pos points to the last char, mark it as end of word + cur_pos->endOfWord = true; +} + +/** + * @brief Function recursively deletes the substring character by + * character iterating through the string to be deleted. It traverses till the + * end of word in a recursive fashion, from there it deletes characters one by + * one till it reaches back to the initial call. + * @param delete_from the acting root to the required suffix to be deleted + * @param delete_string the string to be deleted from the trie + * @param remove_index index denoting the beginning of the substring to be + * deleted + */ +void Tnode::DeleteFrom(Tnode *delete_from, std::string delete_string, + int remove_index) { + if (delete_string.size() == remove_index) { + int letter_index = tolower(delete_string[remove_index]) - 97; + + DeleteFrom(delete_from->english[letter_index], delete_string, + remove_index + 1); + + delete delete_from; + } +} + +/** + * @brief Function to verify presence and hence delete an entry from the trie + * @param entry string entry to be deleted from the trie + */ +void Tnode::Delete(std::string entry) { + Tnode *cur_pos = this, + *delete_from = this; // Current pointer pointing to root + int letter_index = 0, delete_from_index = 0, i = 0, n = entry.size(); + + for (i = 0; i < n; i++) { + // To ignore case + letter_index = tolower(entry[i]) - 97; + + // Display error message when given entry is not present in the tree + if (cur_pos->english[letter_index] == nullptr) { + std::cout << "Entry not Found" << std::endl; + return; + } + // If the current node is end of word for the current prefix or if it + // has 2 or more branches It cannot be deleted while deleting the + // required entry. + if (numberOfChildren(cur_pos) > 1 || cur_pos->endOfWord) { + delete_from = cur_pos; // denotes the beginning of the shortest + // suffix that is allowed to be deleted + delete_from_index = i - 1; // Beginning index of the suffix + // corresponding to the 'entry' + } + + // Traversing through the entry + cur_pos = cur_pos->english[letter_index]; + } + + // cur_pos now points to the last char of entry. Display message if that + // entry does not exist + if (!cur_pos->endOfWord) { + std::cout << "Entry not Found" << std::endl; + return; + } + + // If cur_pos is not a leaf node, unmark end of word and assign 0 to it's + // frequency for deletion + if (numberOfChildren(cur_pos)) { + cur_pos->endOfWord = false; + cur_pos->frequency = 0; + return; + } + + // The first character of the suffix to be deleted + letter_index = tolower(entry[delete_from_index + 1]) - 97; + // Point cur_pos to the next node + cur_pos = delete_from->english[letter_index]; + // Sever the connection from the main trie + delete_from->english[letter_index] = nullptr; + + // If number of characters in the suffix are more than 1, recursively delete + // each character starting from cur_pos using the helper function + if (n > delete_from_index + 2) { + DeleteFrom(cur_pos, entry, delete_from_index + 2); + } + // If the suffix is only 1 char in length + else { + delete cur_pos; + } +} + +/** + * @brief Function to check a word's presence in the trie (Basic) + * @param key the string key to be searched in the trie + * @return true if the key is found + * @return false if the key is not found + */ +bool Tnode::SearchPresence(const std::string &key) { + Tnode *cur_pos = this; + int letter_index = 0; + + for (auto &i : key) { + letter_index = tolower(i) - 97; + // If any character in the order of the key is absent, word not found! + if (cur_pos->english[letter_index] == nullptr) { + return false; + } + cur_pos = cur_pos->english[letter_index]; + } + // Word is only present in the trie if the key is a valid complete entry and + // not just a prefix. + if (cur_pos->endOfWord) { + (cur_pos->frequency)++; + return true; + } else { + return false; + } +} + +/** + * @brief Recursive function to suggest all the entries of trie + * which have a given common prefix + * @param new_root pointer pointing to the node corresponding to the last char + * of prefix + * @param prefix the common prefix that all the suggestions must have + */ +void Tnode::SuggestAutocomplete(Tnode *new_root, const std::string &prefix) { + // Iterate through all 26 nodes as we have to print all strings with the + // given prefix + int i = 0; + for (i = 0; i < ENGLISH_ALPHABET_SIZE; i++) { + if (new_root->english[i] != nullptr) { + // Print the sugestion only if it's a valid complete entry and not + // just a prefix + if (new_root->english[i]->endOfWord) { + std::cout << prefix + char(i + 97) << std::endl; + } + + SuggestAutocomplete(new_root->english[i], prefix + char(i + 97)); + } + } +} + +/** + * @brief Lists out all the words in trie with the longest prefix + * of the search key that is present in the trie. For example - if trie contains + * "abc", "abcde", "abcdefg", "abcddef" and if the search key is "abcdezz", then + * the longest common prefix is "abcde" and hence search results will be + * "abcde", "abcdefg". + * @param key the string key to be searched for suggestions + */ +void Tnode::SearchSuggestions(const std::string &key) { + Tnode *cur_pos = nullptr, *prev_pos = nullptr; + cur_pos = prev_pos = this; // maintaining 2 pointers, initialized to root + int letter_index = 0; + std::string prefix = + ""; // variable storing the updated value of longest common prefix + + for (auto &i : key) { + letter_index = tolower(i) - 97; + prev_pos = cur_pos; // Previous pointer updated to point to the last + // char of the longest common prefix + + // When the node for the character does not exist, longest prefix has + // been determined and SuggestAutocomplete is called + if (cur_pos->english[letter_index] == nullptr) { + SuggestAutocomplete(prev_pos, prefix); + std::cout << "- - - - - - - - - - - - - - - - - - - - - - - - - - " + << std::endl; + return; + } + // Updating the longest common prefix + prefix += char(tolower(i)); + cur_pos = cur_pos->english[letter_index]; + } + // If the key is a valid entry of trie, display it @ top of the suggestions + if (cur_pos->endOfWord) { + std::cout << key << std::endl; + (cur_pos->frequency)++; + } + + (void)prev_pos; // Idiom to ignore previous pointer + + // Call for suggestions when the search key is present as an entry/a prefix + // in the trie + SuggestAutocomplete(cur_pos, prefix); + std::cout << "- - - - - - - - - - - - - - - - - - - - - - - - - - " + << std::endl; + return; +} + +/** + * @brief Function to display the 3 suggestions with highest frequency + * of search hits + * @param suggestions a max heap that contains pairs of (frequency, word) + * heapified based on frequency + */ +void Tnode::SelectionTop_3( + std::priority_queue > *suggestions) { + // Display Either top 3 or total number of suggestions, whichever is smaller + int n = suggestions->size(), Top = 0; + Top = n < 3 ? n : 3; + while (Top--) { + std::cout << suggestions->top().second << std::endl; + suggestions->pop(); + } +} + +/** + * @brief Recursive function to suggest most frequently + * searched entries of trie which have a given common prefix + * @param new_root pointer pointing to the node corresponding to the last char + * of prefix + * @param prefix the common prefix that all the suggestions must have + * @param suggestions a max heap that contains pairs of (frequency, word) + * heapified based on frequency + */ +void Tnode::SuggestFreqAutocomplete( + Tnode *new_root, const std::string &prefix, + std::priority_queue > *suggestions) { + int i = 0; + for (i = 0; i < ENGLISH_ALPHABET_SIZE; i++) { + if (new_root->english[i] != nullptr) { + // Add to sugestions only if it's a valid complete entry and not + // just a prefix + if (new_root->english[i]->endOfWord) { + suggestions->push(std::make_pair( + new_root->english[i]->frequency, prefix + char(i + 97))); + } + + SuggestFreqAutocomplete(new_root->english[i], prefix + char(i + 97), + suggestions); + } + } +} + +/** + * @brief Lists out the most frequent words in trie with the + * longest prefix of the search key that is present in the trie. For example - + * if trie contains "abc", "abcde", "abcdefg", "abcddef" and they have been + * previously searched for 3, 1, 2, 4 times respectively, if the search key is + * "ab", then the longest common prefix is "ab" and only the top 3 frequencies + * among the matches would be displayed viz. "abcddef", "abc", "abcdefg". + * @param key the string key to be searched for suggestions + */ +void Tnode::SearchFreqSuggestions(const std::string &key) { + Tnode *cur_pos = nullptr, *prev_pos = nullptr; + cur_pos = prev_pos = this; // maintaining 2 pointers, initialized to root + int letter_index = 0; + std::string prefix = + ""; // variable storing the updated value of longest common prefix + std::priority_queue > + suggestions; // max heap to store (frequency, word) in descending order + // of freq + + std::priority_queue > *Suggestions = + &suggestions; + + for (auto &i : key) { + letter_index = tolower(i) - 97; + prev_pos = cur_pos; // Previous pointer updated to point to the last + // char of the longest common prefix + + // When the node for the character does not exist, longest prefix has + // been determined and SuggestFreqAutocomplete is called + if (cur_pos->english[letter_index] == nullptr) { + SuggestFreqAutocomplete(prev_pos, prefix, Suggestions); + // To display the top 3 results + SelectionTop_3(Suggestions); + std::cout << "- - - - - - - - - - - - - - - - - - - - - - - - - - " + << std::endl; + return; + } + // Updating the longest common prefix + prefix += char(tolower(i)); + cur_pos = cur_pos->english[letter_index]; + } + // If the key is a valid entry of trie, display it @ top of the suggestions + if (cur_pos->endOfWord) { + (cur_pos->frequency)++; + std::cout << key << std::endl; + } + + (void)prev_pos; // Idiom to ignore previous pointer + + // Call for Suggestions when the search key is present as an entry/a prefix + // in the trie + SuggestFreqAutocomplete(cur_pos, prefix, Suggestions); + // Display the top 3 results + SelectionTop_3(Suggestions); + + std::cout << "- - - - - - - - - - - - - - - - - - - - - - - - - - " + << std::endl; + return; +} +} // namespace trie_operations +} // namespace operations_on_datastructures + +/** + * @brief Function to test a simple search before and after deleting + * an entry. And to test out the multiple variants of search. + */ +static void test() { + auto root = new operations_on_datastructures::trie_operations::Tnode(); + std::vector inputs = { + "abcde", "sss", "ssss", "ssst", "sssu", "sssv", + "sst", "ssts", "sstt", "sstu", "tutu", "tutuv", + "tutuu", "tutuvs", "tutus", "tvst", "tvsu", "vvvv"}; + + for (auto &i : inputs) { + root->Insert(i); + } + // Search an existing entry + assert(root->SearchPresence("vvvv")); + std::cout << root->SearchPresence("vvvv") << std::endl; + // Delete it + root->Delete("vvvv"); + // Search for the entry again + assert(!root->SearchPresence("vvvv")); + std::cout << root->SearchPresence("vvvv") << std::endl; + + std::cout << root->SearchPresence("tutu") << std::endl; + root->SearchSuggestions("tutu"); + std::cout << root->SearchPresence("tutu") << std::endl; + + root->SearchSuggestions("tutuv"); + std::cout << root->SearchPresence("tutuv") << std::endl; + + root->SearchSuggestions("tutuvs"); + + root->SearchFreqSuggestions( + "tu"); // The top 3 frequent entries with prefix tu are tutu, tutuv & + // tutuvs respectively + root->SearchSuggestions( + ""); // Empty search to list all the entries in the trie +} + +/** + * @brief Main function + * @param argc commandline argument count (ignored) + * @param argv commandline array of arguments (ignored) + * @returns 0 on exit + */ +int main(int argc, char const *argv[]) { + test(); // run self-test implementations + return 0; +}