Skip to content

🔍 Thuật toán tìm kiếm (Searching Algorithms)

📖 Giới thiệu

Tìm kiếm là một trong những operations cơ bản và quan trọng nhất trong lập trình. Từ việc tìm một contact trong danh bạ đến search engine indexing hàng tỷ web pages, các thuật toán tìm kiếm là backbone của hầu hết các ứng dụng hiện đại.

Tại sao tìm kiếm quan trọng?

  • Database queries: Tìm records thỏa mãn điều kiện
  • Web search: Google, Bing search trong billions of pages
  • AI và Machine Learning: Feature selection, nearest neighbor
  • Gaming: Pathfinding, collision detection
  • Data analysis: Pattern recognition, anomaly detection

Phân loại thuật toán tìm kiếm:

  • Sequential search: Linear search - O(n)
  • Binary search: Divide and conquer - O(log n)
  • Hash-based search: Hash tables - O(1) average
  • Tree-based search: BST, B-trees - O(log n)
  • Advanced techniques: Interpolation, exponential search

Factors ảnh hưởng performance:

  • Data structure: Array, linked list, tree, hash table
  • Data organization: Sorted vs unsorted
  • Search frequency: One-time vs repeated searches
  • Memory constraints: RAM vs disk-based search

🔧 Cú pháp

cpp
#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;

// Basic linear search - O(n)
int linearSearch(const vector<int>& arr, int target) {
    for (int i = 0; i < arr.size(); i++) {
        if (arr[i] == target) {
            return i;  // Return index of found element
        }
    }
    return -1;  // Element not found
}

// Linear search with early termination for sorted array
int linearSearchSorted(const vector<int>& sortedArr, int target) {
    for (int i = 0; i < sortedArr.size(); i++) {
        if (sortedArr[i] == target) {
            return i;
        }
        if (sortedArr[i] > target) {
            break;  // No need to search further
        }
    }
    return -1;
}

// Find all occurrences
vector<int> findAllOccurrences(const vector<int>& arr, int target) {
    vector<int> indices;
    for (int i = 0; i < arr.size(); i++) {
        if (arr[i] == target) {
            indices.push_back(i);
        }
    }
    return indices;
}

// STL linear search
bool linearSearchSTL(const vector<int>& arr, int target) {
    auto it = find(arr.begin(), arr.end(), target);
    return it != arr.end();
}
cpp
// Iterative binary search - O(log n)
int binarySearch(const vector<int>& sortedArr, int target) {
    int left = 0;
    int right = sortedArr.size() - 1;
    
    while (left <= right) {
        int mid = left + (right - left) / 2;  // Avoid overflow
        
        if (sortedArr[mid] == target) {
            return mid;
        } else if (sortedArr[mid] < target) {
            left = mid + 1;   // Search right half
        } else {
            right = mid - 1;  // Search left half
        }
    }
    
    return -1;  // Element not found
}

// Recursive binary search
int binarySearchRecursive(const vector<int>& arr, int target, int left, int right) {
    if (left > right) {
        return -1;  // Base case: not found
    }
    
    int mid = left + (right - left) / 2;
    
    if (arr[mid] == target) {
        return mid;
    } else if (arr[mid] < target) {
        return binarySearchRecursive(arr, target, mid + 1, right);
    } else {
        return binarySearchRecursive(arr, target, left, mid - 1);
    }
}

// STL binary search functions
bool binarySearchSTL(const vector<int>& sortedArr, int target) {
    return binary_search(sortedArr.begin(), sortedArr.end(), target);
}

// Find position where element should be inserted
int lowerBound(const vector<int>& sortedArr, int target) {
    auto it = lower_bound(sortedArr.begin(), sortedArr.end(), target);
    return it - sortedArr.begin();
}

int upperBound(const vector<int>& sortedArr, int target) {
    auto it = upper_bound(sortedArr.begin(), sortedArr.end(), target);
    return it - sortedArr.begin();
}

// Find range of equal elements
pair<int, int> equalRange(const vector<int>& sortedArr, int target) {
    auto range = equal_range(sortedArr.begin(), sortedArr.end(), target);
    return {range.first - sortedArr.begin(), range.second - sortedArr.begin()};
}

Advanced Search Techniques

cpp
// Interpolation Search - O(log log n) for uniformly distributed data
int interpolationSearch(const vector<int>& sortedArr, int target) {
    int left = 0;
    int right = sortedArr.size() - 1;
    
    while (left <= right && target >= sortedArr[left] && target <= sortedArr[right]) {
        if (left == right) {
            return (sortedArr[left] == target) ? left : -1;
        }
        
        // Estimate position based on value distribution
        int pos = left + ((double)(target - sortedArr[left]) / 
                          (sortedArr[right] - sortedArr[left])) * (right - left);
        
        if (sortedArr[pos] == target) {
            return pos;
        } else if (sortedArr[pos] < target) {
            left = pos + 1;
        } else {
            right = pos - 1;
        }
    }
    
    return -1;
}

// Exponential Search - O(log n)
int exponentialSearch(const vector<int>& sortedArr, int target) {
    int n = sortedArr.size();
    
    if (sortedArr[0] == target) {
        return 0;
    }
    
    // Find range for binary search
    int bound = 1;
    while (bound < n && sortedArr[bound] <= target) {
        bound *= 2;
    }
    
    // Perform binary search in found range
    return binarySearch(sortedArr, target, bound / 2, min(bound, n - 1));
}

// Jump Search - O(√n)
int jumpSearch(const vector<int>& sortedArr, int target) {
    int n = sortedArr.size();
    int step = sqrt(n);
    int prev = 0;
    
    // Find block where element is present
    while (sortedArr[min(step, n) - 1] < target) {
        prev = step;
        step += sqrt(n);
        if (prev >= n) {
            return -1;
        }
    }
    
    // Linear search in identified block
    while (sortedArr[prev] < target) {
        prev++;
        if (prev == min(step, n)) {
            return -1;
        }
    }
    
    return (sortedArr[prev] == target) ? prev : -1;
}
cpp
#include <unordered_map>
#include <unordered_set>

class HashSearch {
private:
    unordered_map<int, vector<int>> hashMap;  // value -> indices
    
public:
    // Build hash index - O(n)
    void buildIndex(const vector<int>& arr) {
        hashMap.clear();
        for (int i = 0; i < arr.size(); i++) {
            hashMap[arr[i]].push_back(i);
        }
    }
    
    // Search using hash - O(1) average case
    vector<int> search(int target) {
        auto it = hashMap.find(target);
        if (it != hashMap.end()) {
            return it->second;  // Return all indices
        }
        return {};  // Not found
    }
    
    bool contains(int target) {
        return hashMap.find(target) != hashMap.end();
    }
};

🔬 Phân tích & Giải thích chi tiết

Comparison of Search Algorithms

AlgorithmTime ComplexitySpacePrerequisitesBest Use Case
LinearO(n)O(1)NoneSmall datasets, unsorted data
BinaryO(log n)O(1)Sorted dataLarge sorted datasets
HashO(1) avgO(n)Hash functionFrequent lookups
InterpolationO(log log n)O(1)Sorted, uniformUniformly distributed data
ExponentialO(log n)O(1)SortedUnknown size, infinite arrays
JumpO(√n)O(1)SortedBlock-based storage

Khi nào sử dụng thuật toán nào?

Linear Search:

  • Unsorted data
  • Small datasets (< 100 elements)
  • Simple implementation needed
  • Memory-constrained environments

Binary Search:

  • Large sorted datasets
  • Repeated searches on same data
  • When sorting cost is amortized
  • Memory access patterns matter

Hash-based Search:

  • Frequent lookups
  • Memory available for hash table
  • Non-comparison based search needed
  • When O(1) access is critical

Search trong Different Data Structures

cpp
// Array-based search
vector<int> arr = {1, 3, 5, 7, 9, 11, 13, 15};

// Binary search tree search
struct TreeNode {
    int val;
    TreeNode* left;
    TreeNode* right;
    TreeNode(int x) : val(x), left(nullptr), right(nullptr) {}
};

bool searchBST(TreeNode* root, int target) {
    if (!root) return false;
    
    if (root->val == target) {
        return true;
    } else if (target < root->val) {
        return searchBST(root->left, target);
    } else {
        return searchBST(root->right, target);
    }
}

// Trie (Prefix Tree) for string search
struct TrieNode {
    unordered_map<char, TrieNode*> children;
    bool isEndOfWord;
    
    TrieNode() : isEndOfWord(false) {}
};

class Trie {
private:
    TrieNode* root;
    
public:
    Trie() : root(new TrieNode()) {}
    
    void insert(const string& word) {
        TrieNode* current = root;
        for (char ch : word) {
            if (current->children.find(ch) == current->children.end()) {
                current->children[ch] = new TrieNode();
            }
            current = current->children[ch];
        }
        current->isEndOfWord = true;
    }
    
    bool search(const string& word) {
        TrieNode* current = root;
        for (char ch : word) {
            if (current->children.find(ch) == current->children.end()) {
                return false;
            }
            current = current->children[ch];
        }
        return current->isEndOfWord;
    }
    
    bool startsWith(const string& prefix) {
        TrieNode* current = root;
        for (char ch : prefix) {
            if (current->children.find(ch) == current->children.end()) {
                return false;
            }
            current = current->children[ch];
        }
        return true;
    }
};

💻 Ví dụ minh họa

Ví dụ 1: Search Performance Benchmark

cpp
#include <iostream>
#include <vector>
#include <chrono>
#include <random>
#include <algorithm>
#include <unordered_set>
using namespace std;

class SearchBenchmark {
private:
    mt19937 rng;
    
public:
    SearchBenchmark() : rng(chrono::steady_clock::now().time_since_epoch().count()) {}
    
    vector<int> generateSortedData(size_t size) {
        vector<int> data;
        for (int i = 0; i < size; i++) {
            data.push_back(i * 2);  // Even numbers: 0, 2, 4, 6, ...
        }
        return data;
    }
    
    vector<int> generateRandomTargets(const vector<int>& data, size_t count) {
        vector<int> targets;
        uniform_int_distribution<int> dist(0, data.back());
        
        for (size_t i = 0; i < count; i++) {
            targets.push_back(dist(rng));
        }
        return targets;
    }
    
    template<typename SearchFunction>
    chrono::microseconds timeSearch(SearchFunction searchFunc, 
                                   const vector<int>& data,
                                   const vector<int>& targets,
                                   const string& name) {
        int foundCount = 0;
        
        auto start = chrono::high_resolution_clock::now();
        
        for (int target : targets) {
            if (searchFunc(data, target) != -1) {
                foundCount++;
            }
        }
        
        auto end = chrono::high_resolution_clock::now();
        auto duration = chrono::duration_cast<chrono::microseconds>(end - start);
        
        cout << name << ": " << duration.count() << " μs (found " 
             << foundCount << "/" << targets.size() << ")" << endl;
        
        return duration;
    }
    
    void runBenchmark() {
        cout << "=== SEARCH ALGORITHMS PERFORMANCE COMPARISON ===" << endl;
        
        vector<size_t> sizes = {1000, 10000, 100000};
        const size_t NUM_SEARCHES = 1000;
        
        for (size_t size : sizes) {
            cout << "\n📊 Data size: " << size << " elements, " 
                 << NUM_SEARCHES << " searches" << endl;
            cout << string(60, '-') << endl;
            
            vector<int> data = generateSortedData(size);
            vector<int> targets = generateRandomTargets(data, NUM_SEARCHES);
            
            // Linear Search
            timeSearch([](const vector<int>& arr, int target) {
                return linearSearch(arr, target);
            }, data, targets, "Linear Search O(n)");
            
            // Binary Search
            timeSearch([](const vector<int>& arr, int target) {
                return binarySearch(arr, target);
            }, data, targets, "Binary Search O(log n)");
            
            // STL Binary Search (with find)
            timeSearch([](const vector<int>& arr, int target) {
                auto it = lower_bound(arr.begin(), arr.end(), target);
                return (it != arr.end() && *it == target) ? 
                       (it - arr.begin()) : -1;
            }, data, targets, "STL Binary Search");
            
            // Interpolation Search
            timeSearch([](const vector<int>& arr, int target) {
                return interpolationSearch(arr, target);
            }, data, targets, "Interpolation Search");
            
            // Hash-based search (with preprocessing)
            unordered_set<int> hashSet(data.begin(), data.end());
            auto hashStart = chrono::high_resolution_clock::now();
            int hashFound = 0;
            for (int target : targets) {
                if (hashSet.find(target) != hashSet.end()) {
                    hashFound++;
                }
            }
            auto hashEnd = chrono::high_resolution_clock::now();
            auto hashDuration = chrono::duration_cast<chrono::microseconds>(hashEnd - hashStart);
            
            cout << "Hash Search O(1): " << hashDuration.count() 
                 << " μs (found " << hashFound << "/" << targets.size() 
                 << ") *excluding preprocessing" << endl;
        }
    }
};

int main() {
    SearchBenchmark benchmark;
    benchmark.runBenchmark();
    
    return 0;
}

Ví dụ 2: Real-world Search Applications

cpp
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <unordered_map>
#include <map>
using namespace std;

// Product search in e-commerce
struct Product {
    int id;
    string name;
    string category;
    double price;
    vector<string> tags;
    
    Product(int i, const string& n, const string& c, double p, const vector<string>& t)
        : id(i), name(n), category(c), price(p), tags(t) {}
};

class ProductSearchEngine {
private:
    vector<Product> products;
    unordered_map<string, vector<int>> nameIndex;      // name -> product indices
    unordered_map<string, vector<int>> categoryIndex;  // category -> product indices
    unordered_map<string, vector<int>> tagIndex;       // tag -> product indices
    map<double, vector<int>> priceIndex;               // price -> product indices (sorted)
    
public:
    void addProduct(const Product& product) {
        int index = products.size();
        products.push_back(product);
        
        // Build indices
        nameIndex[product.name].push_back(index);
        categoryIndex[product.category].push_back(index);
        priceIndex[product.price].push_back(index);
        
        for (const string& tag : product.tags) {
            tagIndex[tag].push_back(index);
        }
    }
    
    // Exact name search - O(1)
    vector<Product> searchByName(const string& name) {
        vector<Product> results;
        auto it = nameIndex.find(name);
        if (it != nameIndex.end()) {
            for (int index : it->second) {
                results.push_back(products[index]);
            }
        }
        return results;
    }
    
    // Category search - O(1)
    vector<Product> searchByCategory(const string& category) {
        vector<Product> results;
        auto it = categoryIndex.find(category);
        if (it != categoryIndex.end()) {
            for (int index : it->second) {
                results.push_back(products[index]);
            }
        }
        return results;
    }
    
    // Price range search using binary search - O(log n + k)
    vector<Product> searchByPriceRange(double minPrice, double maxPrice) {
        vector<Product> results;
        
        auto start = priceIndex.lower_bound(minPrice);
        auto end = priceIndex.upper_bound(maxPrice);
        
        for (auto it = start; it != end; ++it) {
            for (int index : it->second) {
                results.push_back(products[index]);
            }
        }
        
        return results;
    }
    
    // Fuzzy name search using linear search with substring matching
    vector<Product> fuzzySearchByName(const string& query) {
        vector<Product> results;
        string lowerQuery = query;
        transform(lowerQuery.begin(), lowerQuery.end(), lowerQuery.begin(), ::tolower);
        
        for (const Product& product : products) {
            string lowerName = product.name;
            transform(lowerName.begin(), lowerName.end(), lowerName.begin(), ::tolower);
            
            if (lowerName.find(lowerQuery) != string::npos) {
                results.push_back(product);
            }
        }
        
        return results;
    }
    
    // Tag-based search - O(1)
    vector<Product> searchByTag(const string& tag) {
        vector<Product> results;
        auto it = tagIndex.find(tag);
        if (it != tagIndex.end()) {
            for (int index : it->second) {
                results.push_back(products[index]);
            }
        }
        return results;
    }
    
    void printProducts(const vector<Product>& products, const string& title) {
        cout << "\n🔍 " << title << " (" << products.size() << " results):" << endl;
        for (const Product& p : products) {
            cout << "  [" << p.id << "] " << p.name 
                 << " - $" << p.price << " (" << p.category << ")" << endl;
        }
    }
    
    void demonstrateSearch() {
        cout << "=== E-COMMERCE PRODUCT SEARCH DEMO ===" << endl;
        
        // Search demonstrations
        auto laptops = searchByCategory("Electronics");
        printProducts(laptops, "Electronics Category");
        
        auto budgetProducts = searchByPriceRange(50.0, 200.0);
        printProducts(budgetProducts, "Price Range $50-$200");
        
        auto phoneResults = fuzzySearchByName("phone");
        printProducts(phoneResults, "Fuzzy Search 'phone'");
        
        auto wirelessProducts = searchByTag("wireless");
        printProducts(wirelessProducts, "Tag 'wireless'");
    }
};

// Contact search in address book
struct Contact {
    string name;
    string phone;
    string email;
    string company;
    
    Contact(const string& n, const string& p, const string& e, const string& c)
        : name(n), phone(p), email(e), company(c) {}
};

class ContactSearchEngine {
private:
    vector<Contact> contacts;
    
public:
    void addContact(const Contact& contact) {
        contacts.push_back(contact);
        // Keep contacts sorted by name for binary search
        sort(contacts.begin(), contacts.end(), 
             [](const Contact& a, const Contact& b) {
                 return a.name < b.name;
             });
    }
    
    // Binary search by name
    Contact* searchByNameExact(const string& name) {
        auto comparator = [](const Contact& contact, const string& name) {
            return contact.name < name;
        };
        
        auto it = lower_bound(contacts.begin(), contacts.end(), name, comparator);
        
        if (it != contacts.end() && it->name == name) {
            return &(*it);
        }
        return nullptr;
    }
    
    // Prefix search using binary search
    vector<Contact> searchByNamePrefix(const string& prefix) {
        vector<Contact> results;
        
        auto start = lower_bound(contacts.begin(), contacts.end(), prefix,
                                [](const Contact& contact, const string& prefix) {
                                    return contact.name < prefix;
                                });
        
        for (auto it = start; it != contacts.end(); ++it) {
            if (it->name.substr(0, prefix.length()) == prefix) {
                results.push_back(*it);
            } else {
                break;  // No more matches
            }
        }
        
        return results;
    }
    
    // Linear search by phone (since phones aren't sorted)
    Contact* searchByPhone(const string& phone) {
        for (Contact& contact : contacts) {
            if (contact.phone == phone) {
                return &contact;
            }
        }
        return nullptr;
    }
    
    void demonstrateContactSearch() {
        cout << "\n=== CONTACT SEARCH DEMO ===" << endl;
        
        // Exact name search
        Contact* john = searchByNameExact("John Smith");
        if (john) {
            cout << "\n📞 Found contact: " << john->name 
                 << " - " << john->phone << endl;
        }
        
        // Prefix search
        vector<Contact> matches = searchByNamePrefix("John");
        cout << "\n📇 Contacts starting with 'John' (" 
             << matches.size() << " found):" << endl;
        for (const Contact& c : matches) {
            cout << "  " << c.name << " - " << c.phone << endl;
        }
        
        // Phone search
        Contact* byPhone = searchByPhone("555-0101");
        if (byPhone) {
            cout << "\n📱 Found by phone: " << byPhone->name << endl;
        }
    }
};

int main() {
    // Product search demo
    ProductSearchEngine productEngine;
    
    // Add sample products
    productEngine.addProduct(Product(1, "iPhone 15", "Electronics", 999.99, {"phone", "apple", "wireless"}));
    productEngine.addProduct(Product(2, "Samsung Galaxy", "Electronics", 899.99, {"phone", "android", "wireless"}));
    productEngine.addProduct(Product(3, "MacBook Pro", "Electronics", 1999.99, {"laptop", "apple"}));
    productEngine.addProduct(Product(4, "Coffee Mug", "Home", 15.99, {"kitchen", "ceramic"}));
    productEngine.addProduct(Product(5, "Wireless Headphones", "Electronics", 299.99, {"audio", "wireless", "bluetooth"}));
    productEngine.addProduct(Product(6, "Desk Chair", "Furniture", 199.99, {"office", "ergonomic"}));
    
    productEngine.demonstrateSearch();
    
    // Contact search demo
    ContactSearchEngine contactEngine;
    
    contactEngine.addContact(Contact("Alice Johnson", "555-0101", "alice@email.com", "Tech Corp"));
    contactEngine.addContact(Contact("Bob Smith", "555-0102", "bob@email.com", "Design Inc"));
    contactEngine.addContact(Contact("John Doe", "555-0103", "john@email.com", "Startup LLC"));
    contactEngine.addContact(Contact("John Smith", "555-0104", "johnsmith@email.com", "Big Corp"));
    
    contactEngine.demonstrateContactSearch();
    
    return 0;
}

🏋️ Thực hành

Implement search system cho student database với multiple criteria:

cpp
struct Student {
    int id;
    string name;
    int age;
    double gpa;
    string major;
};

// TODO: Implement search functions:
// - searchByGPARange(double min, double max)
// - searchByAgeAndMajor(int age, string major)  
// - searchByNamePrefix(string prefix)
// - complexSearch(multiple criteria with AND/OR logic)

Bài tập 2: Search in 2D Matrix

Implement search trong 2D matrix với properties:

  • Rows sorted left to right
  • Columns sorted top to bottom
  • Optimize better than O(mn)

Bài tập 3: Autocomplete System

Build autocomplete system cho search suggestions:

  • Trie-based prefix matching
  • Ranking by frequency
  • Real-time suggestions as user types

Bài tập 4: Search Analytics

Implement search analytics system:

  • Track search queries và results
  • Popular searches ranking
  • Search performance metrics
  • Query optimization suggestions

Lời giải chi tiết

Bài tập 2 - 2D Matrix Search:

cpp
bool searchMatrix(vector<vector<int>>& matrix, int target) {
    if (matrix.empty() || matrix[0].empty()) {
        return false;
    }
    
    int m = matrix.size();
    int n = matrix[0].size();
    
    // Start from top-right corner
    int row = 0;
    int col = n - 1;
    
    while (row < m && col >= 0) {
        if (matrix[row][col] == target) {
            return true;
        } else if (matrix[row][col] > target) {
            col--;  // Move left
        } else {
            row++;  // Move down
        }
    }
    
    return false;
}

// Time complexity: O(m + n)
// Space complexity: O(1)

📋 Tóm tắt

Các điểm quan trọng

Search Algorithm Classification:

  • Linear O(n): Best for unsorted, small data
  • Binary O(log n): Requires sorted data, excellent for large datasets
  • Hash O(1): Best average case, requires extra space
  • Advanced: Interpolation, exponential, jump search for specific cases

Choosing the Right Algorithm:

  • Data size: Linear cho small, binary cho large
  • Data organization: Sorted vs unsorted
  • Search frequency: One-time vs repeated
  • Memory constraints: Space-time tradeoffs

Data Structure Impact:

  • Arrays: Direct indexing, cache-friendly
  • Trees: Logarithmic search, dynamic
  • Hash tables: Constant average time
  • Tries: Excellent cho prefix searches

Best Practices

Implementation:

  • Use STL binary search functions khi có thể
  • Consider preprocessing costs vs search benefits
  • Handle edge cases (empty data, not found)
  • Choose appropriate data structures

Optimization:

  • Index frequently searched fields
  • Use appropriate search algorithm cho use case
  • Consider memory vs time tradeoffs
  • Profile real performance với actual data

Chuẩn bị cho bài tiếp theo

Bài học tiếp theo sẽ tìm hiểu về Advanced Recursion - đệ quy nâng cao:

  • Classic Problems: Tower of Hanoi, N-Queens
  • Backtracking: Maze solving, sudoku solver
  • Tree Recursion: Binary tree traversals
  • Optimization: Memoization, tail recursion
  • Complexity Analysis: Recurrence relations

Search algorithms là foundation cho data retrieval. Master these concepts để build efficient data access patterns!

Khóa học C++ miễn phí