LCR062-LCR067前缀树

剑指offer（专项突破版）10 前缀树

LCR062.实现前缀树

分析

class Trie {
private:
    vector<Trie*> children;
    bool isEnd;
    Trie* searchPrefix(string prefix) {
        Trie* node = this;
        for(char ch : prefix){
            ch -= 'a';
            if(node->children[ch] == NULL){
                return NULL;
            }
            node = node->children[ch];
        }
        return node;
    }

public:
    /** Initialize your data structure here. */
    Trie() : children(26), isEnd(false) {}
    
    /** Inserts a word into the trie. */
    void insert(string word) {
        // 初始化node
        Trie* node = this;
        // 遍历每一个字符
        for(char ch : word){
            ch -= 'a';
            // 如果没有这个字符，new一个结点
            if(node->children[ch] == NULL){
                node->children[ch] = new Trie();
            }
            node = node->children[ch];
        }
        node->isEnd = true;
    }
    
    /** Returns if the word is in the trie. */
    bool search(string word) {
        // 初始化node
        Trie* node = this->searchPrefix(word);
        return node != NULL && node->isEnd;
    }
    
    /** Returns if there is any word in the trie that starts with the given prefix. */
    bool startsWith(string prefix) {
        return this->searchPrefix(prefix) != NULL;
    }
};

/**
 * Your Trie object will be instantiated and called as such:
 * Trie* obj = new Trie();
 * obj->insert(word);
 * bool param_2 = obj->search(word);
 * bool param_3 = obj->startsWith(prefix);
 */

时间复杂度：如果输入的单词的长度为n，那么函数insert、search和startWith的时间复杂度都是O（n）。
空间复杂度：$O(∣T∣⋅Σ)$，其中$|T|$为所有插入字符串的长度之和，$Σ$为字符集的大小，本题$Σ=26$。

LCR062结果

LCR063.单词替换

分析

对于句子的每个单词，判断字典中有没有其前缀，如果有，就用前缀替换它。如果有多个满足的前缀，则用最短的那个，即每次判断isEnd，为true就直接返回，不需要再找下一个节点。

// 前缀树
class Trie{
private:
    vector<Trie*> children;
    bool isEnd;
public:
    // 初始化
    Trie() : children(26), isEnd(false) {}
    // 插入单词
    void insert(string prefix){
        Trie* node = this;
        for(char ch : prefix){
            if(node->children[ch-'a'] == NULL){
                node->children[ch-'a'] = new Trie();
            }
            node = node->children[ch-'a'];
        }
        node->isEnd = true;
    }
    // 查找前缀
    string searchPrefix(string prefix){
        Trie* node = this;
        string res = "";
        for(char ch : prefix){
            if(node->children[ch-'a'] == NULL){
                break;
            }
            res += ch;
            node = node->children[ch-'a'];
            if(node->isEnd){
                return res;
            }  
        }
        return prefix;
    }
};
class Solution {
public:
    string replaceWords(vector<string>& dictionary, string sentence) {
        // 建立前缀树
        Trie* root = new Trie();
        for(string word : dictionary){
            root->insert(word);
        }

        // 按照空格分隔字符串
        istringstream iss(sentence);
        string str;
        string res = "";
        while(iss >> str){
            res += root->searchPrefix(str);
            res += " ";
        }
        // 返回结果
        res = res.substr(0, res.length()-1);
        return res;
    }
};

时间复杂度：O((N+K)⋅L)，N为词典的单词总数，K为句子中的单词总数，L 是单词的平均长度。
空间复杂度：O((N+K)⋅L)

LCR063结果

LCR064.实现一个魔法字典

分析

首先建立一棵前缀树，然后根据深度优先的顺序搜索前缀树的每条路径。如果到达的节点与字符串中的字符不匹配，则表示此时修改了字符串中的一个字符以匹配前缀树中的路径。如果到达对应字符串最后一个字符对应的节点时该节点的isWord字段的值为true，而且此时正好修改了字符串中的一个字符，那么就找到了修改字符串中一个字符对应的路径，符合题目的条件，可以返回true。

// 构造前缀树
class Trie{
public:
    vector<Trie*> children;
    bool isEnd;
    Trie() : children(26), isEnd(false){}
    // 插入单词
    void insert(string prefix){
        Trie* node = this;
        for(char ch : prefix){
            if(node->children[ch-'a'] == NULL){
                node->children[ch-'a'] = new Trie();
            }
            node = node->children[ch-'a'];
        }
        node->isEnd = true;
    }
};
class MagicDictionary {
public:
    /** Initialize your data structure here. */
    Trie* root;
    MagicDictionary() {
        root = new Trie();
    }
    
    void buildDict(vector<string> dictionary) {
        for(string str : dictionary){
            root->insert(str);
        }
    }

    bool dfs(const string& word, Trie* node, int pos, int modified) {
        if (!node) {
            return false;
        }
        if (pos == word.length()) {
            return node->isEnd && modified == 1;
        }

        int idx = word[pos] - 'a';

        // 尝试不修改当前字符继续搜索
        if (node->children[idx] && dfs(word, node->children[idx], pos + 1, modified)) {
            return true;
        }

        // 尝试修改当前字符后继续搜索
        if (modified == 0) {
            for (int i = 0; i < 26; ++i) {
                if (i != idx && node->children[i]) {
                    if (dfs(word, node->children[i], pos + 1, modified + 1)) {
                        return true;
                    }
                }
            }
        }

        return false;
    }
    
    bool search(string searchWord) {
        return dfs(searchWord, root, 0, 0);
    }
};

/**
 * Your MagicDictionary object will be instantiated and called as such:
 * MagicDictionary* obj = new MagicDictionary();
 * obj->buildDict(dictionary);
 * bool param_2 = obj->search(searchWord);
 */

LCR064复杂度分析

LCR064结果

LCR065.单词的压缩编码

分析

首先反转每个单词，由于在最短编码之中出现的每个单词之后都有一个字符’＃’，因此计算长度时出现的每个单词的长度都要加1。在前缀树中统计路径长度时，可以统计从根节点到每个叶节点的路径的长度。前缀树的根节点并不对应单词的任何字符，在统计路径时将根节点包括进去相当于将单词的长度加1。通常用深度优先遍历的算法统计路径的长度。

// 构造前缀树
class Trie{
public:
    vector<Trie*> childen;
    bool isEnd;

    Trie() : childen(26), isEnd(false) {}

    void insert(string prefix){
        Trie* node = this;
        for(char ch : prefix){
            if(node->childen[ch-'a'] == NULL){
                node->childen[ch-'a'] = new Trie();
            }
            node = node->childen[ch-'a'];
        }
        node->isEnd = true;
    }
};

class Solution {
public:
    int minimumLengthEncoding(vector<string>& words) {
        Trie* root = new Trie();
        // 逆转字符串
        for(string str :words){
            reverse(str.begin(), str.end());
            // 插入前缀树
            root->insert(str);
        }
        // 计算长度
        int res = 0;
        dfs(root, 1, res);
        return res;
    }
    void dfs(Trie* root, int curLen, int& res){
        bool isLeaf = true;
        for(Trie* child : root->childen){
            if(child){
                isLeaf = false;
                dfs(child, curLen + 1, res);
            }
        }
        if(isLeaf){
            res += curLen;
        }
    }
};

时间复杂度：O(N⋅L)，N为单词个数，L为单词平均长度。
空间复杂度：构造前缀树的空间复杂度为O(N⋅L)。DFS递归栈的空间复杂度为O(L)，整体空间复杂度为O(N⋅L)。

LCR065结果

LCR066.单词之和

分析

在查找每个包含给定前缀的单词时，需要加上它的值，因此可以在构造前缀树时提供一个val。因为val是大于等于1的，所以可以取消之前的isEnd字段，用一个整型来替代，当val=0时就不是单词，但是也不需要关注，可以直接相加。


class Trie{
public:
    vector<Trie*> children;
    int val;

    Trie() : children(26), val(0) {}
};
class MapSum {
public:
    Trie* root;
    /** Initialize your data structure here. */
    MapSum() {
        root = new Trie();
    }
    
    void insert(string key, int val) {
        Trie* node = root;
        for(char ch : key){
            if(node->children[ch-'a'] == NULL){
                node->children[ch-'a'] = new Trie();
            }
            node = node->children[ch-'a'];
        }
        node->val = val;
    }
    
    int sum(string prefix) {
        int res = 0;
        Trie* node = root;
        // 先找到前缀处
        for(char ch : prefix){
            if(node->children[ch-'a'] == NULL){
                return 0;
            }
            node = node->children[ch-'a'];
        }
        // 从前缀处除法计算各条路线的和
        dfs(node, res);
        return res;
    }
    void dfs(Trie* pre, int& res){
        if(pre == NULL){
            return;
        }
        Trie* node = pre;
        res += node->val;
        for(Trie* child : node->children){
            dfs(child, res);
        }
    }
};

/**
 * Your MapSum object will be instantiated and called as such:
 * MapSum* obj = new MapSum();
 * obj->insert(key,val);
 * int param_2 = obj->sum(prefix);
 */

LCR066结果

LCR067.数组中两个数的最大异或值

分析

可以利用前缀树，相同前缀越长的其异或值越小。然后遍历每个数字，找到对于它来说异或值最大的。因为每个数字都是32位的，因此无需标记isEnd。

class Trie{
public:
    vector<Trie*> children;

    Trie(): children(2) {}

    void insert(int num){
        Trie* node  = this;
        // 通过右移来遍历数字的每一位
        for(int i = 31; i >=0 ; i--){
            int bit = (num >> i) & 1;
            if(node->children[bit] == NULL){
                node->children[bit] = new Trie();
            }
            node = node->children[bit];
        }
    }
};
class Solution {
public:
    
    int findMaximumXOR(vector<int>& nums) {
        // 建立前缀树
        Trie* root = new Trie();
        for(int num : nums){
            root->insert(num);
        }
        // 对每个数字遍历
        int res = 0;
        for(int num : nums){
            int cur = 0;
            Trie* node = root;
            for(int i = 31; i >= 0; i--){
                int bit = (num >> i) & 1;
                // 1-bit是为了找到和当前位相反的数字位，如果不为空，则顺着它遍历
                if(node->children[1- bit]){
                    cur = (cur << 1) + 1;
                    node = node->children[1 - bit];
                }
                // 如果没有相反的，只能顺着相同的找
                else{
                    cur = cur << 1;
                    node = node->children[bit];
                }
            }
            res = max(res, cur);
        }
        return res;
    }
};

时间复杂度：建立前缀树和findMaximumXOR都有两层循环。第1层循环逐个扫描数组中的每个整数，而第2层循环的执行次数是32次，是一个常数。如果数组nums的长度为n，那么这种算法的时间复杂度是O（n）。
空间复杂度：O(n)。

LCR067结果

总结

本章介绍了前缀树这种数据结构。前缀树通常用来保存字符串，它的节点和字符串的字符对应，而路径和字符串对应。如果只考虑英文小写字母，那么前缀树的每个节点有26个子节点。为了标注某些节点和字符串的最后一个字符对应，前缀树节点中通常需要一个布尔类型的字段。

前缀树经常用来解决与字符串查找相关的问题。和哈希表相比，在前缀树中查找更灵活。既可以从哈希表中找出所有以某个前缀开头的所有单词，也可以找出修改了一个（或多个）字符的字符串。

使用前缀树解决问题通常需要两步：第1步是创建前缀树，第2步是在前缀树中查找。虽然相关的面试题千变万化，但这两步及其代码却大同小异。如果应聘者能够熟练地编写出创建前缀树和在前缀树中查找的正确代码，那么就能得心应手地解决与前缀树相关的面试题。

算法笔记 > 前缀树

#剑指Offer

LCR062-LCR067前缀树

http://example.com/2024/05/22/posts/LCR062/

作者

Xuan Yang

发布于

2024年5月22日

许可协议

LCR068-LCR073二分查找上一篇

LCR059-LCR061堆的应用下一篇