Chapter 4 Trees and Graphs

@smilence 2016-10-08T00:00:56.000000Z 字数 8213 阅读 7732

"The Rules"

推荐教材： http://people.cs.vt.edu/shaffer/Book/

1.关于Tree

//Definition for binary tree
  struct TreeNode {
      int val;
      TreeNode *left;
      TreeNode *right;
      TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 };

Keywords: Binary Search Tree, Balanced Tree, Full tree, Complete Tree
BST适用于快速检索（ $O(logn)$ )，通过in-order遍历可以ascending输出sorted data。
DFS Traversal: In-order traversal, pre-order and post-order.
注意尽量不要在DFS中调用有DFS操作的函数，可以通过统一两个函数的返回值的办法来合并。
对树的所有操作务必注意边界条件( root == NULL)

2.Trie: 又称prefix tree或字典树。N-aray tree, 方便提供string或string前缀的快速检索。 Unlike a binary search tree, no node in the tree stores the key associated with that node; instead, its position in the tree defines the key with which it is associated.

class TrieNode {
private:
    char mContent;
    bool mMarker;
    vector<TrieNode*> mChildren;
public:
    Node() { mContent = ' '; mMarker = false; }
    ~Node() {}
    friend class Trie;
};
class Trie {
public:
    Trie();
    ~Trie();
    void addWord(string s);
    bool searchWord(string s);
    void deleteWord(string s);
private:
    TrieNode* root;
};

3.Heap与priority_queue
Heap首先是一个Binary complete tree，其次是它满足partial order；从具体实现上说通常用array来实现，节点的位置关系通过index体现。
值得注意的是，heap的插入操作逐层sift-up，耗时 $O( log(n) )$ ，与Binary Tree的插入相同。但Build heap通过sift-down所有非叶子节点实现，耗时 $O(n)$ ，小于BST的 $O( nlog(n) )$ 。

priority_queue是STL中heap的类模板。常用函数：top(),pop(),push(const val_type&),empty()，类似<stack>。
在初始化priority_queue时，应尽量使用其constructor，而非循环插入元素。在constructor模板中，需要声明comparison class，注意默认的param1 < param2对应max heap。

//example: min heap for nodes based on their values
struct comp{
    bool operator() ( Node * param1, Node *param2 ){
        return param1->val > param2->val;
    }
};
void buildHeap(){
    vector<Node *> vn;
    priority_queue<Node *, vector<Node *>,comp> heap( vn.begin(), vn.end() );
}

对于user-define的类型，也可以重载<预算符，而不声明comparison class。

4.Graph implementations
每个vertical用一个int类型的ID来代表，实际的数据可以存储在<vector>中。
a. adjacency matrix，space cost: $O( |V|^2 )$ ，适合密集graph, 可以用int[N][N]二维数组实现;
b. adjacency list, space cost: $O( |V| + |E|)$ , 适合稀疏graph, 可以用vector<list<int> >来实现,vector的size为 $|V|$ 。
注：对于undirected graph中的edge，可以视作双向edge；
而标记visit状态的变量，可以作为node本身的属性，或利用数组/hashtable来实现

5.Graph Traversal (DFS and BFS)

void graphTraverse( Graph *G ){
    //much depends on your implementation for the graph
    int v;
    for( v = 0; v != G->n(); v++)
        G->setMark( v, UNVISITED );
    for( v = 0; v != G->n(); v++)
        if( G->getMark(v) == UNVISITED )
            doTraverse(G,v);  // can be DFS or BFS here
}

//DFS
void DFS( Graph *G, int v ){
    preVisit( G, v );
    G->setMark( v, VISITING ); //use "VISITNG" to tag the state between previsit and postvisit
    for( int w = G->first(v); w != G->n(); w = G->next(v,w) )
        if( G->getMark(v) == UNVISITED )
            DFS( G, w);
    postVisit( G,v );
    G->setMark( v, VISITED );
}

//BFS
void BFS( Graph *G, int start ){
    int v,w;
    queue<int> Q;
    G->setMark( start, VISITING );
    Q.push( start );
    while(!Q.empty()){
        v = Q.front();
        Q.pop();
        visit( G, v );
        G->setMark( v ,VISITED);
        for( w = G->first(v); w != G->n(); w = G->next( v, w) )
            if( G->getMark(w) == UNVISITED ){
                //you can also choose to visit here so you visit "earlier"
                G->setMark(w) = VISITING;  //use "VISITING" to identify "candidates" for visits
                Q.push(w);
            }
    }
}

6.单源最短路径问题
Dijkstra算法，贪心算法( 局部最优解决定全局最优解），可以利用数学归纳法证明其正确性。
基本思路是找到局部最优的前驱节点，并据此update该节点的后驱节点，直到所有节点得到update。

class DijkElem{
public:
    int vertex, distance;
    DijkElem( int v = -1, int d = numeric_limit<int>::max() ){
        vertex = v;
        distance = d;
    }
}
bool operator<(const DijkElem &param1, const DijkElem &param2){
    return param1.distance> param2.distance; //considering we need a min heap here.
}
void Dijkstra( Graph *G, int *D, int s){
    int i,v,w;
    DijkElem temp;
    priority_queue<DijkElem> minHeap;
    //initialize distance array, every node considered to be infinitely far from source
    D = new int[G->n()];
    for( i = 0; i < G->n(); ++i){
        D[i] = numeric_limit<int>::max();
    }
    //start from the source;
    temp.vertex = s;
    temp.distance = 0;
    minHeap.push(temp);
    D[s] = 0;
    for( i = 0; i < G->n(); ++i ){
         //find a vertex with minimum distance each time
        do{
            if( minHeap.empty() ) return;
            temp = minHeap.top();
            minHeap.pop();
            v = temp.vertex;
        }while( G->getMark(v) == VISITED );
        G->setMark(v,VISITED);
        if( D[v] == numeric_limit<int>::max() )
            return;
        //update its children
        for( w = G->first(v); w != G->n(); w = G->next(v,w) ){
            if( D[w] > D[v] + G->weight(v,w) ){
                D[w] = D[v] + G->weight(v,w);
                temp.distance = D[w];
                temp.vertex = w;
                minHeap.push(w);
            }
        }
}

模式识别

1.subGraph 或 subTree 的局部最优解解问题
如果全局解可以分割成subTree或subGraph 互相独立的局部解（即局部最优解推出全局最优解），一般可以采用D&C策略，用DFS递归判断subtree或subgraph的解，结合当前节点返回局部解。另见Chapter 7 Backtracking问题的相关说明。

e.g.1 Check if a binary tree is balanced. ( CtCI: 4.1, Leetcode:Balanced Binary Tree )

// How to improve over this? (call another recursion within recursion)
bool isBalanced(TreeNode* root) {
    if (root == NULL) {
        return true;
    }
    if (abs(depth(root->left) - depth(root->right)) > 1)
        return false;
    return isBalanced(root->left) && isBalanced(root->right);
}
int depth(TreeNode* root) {
    if (root == NULL) {
        return 0;
    }
    return max(depth(root->left), depth(root->right)) + 1;
}

// Optimal solution with aggregating two recursions
bool isBalanced(TreeNode* root) {
    if (root == NULL) {
        return true;
    }
    return depth(root) != -1;
}
int depth(TreeNode* root) {
    if (root == NULL) {
        return 0;
    }
    int leftDepth = depth(root->left);
    if (leftDepth == -1) {
        return -1;
    }
    int rightDepth = depth(root->right);
    if (rightDepth == -1) {
        return -1;
    }
    if (abs(leftDepth-rightDepth) > 1) {
        return -1;
    }
    return max(leftDepth,rightDepth) + 1;
}

e.g.2 Check if a binary tree is a BST. ( CtCI: 4.5, Leetcode:Validate Binary Search Tree)
e.g.3 Check if T2 is a subtree of T1. ( CtCI: 4.8 )
e.g.4 Check if a graph has a cycle.
e.g.5 Given a binary tree, find its minimum depth.( Leetcode:Minimum Depth of Binary Tree）

    int minDepth(TreeNode *root) {
       if(root == NULL) return 0;       
       int left = 0, right = 0;
       if(root->left) left = minDepth(root->left);   
       if(root->right) right = minDepth(root->right);
       if(!left) return 1 + right;
       else if(!right) return 1 + left;
       else return 1 + min(left,right);
    }

2.Tree的path问题
一般来说，属于 $backtracking$ 问题。( 将在Recursion一章提到）
对于tree的单向path问题（每条path，每层只经过一个节点），则使用DFS来访问tree，可以利用一个vector<int> &path来记录路径（毕竟，对于backtracking来说，每次只需考虑一条path即可，无论这条path通往“胜利条件”与否），并在每次访问时判定是否满足“胜利条件”，返回bool值或在胜利情形下将当前的路径path记录下来。注意在postVisit节点时path.pop_back();

e.g.1 Given a binary tree, print all paths which sum to a give value.(CtCI: 4.9)
e.g.2 Given a binary tree, find all root-to-leaf paths which sum to a given value.(Leetcode: Path Sum II)
Solution: http://paste.ofcode.org/JvvHJVBCYtRRvLnKHw6puW

3.将Tree转换成其他Data Structure的问题
如果是分层转换，则使用BFS，用list<TreeNode *> parents与list<TreeNode *> children记录邻近的两层，代替<queue>。如果不是，则类似 1类问题，采用D&C策略，将root->left与root->right两个subtree分别转换成目标数据结构，返回“适当的变量”，再处理root节点。如果是将其他数据结构转换为tree，则类似：将两部分递归转换成subtree，然后处理当前节点。

e.g.1 Given a binary tree, creates linked lists of all the nodes at each depth( CtCI 4.4 )
e.g.2 Given a binary tree, flatten it to a linked list in-place. （Leetcode: Flatten Binary Tree to Linked List)
e.g.3 Convert a binary tree into a doubly linked list in place. (CtCI 17.13)
e.g.4 Convert a sorted array( increasing order ) to a balanced BST. ( CtCI 4.3, Leetcode: Convert Sorted Array to Binary Search Tree )

4.寻找特定节点
给定节点，寻找与之有相对关系的特定节点的问题。首先应该以当前节点为root 考虑当前子树（左支、右支）；如果不在当前子树，则向上考虑：a. 有parent节点，则沿parent节点逐层往上查找 b.无parent节点，则由root节点DFS查找给定节点；特别地，对于BST，则只需由root节点单向向下查找。

e.g. 1 Find the in-order successor of a given node, with parent links. (CtCI: 4.6)
e.g. 2 Find the in-order successor of a given node in BST, without parent links.

TreeNode *inorderSucc( TreeNode *given, TreeNode *root ){
    if(given == NULL )
        return NULL;
    if(given->right){
        given = given->right;
        while(given->left)
            given = given->right;
        return given;
    }
    TreeNode* succ = NULL;
    while(root && root != given){
        if( given->val <= root->val ){
            succ = root;
            root = root->left;
        }else{
            root = root->right;
        }
    }
    return succ;
}

e.g.3 Find the first common ancestor of two nodes in a binary tree, with .( CtCI 4.7 )
e.g.4 Find the immediate right neighbor of the given node, with parent links given, but without root node.( Ebay interview question)

TreeNode *findByLvl( TreeNode *root, int lvl ){
    if( root == NULL )
        return NULL;
    if( lvl == 0 )
        return root;
    TreeNode *left = findByLvl(root->left,lvl+1);
    if( left ) return left;
    else    return findByLvl(root->right,lvl+1);
}       
TreeNode* rNeighbor( TreeNode *given ){
    TreeNode *parent = given->parent;
    int level = -1;
    while(parent){
        while( parent && parent->left != given ){
            given = parent;
            parent = given->parent;
            level--;
        }
        if( parent == NULL )
            return NULL;
        TreeNode *local = findByLvl( parent->right, level + 1);
        if(local)
            return local;
        else{
            given = parent;
            parent = given->parent;
            level--;
        }
    }
    return NULL;        
}

5.Graph的访问
绝大多数关于Graph的interview question, 都基于简单的DFS或BFS。通常来说，与路径或最短路径相关的问题，用BFS解决；全局的判定问题，可以用DFS。
寻找最短路径的问题，原则上使用Dijkstra算法，特别地，对于unweighted graph，由于从source开始访问的最小深度就等效于到该节点的最短路径，因此Dijkstra算法就退化成BFS，min heap退化成<queue>，并且可以用distance[i] = -1来表示该节点unvisited。
如果需要记录路径，对于DFS可以用vector<int> &path单向记录，对于BFS可以记录每一个节点的前驱节点，即int prev[G->n()]，最后用递归来整理vector<int> path。

e.g.1 Given a directed graph,find out whether there is a route between two nodes ( CtCI: 4.2 )
e.g.2 Given two words (start and end), and a dictionary, find all shortest transformation sequence(s) from start to end, such that:
1. Only one letter can be changed at a time
2. Each intermediate word must exist in the dictionary ( CtCI: 18.10, Leetcode: Word Ladder II )
Solution: http://paste.ofcode.org/Vwd3BDguN3fdKQkJDRjeyN