@smilence
2016-10-17T06:50:39.000000Z
字数 7875
阅读 2198
算法面试指南
数据结构推荐教材: http://people.cs.vt.edu/shaffer/Book/
1.关于Tree
//Definition for binary tree
struct TreeNode {
int val;
TreeNode *left;
TreeNode *right;
TreeNode(int x) : val(x), left(NULL), right(NULL) {}
};
Keywords: Binary Search Tree, Balanced Tree, Full tree, Complete Tree
BST适用于快速检索( ),通过in-order遍历可以ascending输出sorted data。
DFS Traversal: In-order traversal, pre-order and post-order.
注意尽量不要在DFS中调用有DFS操作的函数,可以通过统一两个函数的返回值的办法来合并。
对树的所有操作务必注意边界条件( root == NULL
)
2.Trie: 又称prefix tree或字典树。N-aray tree, 方便提供string或string前缀的快速检索。 Unlike a binary search tree, no node in the tree stores the key associated with that node; instead, its position in the tree defines the key with which it is associated.
class TrieNode {
private:
char mContent;
bool mMarker;
vector<TrieNode*> mChildren;
public:
Node() { mContent = ' '; mMarker = false; }
~Node() {}
friend class Trie;
};
class Trie {
public:
Trie();
~Trie();
void addWord(string s);
bool searchWord(string s);
void deleteWord(string s);
private:
TrieNode* root;
};
3.Heap与priority_queue
Heap首先是一个Binary complete tree,其次是它满足partial order;从具体实现上说通常用array来实现,节点的位置关系通过index体现。
值得注意的是,heap的插入操作逐层sift-up,耗时,与Binary Tree的插入相同。但Build heap通过sift-down所有非叶子节点实现,耗时,小于BST的。
priority_queue
是STL中heap的类模板。常用函数:top()
,pop()
,push(const val_type&)
,empty()
,类似<stack>
。
在初始化priority_queue
时,应尽量使用其constructor,而非循环插入元素。在constructor模板中,需要声明comparison class,注意默认的param1 < param2
对应max heap。
//example: min heap for nodes based on their values
struct comp{
bool operator() ( Node * param1, Node *param2 ){
return param1->val > param2->val;
}
};
void buildHeap(){
vector<Node *> vn;
priority_queue<Node *, vector<Node *>,comp> heap( vn.begin(), vn.end() );
}
对于user-define的类型,也可以重载<
预算符,而不声明comparison class。
4.Graph implementations
每个vertical用一个int类型的ID来代表,实际的数据可以存储在<vector>
中。
a. adjacency matrix,space cost: ,适合密集graph, 可以用int[N][N]
二维数组实现;
b. adjacency list, space cost: , 适合稀疏graph, 可以用vector<list<int> >
来实现,vector
的size为。
注:对于undirected graph中的edge,可以视作双向edge;
而标记visit状态的变量,可以作为node本身的属性,或利用数组/hashtable来实现
5.Graph Traversal (DFS and BFS)
void graphTraverse( Graph *G ){
//much depends on your implementation for the graph
int v;
for( v = 0; v != G->n(); v++)
G->setMark( v, UNVISITED );
for( v = 0; v != G->n(); v++)
if( G->getMark(v) == UNVISITED )
doTraverse(G,v); // can be DFS or BFS here
}
//DFS
void DFS( Graph *G, int v ){
preVisit( G, v );
G->setMark( v, VISITING ); //use "VISITNG" to tag the state between previsit and postvisit
for( int w = G->first(v); w != G->n(); w = G->next(v,w) )
if( G->getMark(v) == UNVISITED )
DFS( G, w);
postVisit( G,v );
G->setMark( v, VISITED );
}
//BFS
void BFS( Graph *G, int start ){
int v,w;
queue<int> Q;
G->setMark( start, VISITING );
Q.push( start );
while(!Q.empty()){
v = Q.front();
Q.pop();
visit( G, v );
G->setMark( v ,VISITED);
for( w = G->first(v); w != G->n(); w = G->next( v, w) )
if( G->getMark(w) == UNVISITED ){
//you can also choose to visit here so you visit "earlier"
G->setMark(w) = VISITING; //use "VISITING" to identify "candidates" for visits
Q.push(w);
}
}
}
6.单源最短路径问题
Dijkstra算法,贪心算法( 局部最优解决定全局最优解),可以利用数学归纳法证明其正确性。
基本思路是找到局部最优的前驱节点,并据此update该节点的后驱节点,直到所有节点得到update。
class DijkElem{
public:
int vertex, distance;
DijkElem( int v = -1, int d = numeric_limit<int>::max() ){
vertex = v;
distance = d;
}
}
bool operator<(const DijkElem ¶m1, const DijkElem ¶m2){
return param1.distance> param2.distance; //considering we need a min heap here.
}
void Dijkstra( Graph *G, int *D, int s){
int i,v,w;
DijkElem temp;
priority_queue<DijkElem> minHeap;
//initialize distance array, every node considered to be infinitely far from source
D = new int[G->n()];
for( i = 0; i < G->n(); ++i){
D[i] = numeric_limit<int>::max();
}
//start from the source;
temp.vertex = s;
temp.distance = 0;
minHeap.push(temp);
D[s] = 0;
for( i = 0; i < G->n(); ++i ){
//find a vertex with minimum distance each time
do{
if( minHeap.empty() ) return;
temp = minHeap.top();
minHeap.pop();
v = temp.vertex;
}while( G->getMark(v) == VISITED );
G->setMark(v,VISITED);
if( D[v] == numeric_limit<int>::max() )
return;
//update its children
for( w = G->first(v); w != G->n(); w = G->next(v,w) ){
if( D[w] > D[v] + G->weight(v,w) ){
D[w] = D[v] + G->weight(v,w);
temp.distance = D[w];
temp.vertex = w;
minHeap.push(w);
}
}
}
1.subGraph 或 subTree 的局部最优解解问题
如果全局解可以分割成subTree或subGraph 互相独立的局部解(即局部最优解推出全局最优解),一般可以采用D&C策略,用DFS递归判断subtree或subgraph的解,结合当前节点返回局部解。另见Chapter 7 Backtracking问题的相关说明。
e.g.1 Check if a binary tree is balanced. ( CtCI: 4.1, Leetcode:Balanced Binary Tree )
// How to improve over this? (call another recursion within recursion)
bool isBalanced(TreeNode* root) {
if (root == NULL) {
return true;
}
if (abs(depth(root->left) - depth(root->right)) > 1)
return false;
return isBalanced(root->left) && isBalanced(root->right);
}
int depth(TreeNode* root) {
if (root == NULL) {
return 0;
}
return max(depth(root->left), depth(root->right)) + 1;
}
// Optimal solution with aggregating two recursions
bool isBalanced(TreeNode* root) {
if (root == NULL) {
return true;
}
return depth(root) != -1;
}
int depth(TreeNode* root) {
if (root == NULL) {
return 0;
}
int leftDepth = depth(root->left);
if (leftDepth == -1) {
return -1;
}
int rightDepth = depth(root->right);
if (rightDepth == -1) {
return -1;
}
if (abs(leftDepth-rightDepth) > 1) {
return -1;
}
return max(leftDepth,rightDepth) + 1;
}
e.g.2 Check if a binary tree is a BST. ( CtCI: 4.5, Leetcode:Validate Binary Search Tree)
bool isValidBST(TreeNode *root,int min, int max){
if(!root) return true;
// visit before recursion
if(root->val <= min || root->val >= max) return false;
// how compiler would handle this part?
return isValidBST(root->left,min,root->val) && isValidBST(root->right,root->val,max);
}
bool isValidBST(TreeNode *root) {
return isValidBST(root,numeric_limits<int>::min(), numeric_limits<int>::max());
}
e.g.3 Check if T2 is a subtree of T1. ( CtCI: 4.8 )
e.g.4 Check if a graph has a cycle.
e.g.5 Given a binary tree, find its minimum depth.( Leetcode:Minimum Depth of Binary Tree)
2.将Tree转换成其他Data Structure的问题
如果是分层转换,则使用BFS,用list<TreeNode *> parents
与list<TreeNode *> children
记录邻近的两层,代替<queue>
。如果不是,则类似 1类问题,采用D&C策略,将root->left
与root->right
两个subtree分别转换成目标数据结构,返回“适当的变量”,再处理root
节点。如果是将其他数据结构转换为tree,则类似:将两部分递归转换成subtree,然后处理当前节点。
e.g.1 Given a binary tree, creates linked lists of all the nodes at each depth( CtCI 4.4 )
e.g.2 Given a binary tree, flatten it to a linked list in-place. (Leetcode: Flatten Binary Tree to Linked List)
e.g.3 Convert a binary tree into a doubly linked list in place. (CtCI 17.13)
e.g.4 Convert a sorted array( increasing order ) to a balanced BST. ( CtCI 4.3, Leetcode: Convert Sorted Array to Binary Search Tree )
1.Tree的path问题
一般来说,属于 问题。( 将在Recursion一章提到)
对于tree的单向path问题(每条path,每层只经过一个节点),则使用DFS来访问tree,可以利用一个vector<int> &path
来记录路径(毕竟,对于backtracking来说,每次只需考虑一条path即可,无论这条path通往“胜利条件”与否 ),并在每次访问时判定是否满足“胜利条件”,返回bool
值或在胜利情形下将当前的路径path
记录下来。注意在postVisit节点时path.pop_back();
e.g.1 Given a binary tree, print all paths which sum to a give value.(CtCI: 4.9)
e.g.2 Given a binary tree, find all root-to-leaf paths which sum to a given value.(Leetcode: Path Sum II)
2.寻找特定节点
给定节点,寻找与之有相对关系的特定节点的问题。首先应该以当前节点为root
考虑当前子树(左支、右支);如果不在当前子树,则向上考虑:a. 有parent节点,则沿parent节点逐层往上查找 b.无parent节点,则由root
节点出发查找给定节点;特别地,对于BST,则只需由root
节点单向向下查找。
至于选择DFS还是BFS作为查找手段,需要考虑全局查找的效率来决定
e.g. 1 Find the in-order successor of a given node, with parent links. (CtCI: 4.6)
e.g. 2 Find the in-order successor of a given node in BST, without parent links.
TreeNode *inorderSucc( TreeNode *given, TreeNode *root ){
if(given == NULL )
return NULL;
if(given->right){
given = given->right;
while(given->left)
given = given->right;
return given;
}
TreeNode* succ = NULL;
while(root && root != given){
if( given->val <= root->val ){
succ = root;
root = root->left;
}else{
root = root->right;
}
}
return succ;
}
e.g.3 Find the first common ancestor of two nodes in a binary tree, with parent links.( CtCI 4.7 )
e.g.4 Find the immediate right neighbor of the given node, with parent links given, but without root node.( Ebay interview question)
对树与图的操作未必是以subGraph与subTree为单位的Divide&Conquer,也可能是对node为单位的遍历。通常树与图的问题不外乎这两条思路。
Graph的访问
绝大多数关于Graph的interview question, 都基于简单的DFS或BFS。通常来说,与路径或最短路径相关的问题,用BFS解决;全局的判定问题,可以用DFS。
寻找最短路径的问题,原则上使用Dijkstra算法,特别地,对于unweighted graph,由于从source开始访问的最小深度就等效于到该节点的最短路径,因此Dijkstra算法就退化成BFS,min heap退化成<queue>
,并且可以用distance[i] = -1
来表示该节点unvisited。
如果需要记录路径,对于DFS可以用vector<int> &path
单向记录,对于BFS可以记录每一个节点的前驱节点,即int prev[G->n()]
,最后用递归来整理vector<int> path
。
e.g.1 Given a directed graph,find out whether there is a route between two nodes ( CtCI: 4.2 )
e.g.2 Given two words (start and end), and a dictionary, find all shortest transformation sequence(s) from start to end, such that:
1. Only one letter can be changed at a time
2. Each intermediate word must exist in the dictionary ( CtCI: 18.10, Leetcode: Word Ladder II )