@Spongcer
2015-04-10T06:49:50.000000Z
字数 5417
阅读 1819
PG
test=# EXPLAIN SELECT * FROM T1 WHERE (A=1 AND B=1) OR (A=1 AND C=1) OR (A=1 AND D=1) ;QUERY PLAN---------------------------------------------------------Seq Scan on t1 (cost=0.00..45.40 rows=1 width=16)Filter: ((a = 1) AND ((b = 1) OR (c = 1) OR (d = 1)))(2 行记录)test=#
/** process_duplicate_ors* Given a list of exprs which are ORed together, try to apply* the inverse OR distributive law.** Returns the resulting expression (could be an AND clause, an OR* clause, or maybe even a single subexpression).*//** 假设表T1有四列:A、B、C、D。** 这个函数处理这种情况,对于一个选择,SELECT * FROM TEST* WHERE (A=1 AND B=1) OR (A=1 AND C=1) OR (A=1 AND D=1);** 语句中的WHERE条件:* (A=1 AND B=1) OR (A=1 AND C=1) OR (A=1 AND D=1)* 可以改写为:* (A=1)AND (B=1 OR C=1 OR D=1)* 这就是这个函数的主要功能。** 这个函数的参数是一个list, 对于上述的WHERE条件,orlist的结构如下:* orlist中有一个元素,是OR_EXPR类型的BoolExpr,BoolExpr中的结构如下:* typedef struct BoolExpr* {* Expr xpr; = 略* BoolExprType boolop; = OR_EXPR* List *args; = OR中的3个条件,即(A=1 AND B=1) OR (A=1 AND C=1) OR (A=1 AND D=1)* bool plusFlag; = 略* } BoolExpr;** 下面分析函数的具体实现,大致的步骤为:* 1)分析每个OR中的公共项, 2)提取公共项, 3)合并剩余项为AND。*/static Expr *process_duplicate_ors(List *orlist){List *reference = NIL;int num_subclauses = 0;List *winners;List *neworlist;ListCell *temp;if (orlist == NIL)return NULL; /* probably can't happen *//* 如果只有一个。。。。,那就算了吧 */if (list_length(orlist) == 1) /* single-expression OR (can this* happen?) */return linitial(orlist);/** Choose the shortest AND clause as the reference list --- obviously, any* subclause not in this clause isn't in all the clauses. If we find a* clause that's not an AND, we can treat it as a one-element AND clause,* which necessarily wins as shortest.*//** “找最短”。* 在WHERE语句中:* (A=1 AND B=1) OR (A=1 AND C=1) OR (A=1 AND D=1)* OR操作串联了3个子语句,找到其中最短的一个,因为如果有公共项,那么最短的那个也一定* 包含公共项,那么通过找到最短的那个,在后面的操作里能减少 比较 的次数。* 在上面的WHERE语句中,3个子语句的长度相同,按照如下执行过程,找到的应该是(A=1 AND B=1),* 即第一个。*/foreach(temp, orlist){Expr *clause = (Expr *) lfirst(temp);if (and_clause((Node *) clause)){List *subclauses = ((BoolExpr *) clause)->args;int nclauses = list_length(subclauses);/** 这里判断子语句里的长度,比如对于(A=1 AND B=1)子语句,* 他实际上是一个AND连接起来的两个 子子语句, 那么他的长度就是2。** 通过nclauses记录最短的子语句,如果有更短的(nclauses < num_subclauses),* 那么就替换成最短的。*/if (reference == NIL || nclauses < num_subclauses){reference = subclauses;num_subclauses = nclauses;}}else{/** 还有一种情况, 就是可能子句不是一个AND语句,这样看上去不大符合规则,* 那么把他看做一个整体,那这个就是最短元素。******************************** 如果代码执行到这里,那么只有两种情况:* 一种是 ... WHERE (A=1 AND B=1) OR (A=1 AND C=1) OR (A=1)。* 一种是 ... WHERE ((A=1 OR C=1) AND B=1) OR (A=1 OR C=1).* 如果是这两种情况,都可以做如下简化:* 第一种情况简化为 A=1* 第二种情况化简为 (A=1 OR C=1)** 第三种情况待补充...*/reference = list_make1(clause);break;}}/** Just in case, eliminate any duplicates in the reference list.*//* 找到最短的, 存到List */reference = list_union(NIL, reference);/** Check each element of the reference list to see if it's in all the OR* clauses. Build a new list of winning clauses.*//** “找公共项”。** NOTE:这时候就能体现“找最短”带来的优势,外层循环次数会少一些。** 如果WHERE语句是:* (A=1 AND B=1) OR (A=1 AND C=1) OR (A=1 AND D=1)* “找最短”中找到的一定是(A=1 AND B=1)。* 则外层会有两次循环...(foreach(temp, reference)),两次循环的变量分别为* A=1 和 B=1。* 内层有三次循环...(foreach(temp2, orlist)),三次循环的变量分别为* (A=1 AND B=1) 和 (A=1 AND C=1) 和 (A=1 AND D=1)** 示例如下:* 假如现在外层循环第一次执行,即查找A=1的公共项,进而假如内层循环也是第一次执行,* 即在(A=1 AND B=1)中查找是否存在A=1这个公共项,发现是存在的(list_member),* 则依次判断内层循环的第二个子句...** 如上例,具体来说,这些循环分别作的操作是:* 外层第一次:* 判断A=1是否在(A=1 AND B=1),在,判断下一个* 判断A=1是否在(A=1 AND C=1),在,判断下一个* 判断A=1是否在(A=1 AND D=1),在,A=1是公共项,记录(winners = lappend...)* 外层第二次:* 判断B=1是否在(A=1 AND B=1),在,判断下一个* 判断B=1是否在(A=1 AND C=1),不在,跳出循环,下一个不用判断了。* 判断B=1是否在(A=1 AND D=1),未执行,因为上一个不含公共项,就不可能提取了。*/winners = NIL;foreach(temp, reference){Expr *refclause = (Expr *) lfirst(temp);bool win = true;ListCell *temp2;foreach(temp2, orlist){Expr *clause = (Expr *) lfirst(temp2);if (and_clause((Node *) clause)){if (!list_member(((BoolExpr *) clause)->args, refclause)){win = false;break;}}else{if (!equal(refclause, clause)){win = false;break;}}}if (win)winners = lappend(winners, refclause);}/** If no winners, we can't transform the OR*/if (winners == NIL)return make_orclause(orlist);/** Generate new OR list consisting of the remaining sub-clauses.** If any clause degenerates to empty, then we have a situation like (A* AND B) OR (A), which can be reduced to just A --- that is, the* additional conditions in other arms of the OR are irrelevant.** Note that because we use list_difference, any multiple occurrences of a* winning clause in an AND sub-clause will be removed automatically.*//** “提取公共项”。* 用list_difference删除公共项,实现细节不在赘述。*/neworlist = NIL;foreach(temp, orlist){Expr *clause = (Expr *) lfirst(temp);if (and_clause((Node *) clause)){List *subclauses = ((BoolExpr *) clause)->args;/* 看这里...看这里..., 消除公共项 */subclauses = list_difference(subclauses, winners);if (subclauses != NIL){/* 消除后,剩余的拼接起来,拼接成:(B=1 OR C=1 OR D=1)*/if (list_length(subclauses) == 1)neworlist = lappend(neworlist, linitial(subclauses));elseneworlist = lappend(neworlist, make_andclause(subclauses));}else{/** 这说明子语句中,有一个全部是公共项,也就是如下形式:* ... WHERE (A=1 AND B=1) OR (A=1)** 这时候公共项是A=1,第一个子句是(A=1 AND B=1),第二个子句是(A=1),* 第二个子句经过list_difference,返回的结果是NULL。* 对于这种情况,实际上可以化简为:A=1,因为(A=1 AND B=1)一定满足A=1的情况。*/neworlist = NIL; /* degenerate case, see above */break;}}else{if (!list_member(winners, clause))neworlist = lappend(neworlist, clause);else{neworlist = NIL; /* degenerate case, see above */break;}}}/** Append reduced OR to the winners list, if it's not degenerate, handling* the special case of one element correctly (can that really happen?).* Also be careful to maintain AND/OR flatness in case we pulled up a* sub-sub-OR-clause.*/if (neworlist != NIL){if (list_length(neworlist) == 1)winners = lappend(winners, linitial(neworlist));else/*neworlist里面应该是(B=1 OR C=1 OR D=1),所以用make_orclause */winners = lappend(winners, make_orclause(pull_ors(neworlist)));}/** And return the constructed AND clause, again being wary of a single* element and AND/OR flatness.*/if (list_length(winners) == 1)return (Expr *) linitial(winners);else/* 返回的形式是:(A=1)AND (B=1 OR C=1 OR D=1),所以会用make_andclause */return make_andclause(pull_ands(winners));}