Index: costsize.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v retrieving revision 1.207 diff -c -r1.207 costsize.c *** costsize.c 17 Apr 2009 15:33:33 -0000 1.207 --- costsize.c 24 Apr 2009 22:44:36 -0000 *************** *** 119,124 **** --- 119,126 ---- RestrictInfo *rinfo, PathKey *pathkey); static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context); + static Selectivity semi_join_fraction(JoinPath *path, PlannerInfo *root, + SpecialJoinInfo *sjinfo); static double approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals); static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); *************** *** 1399,1408 **** --- 1401,1414 ---- double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = nestloop_inner_path_rows(inner_path); double ntuples; + Selectivity semi_fraction; if (!enable_nestloop) startup_cost += disable_cost; + /* Estimate cost savings fraction for JOIN_SEMI/ANTI cases */ + semi_fraction = semi_join_fraction(path, root, sjinfo); + /* cost of source data */ /* *************** *** 1429,1440 **** run_cost += (outer_path_rows - 1) * inner_path->startup_cost; } run_cost += outer_path_rows * ! (inner_path->total_cost - inner_path->startup_cost); /* * Compute number of tuples processed (not number emitted!) */ ! ntuples = outer_path_rows * inner_path_rows; /* CPU costs */ cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root); --- 1435,1446 ---- run_cost += (outer_path_rows - 1) * inner_path->startup_cost; } run_cost += outer_path_rows * ! (inner_path->total_cost - inner_path->startup_cost) * semi_fraction; /* * Compute number of tuples processed (not number emitted!) */ ! ntuples = outer_path_rows * inner_path_rows * semi_fraction; /* CPU costs */ cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root); *************** *** 1485,1490 **** --- 1491,1497 ---- outerendsel, innerstartsel, innerendsel; + Selectivity semi_fraction; Path sort_path; /* dummy for result of cost_sort */ /* Protect some assumptions below that rowcounts aren't zero */ *************** *** 1496,1501 **** --- 1503,1511 ---- if (!enable_mergejoin) startup_cost += disable_cost; + /* Estimate cost savings fraction for JOIN_SEMI/ANTI cases */ + semi_fraction = semi_join_fraction(&path->jpath, root, sjinfo); + /* * Compute cost of the mergequals and qpquals (other restriction clauses) * separately. *************** *** 1718,1723 **** --- 1728,1735 ---- * The number of tuple comparisons needed is approximately number of outer * rows plus number of inner rows plus number of rescanned tuples (can we * refine this?). At each one, we need to evaluate the mergejoin quals. + * NOTE: semi/anti join mode does not save any work here, so do NOT apply + * semi_fraction. */ startup_cost += merge_qual_cost.startup; startup_cost += merge_qual_cost.per_tuple * *************** *** 1730,1740 **** * For each tuple that gets through the mergejoin proper, we charge * cpu_tuple_cost plus the cost of evaluating additional restriction * clauses that are to be applied at the join. (This is pessimistic since ! * not all of the quals may get evaluated at each tuple.) */ startup_cost += qp_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple; ! run_cost += cpu_per_tuple * mergejointuples; path->jpath.path.startup_cost = startup_cost; path->jpath.path.total_cost = startup_cost + run_cost; --- 1742,1753 ---- * For each tuple that gets through the mergejoin proper, we charge * cpu_tuple_cost plus the cost of evaluating additional restriction * clauses that are to be applied at the join. (This is pessimistic since ! * not all of the quals may get evaluated at each tuple.) Here we should ! * apply semi_fraction, since mergejointuples doesn't account for that. */ startup_cost += qp_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple; ! run_cost += cpu_per_tuple * mergejointuples * semi_fraction; path->jpath.path.startup_cost = startup_cost; path->jpath.path.total_cost = startup_cost + run_cost; *************** *** 1824,1834 **** --- 1837,1851 ---- int num_skew_mcvs; double virtualbuckets; Selectivity innerbucketsize; + Selectivity semi_fraction; ListCell *hcl; if (!enable_hashjoin) startup_cost += disable_cost; + /* Estimate cost savings fraction for JOIN_SEMI/ANTI cases */ + semi_fraction = semi_join_fraction(&path->jpath, root, sjinfo); + /* * Compute cost of the hashquals and qpquals (other restriction clauses) * separately. *************** *** 1972,1987 **** /* * The number of tuple comparisons needed is the number of outer tuples ! * times the typical number of tuples in a hash bucket, which is the inner ! * relation size times its bucketsize fraction. At each one, we need to ! * evaluate the hashjoin quals. But actually, charging the full qual eval ! * cost at each tuple is pessimistic, since we don't evaluate the quals ! * unless the hash values match exactly. For lack of a better idea, halve ! * the cost estimate to allow for that. */ startup_cost += hash_qual_cost.startup; run_cost += hash_qual_cost.per_tuple * ! outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5; /* * For each tuple that gets through the hashjoin proper, we charge --- 1989,2005 ---- /* * The number of tuple comparisons needed is the number of outer tuples ! * times the typical number of tuples in a hash bucket (which is the inner ! * relation size times its bucketsize fraction) times the semi_fraction. ! * At each tuple, we need to evaluate the hashjoin quals. But actually, ! * charging the full qual eval cost at each tuple is pessimistic, since we ! * don't evaluate the quals unless the hash values match exactly. For ! * lack of a better idea, halve the cost estimate to allow for that. */ startup_cost += hash_qual_cost.startup; run_cost += hash_qual_cost.per_tuple * ! outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * ! semi_fraction * 0.5; /* * For each tuple that gets through the hashjoin proper, we charge *************** *** 1991,1997 **** */ startup_cost += qp_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple; ! run_cost += cpu_per_tuple * hashjointuples; path->jpath.path.startup_cost = startup_cost; path->jpath.path.total_cost = startup_cost + run_cost; --- 2009,2015 ---- */ startup_cost += qp_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple; ! run_cost += cpu_per_tuple * hashjointuples * semi_fraction; path->jpath.path.startup_cost = startup_cost; path->jpath.path.total_cost = startup_cost + run_cost; *************** *** 2321,2326 **** --- 2339,2483 ---- /* + * semi_join_fraction + * Determines the fraction of the inner input that a SEMI or ANTI join + * can be expected to scan. + * + * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning + * inner rows as soon as it finds a match to the current outer row. + * We should therefore adjust some of the cost components for this effect. + * This function computes the fraction of the inner relation that can be + * expected to be scanned on average. (In a hash join, this can be taken + * as the fraction of the selected hash bucket, not of the whole relation, + * but the calculation here is the same.) Mergejoins only avoid evaluating + * otherquals, so the effect is much weaker for them, but it still exists. + * + * 'path' is already filled in except for the cost fields + * 'sjinfo' is extra info about the join for selectivity estimation + */ + static Selectivity + semi_join_fraction(JoinPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) + { + Selectivity scanfrac; + Selectivity matchfrac; + Selectivity avgmatch; + Selectivity jselec; + Selectivity nselec; + SpecialJoinInfo norm_sjinfo; + JoinType jointype = path->jointype; + List *joinquals; + ListCell *l; + + /* Return 1.0 whenever it's not JOIN_SEMI or JOIN_ANTI */ + if (jointype != JOIN_SEMI && jointype != JOIN_ANTI) + return 1.0; + + /* + * Note: it's annoying to repeat this selectivity estimation on each call, + * when the joinclause list will be the same for all path pairs + * implementing a given join. clausesel.c will save us from the worst + * effects of this by caching at the RestrictInfo level; but perhaps it'd + * be worth finding a way to cache the results at a higher level. + */ + + /* + * In an ANTI join, we must ignore clauses that are "pushed down", + * since those won't affect the match logic. In a SEMI join, we do not + * distinguish joinquals from "pushed down" quals, so just use the whole + * restrictinfo list. + */ + if (jointype == JOIN_ANTI) + { + joinquals = NIL; + foreach(l, path->joinrestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + + Assert(IsA(rinfo, RestrictInfo)); + if (!rinfo->is_pushed_down) + joinquals = lappend(joinquals, rinfo); + } + } + else + joinquals = path->joinrestrictinfo; + + /* + * Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses. + */ + jselec = clauselist_selectivity(root, + joinquals, + 0, + jointype, + sjinfo); + + /* + * Also get the normal inner-join selectivity of the join clauses. + */ + norm_sjinfo.type = T_SpecialJoinInfo; + norm_sjinfo.min_lefthand = path->outerjoinpath->parent->relids; + norm_sjinfo.min_righthand = path->innerjoinpath->parent->relids; + norm_sjinfo.syn_lefthand = path->outerjoinpath->parent->relids; + norm_sjinfo.syn_righthand = path->innerjoinpath->parent->relids; + norm_sjinfo.jointype = JOIN_INNER; + /* we don't bother trying to make the remaining fields valid */ + norm_sjinfo.lhs_strict = false; + norm_sjinfo.delay_upper_joins = false; + norm_sjinfo.join_quals = NIL; + + nselec = clauselist_selectivity(root, + joinquals, + 0, + JOIN_INNER, + &norm_sjinfo); + + /* Avoid leaking a lot of ListCells */ + if (jointype == JOIN_ANTI) + list_free(joinquals); + + /* + * jselec can be interpreted as the fraction of outer-rel rows that have + * any matches (this is true for both SEMI and ANTI cases). And nselec + * is the fraction of the Cartesian product that matches. So, the + * average number of matches for each outer-rel row that has at least + * one match is nselec * inner_rows / jselec. + * + * Note: it is correct to use the inner rel's "rows" count here, not + * PATH_ROWS(), even if the inner path under consideration is an inner + * indexscan. This is because we have included all the join clauses + * in the selectivity estimate, even ones used in an inner indexscan. + */ + if (jselec > 0) /* protect against zero divide */ + avgmatch = nselec * path->innerjoinpath->parent->rows / jselec; + else + avgmatch = 0; + + /* + * For an outer-rel row that has at least one match, we can expect the + * inner scan to stop after a fraction 1/(avgmatch+1) of the inner rows, + * if the matches are evenly distributed. Since they probably aren't + * quite evenly distributed, we apply a fuzz factor of 2.0 to that + * fraction (but, of course, clamping at an upper limit of 1.0). + */ + matchfrac = 2.0 / (avgmatch + 1.0); + matchfrac = Min(1.0, matchfrac); + + /* + * For an outer-rel row that has no match, the executor will have to scan + * the whole input relation to prove that. Remembering the meaning of + * jselec, we therefore estimate the average scan fraction over all + * outer-rel rows thus: + */ + scanfrac = jselec * matchfrac + (1.0 - jselec) /* times 1.0 */ ; + + /* clamp the result to sane range in case of roundoff errors */ + scanfrac = Max(scanfrac, 0.0); + scanfrac = Min(scanfrac, 1.0); + + return scanfrac; + } + + + /* * approx_tuple_count * Quick-and-dirty estimation of the number of join rows passing * a set of qual conditions.