Index: costsize.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v
retrieving revision 1.207
diff -c -r1.207 costsize.c
*** costsize.c	17 Apr 2009 15:33:33 -0000	1.207
--- costsize.c	24 Apr 2009 22:44:36 -0000
***************
*** 119,124 ****
--- 119,126 ----
  			   RestrictInfo *rinfo,
  			   PathKey *pathkey);
  static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
+ static Selectivity semi_join_fraction(JoinPath *path, PlannerInfo *root,
+ 									  SpecialJoinInfo *sjinfo);
  static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
  								 List *quals);
  static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
***************
*** 1399,1408 ****
--- 1401,1414 ----
  	double		outer_path_rows = PATH_ROWS(outer_path);
  	double		inner_path_rows = nestloop_inner_path_rows(inner_path);
  	double		ntuples;
+ 	Selectivity semi_fraction;
  
  	if (!enable_nestloop)
  		startup_cost += disable_cost;
  
+ 	/* Estimate cost savings fraction for JOIN_SEMI/ANTI cases */
+ 	semi_fraction = semi_join_fraction(path, root, sjinfo);
+ 
  	/* cost of source data */
  
  	/*
***************
*** 1429,1440 ****
  		run_cost += (outer_path_rows - 1) * inner_path->startup_cost;
  	}
  	run_cost += outer_path_rows *
! 		(inner_path->total_cost - inner_path->startup_cost);
  
  	/*
  	 * Compute number of tuples processed (not number emitted!)
  	 */
! 	ntuples = outer_path_rows * inner_path_rows;
  
  	/* CPU costs */
  	cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
--- 1435,1446 ----
  		run_cost += (outer_path_rows - 1) * inner_path->startup_cost;
  	}
  	run_cost += outer_path_rows *
! 		(inner_path->total_cost - inner_path->startup_cost) * semi_fraction;
  
  	/*
  	 * Compute number of tuples processed (not number emitted!)
  	 */
! 	ntuples = outer_path_rows * inner_path_rows * semi_fraction;
  
  	/* CPU costs */
  	cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
***************
*** 1485,1490 ****
--- 1491,1497 ----
  				outerendsel,
  				innerstartsel,
  				innerendsel;
+ 	Selectivity semi_fraction;
  	Path		sort_path;		/* dummy for result of cost_sort */
  
  	/* Protect some assumptions below that rowcounts aren't zero */
***************
*** 1496,1501 ****
--- 1503,1511 ----
  	if (!enable_mergejoin)
  		startup_cost += disable_cost;
  
+ 	/* Estimate cost savings fraction for JOIN_SEMI/ANTI cases */
+ 	semi_fraction = semi_join_fraction(&path->jpath, root, sjinfo);
+ 
  	/*
  	 * Compute cost of the mergequals and qpquals (other restriction clauses)
  	 * separately.
***************
*** 1718,1723 ****
--- 1728,1735 ----
  	 * The number of tuple comparisons needed is approximately number of outer
  	 * rows plus number of inner rows plus number of rescanned tuples (can we
  	 * refine this?).  At each one, we need to evaluate the mergejoin quals.
+ 	 * NOTE: semi/anti join mode does not save any work here, so do NOT apply
+ 	 * semi_fraction.
  	 */
  	startup_cost += merge_qual_cost.startup;
  	startup_cost += merge_qual_cost.per_tuple *
***************
*** 1730,1740 ****
  	 * For each tuple that gets through the mergejoin proper, we charge
  	 * cpu_tuple_cost plus the cost of evaluating additional restriction
  	 * clauses that are to be applied at the join.	(This is pessimistic since
! 	 * not all of the quals may get evaluated at each tuple.)
  	 */
  	startup_cost += qp_qual_cost.startup;
  	cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
! 	run_cost += cpu_per_tuple * mergejointuples;
  
  	path->jpath.path.startup_cost = startup_cost;
  	path->jpath.path.total_cost = startup_cost + run_cost;
--- 1742,1753 ----
  	 * For each tuple that gets through the mergejoin proper, we charge
  	 * cpu_tuple_cost plus the cost of evaluating additional restriction
  	 * clauses that are to be applied at the join.	(This is pessimistic since
! 	 * not all of the quals may get evaluated at each tuple.)  Here we should
! 	 * apply semi_fraction, since mergejointuples doesn't account for that.
  	 */
  	startup_cost += qp_qual_cost.startup;
  	cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
! 	run_cost += cpu_per_tuple * mergejointuples * semi_fraction;
  
  	path->jpath.path.startup_cost = startup_cost;
  	path->jpath.path.total_cost = startup_cost + run_cost;
***************
*** 1824,1834 ****
--- 1837,1851 ----
  	int			num_skew_mcvs;
  	double		virtualbuckets;
  	Selectivity innerbucketsize;
+ 	Selectivity semi_fraction;
  	ListCell   *hcl;
  
  	if (!enable_hashjoin)
  		startup_cost += disable_cost;
  
+ 	/* Estimate cost savings fraction for JOIN_SEMI/ANTI cases */
+ 	semi_fraction = semi_join_fraction(&path->jpath, root, sjinfo);
+ 
  	/*
  	 * Compute cost of the hashquals and qpquals (other restriction clauses)
  	 * separately.
***************
*** 1972,1987 ****
  
  	/*
  	 * The number of tuple comparisons needed is the number of outer tuples
! 	 * times the typical number of tuples in a hash bucket, which is the inner
! 	 * relation size times its bucketsize fraction.  At each one, we need to
! 	 * evaluate the hashjoin quals.  But actually, charging the full qual eval
! 	 * cost at each tuple is pessimistic, since we don't evaluate the quals
! 	 * unless the hash values match exactly.  For lack of a better idea, halve
! 	 * the cost estimate to allow for that.
  	 */
  	startup_cost += hash_qual_cost.startup;
  	run_cost += hash_qual_cost.per_tuple *
! 		outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
  
  	/*
  	 * For each tuple that gets through the hashjoin proper, we charge
--- 1989,2005 ----
  
  	/*
  	 * The number of tuple comparisons needed is the number of outer tuples
! 	 * times the typical number of tuples in a hash bucket (which is the inner
! 	 * relation size times its bucketsize fraction) times the semi_fraction.
! 	 * At each tuple, we need to evaluate the hashjoin quals.  But actually,
! 	 * charging the full qual eval cost at each tuple is pessimistic, since we
! 	 * don't evaluate the quals unless the hash values match exactly.  For
! 	 * lack of a better idea, halve the cost estimate to allow for that.
  	 */
  	startup_cost += hash_qual_cost.startup;
  	run_cost += hash_qual_cost.per_tuple *
! 		outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) *
! 		semi_fraction * 0.5;
  
  	/*
  	 * For each tuple that gets through the hashjoin proper, we charge
***************
*** 1991,1997 ****
  	 */
  	startup_cost += qp_qual_cost.startup;
  	cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
! 	run_cost += cpu_per_tuple * hashjointuples;
  
  	path->jpath.path.startup_cost = startup_cost;
  	path->jpath.path.total_cost = startup_cost + run_cost;
--- 2009,2015 ----
  	 */
  	startup_cost += qp_qual_cost.startup;
  	cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
! 	run_cost += cpu_per_tuple * hashjointuples * semi_fraction;
  
  	path->jpath.path.startup_cost = startup_cost;
  	path->jpath.path.total_cost = startup_cost + run_cost;
***************
*** 2321,2326 ****
--- 2339,2483 ----
  
  
  /*
+  * semi_join_fraction
+  *	  Determines the fraction of the inner input that a SEMI or ANTI join
+  *	  can be expected to scan.
+  *
+  * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning
+  * inner rows as soon as it finds a match to the current outer row.
+  * We should therefore adjust some of the cost components for this effect.
+  * This function computes the fraction of the inner relation that can be
+  * expected to be scanned on average.  (In a hash join, this can be taken
+  * as the fraction of the selected hash bucket, not of the whole relation,
+  * but the calculation here is the same.)  Mergejoins only avoid evaluating
+  * otherquals, so the effect is much weaker for them, but it still exists.
+  *
+  * 'path' is already filled in except for the cost fields
+  * 'sjinfo' is extra info about the join for selectivity estimation
+  */
+ static Selectivity
+ semi_join_fraction(JoinPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
+ {
+ 	Selectivity scanfrac;
+ 	Selectivity matchfrac;
+ 	Selectivity avgmatch;
+ 	Selectivity jselec;
+ 	Selectivity nselec;
+ 	SpecialJoinInfo norm_sjinfo;
+ 	JoinType	jointype = path->jointype;
+ 	List	   *joinquals;
+ 	ListCell   *l;
+ 
+ 	/* Return 1.0 whenever it's not JOIN_SEMI or JOIN_ANTI */
+ 	if (jointype != JOIN_SEMI && jointype != JOIN_ANTI)
+ 		return 1.0;
+ 
+ 	/*
+ 	 * Note: it's annoying to repeat this selectivity estimation on each call,
+ 	 * when the joinclause list will be the same for all path pairs
+ 	 * implementing a given join.  clausesel.c will save us from the worst
+ 	 * effects of this by caching at the RestrictInfo level; but perhaps it'd
+ 	 * be worth finding a way to cache the results at a higher level.
+ 	 */
+ 
+ 	/*
+ 	 * In an ANTI join, we must ignore clauses that are "pushed down",
+ 	 * since those won't affect the match logic.  In a SEMI join, we do not
+ 	 * distinguish joinquals from "pushed down" quals, so just use the whole
+ 	 * restrictinfo list.
+ 	 */
+ 	if (jointype == JOIN_ANTI)
+ 	{
+ 		joinquals = NIL;
+ 		foreach(l, path->joinrestrictinfo)
+ 		{
+ 			RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+ 
+ 			Assert(IsA(rinfo, RestrictInfo));
+ 			if (!rinfo->is_pushed_down)
+ 				joinquals = lappend(joinquals, rinfo);
+ 		}
+ 	}
+ 	else
+ 		joinquals = path->joinrestrictinfo;
+ 
+ 	/*
+ 	 * Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses.
+ 	 */
+ 	jselec = clauselist_selectivity(root,
+ 									joinquals,
+ 									0,
+ 									jointype,
+ 									sjinfo);
+ 
+ 	/*
+ 	 * Also get the normal inner-join selectivity of the join clauses.
+ 	 */
+ 	norm_sjinfo.type = T_SpecialJoinInfo;
+ 	norm_sjinfo.min_lefthand = path->outerjoinpath->parent->relids;
+ 	norm_sjinfo.min_righthand = path->innerjoinpath->parent->relids;
+ 	norm_sjinfo.syn_lefthand = path->outerjoinpath->parent->relids;
+ 	norm_sjinfo.syn_righthand = path->innerjoinpath->parent->relids;
+ 	norm_sjinfo.jointype = JOIN_INNER;
+ 	/* we don't bother trying to make the remaining fields valid */
+ 	norm_sjinfo.lhs_strict = false;
+ 	norm_sjinfo.delay_upper_joins = false;
+ 	norm_sjinfo.join_quals = NIL;
+ 
+ 	nselec = clauselist_selectivity(root,
+ 									joinquals,
+ 									0,
+ 									JOIN_INNER,
+ 									&norm_sjinfo);
+ 
+ 	/* Avoid leaking a lot of ListCells */
+ 	if (jointype == JOIN_ANTI)
+ 		list_free(joinquals);
+ 
+ 	/*
+ 	 * jselec can be interpreted as the fraction of outer-rel rows that have
+ 	 * any matches (this is true for both SEMI and ANTI cases).  And nselec
+ 	 * is the fraction of the Cartesian product that matches.  So, the
+ 	 * average number of matches for each outer-rel row that has at least
+ 	 * one match is nselec * inner_rows / jselec.
+ 	 *
+ 	 * Note: it is correct to use the inner rel's "rows" count here, not
+ 	 * PATH_ROWS(), even if the inner path under consideration is an inner
+ 	 * indexscan.  This is because we have included all the join clauses
+ 	 * in the selectivity estimate, even ones used in an inner indexscan.
+ 	 */
+ 	if (jselec > 0)				/* protect against zero divide */
+ 		avgmatch = nselec * path->innerjoinpath->parent->rows / jselec;
+ 	else
+ 		avgmatch = 0;
+ 
+ 	/*
+ 	 * For an outer-rel row that has at least one match, we can expect the
+ 	 * inner scan to stop after a fraction 1/(avgmatch+1) of the inner rows,
+ 	 * if the matches are evenly distributed.  Since they probably aren't
+ 	 * quite evenly distributed, we apply a fuzz factor of 2.0 to that
+ 	 * fraction (but, of course, clamping at an upper limit of 1.0).
+ 	 */
+ 	matchfrac = 2.0 / (avgmatch + 1.0);
+ 	matchfrac = Min(1.0, matchfrac);
+ 
+ 	/*
+ 	 * For an outer-rel row that has no match, the executor will have to scan
+ 	 * the whole input relation to prove that.  Remembering the meaning of
+ 	 * jselec, we therefore estimate the average scan fraction over all
+ 	 * outer-rel rows thus:
+ 	 */
+ 	scanfrac = jselec * matchfrac + (1.0 - jselec) /* times 1.0 */ ;
+ 
+ 	/* clamp the result to sane range in case of roundoff errors */
+ 	scanfrac = Max(scanfrac, 0.0);
+ 	scanfrac = Min(scanfrac, 1.0);
+ 
+ 	return scanfrac;
+ }
+ 
+ 
+ /*
   * approx_tuple_count
   *		Quick-and-dirty estimation of the number of join rows passing
   *		a set of qual conditions.