From 0f1d944c23c0f9170ffe8553ef2d22754fa3aab7 Mon Sep 17 00:00:00 2001 From: amit Date: Tue, 22 Aug 2017 17:31:42 +0900 Subject: [PATCH 2/5] WIP: planner-side changes for partition-pruning Firstly, this adds a stub get_partitions_for_keys() in partition.c with appropriate interface for the caller to specify bounding scan keys, along with other information about the scan keys extracted from the query, such as NULL-ness of the keys, inclusive-ness, etc. More importantly, this implements the planner-side logic to extract bounding scan keys to be passed to get_partitions_for_keys. That is, it will go through rel->baserestrictinfo and match individual clauses to partition keys and construct lower bound and upper bound tuples, which may cover only a prefix of a multi-column partition key. A bunch of smarts are still missing when mapping the clause operands with keys. For example, code to match a clause is specifed as (constant op var) doesn't exist. Also, redundant keys are not eliminated, for example, a combination of clauses a = 10 and a > 1 will cause the later clause a > 1 taking over and resulting in needless scanning of partitions containing values a > 1 and a < 10. ...constraint exclusion is still used, because get_partitions_for_keys is just a stub... --- src/backend/catalog/partition.c | 42 +++++ src/backend/optimizer/path/allpaths.c | 308 +++++++++++++++++++++++++++++----- src/backend/optimizer/util/plancat.c | 120 +++++++++++++ src/backend/optimizer/util/relnode.c | 20 +++ src/include/catalog/partition.h | 8 + src/include/nodes/nodes.h | 1 + src/include/nodes/relation.h | 135 +++++++++++++++ src/include/optimizer/plancat.h | 2 + 8 files changed, 595 insertions(+), 41 deletions(-) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 50162632f5..bb3009e5b3 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -1138,6 +1138,48 @@ RelationGetPartitionDispatchInfo(Relation rel, return pd; } +/* + * get_partitions_for_keys + * Returns the list of indexes of rel's partitions that will need to be + * scanned given the bounding scan keys. + * + * Each value in the returned list can be used as an index into the oids array + * of the partition descriptor. + * + * Inputs: + * keynullness contains between 0 and (key->partnatts - 1) values, each + * telling what kind of NullTest has been applies to the corresponding + * partition key column. minkeys represents the lower bound on the partition + * the key of the records that the query will return, while maxkeys + * represents upper bound. min_inclusive and max_inclusive tell whether the + * bounds specified minkeys and maxkeys is inclusive, respectively. + * + * Other outputs: + * *min_datum_index will return the index in boundinfo->datums of the first + * datum that the query's bounding keys allow to be returned for the query. + * Similarly, *max_datum_index. *null_partition_chosen returns whether + * the null partition will be scanned. + * + * TODO: Implement. + */ +List * +get_partitions_for_keys(Relation rel, + NullTestType *keynullness, + Datum *minkeys, int n_minkeys, bool min_inclusive, + Datum *maxkeys, int n_maxkeys, bool max_inclusive, + int *min_datum_index, int *max_datum_index, + bool *null_partition_chosen) +{ + List *result = NIL; + int i; + PartitionDesc partdesc = RelationGetPartitionDesc(rel); + + for (i = 0; i < partdesc->nparts; i++) + result = lappend_int(result, i); + + return result; +} + /* Module-local functions */ /* diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 6c3511bd47..97af646242 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -20,6 +20,7 @@ #include "access/sysattr.h" #include "access/tsmapi.h" +#include "catalog/partition.h" #include "catalog/pg_class.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" @@ -845,6 +846,222 @@ set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) } /* + * get_rel_partitions + * Return the list of partitions of rel that pass the query clauses + * + * Returned list contains the AppendInfos of the chosen partitions. + */ +static List * +get_rel_partitions(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relation parent = heap_open(rte->relid, NoLock); + PartitionDesc partdesc = RelationGetPartitionDesc(parent); + List *indexes; + List *result = NIL; + ListCell *lc1, + *lc2; + int keyPos; + List *matchedclauses[PARTITION_MAX_KEYS]; + NullTestType keynullness[PARTITION_MAX_KEYS]; + Datum minkeys[PARTITION_MAX_KEYS], + maxkeys[PARTITION_MAX_KEYS]; + bool need_next_min, + need_next_max, + minkey_set[PARTITION_MAX_KEYS], + maxkey_set[PARTITION_MAX_KEYS], + min_incl, + max_incl; + int n_minkeys = 0, + n_maxkeys = 0, + i; + + /* + * Match individual OpExprs in the query's restriction with individual + * partition key columns. There is one list per key. + */ + memset(keynullness, -1, sizeof(keynullness)); + memset(matchedclauses, 0, sizeof(matchedclauses)); + keyPos = 0; + for (i = 0; i < rel->part_scheme->partnatts; i++) + { + Node *partkey = linitial(rel->partexprs[i]); + + foreach(lc2, rel->baserestrictinfo) + { + RestrictInfo *rinfo = lfirst(lc2); + Expr *clause = rinfo->clause; + + if (is_opclause(clause)) + { + Node *leftop = get_leftop(clause); + + if (IsA(leftop, RelabelType)) + leftop = (Node *) ((RelabelType *) leftop)->arg; + + if (equal(leftop, partkey)) + { + matchedclauses[keyPos] = lappend(matchedclauses[keyPos], + clause); + /* A strict operator implies NOT NULL argument. */ + keynullness[keyPos] = IS_NOT_NULL; + } + } + else if (IsA(clause, NullTest)) + { + NullTest *nulltest = (NullTest *) clause; + Node *arg = (Node *) nulltest->arg; + + if (equal(arg, partkey)) + keynullness[keyPos] = nulltest->nulltesttype; + } + } + + /* Onto finding clauses matching the next partition key. */ + keyPos++; + } + + /* + * Determine the min keys and the max keys using btree semantics-based + * interpretation of the clauses' operators. + */ + + /* + * XXX - There should be a step similar to _bt_preprocess_keys() here, + * to eliminate any redundant scan keys for a given partition column. For + * example, among a <= 4 and a <= 5, we can only keep a <= 4 for being + * more restrictive and discard a <= 5. While doing that, we can also + * check to see if there exists a contradictory combination of scan keys + * that makes the query trivially false for all records in the table. + */ + memset(minkeys, 0, sizeof(minkeys)); + memset(maxkeys, 0, sizeof(maxkeys)); + memset(minkey_set, false, sizeof(minkey_set)); + memset(maxkey_set, false, sizeof(maxkey_set)); + need_next_min = true; + need_next_max = true; + for (i = 0; i < rel->part_scheme->partnatts; i++) + { + /* + * If no scan key existed for the previous column, we are done. + */ + if (i > n_minkeys) + need_next_min = false; + + if (i > n_maxkeys) + need_next_max = false; + + foreach(lc1, matchedclauses[i]) + { + Expr *clause = lfirst(lc1); + Const *rightop = (Const *) get_rightop(clause); + Oid opno = ((OpExpr *) clause)->opno, + opfamily = rel->part_scheme->partopfamily[i]; + StrategyNumber strategy; + + strategy = get_op_opfamily_strategy(opno, opfamily); + switch (strategy) + { + case BTLessStrategyNumber: + case BTLessEqualStrategyNumber: + if (need_next_max) + { + maxkeys[i] = rightop->constvalue; + if (!maxkey_set[i]) + n_maxkeys++; + maxkey_set[i] = true; + max_incl = (strategy == BTLessEqualStrategyNumber); + } + if (strategy == BTLessStrategyNumber) + need_next_max = false; + break; + + case BTGreaterStrategyNumber: + case BTGreaterEqualStrategyNumber: + if (need_next_min) + { + minkeys[i] = rightop->constvalue; + if (!minkey_set[i]) + n_minkeys++; + minkey_set[i] = true; + min_incl = (strategy == BTGreaterEqualStrategyNumber); + } + if (strategy == BTGreaterStrategyNumber) + need_next_min = false; + break; + + case BTEqualStrategyNumber: + if (need_next_min) + { + minkeys[i] = rightop->constvalue; + if (!minkey_set[i]) + n_minkeys++; + } + minkey_set[i] = true; + min_incl = true; + + if (need_next_max) + { + maxkeys[i] = rightop->constvalue; + if (!maxkey_set[i]) + n_maxkeys++; + } + maxkey_set[i] = true; + max_incl = true; + break; + + /* + * This might mean '<>', but we don't have anything for that + * case yet. Perhaps, handle that as key < const OR + * key > const, once we have props needed for handling OR + * clauses. + */ + default: + min_incl = max_incl = false; + break; + } + } + } + + /* Ask partition.c which partitions it thinks match the keys. */ + indexes = get_partitions_for_keys(parent, keynullness, + minkeys, n_minkeys, min_incl, + maxkeys, n_maxkeys, max_incl, + &rel->painfo->min_datum_idx, + &rel->painfo->max_datum_idx, + &rel->painfo->contains_null_partition); + + if (indexes != NIL) + { +#ifdef USE_ASSERT_CHECKING + int first_index, + last_index; + first_index = linitial_int(indexes); + last_index = llast_int(indexes); + Assert(first_index <= last_index || + rel->part_scheme->strategy != PARTITION_STRATEGY_RANGE); +#endif + + foreach(lc1, indexes) + { + int partidx = lfirst_int(lc1); + AppendRelInfo *appinfo = rel->child_appinfos[partidx]; +#ifdef USE_ASSERT_CHECKING + RangeTblEntry *rte = planner_rt_fetch(appinfo->child_relid, root); + Assert(partdesc->oids[partidx] == rte->relid); +#endif + result = lappend(result, appinfo); + } + } + + /* Remember for future users such as set_append_rel_pathlist(). */ + rel->painfo->live_partition_appinfos = result; + + heap_close(parent, NoLock); + + return result; +} + +/* * set_append_rel_size * Set size estimates for a simple "append relation" * @@ -859,6 +1076,7 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { + List *rel_appinfos = NIL; int parentRTindex = rti; bool has_live_children; double parent_rows; @@ -869,6 +1087,24 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Assert(IS_SIMPLE_REL(rel)); + if (rte->relkind != RELKIND_PARTITIONED_TABLE) + { + foreach (l, root->append_rel_list) + { + AppendRelInfo *appinfo = lfirst(l); + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid == parentRTindex) + rel_appinfos = lappend(rel_appinfos, appinfo); + } + } + else + { + rel_appinfos = get_rel_partitions(root, rel, rte); + Assert(rel->painfo != NULL); + rel->painfo->live_partitioned_rels = list_make1_int(rti); + } + /* * Initialize to compute size estimates for whole append relation. * @@ -889,7 +1125,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, nattrs = rel->max_attr - rel->min_attr + 1; parent_attrsizes = (double *) palloc0(nattrs * sizeof(double)); - foreach(l, root->append_rel_list) + foreach(l, rel_appinfos) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); int childRTindex; @@ -902,10 +1138,6 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, ListCell *childvars; ListCell *lc; - /* append_rel_list contains all append rels; ignore others */ - if (appinfo->parent_relid != parentRTindex) - continue; - childRTindex = appinfo->child_relid; childRTE = root->simple_rte_array[childRTindex]; @@ -1114,6 +1346,17 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, has_live_children = true; /* + * If childrel is itself partitioned, add it and its partitioned + * children to the list being propagated up to the root rel. + */ + if (childrel->painfo && rel->painfo) + { + rel->painfo->live_partitioned_rels = + list_concat(rel->painfo->live_partitioned_rels, + list_copy(childrel->painfo->live_partitioned_rels)); + } + + /* * If any live child is not parallel-safe, treat the whole appendrel * as not parallel-safe. In future we might be able to generate plans * in which some children are farmed out to workers while others are @@ -1209,14 +1452,29 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { int parentRTindex = rti; - List *live_childrels = NIL; + List *rel_appinfos = NIL, + *live_childrels = NIL; ListCell *l; + if (rte->relkind != RELKIND_PARTITIONED_TABLE) + { + foreach (l, root->append_rel_list) + { + AppendRelInfo *appinfo = lfirst(l); + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid == parentRTindex) + rel_appinfos = lappend(rel_appinfos, appinfo); + } + } + else + rel_appinfos = rel->painfo->live_partition_appinfos; + /* * Generate access paths for each member relation, and remember the * non-dummy children. */ - foreach(l, root->append_rel_list) + foreach(l, rel_appinfos) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); int childRTindex; @@ -1289,40 +1547,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte; rte = planner_rt_fetch(rel->relid, root); - - /* - * Get the partitioned_rels list from root->pcinfo_list after - * confirming that rel is actually a root partitioned table. - */ - if (rte->relkind == RELKIND_PARTITIONED_TABLE) - { - int parent_relid; - bool is_root_partitioned_table = false; - - /* - * Normally, only the root partitioned rel will be RELOPT_BASEREL - * in a given partitione tree, except when the root table itself - * is a child in the case of a UNION ALL query. - */ - if (!IS_OTHER_REL(rel)) - is_root_partitioned_table = true; - else if (bms_get_singleton_member(rel->top_parent_relids, - &parent_relid)) - { - RelOptInfo *parent_rel; - - parent_rel = root->simple_rel_array[parent_relid]; - is_root_partitioned_table = - (parent_rel->rtekind != RTE_RELATION); - } - - if (is_root_partitioned_table) - { - partitioned_rels = get_partitioned_child_rels(root, rel->relid); - /* The root partitioned table is included as a child rel */ - Assert(list_length(partitioned_rels) >= 1); - } - } + if (rte->relkind == RELKIND_PARTITIONED_TABLE && IS_SIMPLE_REL(rel)) + partitioned_rels = rel->painfo->live_partitioned_rels; /* * For every non-dummy child, remember the cheapest path. Also, identify diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index bfc05a1af5..de50b5d86a 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -68,6 +68,8 @@ static List *get_relation_constraints(PlannerInfo *root, static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation); static List *get_relation_statistics(RelOptInfo *rel, Relation relation); +static void get_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, + Relation relation); /* * get_relation_info - @@ -420,6 +422,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, /* Collect info about relation's foreign keys, if relevant */ get_relation_foreign_keys(root, rel, relation, inhparent); + /* Collect partitioning info, if relevant. */ + if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + get_relation_partition_info(root, rel, relation); + heap_close(relation, NoLock); /* @@ -1805,3 +1811,117 @@ has_row_triggers(PlannerInfo *root, Index rti, CmdType event) heap_close(relation, NoLock); return result; } + +static void +get_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, + Relation relation) +{ + int i; + ListCell *l; + PartitionKey key = RelationGetPartitionKey(relation); + PartitionDesc partdesc = RelationGetPartitionDesc(relation); + + rel->part_scheme = find_partition_scheme(root, relation); + rel->partexprs = (List **) palloc0(key->partnatts * sizeof(List *)); + + l = list_head(key->partexprs); + for (i = 0; i < key->partnatts; i++) + { + Expr *keyCol; + + if (key->partattrs[i] != 0) + { + keyCol = (Expr *) makeVar(rel->relid, + key->partattrs[i], + key->parttypid[i], + key->parttypmod[i], + key->parttypcoll[i], + 0); + } + else + { + if (l == NULL) + elog(ERROR, "wrong number of partition key expressions"); + keyCol = (Expr *) copyObject(lfirst(l)); + l = lnext(l); + } + + rel->partexprs[i] = list_make1(keyCol); + } + + /* Values are filled in build_simple_rel(). */ + rel->child_appinfos = (AppendRelInfo **) palloc0(partdesc->nparts * + sizeof(AppendRelInfo *)); + + /* + * A PartitionAppendInfo to map this table to its immediate partitions + * that will be scanned by this query. At the same time, it records the + * table's partitioning properties reflecting any partition-pruning that + * might occur to satisfy the query. Rest of the fields are set in + * get_rel_partitions() and set_append_rel_size(). + */ + rel->painfo = makeNode(PartitionAppendInfo); + rel->painfo->boundinfo = partdesc->boundinfo; +} + +/* + * find_partition_scheme + * + * The function returns a canonical partition scheme which exactly matches the + * partitioning scheme of the given relation if one exists in the list of + * canonical partitioning schemes maintained in PlannerInfo. If none of the + * existing partitioning schemes match, the function creates a canonical + * partition scheme and adds it to the list. + * + * For an unpartitioned table or for a multi-level partitioned table it returns + * NULL. See comments in the function for more details. + */ +PartitionScheme +find_partition_scheme(PlannerInfo *root, Relation relation) +{ + ListCell *lc; + PartitionKey key = RelationGetPartitionKey(relation); + char strategy = key->strategy; + int partnatts = key->partnatts; + PartitionScheme part_scheme = NULL; + + /* Search for a matching partition scheme and return if found one. */ + foreach(lc, root->partition_schemes) + { + part_scheme = lfirst(lc); + + /* Match various partitioning attributes. */ + if (strategy != part_scheme->strategy || + partnatts != part_scheme->partnatts || + memcmp(key->parttypid, part_scheme->parttypid, + sizeof(Oid) * partnatts) != 0 || + memcmp(key->parttypmod, part_scheme->parttypmod, + sizeof(int32) * partnatts) != 0 || + memcmp(key->partcollation, part_scheme->partcollation, + sizeof(Oid) * partnatts) != 0 || + memcmp(key->partopfamily, part_scheme->partopfamily, + sizeof(Oid) * partnatts) != 0 || + memcmp(key->partopcintype, part_scheme->partopcintype, + sizeof(Oid) * partnatts) != 0) + continue; + + /* Found a matching partition scheme. */ + return part_scheme; + } + + /* Did not find matching partition scheme. Create one. */ + part_scheme = (PartitionScheme) palloc0(sizeof(PartitionSchemeData)); + + part_scheme->strategy = strategy; + part_scheme->partnatts = partnatts; + part_scheme->parttypid = key->parttypid; + part_scheme->parttypmod = key->parttypmod; + part_scheme->partcollation = key->partcollation; + part_scheme->partopfamily = key->partopfamily; + part_scheme->partopcintype = key->partopcintype; + + /* Add the partitioning scheme to PlannerInfo. */ + root->partition_schemes = lappend(root->partition_schemes, part_scheme); + + return part_scheme; +} diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 8ad0b4a669..390d3b4956 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -17,6 +17,7 @@ #include #include "miscadmin.h" +#include "catalog/pg_class.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" @@ -163,6 +164,11 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) else rel->top_parent_relids = NULL; + rel->child_appinfos = NULL; + rel->part_scheme = NULL; + rel->partexprs = NULL; + rel->painfo = NULL; + /* Check type of rtable entry */ switch (rte->rtekind) { @@ -218,7 +224,18 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) if (rte->inh) { ListCell *l; + AppendRelInfo **child_appinfos = NULL; + int i; + if (rte->relkind == RELKIND_PARTITIONED_TABLE) + { + Assert(rel->part_scheme != NULL); + Assert(rel->child_appinfos != NULL); + Assert(rel->painfo != NULL); + child_appinfos = rel->child_appinfos; + } + + i = 0; foreach(l, root->append_rel_list) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); @@ -229,6 +246,9 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) (void) build_simple_rel(root, appinfo->child_relid, rel); + + if (child_appinfos) + child_appinfos[i++] = appinfo; } } diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 2283c675e9..fd16494909 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -99,4 +99,12 @@ extern int get_partition_for_tuple(PartitionDispatch *pd, EState *estate, PartitionDispatchData **failed_at, TupleTableSlot **failed_slot); + +/* Planner support stuff. */ +extern List *get_partitions_for_keys(Relation rel, + NullTestType *keynullness, + Datum *minkeys, int n_minkeys, bool min_inclusive, + Datum *maxkeys, int n_maxkeys, bool max_inclusive, + int *min_datum_index, int *max_datum_index, + bool *null_partition_chosen); #endif /* PARTITION_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 27bd4f3363..63196a1211 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -261,6 +261,7 @@ typedef enum NodeTag T_SpecialJoinInfo, T_AppendRelInfo, T_PartitionedChildRelInfo, + T_PartitionAppendInfo, T_PlaceHolderInfo, T_MinMaxAggInfo, T_PlannerParamItem, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index a39e59d8ac..2b535984a7 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -266,6 +266,8 @@ typedef struct PlannerInfo List *distinct_pathkeys; /* distinctClause pathkeys, if any */ List *sort_pathkeys; /* sortClause pathkeys, if any */ + List *partition_schemes; /* List of PartitionScheme objects. */ + List *initial_rels; /* RelOptInfos we are now trying to join */ /* Use fetch_upper_rel() to get any particular upper rel */ @@ -326,6 +328,48 @@ typedef struct PlannerInfo ((root)->simple_rte_array ? (root)->simple_rte_array[rti] : \ rt_fetch(rti, (root)->parse->rtable)) +/* + * Partitioning scheme + * Structure to hold partitioning scheme for a given relation. + * + * Multiple relations may be partitioned in the same way. The relations + * resulting from joining such relations may be partitioned in the same way as + * the joining relations. Similarly, relations derived from such relations by + * grouping, sorting may be partitioned in the same way as the underlying scan + * relations. All such relations partitioned in the same way share the + * partitioning scheme. + * + * PlannerInfo stores a list of distinct "canonical" partitioning schemes. + * RelOptInfo of a partitioned relation holds the pointer to "canonical" + * partitioning scheme. + * + * We store opclass declared input data types instead of partition key + * datatypes since those are the ones used to compare partition bounds instead + * of actual partition key data types. Since partition key data types and the + * opclass declared input data types are expected to be binary compatible (per + * ResolveOpClass()), both of those should have same byval and length + * properties. + * + * The structure caches information about partition key data type to be used + * while matching partition bounds. While comparing partition schemes we don't + * need to compare this information as it should be same when opclass declared + * input data types are same for two partitioned relations. + */ +typedef struct PartitionSchemeData +{ + char strategy; /* Partitioning strategy */ + int16 partnatts; /* Number of partitioning attributes */ + + /* The following arrays each have partnatts members. */ + Oid *parttypid; /* Type OIDs */ + int32 *parttypmod; /* Typemod values */ + Oid *partcollation; /* Partitioning collation */ + Oid *partopfamily; /* Operator family OIDs */ + Oid *partopcintype; /* Operator class-declared input type OIDs */ +} PartitionSchemeData; + +typedef struct PartitionSchemeData *PartitionScheme; + /*---------- * RelOptInfo @@ -515,6 +559,9 @@ typedef enum RelOptKind /* Is the given relation an "other" relation? */ #define IS_OTHER_REL(rel) ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL) +typedef struct AppendRelInfo AppendRelInfo; +typedef struct PartitionAppendInfo PartitionAppendInfo; + typedef struct RelOptInfo { NodeTag type; @@ -592,6 +639,48 @@ typedef struct RelOptInfo /* used by "other" relations */ Relids top_parent_relids; /* Relids of topmost parents */ + + /* Fields set for partitioned relations */ + + /* + * Information about the partitioning attributes, such as the number of + * attributes, arrays containing per-attribute type/tpymod, partitioning + * collation, operator family OIDs, etc. + */ + PartitionScheme part_scheme; + + /* + * Following contains the exact identities of the individual partitioning + * attributes. For example, if the attribute is a table's column, then + * it will be represented herein by a Var node for the same. This is + * structured as an array of Lists with part_scheme->partnatts members, + * with each list containing the expression(s) corresponding to the ith + * partitioning attribute (0 <= i < part_schem->partnatts) of this rel. + * For baserels, there is just a single expression in each slot (the ith + * list) of the array, because it corresponds to just one table. But for + * a joinrel, there will be as many expressions as there are tables + * involved in that joinrel. We have to do it that way, because in the + * joinrel case, the same corresponding partitioning attribute may have + * different identities in different tables involved in the join; for + * example, a Var node's varno will differ and so might varattnos. + */ + List **partexprs; + + /* AppendRelInfos of *all* partitions of the table. */ + AppendRelInfo **child_appinfos; + + /* + * For a partitioned relation, the following represents the identities + * of its live partition (their RT indexes) and some informations about + * the bounds that the live partitions satisfy. + */ + PartitionAppendInfo *painfo; + + /* + * RT index of the root partitioned table in the the partition tree of + * which this rel is a member. + */ + Index root_parent_relid; } RelOptInfo; /* @@ -2031,6 +2120,52 @@ typedef struct PartitionedChildRelInfo List *child_rels; } PartitionedChildRelInfo; +/* Forward declarations, to avoid including other headers */ +typedef struct PartitionDispatchData *PartitionDispatch; +typedef struct PartitionBoundInfoData *PartitionBoundInfo; +typedef struct PartitionKeyData *PartitionKey; + +/* + * PartitionAppendInfo - Properties of partitions contained in the Append path + * of a given partitioned table + */ +typedef struct PartitionAppendInfo +{ + NodeTag type; + + /* + * List of AppendRelInfos of the table's partitions that satisfy a given + * query. + */ + List *live_partition_appinfos; + + /* + * RT indexes of live partitions that are partitioned tables themselves. + * This includes the RT index of the table itself. + */ + List *live_partitioned_rels; + + /* + * The following simply copies the pointer to boundinfo in the table's + * PartitionDesc. + */ + PartitionBoundInfo boundinfo; + + /* + * Indexes in the boundinfo->datums array of the smallest and the largest + * value of the partition key that the query allows. They are set by + * calling get_partitions_for_keys(). + */ + int min_datum_idx; + int max_datum_idx; + + /* + * Does this Append contain the null-accepting partition, if one exists + * and is allowed by the query's quals. + */ + bool contains_null_partition; +} PartitionAppendInfo; + /* * For each distinct placeholder expression generated during planning, we * store a PlaceHolderInfo node in the PlannerInfo node's placeholder_list. diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index 71f0faf938..c45db074c6 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -56,5 +56,7 @@ extern Selectivity join_selectivity(PlannerInfo *root, SpecialJoinInfo *sjinfo); extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event); +extern PartitionScheme find_partition_scheme(PlannerInfo *root, + Relation relation); #endif /* PLANCAT_H */ -- 2.11.0