PostgreSQL 源码解读(178)- 查询#95(聚合函数)#1相关数据结构

/* ---------------------
 *    AggState information
 *    ss.ss_ScanTupleSlot refers to output of underlying plan.
 *  ss.ss_ScanTupleSlot指的是基础计划的输出.
 *    (ss = ScanState,ps = PlanState)
 *    Note: contains ecxt_aggvalues and
 *    ecxt_aggnulls arrays, which hold the computed agg values for the current
 *    input group during evaluation of an Agg node's output tuple(s).  We
 *    create a second ExprContext, tmpcontext, in which to evaluate input
 *    expressions and run the aggregate transition functions.
 *    注意包含了ecxt_aggvalues和ecxt_aggnulls数组,
 *      这两个数组保存了在计算agg节点的输出元组时当前输入组已计算的agg值.
 * ---------------------
/* these structs are private in nodeAgg.c: */
typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase;
typedef struct AggStatePerHashData *AggStatePerHash;
typedef struct AggState
    ScanState    ss;                /* its first field is NodeTag */
    List       *aggs;            /* all Aggref nodes in targetlist & quals */
    int            numaggs;        /* length of list (could be zero!) */
    int            numtrans;        /* number of pertrans items */
    AggStrategy aggstrategy;    /* strategy mode */
    AggSplit    aggsplit;        /* agg-splitting mode, see nodes.h */
    AggStatePerPhase phase;        /* pointer to current phase data */
    int            numphases;        /* number of phases (including phase 0) */
    int            current_phase;    /* current phase number */
    AggStatePerAgg peragg;        /* per-Aggref information */
    AggStatePerTrans pertrans;    /* per-Trans state information */
    ExprContext *hashcontext;    /* econtexts for long-lived data (hashtable) */
    ExprContext **aggcontexts;    /* econtexts for long-lived data (per GS) */
    ExprContext *tmpcontext;    /* econtext for input expressions */
    ExprContext *curaggcontext; /* currently active aggcontext */
    AggStatePerAgg curperagg;    /* currently active aggregate, if any */
    //当前活跃的trans state
    AggStatePerTrans curpertrans;    /* currently active trans state, if any */
    bool        input_done;        /* indicates end of input */
    bool        agg_done;        /* indicates completion of Agg scan */
    //最后一个grouping set
    int            projected_set;    /* The last projected grouping set */
    //将要解析的当前grouping set
    int            current_set;    /* The current grouping set being evaluated */
    Bitmapset  *grouped_cols;    /* grouped cols in current projection */
    List       *all_grouped_cols;    /* list of all grouped cols in DESC order */
    /* These fields are for grouping set phase data */
    //-------- 下面的列用于grouping set步骤数据
    int            maxsets;        /* The max number of sets in any phase */
    AggStatePerPhase phases;    /* array of all phases */
    //对于phases > 1,已排序的输入信息
    Tuplesortstate *sort_in;    /* sorted input to phases > 1 */
    Tuplesortstate *sort_out;    /* input is copied here for next phase */
    TupleTableSlot *sort_slot;    /* slot for sort results */
    /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
    //------- 下面的列用于AGG_PLAIN和AGG_SORTED模式:
    //per-group指针的grouping set编号数组
    AggStatePerGroup *pergroups;    /* grouping set indexed array of per-group
                                     * pointers */
    HeapTuple    grp_firstTuple; /* copy of first tuple of current group */
    /* these fields are used in AGG_HASHED and AGG_MIXED modes: */
    //--------- 下面的列用于AGG_HASHED和AGG_MIXED模式:
    bool        table_filled;    /* hash table filled yet? */
    int            num_hashes;
    AggStatePerHash perhash;    /* array of per-hashtable data */
    //per-group指针的grouping set编号数组
    AggStatePerGroup *hash_pergroup;    /* grouping set indexed array of
                                         * per-group pointers */
    /* support for evaluation of agg input expressions: */
    //---------- agg输入表达式解析支持
    AggStatePerGroup *all_pergroups;    /* array of first ->pergroups, than
                                         * ->hash_pergroup */
    ProjectionInfo *combinedproj;    /* projection machinery */
} AggState;
/* Primitive options supported by nodeAgg.c: */
//nodeag .c支持的基本选项
#define AGGSPLITOP_COMBINE        0x01    /* substitute combinefn for transfn */
#define AGGSPLITOP_SKIPFINAL    0x02    /* skip finalfn, return state as-is */
#define AGGSPLITOP_SERIALIZE    0x04    /* apply serializefn to output */
#define AGGSPLITOP_DESERIALIZE    0x08    /* apply deserializefn to input */
/* Supported operating modes (i.e., useful combinations of these options): */
typedef enum AggSplit
    /* Basic, non-split aggregation: */
    //基本 : 非split聚合
    /* Initial phase of partial aggregation, with serialization: */
    /* Final phase of partial aggregation, with deserialization: */
} AggSplit;
/* Test whether an AggSplit value selects each primitive option: */
#define DO_AGGSPLIT_COMBINE(as)        (((as) & AGGSPLITOP_COMBINE) != 0)


 * AggStatePerAggData - per-aggregate information
 * AggStatePerAggData - per-aggregate信息
 * This contains the information needed to call the final function, to produce
 * a final aggregate result from the state value. If there are multiple
 * identical Aggrefs in the query, they can all share the same per-agg data.
 * 这个结构体包含了调用最终函数的信息,用以从状态值中产生一个最终的聚合结果.
 * 如果查询中有多个相同的Aggrefs,共享相同的per-agg数据.
 * These values are set up during ExecInitAgg() and do not change thereafter.
 * 这些值在ExecInitAgg()中设置,此后不再变化.
typedef struct AggStatePerAggData
     * Link to an Aggref expr this state value is for.
     * 链接到该状态值代表的Aggref expr上.
     * There can be multiple identical Aggref's sharing the same per-agg. This
     * points to the first one of them.
     * 可能有多个相同的Aggref共享相同的per-agg.指向第一个.
    Aggref       *aggref;
    /* index to the state value which this agg should use */
    int            transno;
    /* Optional Oid of final function (may be InvalidOid) */
    //final function函数的Oid(可以是InvalidOid)
    Oid            finalfn_oid;
     * fmgr lookup data for final function --- only valid when finalfn_oid is
     * not InvalidOid.
     * final function的fmgr检索数据 --- 在finalfn_oid不是InvalidOid时才有效
    FmgrInfo    finalfn;
     * Number of arguments to pass to the finalfn.  This is always at least 1
     * (the transition state value) plus any ordered-set direct args. If the
     * finalfn wants extra args then we pass nulls corresponding to the
     * aggregated input columns.
     * 传递给finalfn的参数个数.
     * 这通常不小于1(转换状态值)加上所有已排序集合的直接参数.
     * 如果finalfn需要特别的参数,那么会传递nulls对应聚合的输入列.
    int            numFinalArgs;
    /* ExprStates for any direct-argument expressions */
    List       *aggdirectargs;
     * We need the len and byval info for the agg's result data type in order
     * to know how to copy/delete values.
     * 对于agg结果数据类型需要长度和byval信息,用以知道如何拷贝和删除值.
    int16        resulttypeLen;
    bool        resulttypeByVal;
     * "shareable" is false if this agg cannot share state values with other
     * aggregates because the final function is read-write.
     * 如因为final function是RW,agg不能与其他aggregates共享状态值,则shareable为F
    bool        shareable;
}            AggStatePerAggData;

聚合状态值信息(per aggregate state value information), 通过输入行调用转换函数更新聚合状态值的工作状态.该结构体不会存储从转换状态而来的用于产生最终聚合结果的相关信息,这些信息会存储在AggStatePerAggData中.

 * AggStatePerTransData - per aggregate state value information
 * AggStatePerTransData - 聚合状态值信息
 * Working state for updating the aggregate's state value, by calling the
 * transition function with an input row. This struct does not store the
 * information needed to produce the final aggregate result from the transition
 * state, that's stored in AggStatePerAggData instead. This separation allows
 * multiple aggregate results to be produced from a single state value.
 * 通过输入行调用转换函数更新聚合状态值的工作状态.
 * 该结构体不会存储从转换状态而来的用于产生最终聚合结果的相关信息,这些信息会存储在AggStatePerAggData中.
 * 这样的分离可以做到多个聚合结果可通过单个状态值产生.
typedef struct AggStatePerTransData
     * These values are set up during ExecInitAgg() and do not change
     * thereafter:
     * 这些值在ExecInitAgg()执行期间设置,以后不会修改.
     * Link to an Aggref expr this state value is for.
     * 链接到该状态值所代表的Aggref表达式上面.
     * There can be multiple Aggref's sharing the same state value, so long as
     * the inputs and transition functions are identical and the final
     * functions are not read-write.  This points to the first one of them.
     * 参见AggStatePerAggData结构体注释
    Aggref       *aggref;
     * Is this state value actually being shared by more than one Aggref?
     * 是否共享?
    bool        aggshared;
     * Number of aggregated input columns.  This includes ORDER BY expressions
     * in both the plain-agg and ordered-set cases.  Ordered-set direct args
     * are not counted, though.
     * 聚合输入列个数.
    int            numInputs;
     * Number of aggregated input columns to pass to the transfn.  This
     * includes the ORDER BY columns for ordered-set aggs, but not for plain
     * aggs.  (This doesn't count the transition state value!)
     * 传递给transfn的聚合输入列个数.
    int            numTransInputs;
    /* Oid of the state transition or combine function */
    Oid            transfn_oid;
    /* Oid of the serialization function or InvalidOid */
    Oid            serialfn_oid;
    /* Oid of the deserialization function or InvalidOid */
    Oid            deserialfn_oid;
    /* Oid of state value's datatype */
    Oid            aggtranstype;
     * fmgr lookup data for transition function or combine function.  Note in
     * particular that the fn_strict flag is kept here.
     * 转换函数或组合函数的fmgr检索数据.
    FmgrInfo    transfn;
    /* fmgr lookup data for serialization function */
    FmgrInfo    serialfn;
    /* fmgr lookup data for deserialization function */
    FmgrInfo    deserialfn;
    /* Input collation derived for aggregate */
    Oid            aggCollation;
    /* number of sorting columns */
    int            numSortCols;
    /* number of sorting columns to consider in DISTINCT comparisons */
    /* (this is either zero or the same as numSortCols) */
    int            numDistinctCols;
    /* deconstructed sorting information (arrays of length numSortCols) */
    AttrNumber *sortColIdx;
    Oid           *sortOperators;
    Oid           *sortCollations;
    bool       *sortNullsFirst;
     * Comparators for input columns --- only set/used when aggregate has
     * DISTINCT flag. equalfnOne version is used for single-column
     * comparisons, equalfnMulti for the case of multiple columns.
     * 输入列比较器,在聚合有DISTINCT标记时才会设置/使用
     * equalfnOne用于单个列比较,equalfnMulti用于多列.
    FmgrInfo    equalfnOne;
    ExprState  *equalfnMulti;
     * initial value from pg_aggregate entry
     * pg_aggregate条目的初始值
    Datum        initValue;
    bool        initValueIsNull;
     * We need the len and byval info for the agg's input and transition data
     * types in order to know how to copy/delete values.
     * 需要聚合输入的len和byval信息以及转换数据类型,以便知道如何拷贝/删除值
     * Note that the info for the input type is used only when handling
     * DISTINCT aggs with just one argument, so there is only one input type.
     * 注意:输入类型的信息仅用于处理单个参数的DISTINCT聚合,因此只有一个输入类型
    int16        inputtypeLen,
    bool        inputtypeByVal,
     * Slots for holding the evaluated input arguments.  These are set up
     * during ExecInitAgg() and then used for each input row requiring either
     * FILTER or ORDER BY/DISTINCT processing.
     * 保存解析输入参数的slots.
     * 在ExecInitAgg()中设置用于每个输入行,在FILTER或ORDER BY/DISTINCT处理过程中需要.
    TupleTableSlot *sortslot;    /* current input tuple */
    TupleTableSlot *uniqslot;    /* used for multi-column DISTINCT */
    TupleDesc    sortdesc;        /* descriptor of input tuples */
     * These values are working state that is initialized at the start of an
     * input tuple group and updated for each input tuple.
     * 这些值是在输入tuple group被初始化时的工作状态,在处理每个tuple都会更新.
     * For a simple (non DISTINCT/ORDER BY) aggregate, we just feed the input
     * values straight to the transition function.  If it's DISTINCT or
     * requires ORDER BY, we pass the input values into a Tuplesort object;
     * then at completion of the input tuple group, we scan the sorted values,
     * eliminate duplicates if needed, and run the transition function on the
     * rest.
     * 对于简单的(不是DISTINCT/ORDER BY)聚合,直接把输入值提供给转换函数即可.
     * 如果是DISTINCT/ORDER BY,传递输入值给Tuplesort对象,
     *   在输入的tuple组结束时,扫描已存储值,如需要去重并在剩余的元组上执行转换函数
     * We need a separate tuplesort for each grouping set.
     * 需要为每一个grouping set提供tuplesort
    //排序对象,仅用于DISTINCT/ORDER BY
    Tuplesortstate **sortstates;    /* sort objects, if DISTINCT or ORDER BY */
     * This field is a pre-initialized FunctionCallInfo struct used for
     * calling this aggregate's transfn.  We save a few cycles per row by not
     * re-initializing the unchanging fields; which isn't much, but it seems
     * worth the extra space consumption.
     * 该字段是预初始化FunctionCallInfo结构体,用于调用聚合的转换函数transfn.
     * 对于每一行,通过减少不会改变的字段的初始化可以节省一些CPU处理周期,
     *   但这个收益不会太大,但看起来值得额外的空间消耗.
    FunctionCallInfoData transfn_fcinfo;
    /* Likewise for serialization and deserialization functions */
    FunctionCallInfoData serialfn_fcinfo;
    FunctionCallInfoData deserialfn_fcinfo;
}            AggStatePerTransData;

per-aggregate-per-group工作状态,这些工作状态值在第一个输入tuple group时初始化,后续在处理每个输入tuple时更新.

 * AggStatePerGroupData - per-aggregate-per-group working state
 * AggStatePerGroupData - per-aggregate-per-group工作状态
 * These values are working state that is initialized at the start of
 * an input tuple group and updated for each input tuple.
 * 这些工作状态值在第一个输入tuple group时初始化,后续在处理每个输入tuple时更新.
 * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these
 * structs (pointed to by aggstate->pergroup); we re-use the array for
 * each input group, if it's AGG_SORTED mode.  In AGG_HASHED mode, the
 * hash table contains an array of these structs for each tuple group.
 * 在AGG_PLAIN/AGG_SORTED模式,这些结构体都有一个单独的数组(aggstate->pergroup指向这些结构体);
 * 在AGG_SORTED模式,对于每一个输入group,都会重用这些数组.
 * 在AGG_HASHED模式,hash表中都有对应每一个tuple group的这些结构体的数组.
 * Logically, the sortstate field belongs in this struct, but we do not
 * keep it here for space reasons: we don't support DISTINCT aggregates
 * in AGG_HASHED mode, so there's no reason to use up a pointer field
 * in every entry of the hashtable.
 * 逻辑上来说,sortstate字段属于该结构体,但出于空间大小考虑,不在这里保存:
 *   在AGG_HASHED模式,不支持DISTINCT聚合,因此没有理由在hash表的每一个条目中都包含指针域.
typedef struct AggStatePerGroupData
    Datum        transValue;        /* current transition value */
    bool        transValueIsNull;
    bool        noTransValue;    /* true if transValue not set yet */
     * Note: noTransValue initially has the same value as transValueIsNull,
     * and if true both are cleared to false at the same time.  They are not
     * the same though: if transfn later returns a NULL, we want to keep that
     * NULL and not auto-replace it with a later input value. Only the first
     * non-NULL input will be auto-substituted.
     * 注意:noTransValue与transValueIsNull在初始化时值一样,如同为T,则同时将二者设置为F.
     * 但它们并不相同,如果transfn后续返回NULL,需要保存该NULL值而不是用随后的输入值自动替换之.
     * 只有在第一个非NULL输入会被自动替换.
}            AggStatePerGroupData;

per-grouping-set-phase状态.Grouping sets会被分拆为多个”步骤”,每一个单独的步骤在输入上都会完成一轮处理.

 * AggStatePerPhaseData - per-grouping-set-phase state
 * AggStatePerPhaseData - per-grouping-set-phase状态
 * Grouping sets are divided into "phases", where a single phase can be
 * processed in one pass over the input. If there is more than one phase, then
 * at the end of input from the current phase, state is reset and another pass
 * taken over the data which has been re-sorted in the mean time.
 * Grouping sets会被分拆为多个"步骤",每一个单独的步骤在输入上都会完成一轮处理.
 * 如果步骤多于一个,在当前步骤的最后一个输入处,状态会被重置,同时另一次传递接管了在此期间重排的数据.
 * Accordingly, each phase specifies a list of grouping sets and group clause
 * information, plus each phase after the first also has a sort order.
 * 相应的,每一个步骤指定了grouping sets和group clause信息链表,外加在第一个步骤的排序.
typedef struct AggStatePerPhaseData
    AggStrategy aggstrategy;    /* strategy for this phase */
    //grouping sets个数,如无则为0
    int            numsets;        /* number of grouping sets (or 0) */
    //grouping sets的大小
    int           *gset_lengths;    /* lengths of grouping sets */
    Bitmapset **grouped_cols;    /* column groupings for rollup */
    ExprState **eqfunctions;    /* expression returning equality, indexed by
                                 * nr of cols to compare */
    Agg           *aggnode;        /* Agg node for phase data */
    Sort       *sortnode;        /* Sort node for input ordering for phase */
    ExprState  *evaltrans;        /* evaluation of transition functions  */
}            AggStatePerPhaseData;

per-hashtable状态.使用哈希进行grouping set,每一个grouping set都会有一个这样的结构体.

 * AggStatePerHashData - per-hashtable state
 * AggStatePerHashData - per-hashtable状态
 * When doing grouping sets with hashing, we have one of these for each
 * grouping set. (When doing hashing without grouping sets, we have just one of
 * them.)
 * 使用哈希进行grouping set,每一个grouping set都会有一个这样的结构体.
 * (如无grouping sets执行哈希,则只需要一个即可)
typedef struct AggStatePerHashData
    TupleHashTable hashtable;    /* hash table with one entry per group */
    TupleHashIterator hashiter; /* for iterating through hash table */
    TupleTableSlot *hashslot;    /* slot for loading hash table */
    FmgrInfo   *hashfunctions;    /* per-grouping-field hash fns */
    Oid           *eqfuncoids;        /* per-grouping-field equality fns */
    int            numCols;        /* number of hash key columns */
    int            numhashGrpCols; /* number of columns in hash table */
    int            largestGrpColIdx;    /* largest col required for hashing */
    //输入slot中的hash col索引数组
    AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
    AttrNumber *hashGrpColIdxHash;    /* indices in hashtbl tuples */
    Agg           *aggnode;        /* original Agg node, for numGroups etc. */
}            AggStatePerHashData;



