ITPub博客

首页 > 数据库 > PostgreSQL > PostgreSQL 源码解读(126)- MVCC#10(vacuum过程)

PostgreSQL 源码解读(126)- MVCC#10(vacuum过程)

原创 PostgreSQL 作者:husthxd 时间:2019-01-22 14:03:19 0 删除 编辑

本节简单介绍了PostgreSQL手工执行vacuum的主处理流程,主要分析了ExecVacuum->vacuum函数的实现逻辑。该函数是VACUUM/ANALYZE命令的内部处理入口。

一、数据结构

宏定义
Vacuum和Analyze命令选项


/* ----------------------
 *      Vacuum and Analyze Statements
 *      Vacuum和Analyze命令选项
 * 
 * Even though these are nominally two statements, it's convenient to use
 * just one node type for both.  Note that at least one of VACOPT_VACUUM
 * and VACOPT_ANALYZE must be set in options.
 * 虽然在这里有两种不同的语句,但只需要使用统一的Node类型即可.
 * 注意至少VACOPT_VACUUM/VACOPT_ANALYZE在选项中设置.
 * ----------------------
 */
typedef enum VacuumOption
{
    VACOPT_VACUUM = 1 << 0,     /* do VACUUM */
    VACOPT_ANALYZE = 1 << 1,    /* do ANALYZE */
    VACOPT_VERBOSE = 1 << 2,    /* print progress info */
    VACOPT_FREEZE = 1 << 3,     /* FREEZE option */
    VACOPT_FULL = 1 << 4,       /* FULL (non-concurrent) vacuum */
    VACOPT_SKIP_LOCKED = 1 << 5,    /* skip if cannot get lock */
    VACOPT_SKIPTOAST = 1 << 6,  /* don't process the TOAST table, if any */
    VACOPT_DISABLE_PAGE_SKIPPING = 1 << 7   /* don't skip any pages */
} VacuumOption;

VacuumStmt
存储vacuum命令的option&Relation链表


typedef struct VacuumStmt
{
    NodeTag     type;//Tag
    //VacuumOption位标记
    int         options;        /* OR of VacuumOption flags */
    //VacuumRelation链表,如为NIL-->所有Relation.
    List       *rels;           /* list of VacuumRelation, or NIL for all */
} VacuumStmt;

VacuumParams
vacuum命令参数


/*
 * Parameters customizing behavior of VACUUM and ANALYZE.
 * 客户端调用VACUUM/ANALYZE时的定制化参数
 */
typedef struct VacuumParams
{
    //最小freeze age,-1表示使用默认
    int         freeze_min_age; /* min freeze age, -1 to use default */
    //扫描整个table的freeze age
    int         freeze_table_age;   /* age at which to scan whole table */
    //最小的multixact freeze age,-1表示默认
    int         multixact_freeze_min_age;   /* min multixact freeze age, -1 to
                                             * use default */
    //扫描全表的freeze age,-1表示默认
    int         multixact_freeze_table_age; /* multixact age at which to scan
                                             * whole table */
    //是否强制wraparound?
    bool        is_wraparound;  /* force a for-wraparound vacuum */
    //以毫秒为单位的最小执行阈值
    int         log_min_duration;   /* minimum execution threshold in ms at
                                     * which  verbose logs are activated, -1
                                     * to use default */
} VacuumParams;

VacuumRelation
VACUUM/ANALYZE命令的目标表信息


/*
 * Info about a single target table of VACUUM/ANALYZE.
 * VACUUM/ANALYZE命令的目标表信息.
 *  
 * If the OID field is set, it always identifies the table to process.
 * Then the relation field can be NULL; if it isn't, it's used only to report
 * failure to open/lock the relation.
 * 如设置了OID字段,该值通常是将要处理的数据表.
 * 那么关系字段可以为空;如果不是,则仅用于报告未能打开/锁定关系。
 */
typedef struct VacuumRelation
{
    NodeTag     type;
    RangeVar   *relation;       /* table name to process, or NULL */
    Oid         oid;            /* table's OID; InvalidOid if not looked up */
    List       *va_cols;        /* list of column names, or NIL for all */
} VacuumRelation;

二、源码解读

vacuum是VACUUM/ANALYZE命令的内部处理入口.
逻辑比较简单:
1.配置vacuum处理的相关参数,如命令类型等
2.执行相关检查
3.构造vacuum处理上下文
4.构造vacuum需处理的relation链表
5.循环遍历relation链表
5.1 获取relation
5.2 执行vacuum_rel
6.收尾工作


/*
 * Internal entry point for VACUUM and ANALYZE commands.
 * VACUUM/ANALYZE命令的内部处理入口
 *
 * options is a bitmask of VacuumOption flags, indicating what to do.
 * options是VacuumOption选项标记位,指示应该做什么.
 *
 * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
 * we process all relevant tables in the database.  For each VacuumRelation,
 * if a valid OID is supplied, the table with that OID is what to process;
 * otherwise, the VacuumRelation's RangeVar indicates what to process.
 * relations,如果不是空指针NIL,那么存储了待处理的VacuumRelation结构体链表.
 * 如为NIL,将处理数据库中的所有相关数据表.
 * 对每一个VacuumRelation,如提供了有效OID,该OID对应table就会被处理,
 *   否则,VacuumRelation的RangeVar指示了如何处理.
 *
 * params contains a set of parameters that can be used to customize the
 * behavior.
 * params是客户端定制的参数集合.
 *
 * bstrategy is normally given as NULL, but in autovacuum it can be passed
 * in to use the same buffer strategy object across multiple vacuum() calls.
 * bstrategy通常是NULL,但在autovacuum中,
 *   该参数可用于指示在多个vacuum()调用中使用同样的缓存strategy object
 *
 * isTopLevel should be passed down from ProcessUtility.
 * isTopLevel通过ProcessUtility向下传递
 *
 * It is the caller's responsibility that all parameters are allocated in a
 * memory context that will not disappear at transaction commit.
 * 调用者应确保所有的参数在同一个内存上下文分配内存,而不会在事务commit时突然消失.
 */
void
vacuum(int options, List *relations, VacuumParams *params,
       BufferAccessStrategy bstrategy, bool isTopLevel)
{
    static bool in_vacuum = false;//是否在vacuum
    const char *stmttype;//语句类型,vacuum?analyze?
    volatile bool in_outer_xact,
                use_own_xacts;
    Assert(params != NULL);
    stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
    /*
     * We cannot run VACUUM inside a user transaction block; if we were inside
     * a transaction, then our commit- and start-transaction-command calls
     * would not have the intended effect!  There are numerous other subtle
     * dependencies on this, too.
     * 不能在用户事务块中运行VACUUM,如果我们在事务块中,
     *   那么处理过程中的commit-和start-transaction-command调用不会有正确的效果.
     * 而且还有许多其他微妙的依赖关系。
     *
     * ANALYZE (without VACUUM) can run either way.
     * ANALYZE(不带VACUUM)则没有此问题.
     */
    if (options & VACOPT_VACUUM)
    {
        PreventInTransactionBlock(isTopLevel, stmttype);
        in_outer_xact = false;
    }
    else
        in_outer_xact = IsInTransactionBlock(isTopLevel);
    /*
     * Due to static variables vac_context, anl_context and vac_strategy,
     * vacuum() is not reentrant.  This matters when VACUUM FULL or ANALYZE
     * calls a hostile index expression that itself calls ANALYZE.
     * 鉴于vac_context, anl_context and vac_strategy这是变量都是静态变量,
     *   因此vacuum()函数是不能重入的(状态已出现变化).
     * 在VACUUM FULL或者ANALYZE调用了hostile index expression,
     *   而此逻辑又调用了ANALYZE时会出现此情况,务必注意.
     */
    if (in_vacuum)
        ereport(ERROR,
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                 errmsg("%s cannot be executed from VACUUM or ANALYZE",
                        stmttype)));
    /*
     * Sanity check DISABLE_PAGE_SKIPPING option.
     * 检查
     */
    if ((options & VACOPT_FULL) != 0 &&
        (options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
        ereport(ERROR,
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                 errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
    /*
     * Send info about dead objects to the statistics collector, unless we are
     * in autovacuum --- autovacuum.c does this for itself.
     * 发送dead objects的统计信息给收集器,除非我们在autovacuum中
     * -- autovacuum.c会自己做这个事情.
     */
    if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
        pgstat_vacuum_stat();
    /*
     * Create special memory context for cross-transaction storage.
     * 跨事务存储,需要创建特别的内存上下文.
     *
     * Since it is a child of PortalContext, it will go away eventually even
     * if we suffer an error; there's no need for special abort cleanup logic.
     * 因为这是PortalContext的子对象,即使我们犯了错误,它最终也会消失;不需要特殊的中止清理逻辑。
     */
    vac_context = AllocSetContextCreate(PortalContext,
                                        "Vacuum",
                                        ALLOCSET_DEFAULT_SIZES);
    /*
     * If caller didn't give us a buffer strategy object, make one in the
     * cross-transaction memory context.
     * 如果调用者没有提供buffer strategy object,
     *   在跨事务的内存上下文中创建一个.
     */
    if (bstrategy == NULL)
    {
        MemoryContext old_context = MemoryContextSwitchTo(vac_context);
        bstrategy = GetAccessStrategy(BAS_VACUUM);
        MemoryContextSwitchTo(old_context);
    }
    vac_strategy = bstrategy;
    /*
     * Build list of relation(s) to process, putting any new data in
     * vac_context for safekeeping.
     * 构建要处理的关系列表,将所有新数据放入vac_context中以进行安全(位于vacuum上下文中)保存。
     */
    if (relations != NIL)
    {
        List       *newrels = NIL;
        ListCell   *lc;
        foreach(lc, relations)
        {
            VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
            List       *sublist;
            MemoryContext old_context;
            sublist = expand_vacuum_rel(vrel, options);
            old_context = MemoryContextSwitchTo(vac_context);
            newrels = list_concat(newrels, sublist);
            MemoryContextSwitchTo(old_context);
        }
        relations = newrels;
    }
    else
        relations = get_all_vacuum_rels(options);
    /*
     * Decide whether we need to start/commit our own transactions.
     * 确定是否需要start/commit自己的事务
     *
     * For VACUUM (with or without ANALYZE): always do so, so that we can
     * release locks as soon as possible.  (We could possibly use the outer
     * transaction for a one-table VACUUM, but handling TOAST tables would be
     * problematic.)
     * 对于VACUUM(包含或不包含ANALYZE):通常需要这样处理,以便我们可以尽可能快的释放锁.
     * (对于一张表的VACUUM,我们可能使用外层事务,但处理TOAST表是会有问题)
     *
     * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
     * start/commit our own transactions.  Also, there's no need to do so if
     * only processing one relation.  For multiple relations when not within a
     * transaction block, and also in an autovacuum worker, use own
     * transactions so we can release locks sooner.
     * 对于ANALYZE(没有VACUUM选项):如果在事务块中,我们不能start/commit自己的事务.
     * 同时,如果只需要处理一个relation,则不需要这样处理.
     * 对于不在一个事务块中的多个relations/在autovacuum worker中,
     *   使用自己的事务以便更快的释放锁.
     */
    if (options & VACOPT_VACUUM)
        use_own_xacts = true;
    else
    {
        Assert(options & VACOPT_ANALYZE);
        if (IsAutoVacuumWorkerProcess())
            use_own_xacts = true;
        else if (in_outer_xact)
            use_own_xacts = false;
        else if (list_length(relations) > 1)
            use_own_xacts = true;
        else
            use_own_xacts = false;
    }
    /*
     * vacuum_rel expects to be entered with no transaction active; it will
     * start and commit its own transaction.  But we are called by an SQL
     * command, and so we are executing inside a transaction already. We
     * commit the transaction started in PostgresMain() here, and start
     * another one before exiting to match the commit waiting for us back in
     * PostgresMain().
     * 在进入vacuum_rel前,不希望存在事务活动.该函数会启动和提交自己的事务.
     * 但由于我们是通过SQL命令调用的,因此我们已处于事务中执行.
     * 在这里我们提交在PostgresMain()中启动的事务,
     * 并在退出之前启动另一个,以匹配在PostgresMain()中等待我们的提交。
     */
    if (use_own_xacts)
    {
        Assert(!in_outer_xact);
        /* ActiveSnapshot is not set by autovacuum */
        //autovacuum不会设置ActiveSnapshot
        if (ActiveSnapshotSet())
            PopActiveSnapshot();
        /* matches the StartTransaction in PostgresMain() */
        //匹配PostgresMain()中的StartTransaction
        CommitTransactionCommand();
    }
    /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
    //设置vacuum成本计数on/off,并set/clear in_vacuum参数
    PG_TRY();
    {
        ListCell   *cur;
        in_vacuum = true;
        VacuumCostActive = (VacuumCostDelay > 0);
        VacuumCostBalance = 0;
        VacuumPageHit = 0;
        VacuumPageMiss = 0;
        VacuumPageDirty = 0;
        /*
         * Loop to process each selected relation.
         * 循环处理每一个已选中的relation.
         */
        foreach(cur, relations)
        {
            VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
            if (options & VACOPT_VACUUM)
            {
                //执行vacuum处理
                if (!vacuum_rel(vrel->oid, vrel->relation, options, params))
                    continue;
            }
            if (options & VACOPT_ANALYZE)
            {
                /*
                 * If using separate xacts, start one for analyze. Otherwise,
                 * we can use the outer transaction.
                 * 如果使用独立的xacts,为analyze启动一个何事务.
                 * 否则,我们可以使用外层事务.
                 */
                if (use_own_xacts)
                {
                    //使用自己的事务
                    StartTransactionCommand();
                    /* functions in indexes may want a snapshot set */
                    //快照压栈
                    PushActiveSnapshot(GetTransactionSnapshot());
                }
                //分析relation
                analyze_rel(vrel->oid, vrel->relation, options, params,
                            vrel->va_cols, in_outer_xact, vac_strategy);
                if (use_own_xacts)
                {
                    //使用自己的事务,出栈
                    PopActiveSnapshot();
                    //提交事务
                    CommitTransactionCommand();
                }
            }
        }
    }
    PG_CATCH();
    {
        in_vacuum = false;
        VacuumCostActive = false;
        PG_RE_THROW();
    }
    PG_END_TRY();
    in_vacuum = false;
    VacuumCostActive = false;
    /*
     * Finish up processing.
     * 完成处理过程
     */
    if (use_own_xacts)
    {
        /* here, we are not in a transaction */
        //在这里,没有处于事务中
        /*
         * This matches the CommitTransaction waiting for us in
         * PostgresMain().
         * 匹配在PostgresMain()函数中等待我们的CommitTransaction.
         */
        StartTransactionCommand();
    }
    if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
    {
        /*
         * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
         * (autovacuum.c does this for itself.)
         * 更新pg_database.datfrozenxid,如可能截断pg_xact.
         * (autovacuum.c不会处理这事情)
         */
        vac_update_datfrozenxid();
    }
    /*
     * Clean up working storage --- note we must do this after
     * StartTransactionCommand, else we might be trying to delete the active
     * context!
     * 清除工作存储 --- 注意必须在StartTransactionCommand命令后执行清除过程,
     *   否则我们可能会尝试删除活动的上下文.
     */
    MemoryContextDelete(vac_context);
    vac_context = NULL;
}

三、跟踪分析

测试脚本


17:19:28 (xdb@[local]:5432)testdb=# vacuum t1;

启动gdb,设置断点


(gdb) b vacuum
Breakpoint 1 at 0x6b9b8c: file vacuum.c, line 175.
(gdb) c
Continuing.
Breakpoint 1, vacuum (options=1, relations=0x2294988, params=0x7fff403d8880, bstrategy=0x0, isTopLevel=true) at vacuum.c:175
175     Assert(params != NULL);
(gdb)

输入参数
options=1 —> VACOPT_VACUUM
relations=0x2294988,relation链表,里面只有一个item,即t1
params=0x7fff403d8880,默认参数
bstrategy=NULL,
isTopLevel=T,为顶层事务


(gdb) p *params
$2 = {freeze_min_age = -1, freeze_table_age = -1, multixact_freeze_min_age = -1, multixact_freeze_table_age = -1, 
  is_wraparound = false, log_min_duration = -1}
(gdb)

变量赋值并执行相关判断


(gdb) n
177     stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
(gdb) 
187     if (options & VACOPT_VACUUM)
(gdb) 
189         PreventInTransactionBlock(isTopLevel, stmttype);
(gdb) 
190         in_outer_xact = false;
(gdb) 
200     if (in_vacuum)
(gdb) 
209     if ((options & VACOPT_FULL) != 0 &&
(gdb)

统计信息


219     if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
(gdb) 
220         pgstat_vacuum_stat();
(gdb)

创建并设置内存上下文


(gdb) n
228     vac_context = AllocSetContextCreate(PortalContext,
(gdb) 
236     if (bstrategy == NULL)
(gdb) 
238         MemoryContext old_context = MemoryContextSwitchTo(vac_context);
(gdb) 
240         bstrategy = GetAccessStrategy(BAS_VACUUM);
(gdb) 
241         MemoryContextSwitchTo(old_context);
(gdb) 
243     vac_strategy = bstrategy;
(gdb) 
249     if (relations != NIL)
(gdb)

构造VacuumRelation链表


(gdb) 
251         List       *newrels = NIL;
(gdb) 
254         foreach(lc, relations)
(gdb) 
256             VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
(gdb) 
260             sublist = expand_vacuum_rel(vrel);
(gdb) p *vrel
$3 = {type = T_VacuumRelation, relation = 0x22948d0, oid = 0, va_cols = 0x0}
(gdb) p *vrel->relation
$4 = {type = T_RangeVar, catalogname = 0x0, schemaname = 0x0, relname = 0x22948b0 "t1", inh = true, 
  relpersistence = 112 'p', alias = 0x0, location = 7}
(gdb) 
(gdb) n
261             old_context = MemoryContextSwitchTo(vac_context);
(gdb) 
262             newrels = list_concat(newrels, sublist);
(gdb) 
263             MemoryContextSwitchTo(old_context);
(gdb) 
254         foreach(lc, relations)
(gdb) 
265         relations = newrels;
(gdb)

使用自主事务


284     if (options & VACOPT_VACUUM)
(gdb) 
285         use_own_xacts = true;
(gdb) 
307     if (use_own_xacts)
(gdb) 
307     if (use_own_xacts)
(gdb) 
309         Assert(!in_outer_xact);
(gdb) 
312         if (ActiveSnapshotSet())
(gdb) 
313             PopActiveSnapshot();
(gdb) 
316         CommitTransactionCommand();
(gdb) 
320     PG_TRY();
(gdb)

开始执行,设置vacuum成本计数on/off,并set/clear in_vacuum参数


(gdb) 
324         in_vacuum = true;
(gdb) 
325         VacuumCostActive = (VacuumCostDelay > 0);
(gdb) 
326         VacuumCostBalance = 0;
(gdb) 
327         VacuumPageHit = 0;
(gdb) 
328         VacuumPageMiss = 0;
(gdb) 
329         VacuumPageDirty = 0;
(gdb)

循环relation,调用vacuum_rel


334         foreach(cur, relations)
(gdb) 
336             VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
(gdb) 
338             if (options & VACOPT_VACUUM)
(gdb) 
340                 if (!vacuum_rel(vrel->oid, vrel->relation, options, params))
(gdb) 
344             if (options & VACOPT_ANALYZE)
(gdb) 
334         foreach(cur, relations)
(gdb) 
374     PG_END_TRY();
(gdb)

执行收尾工作


(gdb) 
376     in_vacuum = false;
(gdb) 
377     VacuumCostActive = false;
(gdb) 
382     if (use_own_xacts)
(gdb) 
390         StartTransactionCommand();
(gdb) 
393     if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
(gdb) 
399         vac_update_datfrozenxid();
(gdb) 
407     MemoryContextDelete(vac_context);
(gdb) 
408     vac_context = NULL;
(gdb)

完成调用


409 }
(gdb) 
ExecVacuum (vacstmt=0x22949c0, isTopLevel=true) at vacuum.c:142
142 }
(gdb)

DONE!

四、参考资料

PG Source Code

来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/6906/viewspace-2564392/,如需转载,请注明出处,否则将追究法律责任。

请登录后发表评论 登录
全部评论
长期从事政务、金融等行业产品研发和架构设计工作,对Oracle、PostgreSQL以及大数据等相关技术有深入研究。现就职于广州云图数据技术有限公司,系统架构师。

注册时间:2007-12-28

  • 博文量
    1169
  • 访问量
    3634699