Postgresql数据库为了定时清理因为MVCC引入的垃圾数据,实现了自动清理的机制。[基于pg12]
- autovacuum launcher
- autovacuum worker
其中autovaucuum launcher主要负责调度worker,
autovacuum worker进程进行具体的自动清理工作。
其中 visibility map[vm:PG8.4版本后增加]起到重要作用,通过为每个数据块设置标志位来标记哪些page是没有dead tuple.
好处:
- vacuum 进行扫描时,可以跳过page。
- index-only sacn,可以减少tuple的可见性判断, 减少IO操作. 提高性能。
autovacuum launcher
…… 待更新…
autovacuum worker
src/backend/postmaster/autovacuum.c:AutoVacWorkerMain()
;
函数工作流程如下:
- 注册信号处理函数
BaseInit()
- 更新GUC参数配置
zero_damaged_pages
设置为false
statement_timeout
设置为0
lock_timeout
设置为0
idle_in_transaction_session_timeout
设置为0
default_transaction_isolation
设置为read committed
,尽可能的减少死锁与堵塞其他事务的可能性。synchronous_commit
设置为local
, 允许我们不接受备库的影响,能够进行正常的清理任务。
- 获取共享内存AutoVacuumShmem中av_startingWorker,并更新 dbid 和 MyWorkerInfo->wi_proc;
- 重新唤醒autovacuum launch;
- 获取 提交的最新xid 和 MultiXactId
执行do_autovacuum清理函数: => 从pg_class遍历所有表,进行清理工作
- 初始化上下文
- 更新统计信息
- 获取effective_multixact_freeze_max_age
- 设置
default_freeze_min_age, default_freeze_table_age, default_multixact_freeze_min_age, default_multixact_freeze_table_age
开始遍历pg_class表记录,进行相应的逻辑处理
孤儿临时表, continue。
1
2
3if (classForm->relpersistence == RELPERSISTENCE_TEMP)
if (!isTempNamespaceInUse(classForm->relnamespace))
orphan_oids = lappend_oid(orphan_oids, relid);检查表 || [toase table] 是否需要vacuum & analyzed & multixact wraparound 过程如下:
relation_needs_vacanalyze
do_vacuum: 该表统计信息中标记为dead的tuple数量大于
vac_base_thresh + vac_scale_factor * reltuples
时。具体细节如下:1
2
3
4
5
6
7
8
9
10
11
12
13vactuples = tabentry->n_dead_tuples; //pgstat统计信息
vac_base_thresh = (relopts && relopts->vacuum_threshold >= 0)
? relopts->vacuum_threshold
: autovacuum_vac_thresh;
vac_scale_factor = (relopts && relopts->vacuum_scale_factor >= 0)
? relopts->vacuum_scale_factor
: autovacuum_vac_scale;
vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
*dovacuum = force_vacuum || (vactuples > vacthresh);do_analyze: 该表统计信息中上次analyze之后改变的元组数大于
anl_base_thresh + anl_scale_factor * reltuples
时. 具体细节如下:1
2
3
4
5
6
7
8
9
10
11
12anltuples = tabentry->changes_since_analyze; //pgstat统计信息
anl_base_thresh = (relopts && relopts->analyze_threshold >= 0)
? relopts->analyze_threshold
: autovacuum_anl_thresh;
anl_scale_factor = (relopts && relopts->analyze_scale_factor >= 0)
? relopts->analyze_scale_factor
: autovacuum_anl_scale;
anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;
*doanalyze = (anltuples > anlthresh);do_multixact_wraparound:
如果wraparound置为true,则将会强制执行do_vacuum;;1
2
3
4
5
6
7
8
9
10
11
12
13
14
15xidForceLimit = recentXid - freeze_max_age;
if (xidForceLimit < FirstNormalTransactionId)
xidForceLimit -= FirstNormalTransactionId;
force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) &&
TransactionIdPrecedes(classForm->relfrozenxid, xidForceLimit));
if (!force_vacuum)
{
multiForceLimit = recentMulti - multixact_freeze_max_age;
if (multiForceLimit < FirstMultiXactId)
multiForceLimit -= FirstMultiXactId;
force_vacuum = MultiXactIdIsValid(classForm->relminmxid) &&
MultiXactIdPrecedes(classForm->relminmxid, multiForceLimit);
}
*wraparound = force_vacuum;
获取需要进行vacuum or analyze的对象,存至table_oids;并进行遍历
- 加载最新的GUC参数
- 并检查当前运行的worker个数, 检查是否有其他worker进程对该relation进行清理, 如果有,则跳过,skipit 置为 true.
- 再次检查该relid是否需要清理, 并生成用于追踪的autovac_table结构。
table_recheck_autovac
- 对所有的vacuum worker进行资源平衡
autovac_balance_cost
- 调用函数autovacuum_do_vac_analyze. 进行vacuum or analyze
vacuum
入口点.
- 释放缓存,更新之前的MyWorkerInfo结构
- 更新该database的datfrozenxid:
vac_update_datfrozenxid
结构体
autovac_table autovacuum利用共享内存AutoVacuumShmem获取并行worker运行情况。传递的autovac_tab结构体参数如下:
1
2
3
4
5
6
7
8
9
10
11
12
13/* struct to keep track of tables to vacuum and/or analyze, after rechecking */
typedef struct autovac_table
{
Oid at_relid;
VacuumParams at_params;
double at_vacuum_cost_delay;
int at_vacuum_cost_limit;
bool at_dobalance;
bool at_sharedrel;
char *at_relname;
char *at_nspname;
char *at_datname;
} autovac_table;其中的VacuumParams
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24/*
* Parameters customizing behavior of VACUUM and ANALYZE.
*
* Note that at least one of VACOPT_VACUUM and VACOPT_ANALYZE must be set
* in options.
*/
typedef struct VacuumParams
{
int options; /* bitmask of VacuumOption */
int freeze_min_age; /* min freeze age, -1 to use default */
int freeze_table_age; /* age at which to scan whole table */
int multixact_freeze_min_age; /* min multixact freeze age, -1 to
* use default */
int multixact_freeze_table_age; /* multixact age at which to scan
* whole table */
bool is_wraparound; /* force a for-wraparound vacuum */
int log_min_duration; /* minimum execution threshold in ms at
* which verbose logs are activated, -1
* to use default */
VacOptTernaryValue index_cleanup; /* Do index vacuum and cleanup,
* default value depends on reloptions */
VacOptTernaryValue truncate; /* Truncate empty pages at the end,
* default value depends on reloptions */
} VacuumParams;VacuumParams中options参数 定义了vacuum的操作类型.
1
2
3
4
5
6
7
8
9
10
11typedef enum VacuumOption
{
VACOPT_VACUUM = 1 << 0, /* do VACUUM */
VACOPT_ANALYZE = 1 << 1, /* do ANALYZE */
VACOPT_VERBOSE = 1 << 2, /* print progress info */
VACOPT_FREEZE = 1 << 3, /* FREEZE option */
VACOPT_FULL = 1 << 4, /* FULL (non-concurrent) vacuum */
VACOPT_SKIP_LOCKED = 1 << 5, /* skip if cannot get lock */
VACOPT_SKIPTOAST = 1 << 6, /* don't process the TOAST table, if any */
VACOPT_DISABLE_PAGE_SKIPPING = 1 << 7 /* don't skip any pages */
} VacuumOption;
vacuum
针对单个relation 其工作流程如下:
src/backend/commands/vacuum.c:vacuum()
1 | void |
传递tab0->at_params
校验vacuum类别;
- 检查param参数
更新pgstat统计信息 [在AutoVacuumWorkerProcess下,则跳过.]
1
2if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
pgstat_vacuum_stat();设置内存上下文
进行relations链表循环
VACOPT_VACUUM: 调用vacuum_rel函数
1
2
3if (params->options & VACOPT_VACUUM) //vacuum flag -
if (!vacuum_rel(vrel->oid, vrel->relation, params))
continue;从此处我们看得出,默认的autovacuum 执行的是lazy vacuum: 只是将dead元组,标记为可用; 并不会进行碎片整理,释放交换给操作系统。
函数的主要流程如下:
vacuum_rel—>table_relation_vacuum->heap_vacuum_rel->lazy_scan_heap -> {lazy_vacuum_index->index_bulk_delete, lazy_vacuum_heap [循环page/更新fsm]->lazy_vacuum_page};
- 确认表的可读性与操作性.
- 检查用户对表的操作权限.
- 检查relation->relkind 是否可操作的类型.
- 忽略其余的临时表.
- 将relation所有页面,标记dead元组为可用
ItemIdSetUnused
, 并设置脏页,刷新到磁盘; - 更新visibility map.
- 更新pgstat统计信息.
- VACOPT_ANALYZE: 调用analyze_rel函数
……
释放上下文, 并更新vac_update_datfrozenxid();
1
2if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
vac_update_datfrozenxid();
引申问题
lazy vacuum & vacuum full区别
LAZY vacuum:只是找到dead的元组,把它们的状态标记为可用状态。但是它不进行空间合并。
1
2VACUUM t_user_info;
VACUUM ANALYZE t_user_info;FULL vacuum:除了 LAZY vacuum,还进行空间合并,临时文件
pg_temp_%u
,因此它需要锁表。1
2VACUUM FULL VERBOSE;
VACUUM FULL t_user_info;
TupleDesc & Relation 本质数据结构的区别以及作用
1
2
3
4
5
6
7
8
9
10
11typedef struct TupleDescData
{
int natts; /* number of attributes in the tuple */
Oid tdtypeid; /* composite type ID for tuple type */
int32 tdtypmod; /* typmod for tuple type */
int tdrefcount; /* reference count, or -1 if not counting */
TupleConstr *constr; /* constraints, or NULL if none */
/* attrs[N] is the description of Attribute Number N+1 */
FormData_pg_attribute attrs[FLEXIBLE_ARRAY_MEMBER];
} TupleDescData;
typedef struct TupleDescData *TupleDesc;此结构体是在后端间描述元组的结构.
表结构: pg_attribute
1
2
3
4
5
6
7
8
9
10
11
12
13
14tupdesc = CreateTemplateTupleDesc(6); //初始化构建;
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
TEXTOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
TEXTOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "is_holdable",
BOOLOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 4, "is_binary",
BOOLOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 5, "is_scrollable",
BOOLOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 6, "creation_time",
TIMESTAMPTZOID, -1, 0);
.....生成记录:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22HeapTuple tuple;
Relation rel;
TupleDesc access_audit_desc;
Datum new_record[Natts_pg_accessaudit_count];
bool nulls[Natts_pg_accessaudit_count];
rel = heap_open(AccessAuditID, RowExclusiveLock);
access_audit_desc = RelationGetDescr(rel);
MemSet(new_record, 1, sizeof(new_record));
MemSet(nulls, false, sizeof(nulls));
new_record[Anum_pg_accessaudit_tid - 1] = ObjectIdGetDatum(toid); //插入新的记录;
new_record[Anum_pg_accessaudit_tname - 1] = get_rel_name(toid);
new_record[Anum_pg_accessaudit_selectcount - 1] = UInt32GetDatum(1000);
new_record[Anum_pg_accessaudit_insertcount - 1] = UInt32GetDatum(1000);
new_record[Anum_pg_accessaudit_updatecount - 1] = UInt32GetDatum(1000);
new_record[Anum_pg_accessaudit_deletecount - 1] = UInt32GetDatum(1000);
tuple = heap_form_tuple(access_audit_desc, new_record, nulls);
CatalogTupleInsert(rel, tuple);
heap_freetuple(tuple);
heap_close(rel, NoLock);…. 待更新…
配置参数
1 | autovacuum_max_workers |
.... 待更新...