[linux內核分析———SLAB原理及實現 ](https://blog.csdn.net/chenxiancool/article/details/7638804) Slab原理及實現 1. 整體關係圖 ! 註:SLAB,SLOB,SLUB都是內核提供的分配器,其前端介面都是一致的,其中SLAB ...
Slab原理及實現
1. 整體關係圖
!
註:SLAB,SLOB,SLUB都是內核提供的分配器,其前端介面都是一致的,其中SLAB是通用的分配器,SLOB針對微小的嵌入式系統,其演算法較為簡單(最先適配演算法),SLUB是面向配備大量物理記憶體的大規模並行系統,通過也描述符中未使用的欄位來管理頁組,降低SLUB本身數據結構的記憶體開銷。
2. 相關數據結構
2.1 緩存kmem_cache (/mm/slab.c)
struct kmem_cache {
struct array_cache *array[NR_CPUS];
unsigned int batchcount;//從本地高速緩存交換的對象的數量
unsigned int limit;//本地高速緩存中空閑對象的數量
unsigned int shared;//是否存在共用CPU高速緩存
unsigned int buffer_size;//對象長度+填充位元組
u32 reciprocal_buffer_size;//倒數,加快計算
unsigned int flags;/*高速緩存永久性的標誌,當前只CFLGS_OFF_SLAB*/
unsigned int num;/*封裝在一個單獨的slab中的對象數目*/
unsigned int gfporder;/*每個slab包含的頁框數取2為底的對數*/
gfp_t gfpflags;/* e.g. GFP_DMA分配頁框是傳遞給伙伴系統的標誌*/
size_t colour; /* cache colouring range緩存的顏色個數free/aln*/
unsigned int colour_off;
/*slab的基本對齊偏移,為aln的整數倍,用來計算left_over*/
struct kmem_cache *slabp_cache;
//slab描述符放在外部時使用,其指向的高速緩存來存儲描述符
unsigned int slab_size;//單個slab頭的大小,包括SLAB和對象描述符
unsigned int dflags; /*描述高速緩存動態屬性,目前沒用*/
/*構造函數*/
void(*ctor)(struct kmem_cache *, void *);
const char *name;
struct list_head next;//高速緩存描述符雙向鏈表指針
/*統計量*/
#if STATS
unsigned long num_active;
unsigned long num_allocations;
unsigned long high_mark;
unsigned long grown;
unsigned long reaped;
unsigned long errors;
unsigned long max_freeable;
unsignedlong node_allocs;
unsigned long node_frees;
unsigned long node_overflow;
atomic_t allochit;
atomic_t allocmiss;
atomic_t freehit;
atomic_t freemiss;
#endif
#if DEBUG
into bj_offset;//對象間的偏移
int obj_size;//對象本身的大小,
#endif
//存放的是所有節點對應的相關數據。每個節點擁有各自的數據;
struc tkmem_list3 *nodelists[MAX_NUMNODES];/
}
2.2 array_cache本地高速緩存,每個CPU對應一個該結構
/*
* struct array_cache
*
*Purpose:
* - LIFO ordering, to hand out cache-warm objectsfrom _alloc
* - reduce the number of linked list operations
* - reduce spinlock operations
*
* The limit is stored in the per-cpu structure toreduce the data cache
* footprint.
*
*/
struct array_cache {
unsigned int avail;//可用對象數目
unsigned int limit;//可擁有的最大對象數目,和kmem_cache中一樣
unsigned int batchcount;//同kmem_cache
unsigned int touched;//是否在收縮後被訪問過
spinlock_t lock;
void *entry[]; /*偽數組,沒有任何數據項,其後為釋放的對象指針數組*/
};
2.3 kmem_list3管理slab鏈表的數據結構
/*
* The slab lists for all objects.
*/
struct kmem_list3 {
struct list_head slabs_partial; /* partial listfirst, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
unsigned long free_objects;//半空和全空鏈表中對象的個數
unsigned int free_limit;//所有slab上允許未使用的對象最大數目
unsigned int colour_next; /* 下一個slab的顏色*/
spinlock_t list_lock;
struct array_cache *shared; /* shared per node */
struct array_cache **alien; /* on other nodes */
unsigned long next_reap; /* 兩次緩存收縮時的間隔,降低次數,提高性能*/
int free_touched; /* 0收縮1獲取一個對象*/
};
2.4 slab對象
struct slab {
struct list_head list;//SLAB所在的鏈表
unsigned long colouroff;//SLAB中第一個對象的偏移
void *s_mem; /* including colour offset 第一個對象的地址*/
unsigned int inuse; /* num of objs active in slab被使用的對象數目*/
kmem_bufctl_t free;//下一個空閑對象的下標
unsigned short nodeid;//用於定址在高速緩存中kmem_list3的下標
};
3. 相關函數
3.1 kmem_cache_create (mm/slab.c)
/**
* kmem_cache_create - Create a cache.
* @name: A string which is used in /proc/slabinfo toidentify this cache.
* @size: The size of objects to be created in thiscache.
* @align: The required alignment for the objects.
* @flags: SLAB flags
* @ctor: A constructor for the objects.
*
* Returns a ptr to the cache on success, NULL onfailure.
* Cannot be calledwithin a int, but can be interrupted.
* The @ctor is run when new pages are allocated bythe cache.
struct kmem_cache *
kmem_cache_create (const char *name, size_t size,size_t align,unsigned long flags,
void (*ctor)(struct kmem_cache *, void *))
{
size_t left_over, slab_size, ralign;
struct kmem_cache *cachep = NULL, *pc;
/*參數有效性檢查,名字有效性,對象長度比處理器字長還短,或者超過了允許分配的最大值,不能處在中斷上下文,可能導致睡眠*/
if (!name || in_interrupt() || (size <BYTES_PER_WORD) ||
size > KMALLOC_MAX_SIZE) {
printk(KERN_ERR "%s: Early error in slab%s\n", __FUNCTION__,
name);
BUG();
}
/*獲得鎖*/
mutex_lock(&cache_chain_mutex);
....
/*
將大小舍入到處理器字長的倍數
*/
if (size & (BYTES_PER_WORD - 1)) {
size += (BYTES_PER_WORD - 1);
size &= ~(BYTES_PER_WORD - 1);
}
/* 計算對齊值*/
//如果設置了該標誌,則用硬體緩存行
if (flags & SLAB_HWCACHE_ALIGN) {
ralign = cache_line_size();//獲得硬體緩存行
//如果對象足夠小,則將對齊值減半,,儘可能增加單行對象數目
while (size <= ralign )
ralign /= 2;
} else {//否則使用處理器字長
ralign = BYTES_PER_WORD;
}
/*體繫結構強制最小值*/
if (ralign < ARCH_SLAB_MINALIGN) {
ralign = ARCH_SLAB_MINALIGN;
}
/*調用者強制對齊值*/
if (ralign < align) {
ralign = align;
}
/*計算出對齊值.*/
align = ralign;
/*從cache_cache緩存中分配一個kmem_cache新實例*/
cachep = kmem_cache_zalloc(&cache_cache,GFP_KERNEL);
//填充cachep成員
cachep->obj_size = size;//將填充後的對象賦值,
//設置SLAB頭位置
//如果對象大小超過一頁的1/8則放在外部
if ((size >= (PAGE_SIZE >> 3)) &&!slab_early_init)
flags |= CFLGS_OFF_SLAB;//設置將SLAB放在外部
size = ALIGN(size, align);//按對齊大小對齊
//計算緩存長度
//利用calculate_slab_order迭代來找到理想的slab長度,size指對象的長度
left_over = calculate_slab_order(cachep, size,align, flags);
if (!cachep->num) {//NUM指SLAB對象的數目
printk(KERN_ERR
"kmem_cache_create: couldn't createcache %s.\n", name);
kmem_cache_free(&cache_cache, cachep);
cachep = NULL;
goto oops;
}
//再次計算SLAB頭存放位置
//計算slab頭的大小=對象的數目x對象描述符的大小+slab描述符
slab_size = ALIGN(cachep->num *sizeof(kmem_bufctl_t)
+ sizeof(struct slab), align);
//如果有足夠的空間,容納SLAB頭則將其放在SLAB上
if (flags & CFLGS_OFF_SLAB && left_over>= slab_size) {
flags &= ~CFLGS_OFF_SLAB;
left_over -= slab_size;
}
//如果標誌仍然存在,則計算外部的slab頭大小
if (flags & CFLGS_OFF_SLAB) {
/* 此處不用對齊了*/
slab_size =
cachep->num * sizeof(kmem_bufctl_t) +sizeof(struct slab);
}
//著色
cachep->colour_off =cache_line_size();//
/* Offset must be a multiple of the alignment. */
if (cachep->colour_off< align)
cachep->colour_off = align;
cachep->colour = left_over /cachep->colour_off;//獲取顏色值
cachep->slab_size = slab_size;
cachep->flags = flags;
cachep->gfpflags = 0; //分配頁框的標誌
if (CONFIG_ZONE_DMA_FLAG && (flags &SLAB_CACHE_DMA))
cachep->gfpflags |= GFP_DMA;
cachep->buffer_size = size;
cachep->reciprocal_buffer_size =reciprocal_value(size);
//如果在SLAB頭在外部,則找一個合適的緩存指向slabp_cache,從通用緩存中
if (flags & CFLGS_OFF_SLAB) {
cachep->slabp_cache= kmem_find_general_cachep(slab_size, 0u);
BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
}
cachep->ctor = ctor;
cachep->name = name;
//設置per-cpu緩存
if (setup_cpu_cache(cachep)){
__kmem_cache_destroy(cachep);
cachep = NULL;
goto oops;
}
/* 加入鏈表*/
list_add(&cachep->next, &cache_chain);
/*解鎖*/
mutex_unlock(&cache_chain_mutex);
return cachep;
}
3.2 對象分配函數kmem_cache_alloc(kmem_cache_t* cachep, gfp_t flags)
static inline void *____cache_alloc(struct kmem_cache *cachep,gfp_t flags)
{
void *objp;
struct array_cache *ac;
check_irq_off();
ac = cpu_cache_get(cachep);//獲得高速緩存中CPU緩存
if (likely(ac->avail)) {//如果CPU緩存中還有空間,則從中分配
STATS_INC_ALLOCHIT(cachep);
ac->touched = 1;
objp = ac->entry[--ac->avail];
} else {//否則要填充CPU高速緩存了
STATS_INC_ALLOCMISS(cachep);
objp = cache_alloc_refill(cachep,flags);
}
return objp;
}
//填充CPU高速緩存
static void *cache_alloc_refill(structkmem_cache *cachep, gfp_t flags)
{
int batchcount;
struct kmem_list3 *l3;
struct array_cache *ac;
int node;
ac = cpu_cache_get(cachep);//獲得高所緩存所在本地CPU緩存
retry:
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT){
/*如果不經常活動,則部分填充*/
batchcount = BATCHREFILL_LIMIT;//16
}
l3 = cachep->nodelists[node];//獲得相應的kmem_list3結構體
...
/* 先考慮從共用本地CPU高速緩存*/
if (l3->shared && transfer_objects(ac, l3->shared,batchcount))
goto alloc_done;
while (batchcount > 0) {//老老實實的從本高速緩存分配
struct list_head *entry;
struct slab *slabp;
/* Get slab alloc is to come from. */
entry = l3->slabs_partial.next;//半滿的鏈表
if (entry == &l3->slabs_partial) {//如果半空的都沒了,找全空的
l3->free_touched = 1;
entry = l3->slabs_free.next;
if (entry == &l3->slabs_free)//全空的也沒了,必須擴充了
cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
}
//此時,已經找到了一個鏈表(半空或者全空)
slabp = list_entry(entry, struct slab, list);//找到一個slab
check_slabp(cachep, slabp);
check_spinlock_acquired(cachep);
while (slabp->inuse < cachep->num &&batchcount--)
{//迴圈從slab中分配對象
ac->entry[ac->avail++] =slab_get_obj(cachep, slabp,node);
}
check_slabp(cachep, slabp);
/*將slab放到合適的鏈中:*/
list_del(&slabp->list);
if (slabp->free == BUFCTL_END)//如果已經沒有空閑對象了,則放到滿鏈表中
list_add(&slabp->list, &l3->slabs_full);
else//否則放在半滿鏈表
list_add(&slabp->list, &l3->slabs_partial);
}
...
ac->touched = 1;
return ac->entry[--ac->avail];
}
//按次序從SLAB中起初對象
static void *slab_get_obj(struct kmem_cache *cachep, struct slab*slabp,
int nodeid)
{
void *objp =index_to_obj(cachep, slabp, slabp->free);//找到要找的對象
kmem_bufctl_t next;
slabp->inuse++;//增加計數器
next =slab_bufctl(slabp)[slabp->free];
//獲得slab_bufctl[slab->free]的值,為下一次鎖定的空閑下標
slabp->free =next;//將鎖定下標放到free中
return objp;
}
3.4 cache_grow
//增加新的SLAB
static int cache_grow(structkmem_cache *cachep, gfp_t flags, int nodeid, void *objp)
{
struct slab *slabp;
size_t offset;
gfp_t local_flags;
struct kmem_list3 *l3;
...
l3 = cachep->nodelists[nodeid];
...
/* 計算偏移量和下一個顏色.*/
offset = l3->colour_next;//計算下一個顏色
l3->colour_next++;//如果到了最大值則回0
if (l3->colour_next >= cachep->colour)
l3->colour_next = 0;
offset *= cachep->colour_off;//計算此SLAB的偏移
//從伙伴系統獲得物理頁
objp = kmem_getpages(cachep, local_flags, nodeid);
...
/* 如果slab頭放在外部,則調用此函數分配函數*/
slabp = alloc_slabmgmt(cachep, objp, offset,
local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
slabp->nodeid = nodeid;//在kmem_cache中數組的下標
//依次對每個物理頁的lru.next=cache,lru.prev=slab
slab_map_pages(cachep, slabp, objp);
//調用各個對象的構造器函數,初始化新SLAB中的對象
cache_init_objs(cachep, slabp);
/* 將新的SLAB加入到全空鏈表中*/
list_add_tail(&slabp->list, &(l3->slabs_free));
STATS_INC_GROWN(cachep);
l3->free_objects += cachep->num;//更新空閑對象的數目
...
return 0;
}
3.5 釋放對象kmem_cache_free
//真正的處理函數
static inline void __cache_free(struct kmem_cache *cachep, void*objp)
{
struct array_cache *ac = cpu_cache_get(cachep);
...
if (likely(ac->avail < ac->limit)){//如果CPU高速緩存還有位子,則直接釋放
ac->entry[ac->avail++] = objp;
return;
} else {//否則需要將部分對象FLUSH到SLAB中了
STATS_INC_FREEMISS(cachep);
cache_flusharray(cachep, ac);
ac->entry[ac->avail++] = objp;
}
}
//將部分CPU高速緩存FLUSH到SLAB中
static void cache_flusharray(struct kmem_cache *cachep, structarray_cache *ac)
{
int batchcount;
struct kmem_list3 *l3;
int node = numa_node_id();
batchcount = ac->batchcount;//指定數量
l3 = cachep->nodelists[node];
if (l3->shared) {//如果共用CPU緩存存在,則將共用緩存填滿,然後返回
struct array_cache *shared_array = l3->shared;
int max = shared_array->limit - shared_array->avail;
if (max) {//
if (batchcount > max)
batchcount = max;
//這裡只是拷貝,並沒有移除
memcpy(&(shared_array->entry[shared_array->avail]),
ac->entry, sizeof(void *) * batchcount);
shared_array->avail += batchcount;
goto free_done;
}
}
//否則需要釋放到SLAB中了
free_block(cachep,ac->entry, batchcount, node);
free_done:
//對CPU高速緩存進行移除操作
spin_unlock(&l3->list_lock);
ac->avail -= batchcount;
memmove(ac->entry, &(ac->entry[batchcount]),sizeof(void *)*ac->avail);
}
//將nr_objects個對象釋放到SLAB中,objpp指CPU緩存數組
static void free_block(struct kmem_cache *cachep, void **objpp,int nr_objects, int node)
{
int i;
struct kmem_list3 *l3;
for (i = 0; i < nr_objects; i++) {//對每一個對象處理,先從頭部處理,LIFO
void *objp = objpp[i];
struct slab *slabp;
slabp = virt_to_slab(objp);//獲得SLAB描述符
l3 = cachep->nodelists[node];
list_del(&slabp->list);//將SLAB從原來的鏈表中刪除
check_spinlock_acquired_node(cachep, node);
check_slabp(cachep, slabp);
slab_put_obj(cachep, slabp, objp,node);//將objp放到slab中,和slab_get_obj相反
STATS_DEC_ACTIVE(cachep);
l3->free_objects++;//增加高速緩存的可用對象數目
check_slabp(cachep, slabp);
/*將SLAB重新插入鏈表*/
if (slabp->inuse == 0) {//如果SLAB是全空的
if (l3->free_objects > l3->free_limit)
{//並且高速緩存空閑對象已經超出限制,則需要將SLAB返回給底層頁框管理器
l3->free_objects -= cachep->num;
slab_destroy(cachep, slabp);
} else {//直接插入空閑鏈表
list_add(&slabp->list, &l3->slabs_free);
}
} else {//直接插入部分空閑鏈表
list_add_tail(&slabp->list, &l3->slabs_partial);
}
}
}
3.5 高速緩存的銷毀kmem_cache_destroy,此函數用在模塊卸載時使用,釋放以前分配的空間
4. 通用緩存
即kmalloc和kfree使用的,放在malloc_size表中,從32-33554432共21個成員。成員的結構如
/* Size description struct for general caches. */
struct cache_sizes {
size_t cs_size;//對象大小
struct kmem_cache *cs_cachep;//對應的高速緩存
struct kmem_cache *cs_dmacachep;//對應的DMA訪問緩存
};
//通用高速緩存在/kmalloc_sizes.h
struct cache_sizes malloc_sizes[] = {
#define CACHE(x) { .cs_size = (x) },
#include <linux/kmalloc_sizes.h>
CACHE(ULONG_MAX)
#undef CACHE
};
Kmalloc_sizes.h
#if (PAGE_SIZE == 4096)
CACHE(32)
#endif
CACHE(64)
#if L1_CACHE_BYTES < 64
CACHE(96)
#endif
CACHE(128)
#if L1_CACHE_BYTES < 128
CACHE(192)
#endif
CACHE(256)
CACHE(512)
CACHE(1024)
CACHE(2048)
CACHE(4096)
CACHE(8192)
CACHE(16384)
CACHE(32768)
CACHE(65536)
CACHE(131072)
#if KMALLOC_MAX_SIZE >= 262144
CACHE(262144)
#endif
#if KMALLOC_MAX_SIZE >= 524288
CACHE(524288)
#endif
#if KMALLOC_MAX_SIZE >= 1048576
CACHE(1048576)
#endif
#if KMALLOC_MAX_SIZE >= 2097152
CACHE(2097152)
#endif
#if KMALLOC_MAX_SIZE >= 4194304
CACHE(4194304)
#endif
#if KMALLOC_MAX_SIZE >= 8388608
CACHE(8388608)
#endif
#if KMALLOC_MAX_SIZE >= 16777216
CACHE(16777216)
#endif
#if KMALLOC_MAX_SIZE >= 33554432
CACHE(33554432)
#endif
4.1 kalloc函數
//分配函數
static inline void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size))
{//是否用常數指定所需的記憶體長度
int i = 0;
//找到合適大小的i值
...
//按類型進行分配
#ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA)
return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep,
flags);
#endif
return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags);
}//不使用常數指定
return __kmalloc(size, flags);
}
//大小不用指定的分配
static __always_inline void *__do_kmalloc(size_t size, gfp_tflags, void *caller)
{
struct kmem_cache *cachep;
cachep = __find_general_cachep(size, flags);//找一個合適大小的高速緩存
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
return __cache_alloc(cachep, flags, caller);//分配函數
}
4.2 釋放函數kfree
//kmalloc對應的釋放函數
void kfree(const void *objp)
{
struct kmem_cache *c;
unsigned long flags;
...
c =virt_to_cache(objp);//獲得高速緩存
debug_check_no_locks_freed(objp, obj_size(c));
__cache_free(c, (void*)objp);//調用此函數完成實質性的分配
local_irq_restore(flags);
}