Linux内核链表结构-CFANZ编程社区

Linux内核链表结构

常见的单向链表和双向链表指针指向的是链表节点起始位置，在Linux内核中实际使用中有一些局限性，如数据区必须是固定的，而实际需求是多种多样的。这种方法无法构建一套通用的链表，因为每个不同的数据区需要一套链表。为此，Linux内核把所有链表操作方法的共同部分提取出来，把不同的部分留给代码编写者自己去处理。

Linux内核实现了一套纯链表的封装，链表节点数据结构只有指针区而没有数据区，另外还封装了各种操作函数，如创建节点函数、插入节点函数、删除节点函数、遍历节点函数等。

链表结构

Linux内核链表使用struct list_head数据结构来描述，代码位于include/linux/types.h文件中。

struct list_head {
    struct list_head *next, *prev;
};

struct list_head数据结构不包含链表节点的数据区，通常是嵌入其他数据结构中，如struct page数据结构中嵌入了一个lru链表节点，通常是把page数据结构挂入LRU链表。

struct page {
    ...
    struct list_head lru;
    ...
}

链表初始化

链表头的初始化有两种方法，一种是静态初始化，另一种动态初始化。把next和prev指针都初始化并指向自己，这样便初始化了一个带头节点的空链表。

静态初始化

/*
 * Circular doubly linked list implementation.
 *
 * Some of the internal functions ("__xxx") are useful when
 * manipulating whole lists rather than single entries, as
 * sometimes we already know the next/prev entries and we can
 * generate better code by using them directly rather than
 * using the generic single-entry routines.
 */

#define LIST_HEAD_INIT(name) { &(name), &(name) }

#define LIST_HEAD(name) \
    struct list_head name = LIST_HEAD_INIT(name)

动态初始化

/**
 * INIT_LIST_HEAD - Initialize a list_head structure
 * @list: list_head structure to be initialized.
 *
 * Initializes the list_head to point to itself.  If it is a list header,
 * the result is an empty list.
 */
static inline void INIT_LIST_HEAD(struct list_head *list)
{
    WRITE_ONCE(list->next, list);
    WRITE_ONCE(list->prev, list);
}

添加链表节点

有两种添加节点的函数：添加到链表头和添加到链表尾；添加到链表头即将新节点添加到head和head->next之间；添加到链表尾即将新节点添加到head->prev和head之间。

添加节点到链表头

/**
 * list_add - add a new entry
 * @new: new entry to be added
 * @head: list head to add it after
 *
 * Insert a new entry after the specified head.
 * This is good for implementing stacks.
 */
static inline void list_add(struct list_head *new, struct list_head *head)
{
    __list_add(new, head, head->next);
}

/*
 * Insert a new entry between two known consecutive entries.
 *
 * This is only for internal list manipulation where we know
 * the prev/next entries already!
 */
static inline void __list_add(struct list_head *new,
                  struct list_head *prev,
                  struct list_head *next)
{
    if (!__list_add_valid(new, prev, next))
        return;

    next->prev = new;
    new->next = next;
    new->prev = prev;
    WRITE_ONCE(prev->next, new);
}

添加节点到链表尾

/**
 * list_add_tail - add a new entry
 * @new: new entry to be added
 * @head: list head to add it before
 *
 * Insert a new entry before the specified head.
 * This is useful for implementing queues.
 */
static inline void list_add_tail(struct list_head *new, struct list_head *head)
{
    __list_add(new, head->prev, head);
}

删除链表节点

节点删除函数有两个，list_del()只做删除；list_del_init()删除后，将删除的节点初始化为空链表。

/**
 * list_del - deletes entry from list.
 * @entry: the element to delete from the list.
 * Note: list_empty() on entry does not return true after this, the entry is
 * in an undefined state.
 */
static inline void list_del(struct list_head *entry)
{
    __list_del_entry(entry);
    entry->next = LIST_POISON1;
    entry->prev = LIST_POISON2;
}

/**
 * list_del_init - deletes entry from list and reinitialize it.
 * @entry: the element to delete from the list.
 */
static inline void list_del_init(struct list_head *entry)
{
    __list_del_entry(entry);
    INIT_LIST_HEAD(entry);
}

static inline void __list_del_entry(struct list_head *entry)
{
    if (!__list_del_entry_valid(entry))
        return;

    __list_del(entry->prev, entry->next);
}

/*
 * Delete a list entry by making the prev/next entries
 * point to each other.
 *
 * This is only for internal list manipulation where we know
 * the prev/next entries already!
 */
static inline void __list_del(struct list_head * prev, struct list_head * next)
{
    next->prev = prev;
    WRITE_ONCE(prev->next, next);
}

/*
 * Architectures might want to move the poison pointer offset
 * into some well-recognized area such as 0xdead000000000000,
 * that is also not mappable by user-space exploits:
 */
#ifdef CONFIG_ILLEGAL_POINTER_VALUE
# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
#else
# define POISON_POINTER_DELTA 0
#endif

/*
 * These are non-NULL pointers that will result in page faults
 * under normal circumstances, used to verify that nobody uses
 * non-initialized list entries.
 */
#define LIST_POISON1  ((void *) 0x100 + POISON_POINTER_DELTA)
#define LIST_POISON2  ((void *) 0x122 + POISON_POINTER_DELTA)

替换链表节点

节点替换函数也有两个，list_replace()只做替换；list_replace_init()替换后，将old节点初始化为空链表。

/**
 * list_replace - replace old entry by new one
 * @old : the element to be replaced
 * @new : the new element to insert
 *
 * If @old was empty, it will be overwritten.
 */
static inline void list_replace(struct list_head *old,
                struct list_head *new)
{
    new->next = old->next;
    new->next->prev = new;
    new->prev = old->prev;
    new->prev->next = new;
}

/**
 * list_replace_init - replace old entry by new one and initialize the old one
 * @old : the element to be replaced
 * @new : the new element to insert
 *
 * If @old was empty, it will be overwritten.
 */
static inline void list_replace_init(struct list_head *old,
                     struct list_head *new)
{
    list_replace(old, new);
    INIT_LIST_HEAD(old);
}

交换链表节点

节点交换是依次使用删除、替换、添加操作完成的；先删除entry2，然后用entry2替换entry1，最后将entry1添加到entry2的位置。

/**
 * list_swap - replace entry1 with entry2 and re-add entry1 at entry2's position
 * @entry1: the location to place entry2
 * @entry2: the location to place entry1
 */
static inline void list_swap(struct list_head *entry1,
                 struct list_head *entry2)
{
    struct list_head *pos = entry2->prev;

    list_del(entry2);
    list_replace(entry1, entry2);
    if (pos == entry1)
        pos = entry2;
    list_add(entry1, pos);
}

移动链表节点

移动链表节点是指从现有链表中删除节点，然后将删除的节点添加到另一个链表中。依次执行了删除、添加操作。

/**
 * list_move - delete from one list and add as another's head
 * @list: the entry to move
 * @head: the head that will precede our entry
 */
static inline void list_move(struct list_head *list, struct list_head *head)
{
    __list_del_entry(list);
    list_add(list, head);
}

遍历链表

/**
 * list_for_each    -   iterate over a list
 * @pos:    the &struct list_head to use as a loop cursor.
 * @head:   the head for your list.
 */
#define list_for_each(pos, head) \
    for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next)

这个宏只是遍历一个一个节点的当前位置，那么如何获取节点本身的数据结构呢？这里还需要使用list_entry()宏。

/**
 * list_entry - get the struct for this entry
 * @ptr:    the &struct list_head pointer.
 * @type:   the type of the struct this is embedded in.
 * @member: the name of the list_head within the struct.
 */
#define list_entry(ptr, type, member) \
    container_of(ptr, type, member)

/**
 * container_of - cast a member of a structure out to the containing structure
 * @ptr:        the pointer to the member.
 * @type:       the type of the container struct this is embedded in.
 * @member:     the name of the member within the struct.
 *
 */
#define container_of(ptr, type, member) ({                      \
    const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
    (type *)( (char *)__mptr - offsetof(type,member) );})

#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)

其中offsetof()宏是通过把0地址转换为type类型的指针，然后去获取该结构体中member成员的指针，也就是获取了member在type结构体中的偏移量。最后用指针ptr减去offset，就得到type结构体的真实地址了。

WRITE_ONCE宏

在动态初始化、添加节点、删除节点过程中都用到了一个WRITE_ONCE()宏来完成部分赋值操作。

#define WRITE_ONCE(x, val)                      \
do {                                    \
    compiletime_assert_rwonce_type(x);              \
    __WRITE_ONCE(x, val);                       \
} while (0)

/*
 * Yes, this permits 64-bit accesses on 32-bit architectures. These will
 * actually be atomic in some cases (namely Armv7 + LPAE), but for others we
 * rely on the access being split into 2x32-bit accesses for a 32-bit quantity
 * (e.g. a virtual address) and a strong prevailing wind.
 */
#define compiletime_assert_rwonce_type(t)                   \
    compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long),  \
        "Unsupported access size for {READ,WRITE}_ONCE().")

#define __WRITE_ONCE(x, val)                        \
do {                                    \
    *(volatile typeof(x) *)&(x) = (val);                \
} while (0)

可以看到，WRITE_ONCE()宏主要使用volatile关键字修饰来禁止编译器进行指令优化，保障在多核情况下指令顺序执行，相当于插入一个内存屏障。
但是为什么有的赋值操作需要使用WRITE_ONCE()，有的不需要呢？我们以添加为例，展开内联函数如下：

static inline void list_add(struct list_head *new, struct list_head *head)
{
    head->next->prev = new;
    new->next = head->next;
    new->prev = head;
    WRITE_ONCE(head->next, new);
}

可以看出，head->next在多核中是竞态的关键，其它变量非原子赋值，并不容易引发错误，而next的非原子操作（或未及时回写内存）则会引发链表的异常。

另外，compiletime_assert_rwonce_type()宏主要用于在编译时检查参数的合法性，即x参数是否可以执行WRITE_ONCE。继续展开则是判断t变量是否是8、16、32、64位的变量，如果不是则不允许使用WRITE_ONCE（编译报错）。

#define compiletime_assert_rwonce_type(t)                   \
    compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long),  \
        "Unsupported access size for {READ,WRITE}_ONCE().")

/* Is this type a native word size -- useful for atomic operations */
#define __native_word(t) \
    (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
     sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))