linux内核启动过程

阅读 90

2022-02-16

setup.bin

OBJCOPYFLAGS_setup.bin  := -O binary
$(obj)/setup.bin: $(obj)/setup.elf FORCE
        $(call if_changed,objcopy)

  setup.bin依赖于setup.elf,先看下setup.elf分支:

LDFLAGS_setup.elf       := -m elf_i386 -T
$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
        $(call if_changed,ld)

  setup.elf依赖于arch/x86/boot/setup.ld *.o,然后执行ld(搜索到arch/x86/boot/.setup.elf.cmd文件),分析.setup.elf.cmd:

cmd_arch/x86/boot/setup.elf := ld -m elf_x86_64   -m elf_i386 -T \
arch/x86/boot/setup.ld arch/x86/boot/a20.o arch/x86/boot/bioscall.o \
arch/x86/boot/cmdline.o arch/x86/boot/copy.o arch/x86/boot/cpu.o \
arch/x86/boot/cpuflags.o arch/x86/boot/cpucheck.o \
arch/x86/boot/early_serial_console.o arch/x86/boot/edd.o arch/x86/boot/header.o \
arch/x86/boot/main.o arch/x86/boot/memory.o arch/x86/boot/pm.o \
arch/x86/boot/pmjump.o arch/x86/boot/printf.o arch/x86/boot/regs.o \
arch/x86/boot/string.o arch/x86/boot/tty.o arch/x86/boot/video.o arch/x86/boot/video-mode.o \
arch/x86/boot/version.o arch/x86/boot/video-vga.o arch/x86/boot/video-vesa.o \
arch/x86/boot/video-bios.o -o arch/x86/boot/setup.elf

  链接输出setup.elf文件,现在分析arch/x86/boot/setup.ld文件:

/*
 * setup.ld
 *
 * Linker script for the i386 setup code
 */
OUTPUT_FORMAT("elf32-i386")
OUTPUT_ARCH(i386)
ENTRY(_start) 

SECTIONS
{
	. = 0;
	.bstext		: { *(.bstext) }   
	.bsdata		: { *(.bsdata) }

	. = 495;
	.header		: { *(.header) }
	.entrytext	: { *(.entrytext) }
	.inittext	: { *(.inittext) }
	.initdata	: { *(.initdata) }
	__end_init = .;

	.text		: { *(.text) }
	.text32		: { *(.text32) }

	. = ALIGN(16);
	.rodata		: { *(.rodata*) }

	.videocards	: {
		video_cards = .;
		*(.videocards)
		video_cards_end = .;
	}

	. = ALIGN(16);
	.data		: { *(.data*) }

	.signature	: {
		setup_sig = .;
		LONG(0x5a5aaa55)
	}


	. = ALIGN(16);
	.bss		:
	{
		__bss_start = .;
		*(.bss)
		__bss_end = .;
	}
	. = ALIGN(16);
	_end = .;

	/DISCARD/	: {
		*(.note*)
	}

	/*
	 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
	 */
	. = ASSERT(_end <= 0x8000, "Setup too big!");
	. = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
	/* Necessary for the very-old-loader check to work... */
	. = ASSERT(__end_init <= 5*512, "init sections too big!");

}

  setup.ld属于链接脚本(Linker Script),.bstext和.bsdata属于BS (1) 定义。接下来分析header.S文件,.header和入口(ENTRY)_start函数都在arch/x86/boot/header.S中定义:

(1) 引导扇区(Boot Sector) 通常指设备的第一个扇区,用于加载并转让处理器控制权给操作系统。.bstext:引导扇区代码段。.bsdata:引导扇区数据段。


1. BS
BOOTSEG         = 0x07C0                /* original address of boot-sector */
SYSSEG          = 0x1000                /* historical load address >> 4 */

  BS起始地址设置为0x07C0,SYSSEG为软启动加载地址(为兼容早期启动方式)

#ifndef SVGA_MODE
#define SVGA_MODE ASK_VGA  // #define ASK_VGA         0xfffd          /* ask for it at bootup */
#endif

#ifndef ROOT_RDONLY
#define ROOT_RDONLY 1
#endif

  SVGA_MODE表示数据入口点,ROOT_RDONLY为1表示/root,即root在根目录

.code16
        .section ".bstext", "ax"

        .global bootsect_start
bootsect_start:
#ifdef CONFIG_EFI_STUB
        # "MZ", MS-DOS header
        .word   MZ_MAGIC
#endif

        # Normalize the start address
        ljmp    $BOOTSEG, $start2

  开始是16位汇编代码,分配.bstext(允许执行,a允许段,w可写段,x执行段)。
  CONFIG_EFI_STUB用于支持efi stub启动模式,默认(make menuconfig直接保存)没有启动这个选项。
  ljmp $BOOTSEG, $start2,从0x07C0执行start2分支(现代linux内核需要一个bootloader,条件不充沛情况下无法启动),这里只是检测是否直接启动(按任意键重启电脑)

start2:
	...
	movw    $bugger_off_msg, %si

        .section ".bsdata", "a" 

msg_loop:
        lodsb
        andb    %al, %al
        jz      bs_die
        movb    $0xe, %ah
        movw    $7, %bx
        int     $0x10
        jmp     msg_loop

bs_die:
        # Allow the user to press a key, then reboot
        xorw    %ax, %ax
        int     $0x16
        int     $0x19

        # int 0x19 should never return.  In case it does anyway,
        # invoke the BIOS reset code...
        ljmp    $0xf000,$0xfff0
        
...

bugger_off_msg:
        .ascii  "Use a boot loader.\r\n"
        .ascii  "\n"
        .ascii  "Remove disk and press any key to reboot...\r\n"
        .byte   0

  输出:
    Use a boot loader.

    Remove disk and press any key to reboot…


2. _start
        # offset 512, entry point

        .globl  _start
_start:
                # Explicitly enter this as bytes, or the assembler
                # tries to generate a 3-byte jump here, which causes
                # everything else to push off to the wrong offset.
                .byte   0xeb            # short (2-byte) jump
                .byte   start_of_setup-1f

  vmlinux可执行文件偏移512字节执行_start(setup.ld跳转到_start),跳转到start_of_setup分支:

start_of_setup:
# Force %es = %ds
        movw    %ds, %ax
        movw    %ax, %es
        cld          //将标志寄存器flag的方向标志位df清零.在字串操作中使变址寄存器si或di的地址指针自动增加,字串处理由前往后

# Apparently some ancient versions of LILO invoked the kernel with %ss != %ds,
# which happened to work by accident for the old code.  Recalculate the stack
# pointer if %ss is invalid.  Otherwise leave it alone, LOADLIN sets up the
# stack behind its own code, so we can't blindly put it directly past the heap.

        movw    %ss, %dx
        cmpw    %ax, %dx        # %ds == %ss?   //如果%ds == %ss,跳转到2
        movw    %sp, %dx
        je      2f              # -> assume %sp is reasonably set 

        # Invalid %ss, make up a new stack
        movw    $_end, %dx
        testb   $CAN_USE_HEAP, loadflags
        jz      1f
        movw    heap_end_ptr, %dx
1:      addw    $STACK_SIZE, %dx
        jnc     2f               
        xorw    %dx, %dx        # Prevent wraparound

2:      # Now %dx should point to the end of our stack space
        andw    $~3, %dx        # dword align (might as well...)
        jnz     3f
        movw    $0xfffc, %dx    # Make sure we're not zero
3:      movw    %ax, %ss
        movzwl  %dx, %esp       # Clear upper half of %esp
        sti                     # Now we should have a working stack

# We will have entered with %cs = %ds+0x20, normalize %cs so
# it is on par with the other segments.
        pushw   %ds
        pushw   $6f
        lretw
6:

# Check signature at end of setup
        cmpl    $0x5a5aaa55, setup_sig
        jne     setup_bad

# Zero the bss
        movw    $__bss_start, %di
        movw    $_end+3, %cx
        xorl    %eax, %eax
        subw    %di, %cx
        shrw    $2, %cx
        rep; stosl

# Jump to C code (should not return)
        calll   main

  start_of_setup执行以下操作:
    1. 重设栈(空间)
    2. 检查、设置栈
    3. 载入setup.elf,启动中断
    6. 检查setup.elf载入是否完整,bss段清零
    跳转到main函数执行


3. main

  arch/x86/boot/main.c文件中定义了main函数,现在分析main执行流程:

void main(void)
{
        /* First, copy the boot header into the "zeropage" */
        copy_boot_params();

  首先将引导头拷贝到零页中,进入copy_boot_params函数:

static void copy_boot_params(void)
{
        struct old_cmdline {
                u16 cl_magic;
                u16 cl_offset;
        };
        const struct old_cmdline * const oldcmd =
                (const struct old_cmdline *)OLD_CL_ADDRESS;

        BUILD_BUG_ON(sizeof(boot_params) != 4096);
        memcpy(&boot_params.hdr, &hdr, sizeof(hdr)); //拷贝hdr结构到boot_params.hdr
        
        //struct setup_header hdr;结构定义;引导加载程序应该只获取setup_header并放入将其放入干净的启动参数缓冲区
 	    // #define OLD_CL_MAGIC            0xA33F
		// #define OLD_CL_ADDRESS          0x020   /* Relative to real mode data */
		// #define NEW_CL_POINTER          0x228   /* Relative to real mode data */


        if (!boot_params.hdr.cmd_line_ptr &&
            oldcmd->cl_magic == OLD_CL_MAGIC) {
                /* Old-style command line protocol. */
                u16 cmdline_seg;

                /* Figure out if the command line falls in the region
                   of memory that an old kernel would have copied up
                   to 0x90000... */
                if (oldcmd->cl_offset < boot_params.hdr.setup_move_size)
                        cmdline_seg = ds();
				else
                        cmdline_seg = 0x9000;

                boot_params.hdr.cmd_line_ptr =
                        (cmdline_seg << 4) + oldcmd->cl_offset;
        }
}

  拷贝hdr结构对象到boot_params.hdr,如果是旧版本内核(0xA33F),将hdr从第一个扇区的497个字节的位置复制到boot_params.hdr里面,参考hdr定义:

        .globl  hdr
hdr:
setup_sects:    .byte 0                 /* Filled in by build.c */
root_flags:     .word ROOT_RDONLY
syssize:        .long 0                 /* Filled in by build.c */
ram_size:       .word 0                 /* Obsolete */
vid_mode:       .word SVGA_MODE
root_dev:       .word 0                 /* Filled in by build.c */
boot_flag:      .word 0xAA55

  除此之外,boot_params别的成员对象都是干净的(未赋值)

/* Initialize the early-boot console */
        console_init();
        if (cmdline_find_option_bool("debug"))
                puts("early console in setup code\n");

  初始化控制台,进入console_init函数:

void console_init(void)
{
        parse_earlyprintk();

        if (!early_serial_base)
                parse_console_uart8250();
}

  检测是否有串口可用,默认为ttyS0(如果存在,不存在将继续向后)

/* End of heap check */
        init_heap();

// #define CAN_USE_HEAP    (1<<7) 

  设置栈尾部

/* Make sure we have all the proper CPU support */
        if (validate_cpu()) {
                puts("Unable to boot - please use a kernel appropriate "
                     "for your CPU.\n");
                die();
        }

  检查所有cpu是否都能使用

/* Tell the BIOS what CPU mode we intend to run in. */
        set_bios_mode();

  设置cpu运行模式(只对16、32位有效,设置为长模式),64位此时还是实模式

/* Detect memory layout */
        detect_memory();

...

void detect_memory(void)
{
        detect_memory_e820();

        detect_memory_e801();

        detect_memory_88();
}

  检测内存

/* Set keyboard repeat rate (why?) and query the lock flags */
        keyboard_init();

/*
 * Query the keyboard lock status as given by the BIOS, and
 * set the keyboard repeat rate to maximum.  Unclear why the latter
 * is done here; this might be possible to kill off as stale code.
 */
static void keyboard_init(void)
{
        struct biosregs ireg, oreg;
        initregs(&ireg);

        ireg.ah = 0x02;         /* Get keyboard status */
        intcall(0x16, &ireg, &oreg);
        boot_params.kbd_status = oreg.al;

        ireg.ax = 0x0305;       /* Set keyboard repeat rate */
        intcall(0x16, &ireg, NULL);
}

  设置键盘重复频率

/* Query Intel SpeedStep (IST) information */
        query_ist();
        
...

/*
 * Get Intel SpeedStep (IST) information.
 */
static void query_ist(void)
{
        struct biosregs ireg, oreg;

        /* Some older BIOSes apparently crash on this call, so filter
           it from machines too old to have SpeedStep at all. */
        if (cpu.level < 6)
                return;

        initregs(&ireg);
        ireg.ax  = 0xe980;       /* IST Support */
        ireg.edx = 0x47534943;   /* Request value */
        intcall(0x15, &ireg, &oreg);

        boot_params.ist_info.signature  = oreg.eax;
        boot_params.ist_info.command    = oreg.ebx;
        boot_params.ist_info.event      = oreg.ecx;
        boot_params.ist_info.perf_level = oreg.edx;
}

  检查英特尔IST

        /* Query APM information */
#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
        query_apm_bios();
#endif

        /* Query EDD information */
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
        query_edd();
#endif
/* Set the video mode */
        set_video();

/* Internal svga startup constants */
#define NORMAL_VGA      0xffff          /* 80x25 mode */
#define EXTENDED_VGA    0xfffe          /* 80x50 mode */
#define ASK_VGA         0xfffd          /* ask for it at bootup */

  设置视频模式

/* Do the last things and invoke protected mode */
        go_to_protected_mode();
}

...
void go_to_protected_mode(void)
{                       
        /* Hook before leaving real mode, also disables interrupts */
        realmode_switch_hook();
        
        /* Enable the A20 gate */
        if (enable_a20()) {
                puts("A20 gate not responding, unable to boot...\n");
                die();
        }
        
        /* Reset coprocessor (IGNNE#) */
        reset_coprocessor();

        /* Mask all interrupts in the PIC */
        mask_all_interrupts();

        /* Actual transition to protected mode... */
        setup_idt();
        setup_gdt();
        protected_mode_jump(boot_params.hdr.code32_start,
                            (u32)&boot_params + (ds() << 4));
}

  16、32位从长模式跳转到保护模式,64位从实模式跳转到保护模式。

精彩评论(0)

0 0 举报