0%

start_kernel前的中断初始化

前言

  接下来的几篇文章我想捋一捋中断的前世今生,从kernel执行的第一个C函数x86_64_start_kernel开始分析。

正文

早期的IDT设置

设置IDT的前32项并加载IDT

  x86_64_start_kernel函数包含下面这段代码,这段代码循环了NUM_EXCEPTION_VECTORS(32)次调用set_intr_gateset_intr_gate的两个参数分别表示中断向量号中断处理程序的入口地址,中断向量号的范围是0 ~255,通常0 ~31 用于异常,32~255表示用户定义的中断。

1
2
3
4
5
#define NUM_EXCEPTION_VECTORS 32

for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
set_intr_gate(i, early_idt_handler_array[i]);
load_idt((const struct desc_ptr *)&idt_descr);

set_intr_gate

函数中还有一些关于tracepoint的代码,这里不做分析。

  首先BUG_ON宏确保中断向量号小于255,然后调用 _set_gate 函数将中断门写入idt_table

1
2
3
4
5
6
7
8
#define set_intr_gate(n, addr)						\
do { \
BUG_ON((unsigned)n > 0xFF); \
_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
__KERNEL_CS); \
_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
0, 0, __KERNEL_CS); \
} while (0)

  _set_gate调用pack_gate填充gate_desc,然后调用write_idt_entrygate_desc memcpyidt_table中。

1
2
3
4
5
6
7
8
9
10
11
12
13
static inline void _set_gate(int gate, unsigned type, void *addr,
unsigned dpl, unsigned ist, unsigned seg)
{
gate_desc s;

pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
/*
* does not need to be atomic because it is only done once at
* setup time
*/
write_idt_entry(idt_table, gate, &s);
write_trace_idt_entry(gate, &s);
}

gate_desc在64位模式下占16字节,在32位模式下8个字节。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF)
#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF)
#define PTR_HIGH(x) ((unsigned long long)(x) >> 32)

static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
unsigned dpl, unsigned ist, unsigned seg)
{
gate->offset_low = PTR_LOW(func); //段偏移(中断处理程序入口点地址低16 bit)
gate->segment = __KERNEL_CS; //段选择子(段寄存器:用于定位段描述符表中表项的索引)
gate->ist = ist;
gate->p = 1; //Segment Present标志,等于0表示段当前不在主存中,Linux总是把此标志设为1。
gate->dpl = dpl; //描述符特权级别(访问段所需最小CPU优先级)
gate->zero0 = 0;
gate->zero1 = 0;
gate->type = type; //门类型
gate->offset_middle = PTR_MIDDLE(func); //中断处理程序入口点地址middle 16 bit
gate->offset_high = PTR_HIGH(func); //中断处理程序入口点地址高32 bit
}
1
2
3
4
5
#define write_idt_entry(dt, entry, g)		native_write_idt_entry(dt, entry, g)
static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate)
{
memcpy(&idt[entry], gate, sizeof(*gate));
}

load_idt

  回到最开始x86_64_start_kernel函数中的代码,循环调用了32set_intr_gate之后调了load_idt加载中断描述符表。

1
2
3
4
5
#define load_idt(dtr)				native_load_idt(dtr)
static inline void native_load_idt(const struct desc_ptr *dtr)
{
asm volatile("lidt %0"::"m" (*dtr));
}

  load_idt 函数利用内嵌汇编执行了 lidt 指令,将中断描述符表的基地址加载入GDTR 寄存器,下面是Intel手册中对lidt指令的描述。

早期的中断处理程序

early_idt_handler_array

1
2
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
set_intr_gate(i, early_idt_handler_array[i]);

  在上面的代码中,用early_idt_handler_array作为0~31号中断(前32个中断实际上为异常)的中断处理程序,early_idt_handler_array实际上是一个汇编函数,它定义在head_64.S中。
  注意rept指令,这实际上是NUM_EXCEPTION_VECTORS(32)个中断处理程序。

rept与loop不同,rept指令是在汇编程序把源文件翻译成目标文件时,把被重复的指令片段直接重复写入目标文件,而loop指令是在程序的执行期反复执行同一指令片段。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
ENTRY(early_idt_handler_array)
# 104(%rsp) %rflags
# 96(%rsp) %cs
# 88(%rsp) %rip
# 80(%rsp) error code
i = 0
.rept NUM_EXCEPTION_VECTORS
.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
pushq $0 # Dummy error code, to make stack frame uniform
.endif
pushq $i # 72(%rsp) Vector number
jmp early_idt_handler_common
i = i + 1
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
ENDPROC(early_idt_handler_array)
1
2
/* Bitmask of exception vectors which push an error code on the stack */
#define EXCEPTION_ERRCODE_MASK 0x00027d00

  EXCEPTION_ERRCODE_MASK是用来表示前32个异常向量是否有error code的一个Bitmask,ifeq那一小段代码判断异常如果没有error code,就手动在栈中压入一个0用于统一栈的布局。然后pushq $i向栈中压入中断向量号,jmp到通用的中断处理程序early_idt_handler_common

early_idt_handler_common

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
early_idt_handler_common:
/*
* The stack is the hardware frame, an error code or zero, and the
* vector number.
*/
cld

cmpl $2,(%rsp) # X86_TRAP_NMI //检查当前中断是否为NMI
je is_nmi # Ignore NMI

cmpl $2,early_recursion_flag(%rip) //检查是否中嵌套中断
jz 1f
incl early_recursion_flag(%rip)
//通用寄存器圧栈
pushq %rax # 64(%rsp)
pushq %rcx # 56(%rsp)
pushq %rdx # 48(%rsp)
pushq %rsi # 40(%rsp)
pushq %rdi # 32(%rsp)
pushq %r8 # 24(%rsp)
pushq %r9 # 16(%rsp)
pushq %r10 # 8(%rsp)
pushq %r11 # 0(%rsp)
//检查段选择子是否是内核代码段
cmpl $__KERNEL_CS,96(%rsp)
jne 11f
//检查向量号
cmpl $14,72(%rsp) # Page fault?
jnz 10f
GET_CR2_INTO(%rdi) # can clobber any volatile register if pv
call early_make_pgtable
andl %eax,%eax
jz 20f # All good

10:
leaq 88(%rsp),%rdi # Pointer to %rip
call early_fixup_exception
andl %eax,%eax
jnz 20f # Found an exception entry

11:
#ifdef CONFIG_EARLY_PRINTK
GET_CR2_INTO(%r9) # can clobber any volatile register if pv
movl 80(%rsp),%r8d # error code
movl 72(%rsp),%esi # vector number
movl 96(%rsp),%edx # %cs
movq 88(%rsp),%rcx # %rip
xorl %eax,%eax
leaq early_idt_msg(%rip),%rdi
call early_printk
cmpl $2,early_recursion_flag(%rip)
jz 1f
call dump_stack
#ifdef CONFIG_KALLSYMS
leaq early_idt_ripmsg(%rip),%rdi
movq 40(%rsp),%rsi # %rip again
call __print_symbol
#endif
#endif /* EARLY_PRINTK */
1: hlt
jmp 1b
//恢复通用寄存器
20: # Exception table entry found or page table generated
popq %r11
popq %r10
popq %r9
popq %r8
popq %rdi
popq %rsi
popq %rdx
popq %rcx
popq %rax
decl early_recursion_flag(%rip)
is_nmi:
addq $16,%rsp # drop vector number and error code
INTERRUPT_RETURN
ENDPROC(early_idt_handler_common)

  early_idt_handler_common首先会检查当前中断是否为NMI(不可屏蔽中断),然后检查是否在early_idt_handler_common程序中递归地产生中断,然后将通用寄存器圧栈等,然后会判断中断向量号,如果是缺页中断(Page Fault)则把cr2寄存器中的值赋给rdi,然后调用early_make_pgtable建立新的页表,否则恢复通用寄存器然后从中断处理程序返回。

cr2寄存器用来存储page fault的线性地址。
image

early_make_pgtable

  通过上面的分析,发现Kernel初期中断处理程序只处理page fault,下面是缺页中断处理程序early_make_pgtable函数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
/* Create a new PMD entry */
int __init early_make_pgtable(unsigned long address)
{
unsigned long physaddr = address - __PAGE_OFFSET;
unsigned long i;
pgdval_t pgd, *pgd_p;
pudval_t pud, *pud_p;
pmdval_t pmd, *pmd_p;
//判断physaddr物理地址是否合法
/* Invalid address or early pgt is done ? */
if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt))
return -1;

again:
//将初期顶层页表目录中包含page fault地址那一项赋值给pgd
pgd_p = &early_level4_pgt[pgd_index(address)].pgd;
pgd = *pgd_p;

/*
* The use of __START_KERNEL_map rather than __PAGE_OFFSET here is
* critical -- __PAGE_OFFSET would point us back into the dynamic
* range and we might end up looping forever...
*/
//如果pgd包含正确全局页表项则赋值给pud_p
if (pgd)
pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
else {
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
reset_early_page_tables();
goto again;
}

pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
for (i = 0; i < PTRS_PER_PUD; i++)
pud_p[i] = 0;
*pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
}
pud_p += pud_index(address);
pud = *pud_p;

if (pud)
pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
else {
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
reset_early_page_tables();
goto again;
}

pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
for (i = 0; i < PTRS_PER_PMD; i++)
pmd_p[i] = 0;
*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
}
pmd = (physaddr & PMD_MASK) + early_pmd_flags;
pmd_p[pmd_index(address)] = pmd;

return 0;
}

脑图

联系我

  你可以直接在下方留言,也可以E-Mail联系我。

  • 本文作者: Lauren
  • 本文链接: http://lihanlu.cn/intr-init-1/
  • 版权声明: 本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!