项目地址 How Does a Database Work? | Let’s Build a Simple Database (cstack.github.io)
上一次我们识别了select、insert,但没有实现功能,这次我们来实现一下。
目前我们存储数据先不用b树,就以数组排列的顺序先凑合。但我们要紧凑的排列。
我们先假设这个数据库有三个属性。id,username,email。id是int类型,username是varchar(32),email是varchar(255)。
在数据库中,每行也就是id,username,email,我们就定义一个结构体,有这三个元素。
#define COLUMN_USERNAME_SIZE 32
#define COLUMN_EMAIL_SIZE 255
typedef struct {
uint32_t id;
char username[COLUMN_USERNAME_SIZE];
char email[COLUMN_EMAIL_SIZE];
} Row;
我们要序列化,要紧凑,每一行都要这样
column | size (bytes) | offset |
---|---|---|
id | 4 | 0 |
username | 32 | 4 |
255 | 36 | |
total | 291 |
故我们定义长度及偏移量。
#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute)
const uint32_t ID_SIZE = size_of_attribute(Row, id);
const uint32_t USERNAME_SIZE = size_of_attribute(Row, username);
const uint32_t EMAIL_SIZE = size_of_attribute(Row, email);
const uint32_t ID_OFFSET = 0;
const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE;
const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;
为了序列化及反序列化,我们定义两个函数
void serialize_row(Row* source, void* destination) {
memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
}
void deserialize_row(void* source, Row* destination) {
memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE);
memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
}
接着设置页(一般是4kb),它能跟踪行。
const uint32_t PAGE_SIZE = 4096;
#define TABLE_MAX_PAGES 100
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;
typedef struct {
uint32_t num_rows;
void* pages[TABLE_MAX_PAGES];
} Table;
把页面大小设置为4 KB,是因为它与大多数计算机体系结构的虚拟内存系统中使用的页面大小相同。这意味着我们数据库中的一个页面对应于操作系统使用的一个页面。操作系统将作为整个单元将页面移入和移出内存,而不是将它们分解。
以下是我们如何确定特定行在内存中读取/写入的位置。
void* row_slot(Table* table, uint32_t row_num) {
uint32_t page_num = row_num / ROWS_PER_PAGE;
void* page = table->pages[page_num];
if (page == NULL) {
// Allocate memory only when we try to access page
page = table->pages[page_num] = malloc(PAGE_SIZE);
}
uint32_t row_offset = row_num % ROWS_PER_PAGE;
uint32_t byte_offset = row_offset * ROW_SIZE;
return page + byte_offset;
}
现在我们可以从表结构中进行读/写了。然后我们初始化表格
Table* new_table() {
Table* table = (Table*)malloc(sizeof(Table));
table->num_rows = 0;
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
table->pages[i] = NULL;
}
return table;
}
void free_table(Table* table) {
for (int i = 0; table->pages[i]; i++) {
free(table->pages[i]);
}
free(table);
}
现在修改程序,让它实现insert,select!先从预处理开始
PrepareResult prepare_statement(InputBuffer* input_buffer,
Statement* statement) {
if (strncmp(input_buffer->buffer, "insert",6) == 0) {
statement->type = STATEMENT_INSERT;
int args_assigned = sscanf(
input_buffer->buffer, "insert %d %s %s", &(statement->row_to_insert.id),
statement->row_to_insert.username, statement->row_to_insert.email);
if (args_assigned < 3) {
return PREPARE_SYNTAX_ERROR;
}
return PREPARE_SUCCESS;
}
if (strcmp(input_buffer->buffer, "select") == 0) {
statement->row_to_select=ALL;
statement->type = STATEMENT_SELECT;
return PREPARE_SUCCESS;
}
else if(strcmp(input_buffer->buffer,"select id")==0) {
statement->row_to_select=ID;
statement->type = STATEMENT_SELECT;
return PREPARE_SUCCESS;
}
else if(strcmp(input_buffer->buffer,"select username")==0){
statement->row_to_select=USERNAME;
statement->type = STATEMENT_SELECT;
return PREPARE_SUCCESS;
}
else if(strcmp(input_buffer->buffer,"select email")==0){
statement->row_to_select=EMAIL;
statement->type = STATEMENT_SELECT;
return PREPARE_SUCCESS;
}
else return PREPARE_SYNTAX_ERROR;
}
处理完了得接受statement并判断类型
ExecuteResult execute_statement(Statement* statement, Table* table) {
switch (statement->type) {
case (STATEMENT_INSERT):
return execute_insert(statement, table);
case (STATEMENT_SELECT):
return execute_select(statement, table);
}
}
我们在execute_insert和execute_select上实现功能,这里我们添加SelectResult作为选择参数,在PrepareResult中添加PREPARE_SYNTAX_ERROR,在Statement中添加参数
typedef enum {
ID,
USERNAME,
EMAIL,
ALL
} SelectResult;
typedef enum {
PREPARE_SUCCESS,
PREPARE_SYNTAX_ERROR,
PREPARE_UNRECOGNIZED_STATEMENT
} PrepareResult;
typedef struct {
StatementType type;
Row row_to_insert;
SelectResult row_to_select;
}Statement;
ExecuteResult execute_insert(Statement* statement, Table* table) {
if (table->num_rows >= TABLE_MAX_ROWS) {
return EXECUTE_TABLE_FULL;
}
Row* row_to_insert = &(statement->row_to_insert);
serialize_row(row_to_insert, row_slot(table, table->num_rows));
table->num_rows += 1;
return EXECUTE_SUCCESS;
}
ExecuteResult execute_select(Statement* statement, Table* table) {
Row row;
if(statement->row_to_select==ID){
for (uint32_t i = 0; i < table->num_rows; i++) {
deserialize_row(row_slot(table, i),
printf("(%d)\n", row.id);
}
}
if(statement->row_to_select==USERNAME){
for (uint32_t i = 0; i < table->num_rows; i++) {
deserialize_row(row_slot(table, i),
printf("(%s)\n", row.username);
}
}
if(statement->row_to_select==EMAIL){
for (uint32_t i = 0; i < table->num_rows; i++) {
deserialize_row(row_slot(table, i),
printf("(%s)\n", row.email);
}
}
if(statement->row_to_select==ALL){
for (uint32_t i = 0; i < table->num_rows; i++) {
deserialize_row(row_slot(table, i),
printf("(%d, %s, %s)\n", row.id, row.username, row.email);
}
}
return EXECUTE_SUCCESS;
}
最后修改main函数
int main(int argc, char* argv[]) {
Table* table=new_table();
InputBuffer* input_buffer = new_input_buffer();
while (true) {
print_prompt();
read_input(input_buffer);
if (input_buffer->buffer[0] == '.') {
switch (do_meta_command(input_buffer)) {
case (META_COMMAND_SUCCESS):
continue;
case (META_COMMAND_UNRECOGNIZED_COMMAND):
printf("Unrecognized command '%s'\n", input_buffer->buffer);
continue;
}
}
Statement statement;
switch (prepare_statement(input_buffer, &statement)) {
case (PREPARE_SUCCESS):
break;
case (PREPARE_SYNTAX_ERROR):
printf("Syntax error.\n");
continue;
case (PREPARE_UNRECOGNIZED_STATEMENT):
printf("Unrecognized keyword at start of '%s'.\n",
input_buffer->buffer);
continue;
}
switch (execute_statement(&statement, table)) {
case (EXECUTE_SUCCESS):
printf("Executed.\n");
break;
case (EXECUTE_TABLE_FULL):
printf("Error: Table full.\n");
break;
}
}
}
最后,它长这样
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#define COLUMN_USERNAME_SIZE 32
#define COLUMN_EMAIL_SIZE 255
#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute)
#define TABLE_MAX_PAGES 100
typedef struct {
uint32_t num_rows;
void* pages[TABLE_MAX_PAGES];
} Table;
typedef struct {
uint32_t id;
char username[COLUMN_USERNAME_SIZE];
char email[COLUMN_EMAIL_SIZE];
} Row;
typedef struct{
char* buffer;
size_t buffer_length;
ssize_t input_length;
}InputBuffer;
typedef enum {
META_COMMAND_SUCCESS,
META_COMMAND_UNRECOGNIZED_COMMAND
} MetaCommandResult;
typedef enum {
PREPARE_SUCCESS,
PREPARE_SYNTAX_ERROR,
PREPARE_UNRECOGNIZED_STATEMENT
} PrepareResult;
typedef enum { EXECUTE_SUCCESS,
EXECUTE_TABLE_FULL
} ExecuteResult;
typedef enum {
STATEMENT_INSERT,
STATEMENT_SELECT,
} StatementType;
typedef enum {
ID,
USERNAME,
EMAIL,
ALL
} SelectResult;
typedef struct {
StatementType type;
Row row_to_insert;
SelectResult row_to_select;
}Statement;
const uint32_t ID_OFFSET = 0;
const uint32_t PAGE_SIZE = 4096;
const uint32_t ID_SIZE = size_of_attribute(Row, id);
const uint32_t USERNAME_SIZE = size_of_attribute(Row, username);
const uint32_t EMAIL_SIZE = size_of_attribute(Row, email);
const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE;
const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;
MetaCommandResult do_meta_command(InputBuffer* input_buffer) {
if (strcmp(input_buffer->buffer, ".exit") == 0) {
exit(EXIT_SUCCESS);
} else {
return META_COMMAND_UNRECOGNIZED_COMMAND;
}
}
InputBuffer* new_input_buffer(){
InputBuffer* input_buffer=(InputBuffer*)malloc(sizeof(InputBuffer));
input_buffer->buffer=NULL;
input_buffer->buffer_length=0;
input_buffer->input_length=0;
return input_buffer;
}
void* row_slot(Table* table, uint32_t row_num) {
uint32_t page_num = row_num / ROWS_PER_PAGE;
void* page = table->pages[page_num];
if (page == NULL) {
// Allocate memory only when we try to access page
page = table->pages[page_num] = malloc(PAGE_SIZE);
}
uint32_t row_offset = row_num % ROWS_PER_PAGE;
uint32_t byte_offset = row_offset * ROW_SIZE;
return page + byte_offset;
}
void serialize_row(Row* source, void* destination) {
memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
}
void deserialize_row(void* source, Row* destination) {
memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE);
memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
}
PrepareResult prepare_statement(InputBuffer* input_buffer,
Statement* statement) {
if (strncmp(input_buffer->buffer, "insert",6) == 0) {
statement->type = STATEMENT_INSERT;
int args_assigned = sscanf(
input_buffer->buffer, "insert %d %s %s", &(statement->row_to_insert.id),
statement->row_to_insert.username, statement->row_to_insert.email);
if (args_assigned < 3) {
return PREPARE_SYNTAX_ERROR;
}
return PREPARE_SUCCESS;
}
if (strcmp(input_buffer->buffer, "select") == 0) {
statement->row_to_select=ALL;
statement->type = STATEMENT_SELECT;
return PREPARE_SUCCESS;
}
else if(strcmp(input_buffer->buffer,"select id")==0) {
statement->row_to_select=ID;
statement->type = STATEMENT_SELECT;
return PREPARE_SUCCESS;
}
else if(strcmp(input_buffer->buffer,"select username")==0){
statement->row_to_select=USERNAME;
statement->type = STATEMENT_SELECT;
return PREPARE_SUCCESS;
}
else if(strcmp(input_buffer->buffer,"select email")==0){
statement->row_to_select=EMAIL;
statement->type = STATEMENT_SELECT;
return PREPARE_SUCCESS;
}
else return PREPARE_SYNTAX_ERROR;
}
ExecuteResult execute_insert(Statement* statement, Table* table) {
if (table->num_rows >= TABLE_MAX_ROWS) {
return EXECUTE_TABLE_FULL;
}
Row* row_to_insert = &(statement->row_to_insert);
serialize_row(row_to_insert, row_slot(table, table->num_rows));
table->num_rows += 1;
return EXECUTE_SUCCESS;
}
ExecuteResult execute_select(Statement* statement, Table* table) {
Row row;
if(statement->row_to_select==ID){
for (uint32_t i = 0; i < table->num_rows; i++) {
deserialize_row(row_slot(table, i),
printf("(%d)\n", row.id);
}
}
if(statement->row_to_select==USERNAME){
for (uint32_t i = 0; i < table->num_rows; i++) {
deserialize_row(row_slot(table, i),
printf("(%s)\n", row.username);
}
}
if(statement->row_to_select==EMAIL){
for (uint32_t i = 0; i < table->num_rows; i++) {
deserialize_row(row_slot(table, i),
printf("(%s)\n", row.email);
}
}
if(statement->row_to_select==ALL){
for (uint32_t i = 0; i < table->num_rows; i++) {
deserialize_row(row_slot(table, i),
printf("(%d, %s, %s)\n", row.id, row.username, row.email);
}
}
return EXECUTE_SUCCESS;
}
ExecuteResult execute_statement(Statement* statement, Table* table) {
switch (statement->type) {
case (STATEMENT_INSERT):
return execute_insert(statement, table);
case (STATEMENT_SELECT):
return execute_select(statement, table);
}
}
Table* new_table() {
Table* table = (Table*)malloc(sizeof(Table));
table->num_rows = 0;
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
table->pages[i] = NULL;
}
return table;
}
void free_table(Table* table) {
for (int i = 0; table->pages[i]; i++) {
free(table->pages[i]);
}
free(table);
}
void print_prompt() { printf("sqlite> "); }
void read_input(InputBuffer* input_buffer){
ssize_t bytes_read=getline(&(input_buffer->buffer), &(input_buffer->buffer_length), stdin);
if(bytes_read<=0){
printf("Error reading input\n");
exit(EXIT_FAILURE);
}
input_buffer->input_length=bytes_read-1;
input_buffer->buffer[bytes_read -1 ]=0;
}
void close_input_buffer(InputBuffer* input_buffer){
free(input_buffer->buffer);
free(input_buffer);
}
int main(int argc, char* argv[]) {
Table* table=new_table();
InputBuffer* input_buffer = new_input_buffer();
while (true) {
print_prompt();
read_input(input_buffer);
if (input_buffer->buffer[0] == '.') {
switch (do_meta_command(input_buffer)) {
case (META_COMMAND_SUCCESS):
continue;
case (META_COMMAND_UNRECOGNIZED_COMMAND):
printf("Unrecognized command '%s'\n", input_buffer->buffer);
continue;
}
}
Statement statement;
switch (prepare_statement(input_buffer, &statement)) {
case (PREPARE_SUCCESS):
break;
case (PREPARE_SYNTAX_ERROR):
printf("Syntax error.\n");
continue;
case (PREPARE_UNRECOGNIZED_STATEMENT):
printf("Unrecognized keyword at start of '%s'.\n",
input_buffer->buffer);
continue;
}
switch (execute_statement(&statement, table)) {
case (EXECUTE_SUCCESS):
printf("Executed.\n");
break;
case (EXECUTE_TABLE_FULL):
printf("Error: Table full.\n");
break;
}
}
}
这就是运行结果了。
以上就是所有内容。因为有个人的感悟,就臭不要脸的投自制了。