Part 3 - An In-Memory, Append-Only, Single-Table Database

    We’re going to start small by putting a lot of limitations on our database. For now, it will:

    • support two operations: inserting a row and printing all rows
    • reside only in memory (no persistence to disk)
    • support a single, hard-coded table
      Our hard-coded table is going to store users and look like this:

    This is a simple schema, but it gets us to support multiple data types and multiple sizes of text data types.

    statements are now going to look like this:

    That means we need to upgrade our prepare_statement function to parse arguments

    1. if (strncmp(input_buffer->buffer, "insert", 6) == 0) {
    2. statement->type = STATEMENT_INSERT;
    3. + int args_assigned = sscanf(
    4. + input_buffer->buffer, "insert %d %s %s", &(statement->row_to_insert.id),
    5. + statement->row_to_insert.username, statement->row_to_insert.email);
    6. + if (args_assigned < 3) {
    7. + return PREPARE_SYNTAX_ERROR;
    8. + }
    9. return PREPARE_SUCCESS;
    10. }
    11. if (strcmp(input_buffer->buffer, "select") == 0) {

    We store those parsed arguments into a new Row data structure inside the statement object:

    1. +const uint32_t COLUMN_USERNAME_SIZE = 32;
    2. +const uint32_t COLUMN_EMAIL_SIZE = 255;
    3. +struct Row_t {
    4. + uint32_t id;
    5. + char username[COLUMN_USERNAME_SIZE];
    6. + char email[COLUMN_EMAIL_SIZE];
    7. +};
    8. +typedef struct Row_t Row;
    9. +
    10. struct Statement_t {
    11. StatementType type;
    12. + Row row_to_insert; // only used by insert statement
    13. };
    14. typedef struct Statement_t Statement;

    Here’s my plan:

    • Store rows in blocks of memory called pages
    • Each page stores as many rows as it can fit
    • Rows are serialized into a compact representation with each page
    • Pages are only allocated as needed
    • Keep a fixed-size array of pointers to pages
      First we’ll define the compact representation of a row:
    1. +#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute)
    2. +
    3. +const uint32_t ID_SIZE = size_of_attribute(Row, id);
    4. +const uint32_t USERNAME_SIZE = size_of_attribute(Row, username);
    5. +const uint32_t EMAIL_SIZE = size_of_attribute(Row, email);
    6. +const uint32_t ID_OFFSET = 0;
    7. +const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE;
    8. +const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
    9. +const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;

    This means the layout of a serialized row will look like this:

    column size (bytes) offset
    id 4 0
    username 32 4
    email 255 36
    total 291

    We also need code to convert to and from the compact representation.

    Next, a Table structure that points to pages of rows and keeps track of how many rows there are:

    1. +const uint32_t PAGE_SIZE = 4096;
    2. +const uint32_t TABLE_MAX_PAGES = 100;
    3. +const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
    4. +const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;
    5. +
    6. +struct Table_t {
    7. + void* pages[TABLE_MAX_PAGES];
    8. + uint32_t num_rows;
    9. +};
    10. +typedef struct Table_t Table;

    I’m making our page size 4 kilobytes because it’s the same size as a page used in the virtual memory systems of most computer architectures. This means one page in our database corresponds to one page used by the operating system. The operating system will move pages in and out of memory as whole units instead of breaking them up.

    I’m setting an arbitrary limit of 100 pages that we will allocate. When we switch to a tree structure, our database’s maximum size will only be limited by the maximum size of a file. (Although we’ll still limit how many pages we keep in memory at once)

    Speaking of which, here is how we figure out where to read/write in memory for a particular row:

    1. +void* row_slot(Table* table, uint32_t row_num) {
    2. + uint32_t page_num = row_num / ROWS_PER_PAGE;
    3. + void* page = table->pages[page_num];
    4. + if (!page) {
    5. + // Allocate memory only when we try to access page
    6. + page = table->pages[page_num] = malloc(PAGE_SIZE);
    7. + }
    8. + uint32_t row_offset = row_num % ROWS_PER_PAGE;
    9. + uint32_t byte_offset = row_offset * ROW_SIZE;
    10. + return page + byte_offset;
    11. +}

    Now we can make execute_statement read/write from our table structure:

    1. -void execute_statement(Statement* statement) {
    2. +ExecuteResult execute_insert(Statement* statement, Table* table) {
    3. + if (table->num_rows >= TABLE_MAX_ROWS) {
    4. + return EXECUTE_TABLE_FULL;
    5. + }
    6. +
    7. + Row* row_to_insert = &(statement->row_to_insert);
    8. +
    9. + serialize_row(row_to_insert, row_slot(table, table->num_rows));
    10. + table->num_rows += 1;
    11. +
    12. + return EXECUTE_SUCCESS;
    13. +}
    14. +
    15. +ExecuteResult execute_select(Statement* statement, Table* table) {
    16. + Row row;
    17. + for (uint32_t i = 0; i < table->num_rows; i++) {
    18. + deserialize_row(row_slot(table, i), &row);
    19. + print_row(&row);
    20. + }
    21. + return EXECUTE_SUCCESS;
    22. +}
    23. +
    24. +ExecuteResult execute_statement(Statement* statement, Table* table) {
    25. switch (statement->type) {
    26. case (STATEMENT_INSERT):
    27. - printf("This is where we would do an insert.\n");
    28. - break;
    29. + return execute_insert(statement, table);
    30. case (STATEMENT_SELECT):
    31. - printf("This is where we would do a select.\n");
    32. - break;
    33. + return execute_select(statement, table);
    34. }
    35. }

    Lastly, we need to initialize the table and handle a few more error cases:

    1. int main(int argc, char* argv[]) {
    2. + Table* table = new_table();
    3. InputBuffer* input_buffer = new_input_buffer();
    4. while (true) {
    5. print_prompt();
    6. @@ -105,13 +203,22 @@ int main(int argc, char* argv[]) {
    7. switch (prepare_statement(input_buffer, &statement)) {
    8. case (PREPARE_SUCCESS):
    9. + case (PREPARE_SYNTAX_ERROR):
    10. + continue;
    11. case (PREPARE_UNRECOGNIZED_STATEMENT):
    12. printf("Unrecognized keyword at start of '%s'.\n",
    13. input_buffer->buffer);
    14. continue;
    15. }
    16. - execute_statement(&statement);
    17. - printf("Executed.\n");
    18. + switch (execute_statement(&statement, table)) {
    19. + case (EXECUTE_SUCCESS):
    20. + printf("Executed.\n");
    21. + break;
    22. + case (EXECUTE_TABLE_FULL):
    23. + printf("Error: Table full.\n");
    24. + break;
    25. + }
    26. }
    27. }

    With those changes we can actually save data in our database!

    1. ~ ./db
    2. db > insert 1 cstack foo@bar.com
    3. Executed.
    4. db > insert 2 bob bob@example.com
    5. Executed.
    6. db > select
    7. (1, cstack, foo@bar.com)
    8. (2, bob, bob@example.com)
    9. Executed.
    10. db > insert foo bar 1
    11. Syntax error. Could not parse statement.
    12. db > .exit
    13. ~

    Now would be a great time to write some tests, for a couple reasons:

    • We’re planning to dramatically change the data structure storing our table, and tests would catch regressions.
    • There are a couple edge cases we haven’t tested manually (e.g. filling up the table)
      We’ll address those issues in the next part. For now, here’s the complete diff from this part:
    1. typedef struct InputBuffer_t InputBuffer;
    2. +enum ExecuteResult_t { EXECUTE_SUCCESS, EXECUTE_TABLE_FULL };
    3. +typedef enum ExecuteResult_t ExecuteResult;
    4. +
    5. enum MetaCommandResult_t {
    6. META_COMMAND_SUCCESS,
    7. META_COMMAND_UNRECOGNIZED_COMMAND
    8. };
    9. typedef enum MetaCommandResult_t MetaCommandResult;
    10. -enum PrepareResult_t { PREPARE_SUCCESS, PREPARE_UNRECOGNIZED_STATEMENT };
    11. +enum PrepareResult_t {
    12. + PREPARE_SUCCESS,
    13. + PREPARE_SYNTAX_ERROR,
    14. + PREPARE_UNRECOGNIZED_STATEMENT
    15. +};
    16. typedef enum PrepareResult_t PrepareResult;
    17. enum StatementType_t { STATEMENT_INSERT, STATEMENT_SELECT };
    18. typedef enum StatementType_t StatementType;
    19. +const uint32_t COLUMN_USERNAME_SIZE = 32;
    20. +const uint32_t COLUMN_EMAIL_SIZE = 255;
    21. +struct Row_t {
    22. + uint32_t id;
    23. + char username[COLUMN_USERNAME_SIZE];
    24. + char email[COLUMN_EMAIL_SIZE];
    25. +};
    26. +typedef struct Row_t Row;
    27. +
    28. struct Statement_t {
    29. StatementType type;
    30. + Row row_to_insert; // only used by insert statement
    31. };
    32. typedef struct Statement_t Statement;
    33. +#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute)
    34. +
    35. +const uint32_t ID_SIZE = size_of_attribute(Row, id);
    36. +const uint32_t USERNAME_SIZE = size_of_attribute(Row, username);
    37. +const uint32_t EMAIL_SIZE = size_of_attribute(Row, email);
    38. +const uint32_t ID_OFFSET = 0;
    39. +const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE;
    40. +const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
    41. +const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;
    42. +
    43. +const uint32_t PAGE_SIZE = 4096;
    44. +const uint32_t TABLE_MAX_PAGES = 100;
    45. +const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
    46. +const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;
    47. +
    48. +struct Table_t {
    49. + void* pages[TABLE_MAX_PAGES];
    50. + uint32_t num_rows;
    51. +};
    52. +typedef struct Table_t Table;
    53. +
    54. +void print_row(Row* row) {
    55. + printf("(%d, %s, %s)\n", row->id, row->username, row->email);
    56. +}
    57. +
    58. +void serialize_row(Row* source, void* destination) {
    59. + memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
    60. + memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
    61. + memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
    62. +}
    63. +
    64. +void deserialize_row(void* source, Row* destination) {
    65. + memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
    66. + memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
    67. +
    68. +void* row_slot(Table* table, uint32_t row_num) {
    69. + uint32_t page_num = row_num / ROWS_PER_PAGE;
    70. + void* page = table->pages[page_num];
    71. + if (!page) {
    72. + // Allocate memory only when we try to access page
    73. + page = table->pages[page_num] = malloc(PAGE_SIZE);
    74. + }
    75. + uint32_t row_offset = row_num % ROWS_PER_PAGE;
    76. + uint32_t byte_offset = row_offset * ROW_SIZE;
    77. + return page + byte_offset;
    78. +}
    79. +
    80. +Table* new_table() {
    81. + Table* table = malloc(sizeof(Table));
    82. + table->num_rows = 0;
    83. +
    84. + return table;
    85. +}
    86. +
    87. InputBuffer* new_input_buffer() {
    88. InputBuffer* input_buffer = malloc(sizeof(InputBuffer));
    89. input_buffer->buffer = NULL;
    90. @@ -64,6 +137,12 @@ PrepareResult prepare_statement(InputBuffer* input_buffer,
    91. Statement* statement) {
    92. if (strncmp(input_buffer->buffer, "insert", 6) == 0) {
    93. statement->type = STATEMENT_INSERT;
    94. + int args_assigned = sscanf(
    95. + input_buffer->buffer, "insert %d %s %s", &(statement->row_to_insert.id),
    96. + statement->row_to_insert.username, statement->row_to_insert.email);
    97. + if (args_assigned < 3) {
    98. + return PREPARE_SYNTAX_ERROR;
    99. + }
    100. return PREPARE_SUCCESS;
    101. }
    102. if (strcmp(input_buffer->buffer, "select") == 0) {
    103. @@ -74,18 +153,39 @@ PrepareResult prepare_statement(InputBuffer* input_buffer,
    104. return PREPARE_UNRECOGNIZED_STATEMENT;
    105. }
    106. -void execute_statement(Statement* statement) {
    107. +ExecuteResult execute_insert(Statement* statement, Table* table) {
    108. + if (table->num_rows >= TABLE_MAX_ROWS) {
    109. + return EXECUTE_TABLE_FULL;
    110. + }
    111. +
    112. + Row* row_to_insert = &(statement->row_to_insert);
    113. +
    114. + serialize_row(row_to_insert, row_slot(table, table->num_rows));
    115. + table->num_rows += 1;
    116. +
    117. + return EXECUTE_SUCCESS;
    118. +}
    119. +
    120. +ExecuteResult execute_select(Statement* statement, Table* table) {
    121. + Row row;
    122. + for (uint32_t i = 0; i < table->num_rows; i++) {
    123. + deserialize_row(row_slot(table, i), &row);
    124. + print_row(&row);
    125. + }
    126. + return EXECUTE_SUCCESS;
    127. +}
    128. +
    129. +ExecuteResult execute_statement(Statement* statement, Table* table) {
    130. switch (statement->type) {
    131. case (STATEMENT_INSERT):
    132. - printf("This is where we would do an insert.\n");
    133. - break;
    134. + return execute_insert(statement, table);
    135. case (STATEMENT_SELECT):
    136. - printf("This is where we would do a select.\n");
    137. - break;
    138. + return execute_select(statement, table);
    139. }
    140. }
    141. int main(int argc, char* argv[]) {
    142. + Table* table = new_table();
    143. InputBuffer* input_buffer = new_input_buffer();
    144. while (true) {
    145. print_prompt();
    146. @@ -105,13 +205,22 @@ int main(int argc, char* argv[]) {
    147. switch (prepare_statement(input_buffer, &statement)) {
    148. case (PREPARE_SUCCESS):
    149. break;
    150. + case (PREPARE_SYNTAX_ERROR):
    151. + printf("Syntax error. Could not parse statement.\n");
    152. + continue;
    153. case (PREPARE_UNRECOGNIZED_STATEMENT):
    154. printf("Unrecognized keyword at start of '%s'.\n",
    155. input_buffer->buffer);
    156. continue;
    157. }
    158. - execute_statement(&statement);
    159. - printf("Executed.\n");
    160. + switch (execute_statement(&statement, table)) {
    161. + case (EXECUTE_SUCCESS):
    162. + printf("Executed.\n");
    163. + break;
    164. + case (EXECUTE_TABLE_FULL):
    165. + printf("Error: Table full.\n");
    166. + break;
    167. + }
    168. }

    Part 2 - World's Simplest SQL Compiler and Virtual Machine